Initial commit

This commit is contained in:
kuntz 2021-02-28 15:49:34 -06:00
parent 3f63a5f4c1
commit 78a9826d0a
4 changed files with 1143 additions and 0 deletions

615
full/compress.c Executable file
View File

@ -0,0 +1,615 @@
//----------------------------------------------------------------------------------------------------------------------
// Copyright © 2021 by Brett Kuntz. All rights reserved.
//----------------------------------------------------------------------------------------------------------------------
#include "shared.h"
//----------------------------------------------------------------------------------------------------------------------
si main(si argc, s8 ** argv)
{
// Command line
if (argc != 4)
{
puts("param error");
return EXIT_FAILURE;
}
indata = calloc(FILE_SIZE, 1);
outdata = calloc(FILE_SIZE, 1);
tweaks = calloc(TWEAK_SIZE, 1);
inverts = calloc(INVERT_SIZE, 1);
FILE * finput = fopen(argv[1], "rb");
if (finput)
{
fread(indata, 1, FILE_SIZE, finput);
fclose(finput);
printf("Opened [%s] for input\n", argv[1]);
}
else return EXIT_FAILURE;
FILE * fiv = fopen(argv[2], "rb");
if (fiv)
{
fread(iv, 1, 16, fiv);
fclose(fiv);
printf("Opened [%s] for iv\n", argv[2]);
}
else return EXIT_FAILURE;
FILE * foutput = fopen(argv[3], "wb");
if (foutput)
{
printf("Opened [%s] for output\n", argv[3]);
}
else return EXIT_FAILURE;
// Start
puts("Starting compression...");
pthread_spin_init(&csjob, PTHREAD_PROCESS_PRIVATE);
pthread_spin_init(&csmem, PTHREAD_PROCESS_PRIVATE);
expand_iv();
const ui threads = get_nprocs();
pthread_t ht[threads];
start_tick = tick();
for (ui i=0;i<threads;i++)
{
pthread_create(&ht[i], 0, thread, 0);
}
for (ui i=0;i<threads;i++)
{
pthread_join(ht[i], 0);
}
// Transpose
puts("Transposing");
memcpy(indata, outdata, FILE_SIZE);
for (u64 i=0;i<128;i++)
{
for (u64 b=0;b<BLOCKS;b++)
{
outdata[(i * BLOCKS) + b] = indata[(b * 128) + i];
}
}
printf("Saving [%s]\n", argv[3]);
fwrite(outdata, 1, FILE_SIZE, foutput);
fclose(foutput);
// Temp save tweaks to file
FILE * ftweaks = fopen("tweaks.bin", "wb");
if (ftweaks)
{
fwrite(tweaks, 1, TWEAK_SIZE, ftweaks);
fclose(ftweaks);
puts("Saving [tweaks.bin]");
}
else return EXIT_FAILURE;
// Temp save inverts to file
FILE * finverts = fopen("inverts.bin", "wb");
if (finverts)
{
fwrite(inverts, 1, INVERT_SIZE, finverts);
fclose(finverts);
puts("Saving [inverts.bin]");
}
else return EXIT_FAILURE;
puts("Done :)\n");
return EXIT_SUCCESS;
}
//----------------------------------------------------------------------------------------------------------------------
void * thread(void * UNUSED)
{
while (1)
{
// Check for and possibly grab a new job
u64 block_num = -1;
pthread_spin_lock(&csjob);
if (CS_NEXT_BLOCK_NUM < BLOCKS)
{
block_num = CS_NEXT_BLOCK_NUM++;
}
pthread_spin_unlock(&csjob);
if (block_num == -1) break;
// Do some work
printf("compressing block %04"PRIu64"...\n", block_num);
fflush(0);
u64 tweak;
ui invert;
u8 input_block[128], output_block[128];
const u64 sub_block = block_num * BLOCK_PRIME_MUL;
memcpy(input_block, &indata[block_num * 128], 128);
// Find the first collision
tweak = find_hash(output_block, input_block, sub_block, &invert);
pthread_spin_lock(&csmem);
{
set_tweak(block_num, 0, tweak);
set_bit(inverts, block_num * 2, invert);
}
pthread_spin_unlock(&csmem);
memcpy(input_block, output_block, 128);
// Find all subsequent collisions
for (ui i=0;i<CUTS_LENGTH;i++)
{
tweak = find_p_hash(output_block, input_block, sub_block + i + 1, CHAIN_CUTS[i]);
pthread_spin_lock(&csmem);
{
set_tweak(block_num, i + 1, tweak);
}
pthread_spin_unlock(&csmem);
memcpy(input_block, output_block, 128);
}
// Shuffle the resulting block
tweak = find_shuffle(output_block, input_block, sub_block + (TWEAKS - 1), &invert);
pthread_spin_lock(&csmem);
{
set_tweak(block_num, (TWEAKS - 1), tweak);
set_bit(inverts, (block_num * 2) + 1, invert);
}
pthread_spin_unlock(&csmem);
memcpy(&outdata[block_num * 128], output_block, 128);
// Progress report
const r64 ms = (tick() - start_tick) / 60000.;
const r64 pm = (block_num + 1) / ms;
const u64 rem = BLOCKS - (block_num + 1);
printf("compressed block %04"PRIu64" - %.1f mins remain\n", block_num, rem / pm);
fflush(0);
}
return 0;
}
//----------------------------------------------------------------------------------------------------------------------
u64 find_hash(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, ui * const restrict invert)
{
u64 tweak = 0;
*invert = 0;
ui best_distance = 0;
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
u64 RO_IV[16];
memcpy(RO_IV, global_iv, 128);
for (ui i=0;i<16;i++)
{
RO_IV[i] += BLAKE_IV * block_n;
}
u64 m[16];
memcpy(m, global_iv, 128);
// Find the best hash collision
for (u64 n=0;n<total_n;n++)
{
hash(output_block, input_block, RO_IV, m);
const si dist = labs(get_hash_score(output_block));
if (dist > best_distance)
{
tweak = n;
best_distance = dist;
}
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV;
}
}
// Confirm the hash collision
memcpy(m, global_iv, 128);
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV * tweak;
}
hash(output_block, input_block, RO_IV, m);
si temp_distance = get_hash_score(output_block);
// Check if this hash needs to be inverted during decompression
if (temp_distance < 0)
{
for (ui i=0;i<128;i++)
{
output_block[i] = ~output_block[i];
}
temp_distance = -temp_distance;
*invert = 1;
}
// Temporary sanity check
if (temp_distance != best_distance)
{
printf("ERROR: temp_distance [%d] != best_distance [%u]\nHash confirmation failed!!\n", temp_distance, best_distance);
exit(EXIT_FAILURE);
}
return tweak;
}
//----------------------------------------------------------------------------------------------------------------------
void hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
for (ui i=0;i<128;i++)
{
u8 const * const restrict vp = (u8 *)v;
output_block[i] = vp[i] ^ input_block[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
u64 find_p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, const u8 cutoff)
{
u64 tweak = 0;
ui best_distance = 0;
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
u64 RO_IV[16];
memcpy(RO_IV, global_iv, 128);
for (ui i=0;i<16;i++)
{
RO_IV[i] += BLAKE_IV * block_n;
}
u64 m[16];
memcpy(m, global_iv, 128);
// Find the best hash collision
for (u64 n=0;n<total_n;n++)
{
p_hash(output_block, input_block, RO_IV, m, cutoff);
const si dist = labs(get_hash_score(output_block));
if (dist > best_distance)
{
tweak = n;
best_distance = dist;
}
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV;
}
}
// Confirm the hash collision
memcpy(m, global_iv, 128);
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV * tweak;
}
p_hash(output_block, input_block, RO_IV, m, cutoff);
const si temp_distance = get_hash_score(output_block);
// Temporary sanity check
if (temp_distance != best_distance)
{
printf("ERROR: temp_distance [%d] != best_distance [%u]\nHash confirmation failed!!\n", temp_distance, best_distance);
exit(EXIT_FAILURE);
}
return tweak;
}
//----------------------------------------------------------------------------------------------------------------------
void p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m, const u8 cutoff)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
u8 const * vp = (u8 *)v;
u8 const * const vl = &vp[128];
for (ui i=0;i<128;i++)
{
if (vp == vl)
{
vp = (u8 *)v;
blake2b(v, m);
}
u8 byte = 0;
for (u8 b=1;b;b<<=1,vp++)
{
if (*vp < cutoff)
{
byte |= b;
}
}
output_block[i] = byte ^ input_block[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
si get_hash_score(u8 const * const restrict block)
{
si population = 0;
for (ui i=0;i<16;i++)
{
u64 temp;
memcpy(&temp, &block[i * 8], 8);
population += __builtin_popcountl(temp);
}
return 512 - population;
}
//----------------------------------------------------------------------------------------------------------------------
u64 find_shuffle(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, ui * const restrict invert)
{
u64 tweak = 0;
*invert = 0;
u32 best_score = 0;
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
u64 RO_IV[16];
memcpy(RO_IV, global_iv, 128);
for (ui i=0;i<16;i++)
{
RO_IV[i] += BLAKE_IV * block_n;
}
u64 m[16];
memcpy(m, global_iv, 128);
// Find the best bit shuffle
for (u64 n=0;n<total_n;n++)
{
shuffle(output_block, input_block, RO_IV, m);
const u32 scr = labs(get_shuffle_score(output_block));
if (scr > best_score)
{
best_score = scr;
tweak = n;
}
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV;
}
}
// Confirm the bit shuffle
memcpy(m, global_iv, 128);
for (ui i=0;i<16;i++)
{
m[i] += BLAKE_IV * tweak;
}
shuffle(output_block, input_block, RO_IV, m);
s32 temp_score = get_shuffle_score(output_block);
// Check if this shuffle needs to be mirrored during decompression
if (temp_score < 0)
{
for (ui i=0;i<512;i++)
{
const ui bi = get_bit(output_block, i);
const ui bj = get_bit(output_block, 1023 - i);
set_bit(output_block, i, bj);
set_bit(output_block, 1023 - i, bi);
}
temp_score = -temp_score;
*invert = 1;
}
// Temporary sanity check
if (temp_score != best_score)
{
printf("ERROR: temp_score [%"PRIi32"] != best_score [%"PRIu32"]\nShuffle confirmation failed!!\n", temp_score, best_score);
exit(EXIT_FAILURE);
}
return tweak;
}
//----------------------------------------------------------------------------------------------------------------------
void shuffle(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
memcpy(output_block, input_block, 128);
ui i = 1023;
while (1)
{
u64 * const restrict p = &v[i & 15];
const ui j = *p % (i + 1);
const ui bi = get_bit(output_block, i);
const ui bj = get_bit(output_block, j);
set_bit(output_block, i, bj);
set_bit(output_block, j, bi);
if (i == 1) return;
i--;
*p ^= *p << 13;
*p ^= *p >> 7;
*p ^= *p << 17;
}
}
//----------------------------------------------------------------------------------------------------------------------
s32 get_shuffle_score(u8 const * const restrict block)
{
s32 score = 0, mscore = 0;
for (ui i=0;i<1024;i++)
{
if (!get_bit(block, i))
{
score += i;
}
if (!get_bit(block, 1023 - i))
{
mscore += i;
}
}
return score > mscore ? score : -mscore ;
}
//----------------------------------------------------------------------------------------------------------------------
void expand_iv(void)
{
const u64 IV[8] =
{
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179,
};
u64 v[16], m[16];
memcpy(&v[0], iv, 16); // 16-byte 'iv' is loaded from file in main()
memcpy(&v[2], iv, 16);
memcpy(&v[4], iv, 16);
memcpy(&v[6], iv, 16);
memcpy(&v[8], IV, 64);
memcpy(&m[0], IV, 64);
memcpy(&m[8], IV, 64);
// Cheeseball way of expanding an IV from 16 to 128 bytes
for (ui i=0;i<128;i++)
{
v[i & 15] ^= m[i & 15];
blake2b(v, m);
}
for (ui i=0;i<128;i++)
{
u8 * const restrict v8 = (u8 *)v;
u8 const * const restrict m8 = (u8 *)m;
blake2b(v, m);
global_iv[i] = v8[i] ^ m8[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
void blake2b(u64 * const restrict v, u64 const * const restrict m)
{
#define G(x, y, a, b, c, d) \
do { \
a = a + b + m[x]; \
d = ((d ^ a) >> 32) | ((d ^ a) << 32); \
c = c + d; \
b = ((b ^ c) >> 24) | ((b ^ c) << 40); \
a = a + b + m[y]; \
d = ((d ^ a) >> 16) | ((d ^ a) << 48); \
c = c + d; \
b = ((b ^ c) >> 63) | ((b ^ c) << 1); \
} while (0)
G(13, 11, v[ 0], v[ 4], v[ 8], v[12]);
G( 7, 14, v[ 1], v[ 5], v[ 9], v[13]);
G(12, 1, v[ 2], v[ 6], v[10], v[14]);
G( 3, 9, v[ 3], v[ 7], v[11], v[15]);
G( 5, 0, v[ 0], v[ 5], v[10], v[15]);
G(15, 4, v[ 1], v[ 6], v[11], v[12]);
G( 8, 6, v[ 2], v[ 7], v[ 8], v[13]);
G( 2, 10, v[ 3], v[ 4], v[ 9], v[14]);
#undef G
}
//----------------------------------------------------------------------------------------------------------------------
void set_tweak(const u64 block_num, const ui tweak_num, u64 tweak)
{
const u64 base_address = (block_num * TWEAKS * TWEAK_BITS) + (tweak_num * TWEAK_BITS);
for (ui i=TWEAK_BITS-1;tweak;i--,tweak>>=1)
{
set_bit(tweaks, base_address + i, tweak & 1);
}
}
//----------------------------------------------------------------------------------------------------------------------
ui get_bit(u8 const * const restrict stream, const u32 address)
{
return (stream[address / CHAR_BIT] >> ((CHAR_BIT - 1) - (address % CHAR_BIT))) & 1;
}
//----------------------------------------------------------------------------------------------------------------------
void set_bit(u8 * const restrict stream, const u32 address, const ui bit)
{
const u8 byte = 1 << ((CHAR_BIT - 1) - (address % CHAR_BIT));
if (bit) stream[address / CHAR_BIT] |= byte;
else stream[address / CHAR_BIT] &= ~byte;
}
//----------------------------------------------------------------------------------------------------------------------
u64 tick(void)
{
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return ((u64)now.tv_sec * 1000) + ((u64)now.tv_nsec / 1000000);
}
//----------------------------------------------------------------------------------------------------------------------

451
full/decompress.c Executable file
View File

@ -0,0 +1,451 @@
//----------------------------------------------------------------------------------------------------------------------
// Copyright © 2021 by Brett Kuntz. All rights reserved.
//----------------------------------------------------------------------------------------------------------------------
#include "shared.h"
//----------------------------------------------------------------------------------------------------------------------
si main(si argc, s8 ** argv)
{
// Command line
if (argc != 4)
{
puts("param error");
return EXIT_FAILURE;
}
indata = calloc(FILE_SIZE, 1);
outdata = calloc(FILE_SIZE, 1);
tweaks = calloc(TWEAK_SIZE, 1);
inverts = calloc(INVERT_SIZE, 1);
FILE * finput = fopen(argv[1], "rb");
if (finput)
{
fread(indata, 1, FILE_SIZE, finput);
fclose(finput);
printf("Opened [%s] for input\n", argv[1]);
}
else return EXIT_FAILURE;
FILE * fiv = fopen(argv[2], "rb");
if (fiv)
{
fread(iv, 1, 16, fiv);
fclose(fiv);
printf("Opened [%s] for iv\n", argv[2]);
}
else return EXIT_FAILURE;
FILE * foutput = fopen(argv[3], "wb");
if (foutput)
{
printf("Opened [%s] for output\n", argv[3]);
}
else return EXIT_FAILURE;
// Temp load tweaks from file
FILE * ftweaks = fopen("tweaks.bin", "rb");
if (ftweaks)
{
fread(tweaks, 1, TWEAK_SIZE, ftweaks);
fclose(ftweaks);
puts("Opened [tweaks.bin] for input");
}
else return EXIT_FAILURE;
// Temp load inverts from file
FILE * finverts = fopen("inverts.bin", "rb");
if (finverts)
{
fread(inverts, 1, INVERT_SIZE, finverts);
fclose(finverts);
puts("Opened [inverts.bin] for input");
}
else return EXIT_FAILURE;
// Transpose
puts("Transposing");
memcpy(outdata, indata, FILE_SIZE);
for (u64 i=0;i<128;i++)
{
for (u64 b=0;b<BLOCKS;b++)
{
indata[(b * 128) + i] = outdata[(i * BLOCKS) + b];
}
}
memset(outdata, 0, FILE_SIZE);
// Start
puts("Starting decompression...");
pthread_spin_init(&csjob, PTHREAD_PROCESS_PRIVATE);
expand_iv();
const ui threads = get_nprocs();
pthread_t ht[threads];
start_tick = tick();
for (ui i=0;i<threads;i++)
{
pthread_create(&ht[i], 0, thread, 0);
}
for (ui i=0;i<threads;i++)
{
pthread_join(ht[i], 0);
}
printf("Saving [%s]\n", argv[3]);
fwrite(outdata, 1, FILE_SIZE, foutput);
fclose(foutput);
puts("Done :)\n");
return EXIT_SUCCESS;
}
//----------------------------------------------------------------------------------------------------------------------
void * thread(void * UNUSED)
{
while (1)
{
// Check for and possibly grab a new job
u64 block_num = -1;
pthread_spin_lock(&csjob);
if (CS_NEXT_BLOCK_NUM < BLOCKS)
{
block_num = CS_NEXT_BLOCK_NUM++;
}
pthread_spin_unlock(&csjob);
if (block_num == -1) break;
// Do some work
printf("decompressing block %04"PRIu64"...\n", block_num);
fflush(0);
u64 tweak;
ui invert;
u64 v[16], m[16];
u8 input_block[128], output_block[128];
const u64 sub_block = block_num * BLOCK_PRIME_MUL;
memcpy(input_block, &indata[block_num * 128], 128);
// Invert the shuffle
tweak = get_tweak(block_num, TWEAKS - 1);
invert = get_bit(inverts, (block_num * 2) + 1);
memcpy(v, global_iv, 128);
memcpy(m, global_iv, 128);
for (ui i=0;i<16;i++)
{
v[i] += BLAKE_IV * (sub_block + (TWEAKS - 1));
m[i] += BLAKE_IV * tweak;
}
if (invert) // mirror all bits
{
for (ui i=0;i<512;i++)
{
const ui ii = get_bit(input_block, i);
const ui ij = get_bit(input_block, 1023 - i);
set_bit(input_block, i, ij);
set_bit(input_block, 1023 - i, ii);
}
}
ishuffle(output_block, input_block, v, m);
// Invert the p-hashes
for (si i=CUTS_LENGTH-1;i>=0;i--)
{
memcpy(input_block, output_block, 128);
tweak = get_tweak(block_num, i + 1);
memcpy(v, global_iv, 128);
memcpy(m, global_iv, 128);
for (ui j=0;j<16;j++)
{
v[j] += BLAKE_IV * (sub_block + i + 1);
m[j] += BLAKE_IV * tweak;
}
p_hash(output_block, input_block, v, m, CHAIN_CUTS[i]);
}
memcpy(input_block, output_block, 128);
// Invert the final hash
tweak = get_tweak(block_num, 0);
invert = get_bit(inverts, block_num * 2);
memcpy(v, global_iv, 128);
memcpy(m, global_iv, 128);
for (ui i=0;i<16;i++)
{
v[i] += BLAKE_IV * sub_block;
m[i] += BLAKE_IV * tweak;
}
if (invert) // flip all bits
{
for (ui i=0;i<128;i++)
{
input_block[i] = ~input_block[i];
}
}
hash(output_block, input_block, v, m);
memcpy(&outdata[block_num * 128], output_block, 128);
// Progress report
const r64 ms = (tick() - start_tick) / 60000.;
const r64 pm = (block_num + 1) / ms;
const u64 rem = BLOCKS - (block_num + 1);
printf("decompressed block %04"PRIu64" - %.1f mins remain\n", block_num, rem / pm);
fflush(0);
}
return 0;
}
//----------------------------------------------------------------------------------------------------------------------
void hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
for (ui i=0;i<128;i++)
{
u8 const * const restrict vp = (u8 *)v;
output_block[i] = vp[i] ^ input_block[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
void p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m, const u8 cutoff)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
u8 const * vp = (u8 *)v;
u8 const * const vl = &vp[128];
for (ui i=0;i<128;i++)
{
if (vp == vl)
{
vp = (u8 *)v;
blake2b(v, m);
}
u8 byte = 0;
for (u8 b=1;b;b<<=1,vp++)
{
if (*vp < cutoff)
{
byte |= b;
}
}
output_block[i] = byte ^ input_block[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
si get_hash_score(u8 const * const restrict block)
{
si population = 0;
for (ui i=0;i<16;i++)
{
u64 temp;
memcpy(&temp, &block[i * 8], 8);
population += __builtin_popcountl(temp);
}
return 512 - population;
}
//----------------------------------------------------------------------------------------------------------------------
void ishuffle(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
{
u64 v[16];
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
blake2b(v, m);
u16 indices[1024];
for (u16 i=0;i<1024;i++)
{
indices[i] = i;
}
ui i = 1023;
while (1)
{
u64 * const restrict p = &v[i & 15];
const ui j = *p % (i + 1);
const ui ii = indices[i];
const ui ij = indices[j];
indices[i] = ij;
indices[j] = ii;
if (i == 1) break;
i--;
*p ^= *p << 13;
*p ^= *p >> 7;
*p ^= *p << 17;
}
for (ui i=0;i<1024;i++)
{
set_bit(output_block, indices[i], get_bit(input_block, i));
}
}
//----------------------------------------------------------------------------------------------------------------------
s32 get_shuffle_score(u8 const * const restrict block)
{
s32 score = 0, mscore = 0;
for (ui i=0;i<1024;i++)
{
if (!get_bit(block, i))
{
score += i;
}
if (!get_bit(block, 1023 - i))
{
mscore += i;
}
}
return score > mscore ? score : -mscore ;
}
//----------------------------------------------------------------------------------------------------------------------
void expand_iv(void)
{
const u64 IV[8] =
{
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179,
};
u64 v[16], m[16];
memcpy(&v[0], iv, 16); // 16-byte 'iv' is loaded from file in main()
memcpy(&v[2], iv, 16);
memcpy(&v[4], iv, 16);
memcpy(&v[6], iv, 16);
memcpy(&v[8], IV, 64);
memcpy(&m[0], IV, 64);
memcpy(&m[8], IV, 64);
// Cheeseball way of expanding an IV from 16 to 128 bytes
for (ui i=0;i<128;i++)
{
v[i & 15] ^= m[i & 15];
blake2b(v, m);
}
for (ui i=0;i<128;i++)
{
u8 * const restrict v8 = (u8 *)v;
u8 const * const restrict m8 = (u8 *)m;
blake2b(v, m);
global_iv[i] = v8[i] ^ m8[i];
}
}
//----------------------------------------------------------------------------------------------------------------------
void blake2b(u64 * const restrict v, u64 const * const restrict m)
{
#define G(x, y, a, b, c, d) \
do { \
a = a + b + m[x]; \
d = ((d ^ a) >> 32) | ((d ^ a) << 32); \
c = c + d; \
b = ((b ^ c) >> 24) | ((b ^ c) << 40); \
a = a + b + m[y]; \
d = ((d ^ a) >> 16) | ((d ^ a) << 48); \
c = c + d; \
b = ((b ^ c) >> 63) | ((b ^ c) << 1); \
} while (0)
G(13, 11, v[ 0], v[ 4], v[ 8], v[12]);
G( 7, 14, v[ 1], v[ 5], v[ 9], v[13]);
G(12, 1, v[ 2], v[ 6], v[10], v[14]);
G( 3, 9, v[ 3], v[ 7], v[11], v[15]);
G( 5, 0, v[ 0], v[ 5], v[10], v[15]);
G(15, 4, v[ 1], v[ 6], v[11], v[12]);
G( 8, 6, v[ 2], v[ 7], v[ 8], v[13]);
G( 2, 10, v[ 3], v[ 4], v[ 9], v[14]);
#undef G
}
//----------------------------------------------------------------------------------------------------------------------
u64 get_tweak(const u64 block_num, const ui tweak_num)
{
u64 tweak = 0;
const u64 base_address = (block_num * TWEAKS * TWEAK_BITS) + (tweak_num * TWEAK_BITS);
for (ui i=0;i<TWEAK_BITS;i++)
{
tweak <<= 1;
tweak |= get_bit(tweaks, base_address + i);
}
return tweak;
}
//----------------------------------------------------------------------------------------------------------------------
ui get_bit(u8 const * const restrict stream, const u32 address)
{
return (stream[address / CHAR_BIT] >> ((CHAR_BIT - 1) - (address % CHAR_BIT))) & 1;
}
//----------------------------------------------------------------------------------------------------------------------
void set_bit(u8 * const restrict stream, const u32 address, const ui bit)
{
const u8 byte = 1 << ((CHAR_BIT - 1) - (address % CHAR_BIT));
if (bit) stream[address / CHAR_BIT] |= byte;
else stream[address / CHAR_BIT] &= ~byte;
}
//----------------------------------------------------------------------------------------------------------------------
u64 tick(void)
{
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return ((u64)now.tv_sec * 1000) + ((u64)now.tv_nsec / 1000000);
}
//----------------------------------------------------------------------------------------------------------------------

11
full/make.sh Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/bash
# Copyright © 2021 by Brett Kuntz. All rights reserved.
clang compress.c -o compress.exe -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -march=native -mtune=native
clang decompress.c -o decompress.exe -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -march=native -mtune=native
# comment the above lines & uncomment the below lines on Linux/POSIX OS's as they will likely support -flto
#clang compress.c -o compress -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -flto -march=native -mtune=native
#clang decompress.c -o decompress -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -flto -march=native -mtune=native

66
full/shared.h Executable file
View File

@ -0,0 +1,66 @@
//----------------------------------------------------------------------------------------------------------------------
// Copyright © 2021 by Brett Kuntz. All rights reserved.
//----------------------------------------------------------------------------------------------------------------------
#define _FILE_OFFSET_BITS 64
//----------------------------------------------------------------------------------------------------------------------
#include <assert.h>
#include <limits.h>
static_assert(CHAR_BIT == 8, "This code requires [char] to be exactly 8 bits.");
static_assert(sizeof(long) == 8, "This code requires [long] to be exactly 8 bytes."); // __builtin_popcountl
//----------------------------------------------------------------------------------------------------------------------
#include <stdint.h>
typedef unsigned char u8 ; typedef char s8 ;
typedef uint16_t u16 ; typedef int16_t s16 ;
typedef uint32_t u32 ; typedef int32_t s32 ;
typedef uint64_t u64 ; typedef int64_t s64 ;
typedef __uint128_t u128 ; typedef __int128_t s128 ;
typedef unsigned int ui ; typedef int si ;
typedef unsigned long ul ; typedef long sl ;
typedef unsigned long long ull ; typedef long long sll ;
typedef float r32 ; typedef double r64 ;
//----------------------------------------------------------------------------------------------------------------------
#define halt do { fflush(0); while (1) sleep(-1); } while (0)
//----------------------------------------------------------------------------------------------------------------------
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <inttypes.h>
#include <sys/sysinfo.h>
//----------------------------------------------------------------------------------------------------------------------
#define FILE_SIZE 1048576
#define BLOCKS 8192
#define BLOCK_PRIME_MUL 83
#define CUTS_LENGTH 28
#define TWEAKS (CUTS_LENGTH + 2) // 28 chains, 1 full hash, 1 shuffle
#define TWEAK_BITS 20
#define TWEAK_SIZE ((TWEAKS * TWEAK_BITS * BLOCKS) / CHAR_BIT)
#define INVERT_SIZE ((2 * BLOCKS) / CHAR_BIT)
const u8 CHAIN_CUTS[CUTS_LENGTH] = { 37, 23, 17, 14, 11, 9, 8, 7, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2 };
const u64 BLAKE_IV = UINT64_C(0xA54FF53A5F1D36F1);
u8 * indata, * outdata, * tweaks, * inverts, iv[16], global_iv[128];
u64 CS_NEXT_BLOCK_NUM, start_tick;
pthread_spinlock_t csjob, csmem;
//----------------------------------------------------------------------------------------------------------------------
u64 find_hash(u8 * const restrict, u8 const * const restrict, const u64, ui * const restrict);
void hash(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
u64 find_p_hash(u8 * const restrict, u8 const * const restrict, const u64, const u8);
void p_hash(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict, const u8);
si get_hash_score(u8 const * const restrict);
//----------------------------------------------------------------------------------------------------------------------
u64 find_shuffle(u8 * const restrict, u8 const * const restrict, const u64, ui * const restrict);
void shuffle(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
void ishuffle(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
s32 get_shuffle_score(u8 const * const restrict);
//----------------------------------------------------------------------------------------------------------------------
void * thread(void *);
void set_tweak(const u64, const ui, u64);
u64 get_tweak(const u64, const ui);
void set_bit(u8 * const restrict, const u32, const ui);
ui get_bit(u8 const * const restrict, const u32);
void expand_iv(void);
void blake2b(u64 * const restrict, u64 const * const restrict);
u64 tick(void);
//----------------------------------------------------------------------------------------------------------------------