Initial commit
This commit is contained in:
parent
3f63a5f4c1
commit
78a9826d0a
|
@ -0,0 +1,615 @@
|
|||
//----------------------------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2021 by Brett Kuntz. All rights reserved.
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#include "shared.h"
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
si main(si argc, s8 ** argv)
|
||||
{
|
||||
// Command line
|
||||
|
||||
if (argc != 4)
|
||||
{
|
||||
puts("param error");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
indata = calloc(FILE_SIZE, 1);
|
||||
outdata = calloc(FILE_SIZE, 1);
|
||||
tweaks = calloc(TWEAK_SIZE, 1);
|
||||
inverts = calloc(INVERT_SIZE, 1);
|
||||
|
||||
FILE * finput = fopen(argv[1], "rb");
|
||||
if (finput)
|
||||
{
|
||||
fread(indata, 1, FILE_SIZE, finput);
|
||||
fclose(finput);
|
||||
printf("Opened [%s] for input\n", argv[1]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
FILE * fiv = fopen(argv[2], "rb");
|
||||
if (fiv)
|
||||
{
|
||||
fread(iv, 1, 16, fiv);
|
||||
fclose(fiv);
|
||||
printf("Opened [%s] for iv\n", argv[2]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
FILE * foutput = fopen(argv[3], "wb");
|
||||
if (foutput)
|
||||
{
|
||||
printf("Opened [%s] for output\n", argv[3]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
// Start
|
||||
|
||||
puts("Starting compression...");
|
||||
|
||||
pthread_spin_init(&csjob, PTHREAD_PROCESS_PRIVATE);
|
||||
pthread_spin_init(&csmem, PTHREAD_PROCESS_PRIVATE);
|
||||
|
||||
expand_iv();
|
||||
|
||||
const ui threads = get_nprocs();
|
||||
pthread_t ht[threads];
|
||||
|
||||
start_tick = tick();
|
||||
|
||||
for (ui i=0;i<threads;i++)
|
||||
{
|
||||
pthread_create(&ht[i], 0, thread, 0);
|
||||
}
|
||||
|
||||
for (ui i=0;i<threads;i++)
|
||||
{
|
||||
pthread_join(ht[i], 0);
|
||||
}
|
||||
|
||||
// Transpose
|
||||
|
||||
puts("Transposing");
|
||||
memcpy(indata, outdata, FILE_SIZE);
|
||||
|
||||
for (u64 i=0;i<128;i++)
|
||||
{
|
||||
for (u64 b=0;b<BLOCKS;b++)
|
||||
{
|
||||
outdata[(i * BLOCKS) + b] = indata[(b * 128) + i];
|
||||
}
|
||||
}
|
||||
|
||||
printf("Saving [%s]\n", argv[3]);
|
||||
fwrite(outdata, 1, FILE_SIZE, foutput);
|
||||
fclose(foutput);
|
||||
|
||||
// Temp save tweaks to file
|
||||
|
||||
FILE * ftweaks = fopen("tweaks.bin", "wb");
|
||||
if (ftweaks)
|
||||
{
|
||||
fwrite(tweaks, 1, TWEAK_SIZE, ftweaks);
|
||||
fclose(ftweaks);
|
||||
puts("Saving [tweaks.bin]");
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
// Temp save inverts to file
|
||||
|
||||
FILE * finverts = fopen("inverts.bin", "wb");
|
||||
if (finverts)
|
||||
{
|
||||
fwrite(inverts, 1, INVERT_SIZE, finverts);
|
||||
fclose(finverts);
|
||||
puts("Saving [inverts.bin]");
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
puts("Done :)\n");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void * thread(void * UNUSED)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
// Check for and possibly grab a new job
|
||||
|
||||
u64 block_num = -1;
|
||||
pthread_spin_lock(&csjob);
|
||||
if (CS_NEXT_BLOCK_NUM < BLOCKS)
|
||||
{
|
||||
block_num = CS_NEXT_BLOCK_NUM++;
|
||||
}
|
||||
pthread_spin_unlock(&csjob);
|
||||
if (block_num == -1) break;
|
||||
|
||||
// Do some work
|
||||
|
||||
printf("compressing block %04"PRIu64"...\n", block_num);
|
||||
fflush(0);
|
||||
|
||||
u64 tweak;
|
||||
ui invert;
|
||||
u8 input_block[128], output_block[128];
|
||||
|
||||
const u64 sub_block = block_num * BLOCK_PRIME_MUL;
|
||||
|
||||
memcpy(input_block, &indata[block_num * 128], 128);
|
||||
|
||||
// Find the first collision
|
||||
|
||||
tweak = find_hash(output_block, input_block, sub_block, &invert);
|
||||
|
||||
pthread_spin_lock(&csmem);
|
||||
{
|
||||
set_tweak(block_num, 0, tweak);
|
||||
set_bit(inverts, block_num * 2, invert);
|
||||
}
|
||||
pthread_spin_unlock(&csmem);
|
||||
|
||||
memcpy(input_block, output_block, 128);
|
||||
|
||||
// Find all subsequent collisions
|
||||
|
||||
for (ui i=0;i<CUTS_LENGTH;i++)
|
||||
{
|
||||
tweak = find_p_hash(output_block, input_block, sub_block + i + 1, CHAIN_CUTS[i]);
|
||||
|
||||
pthread_spin_lock(&csmem);
|
||||
{
|
||||
set_tweak(block_num, i + 1, tweak);
|
||||
}
|
||||
pthread_spin_unlock(&csmem);
|
||||
|
||||
memcpy(input_block, output_block, 128);
|
||||
}
|
||||
|
||||
// Shuffle the resulting block
|
||||
|
||||
tweak = find_shuffle(output_block, input_block, sub_block + (TWEAKS - 1), &invert);
|
||||
|
||||
pthread_spin_lock(&csmem);
|
||||
{
|
||||
set_tweak(block_num, (TWEAKS - 1), tweak);
|
||||
set_bit(inverts, (block_num * 2) + 1, invert);
|
||||
}
|
||||
pthread_spin_unlock(&csmem);
|
||||
|
||||
memcpy(&outdata[block_num * 128], output_block, 128);
|
||||
|
||||
// Progress report
|
||||
|
||||
const r64 ms = (tick() - start_tick) / 60000.;
|
||||
const r64 pm = (block_num + 1) / ms;
|
||||
const u64 rem = BLOCKS - (block_num + 1);
|
||||
printf("compressed block %04"PRIu64" - %.1f mins remain\n", block_num, rem / pm);
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 find_hash(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, ui * const restrict invert)
|
||||
{
|
||||
u64 tweak = 0;
|
||||
*invert = 0;
|
||||
ui best_distance = 0;
|
||||
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
|
||||
|
||||
u64 RO_IV[16];
|
||||
memcpy(RO_IV, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
RO_IV[i] += BLAKE_IV * block_n;
|
||||
}
|
||||
|
||||
u64 m[16];
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
// Find the best hash collision
|
||||
|
||||
for (u64 n=0;n<total_n;n++)
|
||||
{
|
||||
hash(output_block, input_block, RO_IV, m);
|
||||
|
||||
const si dist = labs(get_hash_score(output_block));
|
||||
|
||||
if (dist > best_distance)
|
||||
{
|
||||
tweak = n;
|
||||
best_distance = dist;
|
||||
}
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm the hash collision
|
||||
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
hash(output_block, input_block, RO_IV, m);
|
||||
|
||||
si temp_distance = get_hash_score(output_block);
|
||||
|
||||
// Check if this hash needs to be inverted during decompression
|
||||
|
||||
if (temp_distance < 0)
|
||||
{
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
output_block[i] = ~output_block[i];
|
||||
}
|
||||
|
||||
temp_distance = -temp_distance;
|
||||
*invert = 1;
|
||||
}
|
||||
|
||||
// Temporary sanity check
|
||||
|
||||
if (temp_distance != best_distance)
|
||||
{
|
||||
printf("ERROR: temp_distance [%d] != best_distance [%u]\nHash confirmation failed!!\n", temp_distance, best_distance);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return tweak;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
u8 const * const restrict vp = (u8 *)v;
|
||||
output_block[i] = vp[i] ^ input_block[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 find_p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, const u8 cutoff)
|
||||
{
|
||||
u64 tweak = 0;
|
||||
ui best_distance = 0;
|
||||
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
|
||||
|
||||
u64 RO_IV[16];
|
||||
memcpy(RO_IV, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
RO_IV[i] += BLAKE_IV * block_n;
|
||||
}
|
||||
|
||||
u64 m[16];
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
// Find the best hash collision
|
||||
|
||||
for (u64 n=0;n<total_n;n++)
|
||||
{
|
||||
p_hash(output_block, input_block, RO_IV, m, cutoff);
|
||||
|
||||
const si dist = labs(get_hash_score(output_block));
|
||||
|
||||
if (dist > best_distance)
|
||||
{
|
||||
tweak = n;
|
||||
best_distance = dist;
|
||||
}
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm the hash collision
|
||||
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
p_hash(output_block, input_block, RO_IV, m, cutoff);
|
||||
|
||||
const si temp_distance = get_hash_score(output_block);
|
||||
|
||||
// Temporary sanity check
|
||||
|
||||
if (temp_distance != best_distance)
|
||||
{
|
||||
printf("ERROR: temp_distance [%d] != best_distance [%u]\nHash confirmation failed!!\n", temp_distance, best_distance);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return tweak;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m, const u8 cutoff)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
u8 const * vp = (u8 *)v;
|
||||
u8 const * const vl = &vp[128];
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
if (vp == vl)
|
||||
{
|
||||
vp = (u8 *)v;
|
||||
blake2b(v, m);
|
||||
}
|
||||
|
||||
u8 byte = 0;
|
||||
|
||||
for (u8 b=1;b;b<<=1,vp++)
|
||||
{
|
||||
if (*vp < cutoff)
|
||||
{
|
||||
byte |= b;
|
||||
}
|
||||
}
|
||||
|
||||
output_block[i] = byte ^ input_block[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
si get_hash_score(u8 const * const restrict block)
|
||||
{
|
||||
si population = 0;
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
u64 temp;
|
||||
memcpy(&temp, &block[i * 8], 8);
|
||||
population += __builtin_popcountl(temp);
|
||||
}
|
||||
|
||||
return 512 - population;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 find_shuffle(u8 * const restrict output_block, u8 const * const restrict input_block, const u64 block_n, ui * const restrict invert)
|
||||
{
|
||||
u64 tweak = 0;
|
||||
*invert = 0;
|
||||
u32 best_score = 0;
|
||||
const u64 total_n = (u64)1 << (TWEAK_BITS - 1);
|
||||
|
||||
u64 RO_IV[16];
|
||||
memcpy(RO_IV, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
RO_IV[i] += BLAKE_IV * block_n;
|
||||
}
|
||||
|
||||
u64 m[16];
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
// Find the best bit shuffle
|
||||
|
||||
for (u64 n=0;n<total_n;n++)
|
||||
{
|
||||
shuffle(output_block, input_block, RO_IV, m);
|
||||
|
||||
const u32 scr = labs(get_shuffle_score(output_block));
|
||||
|
||||
if (scr > best_score)
|
||||
{
|
||||
best_score = scr;
|
||||
tweak = n;
|
||||
}
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm the bit shuffle
|
||||
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
m[i] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
shuffle(output_block, input_block, RO_IV, m);
|
||||
|
||||
s32 temp_score = get_shuffle_score(output_block);
|
||||
|
||||
// Check if this shuffle needs to be mirrored during decompression
|
||||
|
||||
if (temp_score < 0)
|
||||
{
|
||||
for (ui i=0;i<512;i++)
|
||||
{
|
||||
const ui bi = get_bit(output_block, i);
|
||||
const ui bj = get_bit(output_block, 1023 - i);
|
||||
|
||||
set_bit(output_block, i, bj);
|
||||
set_bit(output_block, 1023 - i, bi);
|
||||
}
|
||||
|
||||
temp_score = -temp_score;
|
||||
*invert = 1;
|
||||
}
|
||||
|
||||
// Temporary sanity check
|
||||
|
||||
if (temp_score != best_score)
|
||||
{
|
||||
printf("ERROR: temp_score [%"PRIi32"] != best_score [%"PRIu32"]\nShuffle confirmation failed!!\n", temp_score, best_score);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return tweak;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void shuffle(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
memcpy(output_block, input_block, 128);
|
||||
|
||||
ui i = 1023;
|
||||
|
||||
while (1)
|
||||
{
|
||||
u64 * const restrict p = &v[i & 15];
|
||||
|
||||
const ui j = *p % (i + 1);
|
||||
|
||||
const ui bi = get_bit(output_block, i);
|
||||
const ui bj = get_bit(output_block, j);
|
||||
|
||||
set_bit(output_block, i, bj);
|
||||
set_bit(output_block, j, bi);
|
||||
|
||||
if (i == 1) return;
|
||||
|
||||
i--;
|
||||
|
||||
*p ^= *p << 13;
|
||||
*p ^= *p >> 7;
|
||||
*p ^= *p << 17;
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
s32 get_shuffle_score(u8 const * const restrict block)
|
||||
{
|
||||
s32 score = 0, mscore = 0;
|
||||
|
||||
for (ui i=0;i<1024;i++)
|
||||
{
|
||||
if (!get_bit(block, i))
|
||||
{
|
||||
score += i;
|
||||
}
|
||||
|
||||
if (!get_bit(block, 1023 - i))
|
||||
{
|
||||
mscore += i;
|
||||
}
|
||||
}
|
||||
|
||||
return score > mscore ? score : -mscore ;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void expand_iv(void)
|
||||
{
|
||||
const u64 IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179,
|
||||
};
|
||||
|
||||
u64 v[16], m[16];
|
||||
|
||||
memcpy(&v[0], iv, 16); // 16-byte 'iv' is loaded from file in main()
|
||||
memcpy(&v[2], iv, 16);
|
||||
memcpy(&v[4], iv, 16);
|
||||
memcpy(&v[6], iv, 16);
|
||||
|
||||
memcpy(&v[8], IV, 64);
|
||||
memcpy(&m[0], IV, 64);
|
||||
memcpy(&m[8], IV, 64);
|
||||
|
||||
// Cheeseball way of expanding an IV from 16 to 128 bytes
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
v[i & 15] ^= m[i & 15];
|
||||
blake2b(v, m);
|
||||
}
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
u8 * const restrict v8 = (u8 *)v;
|
||||
u8 const * const restrict m8 = (u8 *)m;
|
||||
|
||||
blake2b(v, m);
|
||||
global_iv[i] = v8[i] ^ m8[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void blake2b(u64 * const restrict v, u64 const * const restrict m)
|
||||
{
|
||||
#define G(x, y, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[x]; \
|
||||
d = ((d ^ a) >> 32) | ((d ^ a) << 32); \
|
||||
c = c + d; \
|
||||
b = ((b ^ c) >> 24) | ((b ^ c) << 40); \
|
||||
a = a + b + m[y]; \
|
||||
d = ((d ^ a) >> 16) | ((d ^ a) << 48); \
|
||||
c = c + d; \
|
||||
b = ((b ^ c) >> 63) | ((b ^ c) << 1); \
|
||||
} while (0)
|
||||
|
||||
G(13, 11, v[ 0], v[ 4], v[ 8], v[12]);
|
||||
G( 7, 14, v[ 1], v[ 5], v[ 9], v[13]);
|
||||
G(12, 1, v[ 2], v[ 6], v[10], v[14]);
|
||||
G( 3, 9, v[ 3], v[ 7], v[11], v[15]);
|
||||
G( 5, 0, v[ 0], v[ 5], v[10], v[15]);
|
||||
G(15, 4, v[ 1], v[ 6], v[11], v[12]);
|
||||
G( 8, 6, v[ 2], v[ 7], v[ 8], v[13]);
|
||||
G( 2, 10, v[ 3], v[ 4], v[ 9], v[14]);
|
||||
|
||||
#undef G
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void set_tweak(const u64 block_num, const ui tweak_num, u64 tweak)
|
||||
{
|
||||
const u64 base_address = (block_num * TWEAKS * TWEAK_BITS) + (tweak_num * TWEAK_BITS);
|
||||
|
||||
for (ui i=TWEAK_BITS-1;tweak;i--,tweak>>=1)
|
||||
{
|
||||
set_bit(tweaks, base_address + i, tweak & 1);
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
ui get_bit(u8 const * const restrict stream, const u32 address)
|
||||
{
|
||||
return (stream[address / CHAR_BIT] >> ((CHAR_BIT - 1) - (address % CHAR_BIT))) & 1;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void set_bit(u8 * const restrict stream, const u32 address, const ui bit)
|
||||
{
|
||||
const u8 byte = 1 << ((CHAR_BIT - 1) - (address % CHAR_BIT));
|
||||
|
||||
if (bit) stream[address / CHAR_BIT] |= byte;
|
||||
else stream[address / CHAR_BIT] &= ~byte;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 tick(void)
|
||||
{
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
return ((u64)now.tv_sec * 1000) + ((u64)now.tv_nsec / 1000000);
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
|
@ -0,0 +1,451 @@
|
|||
//----------------------------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2021 by Brett Kuntz. All rights reserved.
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#include "shared.h"
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
si main(si argc, s8 ** argv)
|
||||
{
|
||||
// Command line
|
||||
|
||||
if (argc != 4)
|
||||
{
|
||||
puts("param error");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
indata = calloc(FILE_SIZE, 1);
|
||||
outdata = calloc(FILE_SIZE, 1);
|
||||
tweaks = calloc(TWEAK_SIZE, 1);
|
||||
inverts = calloc(INVERT_SIZE, 1);
|
||||
|
||||
FILE * finput = fopen(argv[1], "rb");
|
||||
if (finput)
|
||||
{
|
||||
fread(indata, 1, FILE_SIZE, finput);
|
||||
fclose(finput);
|
||||
printf("Opened [%s] for input\n", argv[1]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
FILE * fiv = fopen(argv[2], "rb");
|
||||
if (fiv)
|
||||
{
|
||||
fread(iv, 1, 16, fiv);
|
||||
fclose(fiv);
|
||||
printf("Opened [%s] for iv\n", argv[2]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
FILE * foutput = fopen(argv[3], "wb");
|
||||
if (foutput)
|
||||
{
|
||||
printf("Opened [%s] for output\n", argv[3]);
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
// Temp load tweaks from file
|
||||
|
||||
FILE * ftweaks = fopen("tweaks.bin", "rb");
|
||||
if (ftweaks)
|
||||
{
|
||||
fread(tweaks, 1, TWEAK_SIZE, ftweaks);
|
||||
fclose(ftweaks);
|
||||
puts("Opened [tweaks.bin] for input");
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
// Temp load inverts from file
|
||||
|
||||
FILE * finverts = fopen("inverts.bin", "rb");
|
||||
if (finverts)
|
||||
{
|
||||
fread(inverts, 1, INVERT_SIZE, finverts);
|
||||
fclose(finverts);
|
||||
puts("Opened [inverts.bin] for input");
|
||||
}
|
||||
else return EXIT_FAILURE;
|
||||
|
||||
// Transpose
|
||||
|
||||
puts("Transposing");
|
||||
memcpy(outdata, indata, FILE_SIZE);
|
||||
|
||||
for (u64 i=0;i<128;i++)
|
||||
{
|
||||
for (u64 b=0;b<BLOCKS;b++)
|
||||
{
|
||||
indata[(b * 128) + i] = outdata[(i * BLOCKS) + b];
|
||||
}
|
||||
}
|
||||
|
||||
memset(outdata, 0, FILE_SIZE);
|
||||
|
||||
// Start
|
||||
|
||||
puts("Starting decompression...");
|
||||
|
||||
pthread_spin_init(&csjob, PTHREAD_PROCESS_PRIVATE);
|
||||
|
||||
expand_iv();
|
||||
|
||||
const ui threads = get_nprocs();
|
||||
pthread_t ht[threads];
|
||||
|
||||
start_tick = tick();
|
||||
|
||||
for (ui i=0;i<threads;i++)
|
||||
{
|
||||
pthread_create(&ht[i], 0, thread, 0);
|
||||
}
|
||||
|
||||
for (ui i=0;i<threads;i++)
|
||||
{
|
||||
pthread_join(ht[i], 0);
|
||||
}
|
||||
|
||||
printf("Saving [%s]\n", argv[3]);
|
||||
fwrite(outdata, 1, FILE_SIZE, foutput);
|
||||
fclose(foutput);
|
||||
|
||||
puts("Done :)\n");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void * thread(void * UNUSED)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
// Check for and possibly grab a new job
|
||||
|
||||
u64 block_num = -1;
|
||||
pthread_spin_lock(&csjob);
|
||||
if (CS_NEXT_BLOCK_NUM < BLOCKS)
|
||||
{
|
||||
block_num = CS_NEXT_BLOCK_NUM++;
|
||||
}
|
||||
pthread_spin_unlock(&csjob);
|
||||
if (block_num == -1) break;
|
||||
|
||||
// Do some work
|
||||
|
||||
printf("decompressing block %04"PRIu64"...\n", block_num);
|
||||
fflush(0);
|
||||
|
||||
u64 tweak;
|
||||
ui invert;
|
||||
u64 v[16], m[16];
|
||||
u8 input_block[128], output_block[128];
|
||||
|
||||
const u64 sub_block = block_num * BLOCK_PRIME_MUL;
|
||||
|
||||
memcpy(input_block, &indata[block_num * 128], 128);
|
||||
|
||||
// Invert the shuffle
|
||||
|
||||
tweak = get_tweak(block_num, TWEAKS - 1);
|
||||
invert = get_bit(inverts, (block_num * 2) + 1);
|
||||
|
||||
memcpy(v, global_iv, 128);
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
v[i] += BLAKE_IV * (sub_block + (TWEAKS - 1));
|
||||
m[i] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
if (invert) // mirror all bits
|
||||
{
|
||||
for (ui i=0;i<512;i++)
|
||||
{
|
||||
const ui ii = get_bit(input_block, i);
|
||||
const ui ij = get_bit(input_block, 1023 - i);
|
||||
|
||||
set_bit(input_block, i, ij);
|
||||
set_bit(input_block, 1023 - i, ii);
|
||||
}
|
||||
}
|
||||
|
||||
ishuffle(output_block, input_block, v, m);
|
||||
|
||||
// Invert the p-hashes
|
||||
|
||||
for (si i=CUTS_LENGTH-1;i>=0;i--)
|
||||
{
|
||||
memcpy(input_block, output_block, 128);
|
||||
|
||||
tweak = get_tweak(block_num, i + 1);
|
||||
|
||||
memcpy(v, global_iv, 128);
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui j=0;j<16;j++)
|
||||
{
|
||||
v[j] += BLAKE_IV * (sub_block + i + 1);
|
||||
m[j] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
p_hash(output_block, input_block, v, m, CHAIN_CUTS[i]);
|
||||
}
|
||||
|
||||
memcpy(input_block, output_block, 128);
|
||||
|
||||
// Invert the final hash
|
||||
|
||||
tweak = get_tweak(block_num, 0);
|
||||
invert = get_bit(inverts, block_num * 2);
|
||||
|
||||
memcpy(v, global_iv, 128);
|
||||
memcpy(m, global_iv, 128);
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
v[i] += BLAKE_IV * sub_block;
|
||||
m[i] += BLAKE_IV * tweak;
|
||||
}
|
||||
|
||||
if (invert) // flip all bits
|
||||
{
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
input_block[i] = ~input_block[i];
|
||||
}
|
||||
}
|
||||
|
||||
hash(output_block, input_block, v, m);
|
||||
|
||||
memcpy(&outdata[block_num * 128], output_block, 128);
|
||||
|
||||
// Progress report
|
||||
|
||||
const r64 ms = (tick() - start_tick) / 60000.;
|
||||
const r64 pm = (block_num + 1) / ms;
|
||||
const u64 rem = BLOCKS - (block_num + 1);
|
||||
printf("decompressed block %04"PRIu64" - %.1f mins remain\n", block_num, rem / pm);
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
u8 const * const restrict vp = (u8 *)v;
|
||||
output_block[i] = vp[i] ^ input_block[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void p_hash(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m, const u8 cutoff)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
u8 const * vp = (u8 *)v;
|
||||
u8 const * const vl = &vp[128];
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
if (vp == vl)
|
||||
{
|
||||
vp = (u8 *)v;
|
||||
blake2b(v, m);
|
||||
}
|
||||
|
||||
u8 byte = 0;
|
||||
|
||||
for (u8 b=1;b;b<<=1,vp++)
|
||||
{
|
||||
if (*vp < cutoff)
|
||||
{
|
||||
byte |= b;
|
||||
}
|
||||
}
|
||||
|
||||
output_block[i] = byte ^ input_block[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
si get_hash_score(u8 const * const restrict block)
|
||||
{
|
||||
si population = 0;
|
||||
|
||||
for (ui i=0;i<16;i++)
|
||||
{
|
||||
u64 temp;
|
||||
memcpy(&temp, &block[i * 8], 8);
|
||||
population += __builtin_popcountl(temp);
|
||||
}
|
||||
|
||||
return 512 - population;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void ishuffle(u8 * const restrict output_block, u8 const * const restrict input_block, u64 const * const restrict RO_IV, u64 const * const restrict m)
|
||||
{
|
||||
u64 v[16];
|
||||
memcpy(v, RO_IV, 128); // A copy is needed because the IV is Read-Only
|
||||
|
||||
blake2b(v, m);
|
||||
|
||||
u16 indices[1024];
|
||||
|
||||
for (u16 i=0;i<1024;i++)
|
||||
{
|
||||
indices[i] = i;
|
||||
}
|
||||
|
||||
ui i = 1023;
|
||||
|
||||
while (1)
|
||||
{
|
||||
u64 * const restrict p = &v[i & 15];
|
||||
|
||||
const ui j = *p % (i + 1);
|
||||
|
||||
const ui ii = indices[i];
|
||||
const ui ij = indices[j];
|
||||
|
||||
indices[i] = ij;
|
||||
indices[j] = ii;
|
||||
|
||||
if (i == 1) break;
|
||||
|
||||
i--;
|
||||
|
||||
*p ^= *p << 13;
|
||||
*p ^= *p >> 7;
|
||||
*p ^= *p << 17;
|
||||
}
|
||||
|
||||
for (ui i=0;i<1024;i++)
|
||||
{
|
||||
set_bit(output_block, indices[i], get_bit(input_block, i));
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
s32 get_shuffle_score(u8 const * const restrict block)
|
||||
{
|
||||
s32 score = 0, mscore = 0;
|
||||
|
||||
for (ui i=0;i<1024;i++)
|
||||
{
|
||||
if (!get_bit(block, i))
|
||||
{
|
||||
score += i;
|
||||
}
|
||||
|
||||
if (!get_bit(block, 1023 - i))
|
||||
{
|
||||
mscore += i;
|
||||
}
|
||||
}
|
||||
|
||||
return score > mscore ? score : -mscore ;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void expand_iv(void)
|
||||
{
|
||||
const u64 IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179,
|
||||
};
|
||||
|
||||
u64 v[16], m[16];
|
||||
|
||||
memcpy(&v[0], iv, 16); // 16-byte 'iv' is loaded from file in main()
|
||||
memcpy(&v[2], iv, 16);
|
||||
memcpy(&v[4], iv, 16);
|
||||
memcpy(&v[6], iv, 16);
|
||||
|
||||
memcpy(&v[8], IV, 64);
|
||||
memcpy(&m[0], IV, 64);
|
||||
memcpy(&m[8], IV, 64);
|
||||
|
||||
// Cheeseball way of expanding an IV from 16 to 128 bytes
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
v[i & 15] ^= m[i & 15];
|
||||
blake2b(v, m);
|
||||
}
|
||||
|
||||
for (ui i=0;i<128;i++)
|
||||
{
|
||||
u8 * const restrict v8 = (u8 *)v;
|
||||
u8 const * const restrict m8 = (u8 *)m;
|
||||
|
||||
blake2b(v, m);
|
||||
global_iv[i] = v8[i] ^ m8[i];
|
||||
}
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void blake2b(u64 * const restrict v, u64 const * const restrict m)
|
||||
{
|
||||
#define G(x, y, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[x]; \
|
||||
d = ((d ^ a) >> 32) | ((d ^ a) << 32); \
|
||||
c = c + d; \
|
||||
b = ((b ^ c) >> 24) | ((b ^ c) << 40); \
|
||||
a = a + b + m[y]; \
|
||||
d = ((d ^ a) >> 16) | ((d ^ a) << 48); \
|
||||
c = c + d; \
|
||||
b = ((b ^ c) >> 63) | ((b ^ c) << 1); \
|
||||
} while (0)
|
||||
|
||||
G(13, 11, v[ 0], v[ 4], v[ 8], v[12]);
|
||||
G( 7, 14, v[ 1], v[ 5], v[ 9], v[13]);
|
||||
G(12, 1, v[ 2], v[ 6], v[10], v[14]);
|
||||
G( 3, 9, v[ 3], v[ 7], v[11], v[15]);
|
||||
G( 5, 0, v[ 0], v[ 5], v[10], v[15]);
|
||||
G(15, 4, v[ 1], v[ 6], v[11], v[12]);
|
||||
G( 8, 6, v[ 2], v[ 7], v[ 8], v[13]);
|
||||
G( 2, 10, v[ 3], v[ 4], v[ 9], v[14]);
|
||||
|
||||
#undef G
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 get_tweak(const u64 block_num, const ui tweak_num)
|
||||
{
|
||||
u64 tweak = 0;
|
||||
const u64 base_address = (block_num * TWEAKS * TWEAK_BITS) + (tweak_num * TWEAK_BITS);
|
||||
|
||||
for (ui i=0;i<TWEAK_BITS;i++)
|
||||
{
|
||||
tweak <<= 1;
|
||||
tweak |= get_bit(tweaks, base_address + i);
|
||||
}
|
||||
|
||||
return tweak;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
ui get_bit(u8 const * const restrict stream, const u32 address)
|
||||
{
|
||||
return (stream[address / CHAR_BIT] >> ((CHAR_BIT - 1) - (address % CHAR_BIT))) & 1;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void set_bit(u8 * const restrict stream, const u32 address, const ui bit)
|
||||
{
|
||||
const u8 byte = 1 << ((CHAR_BIT - 1) - (address % CHAR_BIT));
|
||||
|
||||
if (bit) stream[address / CHAR_BIT] |= byte;
|
||||
else stream[address / CHAR_BIT] &= ~byte;
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 tick(void)
|
||||
{
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
return ((u64)now.tv_sec * 1000) + ((u64)now.tv_nsec / 1000000);
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/bash
|
||||
|
||||
# Copyright © 2021 by Brett Kuntz. All rights reserved.
|
||||
|
||||
clang compress.c -o compress.exe -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -march=native -mtune=native
|
||||
clang decompress.c -o decompress.exe -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -march=native -mtune=native
|
||||
|
||||
# comment the above lines & uncomment the below lines on Linux/POSIX OS's as they will likely support -flto
|
||||
|
||||
#clang compress.c -o compress -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -flto -march=native -mtune=native
|
||||
#clang decompress.c -o decompress -pipe -pthread -Wall -Werror -Wfatal-errors -O3 -fomit-frame-pointer -flto -march=native -mtune=native
|
|
@ -0,0 +1,66 @@
|
|||
//----------------------------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2021 by Brett Kuntz. All rights reserved.
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
static_assert(CHAR_BIT == 8, "This code requires [char] to be exactly 8 bits.");
|
||||
static_assert(sizeof(long) == 8, "This code requires [long] to be exactly 8 bytes."); // __builtin_popcountl
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#include <stdint.h>
|
||||
typedef unsigned char u8 ; typedef char s8 ;
|
||||
typedef uint16_t u16 ; typedef int16_t s16 ;
|
||||
typedef uint32_t u32 ; typedef int32_t s32 ;
|
||||
typedef uint64_t u64 ; typedef int64_t s64 ;
|
||||
typedef __uint128_t u128 ; typedef __int128_t s128 ;
|
||||
typedef unsigned int ui ; typedef int si ;
|
||||
typedef unsigned long ul ; typedef long sl ;
|
||||
typedef unsigned long long ull ; typedef long long sll ;
|
||||
typedef float r32 ; typedef double r64 ;
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#define halt do { fflush(0); while (1) sleep(-1); } while (0)
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <inttypes.h>
|
||||
#include <sys/sysinfo.h>
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
#define FILE_SIZE 1048576
|
||||
#define BLOCKS 8192
|
||||
#define BLOCK_PRIME_MUL 83
|
||||
#define CUTS_LENGTH 28
|
||||
#define TWEAKS (CUTS_LENGTH + 2) // 28 chains, 1 full hash, 1 shuffle
|
||||
#define TWEAK_BITS 20
|
||||
#define TWEAK_SIZE ((TWEAKS * TWEAK_BITS * BLOCKS) / CHAR_BIT)
|
||||
#define INVERT_SIZE ((2 * BLOCKS) / CHAR_BIT)
|
||||
const u8 CHAIN_CUTS[CUTS_LENGTH] = { 37, 23, 17, 14, 11, 9, 8, 7, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2 };
|
||||
const u64 BLAKE_IV = UINT64_C(0xA54FF53A5F1D36F1);
|
||||
u8 * indata, * outdata, * tweaks, * inverts, iv[16], global_iv[128];
|
||||
u64 CS_NEXT_BLOCK_NUM, start_tick;
|
||||
pthread_spinlock_t csjob, csmem;
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 find_hash(u8 * const restrict, u8 const * const restrict, const u64, ui * const restrict);
|
||||
void hash(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
|
||||
u64 find_p_hash(u8 * const restrict, u8 const * const restrict, const u64, const u8);
|
||||
void p_hash(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict, const u8);
|
||||
si get_hash_score(u8 const * const restrict);
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
u64 find_shuffle(u8 * const restrict, u8 const * const restrict, const u64, ui * const restrict);
|
||||
void shuffle(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
|
||||
void ishuffle(u8 * const restrict, u8 const * const restrict, u64 const * const restrict, u64 const * const restrict);
|
||||
s32 get_shuffle_score(u8 const * const restrict);
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
void * thread(void *);
|
||||
void set_tweak(const u64, const ui, u64);
|
||||
u64 get_tweak(const u64, const ui);
|
||||
void set_bit(u8 * const restrict, const u32, const ui);
|
||||
ui get_bit(u8 const * const restrict, const u32);
|
||||
void expand_iv(void);
|
||||
void blake2b(u64 * const restrict, u64 const * const restrict);
|
||||
u64 tick(void);
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
Loading…
Reference in New Issue