2020-11-28 19:05:27 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdint.h>
|
2020-12-14 22:15:20 +00:00
|
|
|
#include <algorithm>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <list>
|
2020-11-28 19:05:27 +00:00
|
|
|
|
|
|
|
#define FLAG_COPY 1
|
|
|
|
#define FLAG_BACKREF 0
|
|
|
|
|
2020-12-14 22:15:20 +00:00
|
|
|
#define MAX_BACKREF ((unsigned int)18)
|
|
|
|
#define RING_LEN 0x1000
|
|
|
|
|
2020-11-28 19:05:27 +00:00
|
|
|
extern "C"
|
|
|
|
{
|
|
|
|
int decompress(uint8_t *indata, unsigned int inlen, uint8_t *outdata, unsigned int outlen)
|
|
|
|
{
|
|
|
|
// First, let's assume a worst case compression which in theory is just a copy.
|
|
|
|
// The math is basically 9 bytes used for every 8 bytes. So, the minimum output
|
|
|
|
// buffer we need is (inlen * 8/9). If we have an outlen smaller than that, we
|
|
|
|
// are hosed.
|
|
|
|
if (outlen < ((inlen * 8) / 9))
|
|
|
|
{
|
|
|
|
// We cannot decompress, we will run out of room!
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now, let's enter a loop where we read control bytes and act on them.
|
|
|
|
unsigned int inloc = 0;
|
|
|
|
unsigned int outloc = 0;
|
|
|
|
bool eof = false;
|
|
|
|
while (inloc < inlen && !eof)
|
|
|
|
{
|
|
|
|
uint8_t flags = indata[inloc++];
|
|
|
|
for (unsigned int flagpos = 0; flagpos < 8; flagpos++)
|
|
|
|
{
|
|
|
|
if (((flags >> flagpos) & 1) == FLAG_COPY)
|
|
|
|
{
|
|
|
|
// Copy a byte, move on
|
|
|
|
if (inloc >= inlen)
|
|
|
|
{
|
|
|
|
// We failed to decompress, we overran the input buffer.
|
|
|
|
return -2;
|
|
|
|
}
|
|
|
|
if (outloc >= outlen)
|
|
|
|
{
|
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
|
|
|
outdata[outloc++] = indata[inloc++];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Backref copy
|
|
|
|
if (inloc >= (inlen - 1))
|
|
|
|
{
|
|
|
|
// We failed to decompress, we overran the input buffer.
|
|
|
|
return -2;
|
|
|
|
}
|
2020-12-20 03:38:34 +00:00
|
|
|
|
|
|
|
unsigned int hi = indata[inloc++];
|
|
|
|
unsigned int lo = indata[inloc++];
|
2020-11-28 19:05:27 +00:00
|
|
|
|
|
|
|
unsigned int copy_len = (lo & 0xF) + 3;
|
|
|
|
unsigned int copy_pos = (hi << 4) | (lo >> 4);
|
|
|
|
|
|
|
|
if (copy_pos == 0)
|
|
|
|
{
|
|
|
|
// This is the end of a file.
|
|
|
|
eof = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy backref a byte at a time. This is because a backref can stick
|
|
|
|
// out into as-of-yet uncopied data in order to reference what we're
|
|
|
|
// about to write.
|
2020-12-20 03:38:34 +00:00
|
|
|
if (outloc + copy_len > outlen)
|
2020-11-28 19:05:27 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
2020-11-28 19:05:27 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
int backref_start_loc = (int)outloc - (int)copy_pos;
|
|
|
|
for (int backref_copy_pos = backref_start_loc; backref_copy_pos < backref_start_loc + (int)copy_len; backref_copy_pos++)
|
|
|
|
{
|
|
|
|
if (backref_copy_pos < 0)
|
2020-11-28 19:05:27 +00:00
|
|
|
{
|
|
|
|
outdata[outloc++] = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
outdata[outloc++] = outdata[backref_copy_pos];
|
2020-11-28 19:05:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the outlen with the actual data length.
|
|
|
|
return outloc;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
|
|
|
int compress(uint8_t *indata, unsigned int inlen, uint8_t *outdata, unsigned int outlen)
|
|
|
|
{
|
|
|
|
uint32_t key = 0;
|
2021-05-19 20:01:32 +00:00
|
|
|
std::unordered_map<uint32_t, std::list<unsigned int> > starts;
|
2020-12-14 22:15:20 +00:00
|
|
|
bool eof = false;
|
|
|
|
unsigned int outloc = 0;
|
|
|
|
unsigned int inloc = 0;
|
|
|
|
|
|
|
|
while (!eof)
|
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
if (outloc >= outlen)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// Add a spot for the flag byte, we'll fill this in later.
|
|
|
|
unsigned int flagsloc = outloc;
|
|
|
|
outdata[outloc++] = 0;
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
for (unsigned int flagpos = 0; flagpos < 8; flagpos++)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
if (inloc == inlen)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
if (outloc > (outlen - 2))
|
|
|
|
{
|
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We hit the end of compressable data and we are mid flag byte.
|
|
|
|
// Set the particular flag bit to a backref and point at the current
|
|
|
|
// byte to signify end of file.
|
|
|
|
outdata[flagsloc] |= (FLAG_BACKREF << flagpos);
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// Add the backref itself.
|
|
|
|
outdata[outloc++] = 0;
|
|
|
|
outdata[outloc++] = 0;
|
|
|
|
|
|
|
|
// Bail out of the loop, we're done!
|
|
|
|
eof = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (inloc < 3 || inloc >= (inlen - 3))
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
if (outloc >= outlen)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We either don't have enough data written to backref, or we
|
|
|
|
// don't have enough data in the stream that could be made into
|
|
|
|
// a backref. Set the particular flag bit to a copy and then
|
|
|
|
// output that byte to the compressed stream.
|
|
|
|
outdata[flagsloc] |= (FLAG_COPY << flagpos);
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// Update our key to reflect this byte coming out as long as we aren't
|
|
|
|
// in the first two bytes (we wouldn't have a 3 byte prefix in the key yet).
|
|
|
|
key = ((key << 8) | indata[inloc]) & 0xFFFFFF;
|
|
|
|
if (inloc >= 2)
|
|
|
|
{
|
|
|
|
starts[key].push_back(inloc - 2);
|
2020-12-14 22:15:20 +00:00
|
|
|
}
|
2020-12-20 03:38:34 +00:00
|
|
|
|
|
|
|
// Output this byte specifically
|
|
|
|
outdata[outloc++] = indata[inloc++];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Figure out the maximum backref amount we can reference.
|
|
|
|
unsigned int backref_amount = std::min(inlen - inloc, MAX_BACKREF);
|
|
|
|
unsigned int earliest_backref = std::max(0, (int)inloc - (RING_LEN - 1));
|
|
|
|
uint32_t search_key = (indata[inloc] << 16) | (indata[inloc + 1] << 8) | (indata[inloc + 2]);
|
|
|
|
|
|
|
|
// Prune anything that we can't backref.
|
|
|
|
starts[search_key].remove_if([earliest_backref](auto val)
|
|
|
|
{
|
|
|
|
return val < earliest_backref;
|
|
|
|
});
|
|
|
|
|
|
|
|
if (starts[search_key].size() == 0)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
|
|
|
if (outloc >= outlen)
|
|
|
|
{
|
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We couldn't find a previous data in range of a backref.
|
2020-12-14 22:15:20 +00:00
|
|
|
outdata[flagsloc] |= (FLAG_COPY << flagpos);
|
|
|
|
|
|
|
|
// Update our key to reflect this byte coming out.
|
|
|
|
key = ((key << 8) | indata[inloc]) & 0xFFFFFF;
|
2020-12-20 03:38:34 +00:00
|
|
|
starts[key].push_back(inloc - 2);
|
2020-12-14 22:15:20 +00:00
|
|
|
|
|
|
|
// Output this byte specifically
|
|
|
|
outdata[outloc++] = indata[inloc++];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
int best_backref = -1;
|
|
|
|
unsigned int best_length = 1;
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
for (auto possible_backref = starts[search_key].begin(); possible_backref != starts[search_key].end(); possible_backref++)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// If the current best length isn't a match on this chunk, then we shouldn't even consider it
|
|
|
|
// since the other chunk is already a better match.
|
|
|
|
if (indata[(*possible_backref) + (best_length - 1)] != indata[inloc + (best_length - 1)])
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
continue;
|
2020-12-14 22:15:20 +00:00
|
|
|
}
|
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We already know that the first three match so we don't need to check those;
|
|
|
|
unsigned int current_length;
|
|
|
|
for (current_length = 3; current_length < backref_amount; current_length++)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
if (indata[(*possible_backref) + current_length] != indata[inloc + current_length])
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// This value doesn't match, so the current length is the longest prefix
|
|
|
|
// for this possible backref.
|
2020-12-14 22:15:20 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We found a better match
|
|
|
|
if (best_length < current_length)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
best_length = current_length;
|
|
|
|
best_backref = (inloc - *possible_backref) & 0xFFF;
|
2020-12-14 22:15:20 +00:00
|
|
|
}
|
2020-12-20 03:38:34 +00:00
|
|
|
else if (best_length == backref_amount)
|
2020-12-14 22:15:20 +00:00
|
|
|
{
|
2020-12-20 03:38:34 +00:00
|
|
|
// We found an ideal length, no need to keep searching.
|
|
|
|
break;
|
2020-12-14 22:15:20 +00:00
|
|
|
}
|
2020-12-20 03:38:34 +00:00
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
if (best_backref <= 0)
|
|
|
|
{
|
|
|
|
// Double check, since we know we should have found a backref.
|
|
|
|
return -2;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
if (outloc > (outlen - 2))
|
|
|
|
{
|
|
|
|
// We overwrote our output buffer, we probably corrupted memory somewhere.
|
|
|
|
return -3;
|
|
|
|
}
|
2020-12-14 22:15:20 +00:00
|
|
|
|
2020-12-20 03:38:34 +00:00
|
|
|
// We got a valid backref, so let's record it as well as the start positions
|
|
|
|
// for each of the bytes we compressed.
|
|
|
|
outdata[flagsloc] |= (FLAG_BACKREF << flagpos);
|
|
|
|
|
|
|
|
// Add the backref itself.
|
|
|
|
outdata[outloc++] = (best_backref >> 4) & 0xFF;
|
|
|
|
outdata[outloc++] = ((best_backref & 0xF) << 4) | ((best_length - 3) & 0xF);
|
|
|
|
|
|
|
|
// Record the keys for each byte;
|
|
|
|
for (unsigned int i = 0; i < best_length; i++)
|
|
|
|
{
|
|
|
|
key = ((key << 8) | indata[inloc]) & 0xFFFFFF;
|
|
|
|
starts[key].push_back(inloc - 2);
|
|
|
|
inloc++;
|
2020-12-14 22:15:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return outloc;
|
|
|
|
}
|
2020-11-28 19:05:27 +00:00
|
|
|
}
|