vgmstream/src/coding/g7221_decoder_aes.c

327 lines
14 KiB
C
Raw Normal View History

#include <stdlib.h>
#include "g7221_decoder_aes.h"
/* Namco's NUS AES is just standard AES-192 in ECB mode, so this can be swapped with another lib,
* if more code needs AES. Most implementations out there either use pre-calculated look-up tables,
* or calculate manually every AES op. Namco's code calculates tables first in a slightly different
* layout, so it may be interesting as a sort of doc piece. */
struct s14aes_handle {
/* substitution box look-up table and the inverse */
uint8_t sbox[256];
uint8_t ibox[256];
/* helper for various tables, not too sure about it */
uint8_t xors[256];
/* round constant, though only sets up to 8 */
uint8_t rcon[16];
/* MixColumn(?) LUTs, unlike normal Rijndael which uses 4 tables: Td0a Td0b..., Td1a Td1b..., ...
* layout is: Td0a Td1a Td2a Td3a, Td0b Td0b Td1b Td2b, ... (better for CPU cache?) */
uint32_t tds[256*4];
/* expanded roundkey, actual final key */
uint32_t rk[52];
} ;
#define GET_U32LE(p) (((p)[0] << 0 ) | ((p)[1] << 8 ) | ((p)[2] << 16) | ((p)[3] << 24))
#define GET_U32BE(p) (((p)[3] << 0 ) | ((p)[2] << 8 ) | ((p)[1] << 16) | ((p)[0] << 24))
#define GET_B0(x) (((x) >> 0 ) & 0xFF)
#define GET_B1(x) (((x) >> 8 ) & 0xFF)
#define GET_B2(x) (((x) >> 16) & 0xFF)
#define GET_B3(x) (((x) >> 24) & 0xFF)
static void aes_key_expand(s14aes_handle* ctx, const uint8_t* key, uint32_t* rk) {
int i;
rk[0] = GET_U32LE(key + 0);
rk[1] = GET_U32LE(key + 4);
rk[2] = GET_U32LE(key + 8);
rk[3] = GET_U32LE(key + 12);
rk[4] = GET_U32LE(key + 16);
rk[5] = GET_U32LE(key + 20);
for (i = 6; i < 52; i++) {
uint8_t temp0 = (rk[5] >> 0 ) & 0xFF;
uint8_t temp1 = (rk[5] >> 8 ) & 0xFF;
uint8_t temp2 = (rk[5] >> 16) & 0xFF;
uint8_t temp3 = (rk[5] >> 24) & 0xFF;
if (i == 6 * (i / 6)) {
uint8_t sv = ctx->sbox[temp1];
temp1 = ctx->sbox[temp2];
temp2 = ctx->sbox[temp3];
temp3 = ctx->sbox[temp0];
temp0 = ctx->rcon[i / 6u - 1] ^ sv;
}
rk[6] = ((temp0 ^ ((rk[0] >> 0 ) & 0xFF)) << 0 ) |
((temp1 ^ ((rk[0] >> 8 ) & 0xFF)) << 8 ) |
((temp2 ^ ((rk[0] >> 16) & 0xFF)) << 16) |
((temp3 ^ ((rk[0] >> 24) & 0xFF)) << 24);
rk++;
}
}
static void aes_init_key(s14aes_handle* ctx, const uint8_t* key) {
const uint8_t invcols[4][4] = {
{0x0E,0x0B,0x0D,0x09},
{0x09,0x0E,0x0B,0x0D},
{0x0D,0x09,0x0E,0x0B},
{0x0B,0x0D,0x09,0x0E}
};
unsigned int roundkey[52];
int i, j, row, col, b;
aes_key_expand(ctx, key, roundkey);
for (i = 0; i < 4; i++) {
ctx->rk[i] = ((roundkey[i] << 24) & 0xFF000000) |
((roundkey[i] << 8 ) & 0x00FF0000) |
((roundkey[i] >> 8 ) & 0x0000FF00) |
((roundkey[i] >> 24) & 0x000000FF);
}
for (i = 4; i < 48; i += 4) {
for (j = i; j < i + 4; j++) {
uint32_t val = 0;
for (row = 0; row < 4; row++) {
uint8_t xv = 0;
for (col = 0; col < 4; col++) {
uint16_t rv1 = 0;
uint16_t rv2 = (roundkey[j] >> (col * 8)) & 0xFF;
uint8_t ic = invcols[row][col];
for (b = 0; b < 8; b++) {
if (ic & (1 << b))
rv1 ^= rv2;
rv2 *= 2;
}
xv ^= rv1 ^ ctx->xors[rv1 >> 8];
}
val = (val << 8) | xv;
}
ctx->rk[j] = val;
}
}
for (i = 48; i < 52; i++) {
ctx->rk[i] = ((roundkey[i] << 24) & 0xFF000000) |
((roundkey[i] << 8 ) & 0x00FF0000) |
((roundkey[i] >> 8 ) & 0x0000FF00) |
((roundkey[i] >> 24) & 0x000000FF);
}
}
static void aes_init_state(s14aes_handle* ctx) {
const uint8_t invcol[4] = {
0x0E, 0x0B, 0x0D, 0x09
};
unsigned int *tds_ptr;
uint8_t rcon[32];
uint8_t box[256];
int i, j, k, b;
for (i = 0; i < 32; i++) {
uint16_t rv;
if (i >= 8) {
rv = 128;
for (j = 0; j < i - 7; j++) {
rv *= 2;
if (rv & 256)
rv ^= 0x11Bu;
}
}
else {
rv = 1 << i;
}
rcon[i] = rv;
}
for (i = 0; i < 256; i++) {
uint8_t xv = 0;
for (j = 0; j < 8; j++) {
if (i & (1 << j))
xv ^= rcon[j + 8];
}
ctx->xors[i] = xv;
}
tds_ptr = ctx->tds;
for (i = 0; i < 256; i++) {
uint32_t val = 0;
for (j = 0; j < 4; j++) {
uint16_t tv1 = 0;
uint16_t tv2 = invcol[j];
for (b = 0; b < 8; b++) {
if (i & (1 << b))
tv1 ^= tv2;
tv2 *= 2;
}
val = ((val >> 8u) & 0x00FFFFFF) | ((val << 24u) & 0xFF000000);
val = ((uint8_t)tv1 ^ ctx->xors[tv1 >> 8]) | val;
}
val = ((val >> 16u) & 0x0000FFFF) | ((val << 16u) & 0xFFFF0000);
for (j = 0; j < 4; j++) {
*tds_ptr++ = val;
val = ((val >> 8u) & 0x00FFFFFF) | ((val << 24u) & 0xFF000000);
}
}
box[0] = 0;
for (i = 1; i < 256; i++) {
for (j = 1; j < 256; j++) {
uint16_t bv1 = 0;
uint16_t bv2 = j;
for (k = 0; k < 8; k++) {
if (i & (1 << k))
bv1 ^= bv2;
bv2 *= 2;
}
if (((uint8_t)bv1 ^ ctx->xors[bv1 >> 8]) == 1)
break;
}
box[i] = j;
if (j == 256) /* ??? */
return;
}
for (i = 0; i < 256; i += 16) {
for (j = 0; j < 16; j++) {
uint8_t val = 0;
for (k = 0; k < 8; k++) {
val |= box[i | j] & (1 << k);
for (b = 0; b < 4; b++) {
uint8_t bv = box[i | j];
if (bv & (1 << ((b + k - 4) & 7)))
val ^= 1 << k;
}
}
ctx->sbox[i + j] = val ^ 0x63;
ctx->ibox[val ^ 0x63] = i + j;
}
}
/* originally recalculated in Namco's code (inlined?) */
for (i = 0; i < 8; i++) {
ctx->rcon[i] = rcon[i];
}
}
static void aes_decrypt_block(s14aes_handle* ctx, uint8_t* buf) {
uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
uint8_t* ibox = ctx->ibox;
uint32_t* tds = ctx->tds;
uint32_t* rk = ctx->rk;
s0 = rk[48] ^ GET_U32BE(buf + 0);
s1 = rk[49] ^ GET_U32BE(buf + 4);
s2 = rk[50] ^ GET_U32BE(buf + 8);
s3 = rk[51] ^ GET_U32BE(buf + 12);
t0 = rk[44] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[45] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[46] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[47] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
s0 = rk[40] ^ tds[4 * ibox[GET_B0(t1)] + 3] ^ tds[4 * ibox[GET_B1(t2)] + 2] ^ tds[4 * ibox[GET_B2(t3)] + 1] ^ tds[4 * ibox[GET_B3(t0)] + 0];
s1 = rk[41] ^ tds[4 * ibox[GET_B0(t2)] + 3] ^ tds[4 * ibox[GET_B1(t3)] + 2] ^ tds[4 * ibox[GET_B2(t0)] + 1] ^ tds[4 * ibox[GET_B3(t1)] + 0];
s2 = rk[42] ^ tds[4 * ibox[GET_B0(t3)] + 3] ^ tds[4 * ibox[GET_B1(t0)] + 2] ^ tds[4 * ibox[GET_B2(t1)] + 1] ^ tds[4 * ibox[GET_B3(t2)] + 0];
s3 = rk[43] ^ tds[4 * ibox[GET_B0(t0)] + 3] ^ tds[4 * ibox[GET_B1(t1)] + 2] ^ tds[4 * ibox[GET_B2(t2)] + 1] ^ tds[4 * ibox[GET_B3(t3)] + 0];
t0 = rk[36] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[37] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[38] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[39] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
s0 = rk[32] ^ tds[4 * ibox[GET_B0(t1)] + 3] ^ tds[4 * ibox[GET_B1(t2)] + 2] ^ tds[4 * ibox[GET_B2(t3)] + 1] ^ tds[4 * ibox[GET_B3(t0)] + 0];
s1 = rk[33] ^ tds[4 * ibox[GET_B0(t2)] + 3] ^ tds[4 * ibox[GET_B1(t3)] + 2] ^ tds[4 * ibox[GET_B2(t0)] + 1] ^ tds[4 * ibox[GET_B3(t1)] + 0];
s2 = rk[34] ^ tds[4 * ibox[GET_B0(t3)] + 3] ^ tds[4 * ibox[GET_B1(t0)] + 2] ^ tds[4 * ibox[GET_B2(t1)] + 1] ^ tds[4 * ibox[GET_B3(t2)] + 0];
s3 = rk[35] ^ tds[4 * ibox[GET_B0(t0)] + 3] ^ tds[4 * ibox[GET_B1(t1)] + 2] ^ tds[4 * ibox[GET_B2(t2)] + 1] ^ tds[4 * ibox[GET_B3(t3)] + 0];
t0 = rk[28] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[29] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[30] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[31] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
s0 = rk[24] ^ tds[4 * ibox[GET_B0(t1)] + 3] ^ tds[4 * ibox[GET_B1(t2)] + 2] ^ tds[4 * ibox[GET_B2(t3)] + 1] ^ tds[4 * ibox[GET_B3(t0)] + 0];
s1 = rk[25] ^ tds[4 * ibox[GET_B0(t2)] + 3] ^ tds[4 * ibox[GET_B1(t3)] + 2] ^ tds[4 * ibox[GET_B2(t0)] + 1] ^ tds[4 * ibox[GET_B3(t1)] + 0];
s2 = rk[26] ^ tds[4 * ibox[GET_B0(t3)] + 3] ^ tds[4 * ibox[GET_B1(t0)] + 2] ^ tds[4 * ibox[GET_B2(t1)] + 1] ^ tds[4 * ibox[GET_B3(t2)] + 0];
s3 = rk[27] ^ tds[4 * ibox[GET_B0(t0)] + 3] ^ tds[4 * ibox[GET_B1(t1)] + 2] ^ tds[4 * ibox[GET_B2(t2)] + 1] ^ tds[4 * ibox[GET_B3(t3)] + 0];
t0 = rk[20] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[21] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[22] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[23] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
s0 = rk[16] ^ tds[4 * ibox[GET_B0(t1)] + 3] ^ tds[4 * ibox[GET_B1(t2)] + 2] ^ tds[4 * ibox[GET_B2(t3)] + 1] ^ tds[4 * ibox[GET_B3(t0)] + 0];
s1 = rk[17] ^ tds[4 * ibox[GET_B0(t2)] + 3] ^ tds[4 * ibox[GET_B1(t3)] + 2] ^ tds[4 * ibox[GET_B2(t0)] + 1] ^ tds[4 * ibox[GET_B3(t1)] + 0];
s2 = rk[18] ^ tds[4 * ibox[GET_B0(t3)] + 3] ^ tds[4 * ibox[GET_B1(t0)] + 2] ^ tds[4 * ibox[GET_B2(t1)] + 1] ^ tds[4 * ibox[GET_B3(t2)] + 0];
s3 = rk[19] ^ tds[4 * ibox[GET_B0(t0)] + 3] ^ tds[4 * ibox[GET_B1(t1)] + 2] ^ tds[4 * ibox[GET_B2(t2)] + 1] ^ tds[4 * ibox[GET_B3(t3)] + 0];
t0 = rk[12] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[13] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[14] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[15] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
s0 = rk[8 ] ^ tds[4 * ibox[GET_B0(t1)] + 3] ^ tds[4 * ibox[GET_B1(t2)] + 2] ^ tds[4 * ibox[GET_B2(t3)] + 1] ^ tds[4 * ibox[GET_B3(t0)] + 0];
s1 = rk[9 ] ^ tds[4 * ibox[GET_B0(t2)] + 3] ^ tds[4 * ibox[GET_B1(t3)] + 2] ^ tds[4 * ibox[GET_B2(t0)] + 1] ^ tds[4 * ibox[GET_B3(t1)] + 0];
s2 = rk[10] ^ tds[4 * ibox[GET_B0(t3)] + 3] ^ tds[4 * ibox[GET_B1(t0)] + 2] ^ tds[4 * ibox[GET_B2(t1)] + 1] ^ tds[4 * ibox[GET_B3(t2)] + 0];
s3 = rk[11] ^ tds[4 * ibox[GET_B0(t0)] + 3] ^ tds[4 * ibox[GET_B1(t1)] + 2] ^ tds[4 * ibox[GET_B2(t2)] + 1] ^ tds[4 * ibox[GET_B3(t3)] + 0];
t0 = rk[4 ] ^ tds[4 * ibox[GET_B0(s1)] + 3] ^ tds[4 * ibox[GET_B1(s2)] + 2] ^ tds[4 * ibox[GET_B2(s3)] + 1] ^ tds[4 * ibox[GET_B3(s0)] + 0];
t1 = rk[5 ] ^ tds[4 * ibox[GET_B0(s2)] + 3] ^ tds[4 * ibox[GET_B1(s3)] + 2] ^ tds[4 * ibox[GET_B2(s0)] + 1] ^ tds[4 * ibox[GET_B3(s1)] + 0];
t2 = rk[6 ] ^ tds[4 * ibox[GET_B0(s3)] + 3] ^ tds[4 * ibox[GET_B1(s0)] + 2] ^ tds[4 * ibox[GET_B2(s1)] + 1] ^ tds[4 * ibox[GET_B3(s2)] + 0];
t3 = rk[7 ] ^ tds[4 * ibox[GET_B0(s0)] + 3] ^ tds[4 * ibox[GET_B1(s1)] + 2] ^ tds[4 * ibox[GET_B2(s2)] + 1] ^ tds[4 * ibox[GET_B3(s3)] + 0];
buf[0 ] = GET_B3(rk[0]) ^ ibox[GET_B3(t0)];
buf[1 ] = GET_B2(rk[0]) ^ ibox[GET_B2(t3)];
buf[2 ] = GET_B1(rk[0]) ^ ibox[GET_B1(t2)];
buf[3 ] = GET_B0(rk[0]) ^ ibox[GET_B0(t1)];
buf[4 ] = GET_B3(rk[1]) ^ ibox[GET_B3(t1)];
buf[5 ] = GET_B2(rk[1]) ^ ibox[GET_B2(t0)];
buf[6 ] = GET_B1(rk[1]) ^ ibox[GET_B1(t3)];
buf[7 ] = GET_B0(rk[1]) ^ ibox[GET_B0(t2)];
buf[8 ] = GET_B3(rk[2]) ^ ibox[GET_B3(t2)];
buf[9 ] = GET_B2(rk[2]) ^ ibox[GET_B2(t1)];
buf[10] = GET_B1(rk[2]) ^ ibox[GET_B1(t0)];
buf[11] = GET_B0(rk[2]) ^ ibox[GET_B0(t3)];
buf[12] = GET_B3(rk[3]) ^ ibox[GET_B3(t3)];
buf[13] = GET_B2(rk[3]) ^ ibox[GET_B2(t2)];
buf[14] = GET_B1(rk[3]) ^ ibox[GET_B1(t1)];
buf[15] = GET_B0(rk[3]) ^ ibox[GET_B0(t0)];
}
/* **************************** */
s14aes_handle* s14aes_init() {
s14aes_handle* ctx = malloc(sizeof(s14aes_handle));
if (!ctx) goto fail;
aes_init_state(ctx);
return ctx;
fail:
return NULL;
}
void s14aes_close(s14aes_handle* ctx) {
free(ctx);
}
void s14aes_set_key(s14aes_handle* ctx, const uint8_t* key) {
aes_init_key(ctx, key);
}
void s14aes_decrypt(s14aes_handle* ctx, uint8_t* buf) {
aes_decrypt_block(ctx, buf);
}