From fcb0bdbb8bd2fc927b2649895958bad0635ec00f Mon Sep 17 00:00:00 2001
From: bnnm <bananaman255@gmail.com>
Date: Sat, 20 Jan 2024 14:35:50 +0100
Subject: [PATCH] Fix minor UTK issues + cleanup/move

---
 src/CMakeLists.txt               |   6 +
 src/Makefile                     |   2 +-
 src/coding/coding.h              |   3 +-
 src/coding/ea_mt_decoder.c       |  67 ++--
 src/coding/ea_mt_decoder_utk.h   | 469 ------------------------
 src/coding/libs/utkdec.c         | 604 +++++++++++++++++++++++++++++++
 src/coding/libs/utkdec.h         |  48 +++
 src/libvgmstream.vcxproj         |   3 +-
 src/libvgmstream.vcxproj.filters |   9 +-
 9 files changed, 700 insertions(+), 511 deletions(-)
 delete mode 100644 src/coding/ea_mt_decoder_utk.h
 create mode 100644 src/coding/libs/utkdec.c
 create mode 100644 src/coding/libs/utkdec.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 675eb364..3e707614 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,8 @@ file(GLOB BASE_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/base/*.h")
 file(GLOB BASE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/base/*.c")
 file(GLOB CODING_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/coding/*.h")
 file(GLOB CODING_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/coding/*.c")
+file(GLOB CLIBS_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/coding/libs/*.h")
+file(GLOB CLIBS_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/coding/libs/*.c")
 file(GLOB LAYOUT_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/layout/*.h")
 file(GLOB LAYOUT_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/layout/*.c")
 file(GLOB META_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/meta/*.h")
@@ -16,6 +18,7 @@ file(GLOB MAIN_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.c")
 # Setup source groups, mainly for Visual Studio
 source_group("Header Files\\base" FILES ${BASE_HEADERS})
 source_group("Header Files\\coding" FILES ${CODING_HEADERS})
+source_group("Header Files\\coding\\libs" FILES ${CLIBS_HEADERS})
 source_group("Header Files\\layout" FILES ${LAYOUT_HEADERS})
 source_group("Header Files\\meta" FILES ${META_HEADERS})
 source_group("Header Files\\util" FILES ${UTIL_HEADERS})
@@ -23,6 +26,7 @@ source_group("Header Files\\ext" FILES ${EXT_HEADERS})
 
 source_group("Source Files\\base" FILES ${BASE_SOURCES})
 source_group("Source Files\\coding" FILES ${CODING_SOURCES})
+source_group("Source Files\\coding\\libs" FILES ${CLIBS_SOURCES})
 source_group("Source Files\\layout" FILES ${LAYOUT_SOURCES})
 source_group("Source Files\\meta" FILES ${META_SOURCES})
 source_group("Source Files\\util" FILES ${UTIL_SOURCES})
@@ -32,6 +36,8 @@ set(libvgmstream_sources
 	${BASE_SOURCES}
 	${CODING_HEADERS}
 	${CODING_SOURCES}
+	${CLIBS_HEADERS}
+	${CLIBS_SOURCES}
 	${LAYOUT_HEADERS}
 	${LAYOUT_SOURCES}
 	${META_HEADERS}
diff --git a/src/Makefile b/src/Makefile
index 06e299a1..2596cd63 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -6,7 +6,7 @@
 OBJECTS =
 
 #SRCS = $(wildcard **/*.c) #GNUMake 3.81?
-SRCS = $(wildcard *.c) $(wildcard */*.c)
+SRCS = $(wildcard *.c) $(wildcard */*.c) $(wildcard */*/*.c)
 OBJECTS = $(patsubst %.c,%.o,$(SRCS))
 
 
diff --git a/src/coding/coding.h b/src/coding/coding.h
index d1642dab..2cc218ce 100644
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@@ -306,8 +306,9 @@ STREAMFILE* compresswave_get_streamfile(compresswave_codec_data* data);
 /* ea_mt_decoder*/
 typedef struct ea_mt_codec_data ea_mt_codec_data;
 
-ea_mt_codec_data* init_ea_mt(int channels, int type);
+ea_mt_codec_data* init_ea_mt(int channels, int pcm_blocks);
 ea_mt_codec_data* init_ea_mt_loops(int channels, int pcm_blocks, int loop_sample, off_t* loop_offsets);
+ea_mt_codec_data* init_ea_mt_cbx(int channels);
 void decode_ea_mt(VGMSTREAM* vgmstream, sample * outbuf, int channelspacing, int32_t samples_to_do, int channel);
 void reset_ea_mt(VGMSTREAM* vgmstream);
 void flush_ea_mt(VGMSTREAM* vgmstream);
diff --git a/src/coding/ea_mt_decoder.c b/src/coding/ea_mt_decoder.c
index b7b71b2a..38881924 100644
--- a/src/coding/ea_mt_decoder.c
+++ b/src/coding/ea_mt_decoder.c
@@ -1,17 +1,8 @@
 #include "coding.h"
+#include "libs/utkdec.h"
 
-#include "ea_mt_decoder_utk.h"
+/* Decodes EA MicroTalk */
 
-/* Decodes EA MicroTalk (speech codec) using utkencode lib (slightly modified for vgmstream).
- * EA separates MT10:1 and MT5:1 (bigger frames), but apparently are the same
- * with different encoding parameters. Later revisions may have PCM blocks (rare).
- *
- * Decoder by Andrew D'Addesio: https://github.com/daddesio/utkencode
- * Info: http://wiki.niotso.org/UTK
- */
-
-
-//#define UTK_MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
 #define UTK_ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
 #define UTK_MIN(x,y) ((x)<(y)?(x):(y))
 #define UTK_MAX(x,y) ((x)>(y)?(x):(y))
@@ -26,21 +17,30 @@ struct ea_mt_codec_data {
     off_t loop_offset;
     int loop_sample;
 
-    int pcm_blocks;
     int samples_filled;
     int samples_used;
     int samples_done;
     int samples_discard;
-    void* utk_context;
+    void* ctx;
 };
 
 static size_t ea_mt_read_callback(void *dest, int size, void *arg);
+static ea_mt_codec_data* init_ea_mt_internal(utk_type_t type, int channels, int loop_sample, off_t* loop_offsets);
+
 
 ea_mt_codec_data* init_ea_mt(int channels, int pcm_blocks) {
     return init_ea_mt_loops(channels, pcm_blocks, 0, NULL);
 }
 
 ea_mt_codec_data* init_ea_mt_loops(int channels, int pcm_blocks, int loop_sample, off_t *loop_offsets) {
+    return init_ea_mt_internal(pcm_blocks ? UTK_EA_PCM : UTK_EA, channels, loop_sample, loop_offsets);
+}
+
+ea_mt_codec_data* init_ea_mt_cbx(int channels) {
+    return init_ea_mt_internal(UTK_CBX, channels, 0, NULL);
+}
+
+static ea_mt_codec_data* init_ea_mt_internal(utk_type_t type, int channels, int loop_sample, off_t* loop_offsets) {
     ea_mt_codec_data* data = NULL;
     int i;
 
@@ -48,16 +48,14 @@ ea_mt_codec_data* init_ea_mt_loops(int channels, int pcm_blocks, int loop_sample
     if (!data) goto fail;
 
     for (i = 0; i < channels; i++) {
-        data[i].utk_context = calloc(1, sizeof(UTKContext));
-        if (!data[i].utk_context) goto fail;
-        utk_init(data[i].utk_context);
+        data[i].ctx = utk_init(type);
+        if (!data[i].ctx) goto fail;
 
-        data[i].pcm_blocks = pcm_blocks;
         data[i].loop_sample = loop_sample;
         if (loop_offsets)
             data[i].loop_offset = loop_offsets[i];
 
-        utk_set_callback(data[i].utk_context, data[i].buffer, UTK_BUFFER_SIZE, &data[i], &ea_mt_read_callback);
+        utk_set_callback(data[i].ctx, data[i].buffer, UTK_BUFFER_SIZE, &data[i], &ea_mt_read_callback);
     }
 
     return data;
@@ -71,10 +69,9 @@ void decode_ea_mt(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, in
     int i;
     ea_mt_codec_data* data = vgmstream->codec_data;
     ea_mt_codec_data* ch_data = &data[channel];
-    UTKContext* ctx = ch_data->utk_context;
     int samples_done = 0;
 
-
+    float* fbuf = utk_get_samples(ch_data->ctx);
     while (samples_done < samples_to_do) {
 
         if (ch_data->samples_filled) {
@@ -98,7 +95,7 @@ void decode_ea_mt(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, in
                     samples_to_get = samples_to_do - samples_done;
 
                 for (i = ch_data->samples_used; i < ch_data->samples_used + samples_to_get; i++) {
-                    int pcm = UTK_ROUND(ctx->decompressed_frame[i]);
+                    int pcm = UTK_ROUND(fbuf[i]);
                     outbuf[0] = (int16_t)UTK_CLAMP(pcm, -32768, 32767);
                     outbuf += channelspacing;
                 }
@@ -119,19 +116,20 @@ void decode_ea_mt(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, in
 
                 /* offset is usually at loop_offset here, but not always (ex. loop_sample < 432) */
                 ch_data->offset = ch_data->loop_offset;
-                utk_set_ptr(ctx, 0, 0); /* reset the buffer reader */
-                utk_reset(ctx); /* decoder init (all fields must be reset, for some edge cases) */
+                utk_set_buffer(ch_data->ctx, 0, 0); /* reset the buffer reader */
+                utk_reset(ch_data->ctx); /* decoder init (all fields must be reset, for some edge cases) */
             }
         }
         else {
             /* new frame */
-            if (ch_data->pcm_blocks)
-                utk_rev3_decode_frame(ctx);
-            else
-                utk_decode_frame(ctx);
+            int samples = utk_decode_frame(ch_data->ctx);
+            if (samples < 0) {
+                VGM_LOG("wrong decode: %i\n", samples);
+                samples = 432;
+            }
 
             ch_data->samples_used = 0;
-            ch_data->samples_filled = 432;
+            ch_data->samples_filled = samples;
         }
     }
 }
@@ -143,23 +141,20 @@ static void flush_ea_mt_offsets(VGMSTREAM* vgmstream, int is_start, int samples_
     if (!data) return;
 
 
-    /* EA-MT frames are VBR (not byte-aligned?), so utk_decoder reads new buffer data automatically.
+    /* EA-MT frames are VBR and not byte-aligned, so utk_decoder reads new buffer data automatically.
      * When decoding starts or a SCHl block changes, flush_ea_mt must be called to reset the state.
      * A bit hacky but would need some restructuring otherwise. */
 
     for (i = 0; i < vgmstream->channels; i++) {
-        UTKContext* ctx = data[i].utk_context;
-
-        data[i].streamfile = vgmstream->ch[i].streamfile; /* maybe should keep its own STREAMFILE? */
+        data[i].streamfile = vgmstream->ch[i].streamfile;
         if (is_start)
             data[i].offset = vgmstream->ch[i].channel_start_offset;
         else
             data[i].offset = vgmstream->ch[i].offset;
-        utk_set_ptr(ctx, 0, 0); /* reset the buffer reader */
+        utk_set_buffer(data[i].ctx, 0, 0); /* reset the buffer reader */
 
         if (is_start) {
-            utk_reset(ctx);
-            ctx->parsed_header = 0;
+            utk_reset(data[i].ctx);
             data[i].samples_done = 0;
         }
 
@@ -187,7 +182,7 @@ void free_ea_mt(ea_mt_codec_data* data, int channels) {
         return;
 
     for (i = 0; i < channels; i++) {
-        free(data[i].utk_context);
+        utk_free(data[i].ctx);
     }
     free(data);
 }
diff --git a/src/coding/ea_mt_decoder_utk.h b/src/coding/ea_mt_decoder_utk.h
deleted file mode 100644
index 3034bc94..00000000
--- a/src/coding/ea_mt_decoder_utk.h
+++ /dev/null
@@ -1,469 +0,0 @@
-#ifndef _EA_MT_DECODER_UTK_H_
-#define _EA_MT_DECODER_UTK_H_
-
-#include <stdint.h>
-#include <string.h>
-
-/* Note: This struct assumes a member alignment of 4 bytes.
-** This matters when pitch_lag > 216 on the first subframe of any given frame. */
-typedef struct UTKContext {
-    uint8_t *buffer;
-    size_t buffer_size;
-    void *arg;
-    size_t (*read_callback)(void *dest, int size, void *arg);
-    const uint8_t *ptr, *end;
-
-    int parsed_header;
-    unsigned int bits_value;
-    int bits_count;
-    int reduced_bw;
-    int multipulse_thresh;
-    float fixed_gains[64];
-    float rc[12];
-    float synth_history[12];
-    float adapt_cb[324];
-    float decompressed_frame[432];
-} UTKContext;
-
-enum {
-    MDL_NORMAL = 0,
-    MDL_LARGEPULSE = 1
-};
-
-static const float utk_rc_table[64] = {
-    +0.0f,
-    -.99677598476409912109375f, -.99032700061798095703125f, -.983879029750823974609375f, -.977430999279022216796875f,
-    -.970982015132904052734375f, -.964533984661102294921875f, -.958085000514984130859375f, -.9516370296478271484375f,
-    -.930754005908966064453125f, -.904959976673126220703125f, -.879167020320892333984375f, -.853372991085052490234375f,
-    -.827579021453857421875f, -.801786005496978759765625f, -.775991976261138916015625f, -.75019800662994384765625f,
-    -.724404990673065185546875f, -.6986110210418701171875f, -.6706349849700927734375f, -.61904799938201904296875f,
-    -.567460000514984130859375f, -.515873014926910400390625f, -.4642859995365142822265625f, -.4126980006694793701171875f,
-    -.361110985279083251953125f, -.309523999691009521484375f, -.257937014102935791015625f, -.20634900033473968505859375f,
-    -.1547619998455047607421875f, -.10317499935626983642578125f, -.05158700048923492431640625f,
-    +0.0f,
-    +.05158700048923492431640625f, +.10317499935626983642578125f, +.1547619998455047607421875f, +.20634900033473968505859375f,
-    +.257937014102935791015625f, +.309523999691009521484375f, +.361110985279083251953125f, +.4126980006694793701171875f,
-    +.4642859995365142822265625f, +.515873014926910400390625f, +.567460000514984130859375f, +.61904799938201904296875f,
-    +.6706349849700927734375f, +.6986110210418701171875f, +.724404990673065185546875f, +.75019800662994384765625f,
-    +.775991976261138916015625f, +.801786005496978759765625f, +.827579021453857421875f, +.853372991085052490234375f,
-    +.879167020320892333984375f, +.904959976673126220703125f, +.930754005908966064453125f, +.9516370296478271484375f,
-    +.958085000514984130859375f, +.964533984661102294921875f, +.970982015132904052734375f, +.977430999279022216796875f,
-    +.983879029750823974609375f, +.99032700061798095703125f, +.99677598476409912109375f
-};
-
-static const uint8_t utk_codebooks[2][256] = {
-    { /* normal model */
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 25,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  0,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 26,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
-        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
-        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  2
-    }, { /* large-pulse model */
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
-        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
-        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3
-    }
-};
-
-static const struct {
-    int next_model;
-    int code_size;
-    float pulse_value;
-} utk_commands[29] = {
-    {MDL_LARGEPULSE, 8,  0.0f},
-    {MDL_LARGEPULSE, 7,  0.0f},
-    {MDL_NORMAL,     8,  0.0f},
-    {MDL_NORMAL,     7,  0.0f},
-    {MDL_NORMAL,     2,  0.0f},
-    {MDL_NORMAL,     2, -1.0f},
-    {MDL_NORMAL,     2, +1.0f},
-    {MDL_NORMAL,     3, -1.0f},
-    {MDL_NORMAL,     3, +1.0f},
-    {MDL_LARGEPULSE, 4, -2.0f},
-    {MDL_LARGEPULSE, 4, +2.0f},
-    {MDL_LARGEPULSE, 3, -2.0f},
-    {MDL_LARGEPULSE, 3, +2.0f},
-    {MDL_LARGEPULSE, 5, -3.0f},
-    {MDL_LARGEPULSE, 5, +3.0f},
-    {MDL_LARGEPULSE, 4, -3.0f},
-    {MDL_LARGEPULSE, 4, +3.0f},
-    {MDL_LARGEPULSE, 6, -4.0f},
-    {MDL_LARGEPULSE, 6, +4.0f},
-    {MDL_LARGEPULSE, 5, -4.0f},
-    {MDL_LARGEPULSE, 5, +4.0f},
-    {MDL_LARGEPULSE, 7, -5.0f},
-    {MDL_LARGEPULSE, 7, +5.0f},
-    {MDL_LARGEPULSE, 6, -5.0f},
-    {MDL_LARGEPULSE, 6, +5.0f},
-    {MDL_LARGEPULSE, 8, -6.0f},
-    {MDL_LARGEPULSE, 8, +6.0f},
-    {MDL_LARGEPULSE, 7, -6.0f},
-    {MDL_LARGEPULSE, 7, +6.0f}
-};
-
-static int utk_read_byte(UTKContext *ctx)
-{
-    if (ctx->ptr < ctx->end)
-        return *ctx->ptr++;
-
-    if (ctx->read_callback) {
-        size_t bytes_copied = ctx->read_callback(ctx->buffer, ctx->buffer_size, ctx->arg);
-        if (bytes_copied > 0 && bytes_copied <= ctx->buffer_size) {
-            ctx->ptr = ctx->buffer;
-            ctx->end = ctx->buffer + bytes_copied;
-            return *ctx->ptr++;
-        }
-    }
-
-    return 0;
-}
-
-static int16_t utk_read_i16(UTKContext *ctx)
-{
-    int x = utk_read_byte(ctx);
-    x = (x << 8) | utk_read_byte(ctx);
-    return x;
-}
-
-static int utk_read_bits(UTKContext *ctx, int count)
-{
-    int ret = ctx->bits_value & ((1 << count) - 1);
-    ctx->bits_value >>= count;
-    ctx->bits_count -= count;
-
-    if (ctx->bits_count < 8) {
-        /* read another byte */
-        ctx->bits_value |= utk_read_byte(ctx) << ctx->bits_count;
-        ctx->bits_count += 8;
-    }
-
-    return ret;
-}
-
-static void utk_parse_header(UTKContext *ctx)
-{
-    int i;
-    float multiplier;
-
-    ctx->reduced_bw = utk_read_bits(ctx, 1);
-    ctx->multipulse_thresh = 32 - utk_read_bits(ctx, 4);
-    ctx->fixed_gains[0] = 8.0f * (1 + utk_read_bits(ctx, 4));
-    multiplier = 1.04f + utk_read_bits(ctx, 6)*0.001f;
-
-    for (i = 1; i < 64; i++)
-        ctx->fixed_gains[i] = ctx->fixed_gains[i-1] * multiplier;
-}
-
-static void utk_decode_excitation(UTKContext *ctx, int use_multipulse, float *out, int stride)
-{
-    int i;
-
-    if (use_multipulse) {
-        /* multi-pulse model: n pulses are coded explicitly; the rest are zero */
-        int model, cmd;
-        model = 0;
-        i = 0;
-        while (i < 108) {
-            cmd = utk_codebooks[model][ctx->bits_value & 0xff];
-            model = utk_commands[cmd].next_model;
-            utk_read_bits(ctx, utk_commands[cmd].code_size);
-
-            if (cmd > 3) {
-                /* insert a pulse with magnitude <= 6.0f */
-                out[i] = utk_commands[cmd].pulse_value;
-                i += stride;
-            } else if (cmd > 1) {
-                /* insert between 7 and 70 zeros */
-                int count = 7 + utk_read_bits(ctx, 6);
-                if (i + count * stride > 108)
-                    count = (108 - i)/stride;
-
-                while (count > 0) {
-                    out[i] = 0.0f;
-                    i += stride;
-                    count--;
-                }
-            } else {
-                /* insert a pulse with magnitude >= 7.0f */
-                int x = 7;
-
-                while (utk_read_bits(ctx, 1))
-                    x++;
-
-                if (!utk_read_bits(ctx, 1))
-                    x *= -1;
-
-                out[i] = (float)x;
-                i += stride;
-            }
-        }
-    } else {
-        /* RELP model: entire residual (excitation) signal is coded explicitly */
-        i = 0;
-        while (i < 108) {
-            if (!utk_read_bits(ctx, 1))
-                out[i] = 0.0f;
-            else if (!utk_read_bits(ctx, 1))
-                out[i] = -2.0f;
-            else
-                out[i] = 2.0f;
-
-            i += stride;
-        }
-    }
-}
-
-static void rc_to_lpc(const float *rc, float *lpc)
-{
-    int i, j;
-    float tmp1[12];
-    float tmp2[12];
-
-    for (i = 10; i >= 0; i--)
-        tmp2[1+i] = rc[i];
-
-    tmp2[0] = 1.0f;
-
-    for (i = 0; i < 12; i++) {
-        float x = -tmp2[11] * rc[11];
-
-        for (j = 10; j >= 0; j--) {
-            x -= tmp2[j] * rc[j];
-            tmp2[j+1] = x * rc[j] + tmp2[j];
-        }
-
-        tmp1[i] = tmp2[0] = x;
-
-        for (j = 0; j < i; j++)
-            x -= tmp1[i-1-j] * lpc[j];
-
-        lpc[i] = x;
-    }
-}
-
-static void utk_lp_synthesis_filter(UTKContext *ctx, int offset, int num_blocks)
-{
-    int i, j, k;
-    float lpc[12];
-    float *ptr = &ctx->decompressed_frame[offset];
-
-    rc_to_lpc(ctx->rc, lpc);
-
-    for (i = 0; i < num_blocks; i++) {
-        for (j = 0; j < 12; j++) {
-            float x = *ptr;
-
-            for (k = 0; k < j; k++)
-                x += lpc[k] * ctx->synth_history[k-j+12];
-            for (; k < 12; k++)
-                x += lpc[k] * ctx->synth_history[k-j];
-
-            ctx->synth_history[11-j] = x;
-            *ptr++ = x;
-        }
-    }
-}
-
-/*
-** Public functions.
-*/
-
-static int utk_decode_frame(UTKContext *ctx)
-{
-    int i, j;
-    int use_multipulse = 0;
-    float excitation[5+108+5];
-    float rc_delta[12];
-
-    if (!ctx->bits_count) {
-        ctx->bits_value = utk_read_byte(ctx);
-        ctx->bits_count = 8;
-    }
-
-    if (!ctx->parsed_header) {
-        utk_parse_header(ctx);
-        ctx->parsed_header = 1;
-    }
-
-    memset(&excitation[0], 0, 5*sizeof(float));
-    memset(&excitation[5+108], 0, 5*sizeof(float));
-
-    /* read the reflection coefficients */
-    for (i = 0; i < 12; i++) {
-        int idx;
-        if (i == 0) {
-            idx = utk_read_bits(ctx, 6);
-            if (idx < ctx->multipulse_thresh)
-                use_multipulse = 1;
-        } else if (i < 4) {
-            idx = utk_read_bits(ctx, 6);
-        } else {
-            idx = 16 + utk_read_bits(ctx, 5);
-        }
-
-        rc_delta[i] = (utk_rc_table[idx] - ctx->rc[i])*0.25f;
-    }
-
-    /* decode four subframes */
-    for (i = 0; i < 4; i++) {
-        int pitch_lag = utk_read_bits(ctx, 8);
-        float pitch_gain = (float)utk_read_bits(ctx, 4)/15.0f;
-        float fixed_gain = ctx->fixed_gains[utk_read_bits(ctx, 6)];
-
-        if (!ctx->reduced_bw) {
-            utk_decode_excitation(ctx, use_multipulse, &excitation[5], 1);
-        } else {
-            /* residual (excitation) signal is encoded at reduced bandwidth */
-            int align = utk_read_bits(ctx, 1);
-            int zero = utk_read_bits(ctx, 1);
-
-            utk_decode_excitation(ctx, use_multipulse, &excitation[5+align], 2);
-
-            if (zero) {
-                /* fill the remaining samples with zero
-                ** (spectrum is duplicated into high frequencies) */
-                for (j = 0; j < 54; j++)
-                    excitation[5+(1-align)+2*j] = 0.0f;
-            } else {
-                /* interpolate the remaining samples
-                ** (spectrum is low-pass filtered) */
-                float *ptr = &excitation[5+(1-align)];
-                for (j = 0; j < 108; j += 2)
-                    ptr[j] =   ptr[j-5] * 0.01803267933428287506103515625f
-                             - ptr[j-3] * 0.114591561257839202880859375f
-                             + ptr[j-1] * 0.597385942935943603515625f
-                             + ptr[j+1] * 0.597385942935943603515625f
-                             - ptr[j+3] * 0.114591561257839202880859375f
-                             + ptr[j+5] * 0.01803267933428287506103515625f;
-
-                /* scale by 0.5f to give the sinc impulse response unit energy */
-                fixed_gain *= 0.5f;
-            }
-        }
-
-        for (j = 0; j < 108; j++)
-            ctx->decompressed_frame[108*i+j] =   fixed_gain * excitation[5+j]
-                                               + pitch_gain * ctx->adapt_cb[108*i+216-pitch_lag+j];
-    }
-
-    for (i = 0; i < 324; i++)
-        ctx->adapt_cb[i] = ctx->decompressed_frame[108+i];
-
-    for (i = 0; i < 4; i++) {
-        for (j = 0; j < 12; j++)
-            ctx->rc[j] += rc_delta[j];
-
-        utk_lp_synthesis_filter(ctx, 12*i, i < 3 ? 1 : 33);
-    }
-
-    return 0;
-}
-
-static void utk_init(UTKContext *ctx)
-{
-    memset(ctx, 0, sizeof(*ctx));
-}
-
-static void utk_reset(UTKContext *ctx)
-{
-    /* resets the internal state, leaving the external config/buffers
-     * untouched (could be reset externally or using utk_set_x) */
-    ctx->parsed_header = 0;
-    ctx->bits_value = 0;
-    ctx->bits_count = 0;
-    ctx->reduced_bw = 0;
-    ctx->multipulse_thresh = 0;
-    memset(ctx->fixed_gains, 0, sizeof(ctx->fixed_gains));
-    memset(ctx->rc, 0, sizeof(ctx->rc));
-    memset(ctx->synth_history, 0, sizeof(ctx->synth_history));
-    memset(ctx->adapt_cb, 0, sizeof(ctx->adapt_cb));
-    memset(ctx->decompressed_frame, 0, sizeof(ctx->decompressed_frame));
-}
-
-static void utk_set_callback(UTKContext *ctx, uint8_t *buffer, size_t buffer_size, void *arg, size_t (*read_callback)(void *, int , void *))
-{
-    /* prepares for external reading */
-    ctx->buffer = buffer;
-    ctx->buffer_size = buffer_size;
-    ctx->arg = arg;
-    ctx->read_callback = read_callback;
-
-    /* reset the bit reader */
-    ctx->bits_count = 0;
-}
-
-static void utk_set_ptr(UTKContext *ctx, const uint8_t *ptr, const uint8_t *end)
-{
-    /* sets the pointer to an external data buffer (can also be used to
-     * reset the buffered data if set to ptr/end 0) */
-    ctx->ptr = ptr;
-    ctx->end = end;
-
-    /* reset the bit reader */
-    ctx->bits_count = 0;
-}
-
-/*
-** MicroTalk Revision 3 decoding function.
-*/
-
-static int utk_rev3_decode_frame(UTKContext *ctx)
-{
-    int pcm_data_present = (utk_read_byte(ctx) == 0xee);
-    int i;
-
-    utk_decode_frame(ctx);
-
-    /* unread the last 8 bits and reset the bit reader */
-    ctx->ptr--;
-    ctx->bits_count = 0;
-
-    if (pcm_data_present) {
-        /* Overwrite n samples at a given offset in the decoded frame with
-        ** raw PCM data. */
-        int offset = utk_read_i16(ctx);
-        int count = utk_read_i16(ctx);
-
-        /* sx.exe does not do any bounds checking or clamping of these two
-        ** fields (see 004274D1 in sx.exe v3.01.01), which means a specially
-        ** crafted MT5:1 file can crash sx.exe.
-        ** We will throw an error instead. */
-        if (offset < 0 || offset > 432) {
-            return -1; /* invalid PCM offset */
-        }
-        if (count < 0 || count > 432 - offset) {
-            return -2; /* invalid PCM count */
-        }
-
-        for (i = 0; i < count; i++)
-            ctx->decompressed_frame[offset+i] = (float)utk_read_i16(ctx);
-    }
-
-    return 0;
-}
-
-#endif /* _EA_MT_DECODER_UTK_H_ */
diff --git a/src/coding/libs/utkdec.c b/src/coding/libs/utkdec.c
new file mode 100644
index 00000000..f93f9dbd
--- /dev/null
+++ b/src/coding/libs/utkdec.c
@@ -0,0 +1,604 @@
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include "utkdec.h"
+
+
+struct utk_context_t {
+    /* config */ 
+    utk_type_t type;
+    int parsed_header;
+
+    /* state */
+    struct bitreader_t {
+        const uint8_t* ptr;
+        uint32_t bits_value;
+        int bits_count;
+        /* extra (OG MT/CBX just loads ptr memory externally) */
+        const uint8_t* end;
+        void* arg;
+        uint8_t* buffer;
+        size_t buffer_size;
+        size_t (*read_callback)(void* dst, int size, void* arg);
+    } br;
+    bool reduced_bandwidth;
+    int multipulse_threshold;
+
+    float fixed_gains[64];
+    float rc_data[12];
+    float synth_history[12];
+    float subframes[324 + 432];
+    /* adapt_cb indexes may read from samples, join both + ptr to avoid
+     * struct aligment issues (typically doesn't matter but for completeness) */
+    float* adapt_cb; /* subframes + 0 */
+    float* samples; /* subframes + 324 */
+};
+
+
+/* bit mask; (1 << count) - 1 is probably faster now but OG code uses a table */
+static const uint8_t mask_table[8] = {
+    0x01,0x03,0x07,0x0F,0x1F,0x3F,0x7F,0xFF
+};
+
+/* reflection coefficients, rounded that correspond to hex values in exes (actual float is longer)
+ * note this table is mirrored: for (i = 1 .. 32) t[64 - i] = -t[i]) */
+static const float utk_rc_table[64] = {
+    /* 6b index start */
+    +0.000000, -0.996776, -0.990327, -0.983879,
+    -0.977431, -0.970982, -0.964534, -0.958085,
+    -0.951637, -0.930754, -0.904960, -0.879167,
+    -0.853373, -0.827579, -0.801786, -0.775992,
+    /* 5b index start */
+    -0.750198, -0.724405, -0.698611, -0.670635,
+    -0.619048, -0.567460, -0.515873, -0.464286,
+    -0.412698, -0.361111, -0.309524, -0.257937,
+    -0.206349, -0.154762, -0.103175, -0.051587,
+    +0.000000, +0.051587, +0.103175, +0.154762,
+    +0.206349, +0.257937, +0.309524, +0.361111,
+    +0.412698, +0.464286, +0.515873, +0.567460,
+    +0.619048, +0.670635, +0.698611, +0.724405,
+    +0.750198, +0.775992, +0.801786, +0.827579,
+    +0.853373, +0.879167, +0.904960, +0.930754,
+    +0.951637, +0.958085, +0.964534, +0.970982,
+    +0.977431, +0.983879, +0.990327, +0.996776,
+};
+
+static const uint8_t utk_codebooks[2][256] = {
+    /* normal model */
+    {
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 25,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  0,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 26,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
+        4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
+        4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  2
+    },
+    /* large-pulse model */
+    {
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
+        4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
+        4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3
+    },
+};
+
+enum {
+    MDL_NORMAL = 0,
+    MDL_LARGEPULSE = 1
+};
+
+static const struct {
+    int next_model;
+    int code_size;
+    float pulse_value;
+} utk_commands[29] = {
+    {MDL_LARGEPULSE, 8,  0.0f},
+    {MDL_LARGEPULSE, 7,  0.0f},
+    {MDL_NORMAL,     8,  0.0f},
+    {MDL_NORMAL,     7,  0.0f},
+    {MDL_NORMAL,     2,  0.0f},
+    {MDL_NORMAL,     2, -1.0f},
+    {MDL_NORMAL,     2, +1.0f},
+    {MDL_NORMAL,     3, -1.0f},
+    {MDL_NORMAL,     3, +1.0f},
+    {MDL_LARGEPULSE, 4, -2.0f},
+    {MDL_LARGEPULSE, 4, +2.0f},
+    {MDL_LARGEPULSE, 3, -2.0f},
+    {MDL_LARGEPULSE, 3, +2.0f},
+    {MDL_LARGEPULSE, 5, -3.0f},
+    {MDL_LARGEPULSE, 5, +3.0f},
+    {MDL_LARGEPULSE, 4, -3.0f},
+    {MDL_LARGEPULSE, 4, +3.0f},
+    {MDL_LARGEPULSE, 6, -4.0f},
+    {MDL_LARGEPULSE, 6, +4.0f},
+    {MDL_LARGEPULSE, 5, -4.0f},
+    {MDL_LARGEPULSE, 5, +4.0f},
+    {MDL_LARGEPULSE, 7, -5.0f},
+    {MDL_LARGEPULSE, 7, +5.0f},
+    {MDL_LARGEPULSE, 6, -5.0f},
+    {MDL_LARGEPULSE, 6, +5.0f},
+    {MDL_LARGEPULSE, 8, -6.0f},
+    {MDL_LARGEPULSE, 8, +6.0f},
+    {MDL_LARGEPULSE, 7, -6.0f},
+    {MDL_LARGEPULSE, 7, +6.0f}
+};
+
+static const float cbx_fixed_gains[64] = {
+    1.068,      68.351997,  72.999931,  77.963921,
+    83.265465,  88.927513,  94.974579,  101.43285,
+    108.33028,  115.69673,  123.5641,   131.96646,
+    140.94017,  150.52409,  160.75972,  171.69138,
+    183.36638,  195.83528,  209.15207,  223.3744,
+    238.56386,  254.78619,  272.11163,  290.6152,
+    310.37701,  331.48264,  354.02344,  378.09702,
+    403.80759,  431.26648,  460.59259,  491.91287,
+    525.36292,  561.08759,  599.24152,  639.98993,
+    683.50922,  729.98779,  779.62695,  832.64154,
+    889.26111,  949.73083,  1014.3125,  1083.2858,
+    1156.9491,  1235.6216,  1319.6438,  1409.3795,
+    1505.2173,  1607.572,   1716.8868,  1833.6351,
+    1958.3223,  2091.488,   2233.7092,  2385.6013,
+    2547.822,   2721.0737,  2906.1067,  3103.7219,
+    3314.7749,  3540.1794,  3780.9116,  4038.0134,
+};
+
+/* Bitreader in OG code can only read from set ptr; doesn't seem to check bounds though.
+ * Incidentally bitreader functions seem to be used only in MT and not in other EA stuff. */
+static uint8_t read_byte(struct bitreader_t* br) {
+    if (br->ptr < br->end)
+        return *br->ptr++;
+
+    if (br->read_callback) {
+        size_t bytes_copied = br->read_callback(br->buffer, br->buffer_size, br->arg);
+        if (bytes_copied > 0 && bytes_copied <= br->buffer_size) {
+            br->ptr = br->buffer;
+            br->end = br->buffer + bytes_copied;
+            return *br->ptr++;
+        }
+    }
+
+    return 0;
+}
+
+static int16_t read_s16(struct bitreader_t* br) {
+    int x = read_byte(br);
+    x = (x << 8) | read_byte(br);
+    return x;
+}
+
+static void init_bits(struct bitreader_t* br) {
+    if (!br->bits_count) {
+        br->bits_value = read_byte(br);
+        br->bits_count = 8;
+    }
+}
+
+static uint8_t peek_bits(struct bitreader_t* br, int count) {
+    uint8_t mask = mask_table[count - 1];
+    return br->bits_value & mask;
+}
+
+/* assumes count <= 8, which is always true since sizes are known and don't depend on the bitstream. */
+static uint8_t read_bits(struct bitreader_t* br, int count) {
+    uint8_t mask = mask_table[count - 1];
+    uint8_t ret = br->bits_value & mask;
+    br->bits_value >>= count;
+    br->bits_count -= count;
+
+    if (br->bits_count < 8) {
+        /* read another byte */
+        br->bits_value |= read_byte(br) << br->bits_count;
+        br->bits_count += 8;
+    }
+
+    return ret;
+}
+
+/* for clarity, as found in OG code (no return) */
+static void consume_bits(struct bitreader_t* br, int count) {
+    read_bits(br, count);
+}
+
+static void parse_header(utk_context_t* ctx) {
+    if (ctx->type == UTK_CBX) {
+        /* CBX uses fixed parameters unlike EA-MT, probably encoder defaults for MT10:1 */
+        ctx->reduced_bandwidth = true;
+        ctx->multipulse_threshold = 32 - 8;
+
+        /* equivalent to EA-MT with base_gain = 8, base_mult = 28 (plus rounding diffs)
+         * then fixed_gain[0] is set to 1.068 afterwards.
+         * OG CBX code uses config/tables directly rather than copying though */
+        for (int i = 0; i < 64; i++) {
+            ctx->fixed_gains[i] = cbx_fixed_gains[i];
+        }
+    }
+    else {
+        ctx->reduced_bandwidth = read_bits(&ctx->br, 1) == 1;
+
+        int base_thre = read_bits(&ctx->br, 4);
+        int base_gain = read_bits(&ctx->br, 4);
+        int base_mult = read_bits(&ctx->br, 6);
+
+        ctx->multipulse_threshold = 32 - base_thre;
+        ctx->fixed_gains[0] = 8.0f * (1 + base_gain);
+
+        float multiplier = 1.04f + base_mult * 0.001f;
+        for (int i = 1; i < 64; i++) {
+            ctx->fixed_gains[i] = ctx->fixed_gains[i-1] * multiplier;
+        }
+    }
+}
+
+static void decode_excitation(utk_context_t* ctx, bool use_multipulse, float* out, int stride) {
+    int i = 0;
+
+    if (use_multipulse) {
+        /* multi-pulse model: n pulses are coded explicitly; the rest are zero */
+        int model = 0;
+        while (i < 108) {
+            int huffman_code = peek_bits(&ctx->br, 8); /* variable-length, may consume less */
+
+            int cmd = utk_codebooks[model][huffman_code];
+            model = utk_commands[cmd].next_model;
+
+            consume_bits(&ctx->br, utk_commands[cmd].code_size);
+
+            if (cmd > 3) {
+                /* insert a pulse with magnitude <= 6.0f */
+                out[i] = utk_commands[cmd].pulse_value;
+                i += stride;
+            }
+            else if (cmd > 1) {
+                /* insert between 7 and 70 zeros */
+                int count = 7 + read_bits(&ctx->br, 6);
+                if (i + count * stride > 108)
+                    count = (108 - i) / stride;
+
+                while (count > 0) {
+                    out[i] = 0.0f;
+                    i += stride;
+                    count--;
+                }
+            }
+            else {
+                /* insert a pulse with magnitude >= 7.0f */
+                int x = 7;
+
+                while (read_bits(&ctx->br, 1)) {
+                    x++;
+                }
+
+                if (!read_bits(&ctx->br, 1))
+                    x *= -1;
+
+                out[i] = (float)x;
+                i += stride;
+            }
+        }
+    }
+    else {
+        /* RELP model: entire residual (excitation) signal is coded explicitly */
+        while (i < 108) {
+            int bits = 0;
+            float val = 0.0f;
+
+            /* peek + partial consume code (odd to use 2 codes for 0.0 but seen in multiple exes) */
+            int huffman_code = peek_bits(&ctx->br, 2); /* variable-length, may consume less */
+            switch (huffman_code) {
+                case 0: //code: 0
+                case 2: //code: 1 (maybe meant to be -0.0?)
+                    val = 0.0f;
+                    bits = 1;
+                    break;
+                case 1: //code: 01
+                    val = -2.0f;
+                    bits = 2;
+                    break;
+                case 3: //code: 11
+                    val = 2.0f;
+                    bits = 2;
+                    break;
+                default:
+                    break;
+            }
+            consume_bits(&ctx->br, bits);
+
+            out[i] = val;
+            i += stride;
+        }
+    }
+}
+
+static void rc_to_lpc(const float* rc_data, float* lpc) {
+    int j;
+    float tmp1[12];
+    float tmp2[12];
+
+    for (int i = 10; i >= 0; i--) {
+        tmp2[i + 1] = rc_data[i];
+    }
+
+    tmp2[0] = 1.0f;
+
+    for (int i = 0; i < 12; i++) {
+        float x = -(rc_data[11] * tmp2[11]);
+
+        for (j = 10; j >= 0; j--) {
+            x -= (rc_data[j] * tmp2[j]);
+            tmp2[j + 1] = x * rc_data[j] + tmp2[j];
+        }
+
+        tmp2[0] = x;
+        tmp1[i] = x;
+
+        for (j = 0; j < i; j++) {
+            x -= tmp1[i - 1 - j] * lpc[j];
+        }
+
+        lpc[i] = x;
+    }
+}
+
+static void lp_synthesis_filter(utk_context_t* ctx, int offset, int blocks) {
+    int i, j, k;
+    float lpc[12];
+    float* ptr = &ctx->samples[offset];
+
+    rc_to_lpc(ctx->rc_data, lpc);
+
+    for (i = 0; i < blocks; i++) {
+        /* OG: unrolled x12*12 */
+        for (j = 0; j < 12; j++) {
+            float x = *ptr;
+
+            for (k = 0; k < j; k++) {
+                x += lpc[k] * ctx->synth_history[k - j + 12];
+            }
+            for (; k < 12; k++) {
+                x += lpc[k] * ctx->synth_history[k - j + 0];
+            }
+
+            ctx->synth_history[11 - j] = x;
+
+            *ptr++ = x;
+
+            /* CBX only: samples are multiplied by 12582912.0, then coerce_int(sample[i]) on output
+             * to get final int16, as a pseudo-optimization; not sure if worth replicating */
+        }
+    }
+}
+
+/* OG sometimes inlines this (sx3, not B&B/CBX) */
+static void interpolate_rest(float* excitation) {
+    for (int i = 0; i < 108; i += 2) {
+        float tmp1 = (excitation[i - 5] + excitation[i + 5]) * 0.01803268f;
+        float tmp2 = (excitation[i - 3] + excitation[i + 3]) * 0.11459156f;
+        float tmp3 = (excitation[i - 1] + excitation[i + 1]) * 0.59738597f;
+        excitation[i] = tmp1 - tmp2 + tmp3;
+    }
+}
+
+static void decode_frame_main(utk_context_t* ctx) {
+    bool use_multipulse = false;
+    float excitation[5 + 108 + 5]; /* extra +5*2 for interpolation */
+    float rc_delta[12];
+
+    /* OG code usually calls this init/parse header after creation rather than on frame decode,
+     * but use a flag for now since buffer can be set/reset after init */
+    init_bits(&ctx->br);
+
+    if (!ctx->parsed_header) {
+        parse_header(ctx);
+        ctx->parsed_header = 1;
+    }
+
+
+    /* read the reflection coefficients (OG unrolled) */
+    for (int i = 0; i < 12; i++) {
+        int idx;
+        if (i == 0) {
+            idx = read_bits(&ctx->br, 6);
+            if (idx < ctx->multipulse_threshold)
+                use_multipulse = true;
+        }
+        else if (i < 4) {
+            idx = read_bits(&ctx->br, 6);
+        }
+        else {
+            idx = 16 + read_bits(&ctx->br, 5);
+        }
+
+        rc_delta[i] = (utk_rc_table[idx] - ctx->rc_data[i]) * 0.25f;
+    }
+
+    /* decode four subframes */
+    for (int i = 0; i < 4; i++) {
+        int pitch_lag = read_bits(&ctx->br, 8);
+        int pitch_value = read_bits(&ctx->br, 4);
+        int gain_index = read_bits(&ctx->br, 6);
+
+        float pitch_gain = (float)pitch_value / 15.0f; /* may be compiled as: value * 0.6..67 (float or double) */
+        float fixed_gain = ctx->fixed_gains[gain_index];
+
+        if (!ctx->reduced_bandwidth) {
+            /* full bandwidth (probably MT5:1) */
+            decode_excitation(ctx, use_multipulse, &excitation[5 + 0], 1);
+            /* OG: CBX doesn't have this flag and removes the if (so not 100% same code as MT) */
+        }
+        else {
+            /* residual (excitation) signal is encoded at reduced bandwidth */
+            int align = read_bits(&ctx->br, 1);
+            int zero_flag = read_bits(&ctx->br, 1);
+
+            decode_excitation(ctx, use_multipulse, &excitation[5 + align], 2);
+
+            if (zero_flag) {
+                /* fill the remaining samples with zero (spectrum is duplicated into high frequencies) */
+                for (int j = 0; j < 54; j++) {
+                    excitation[5 + (1 - align) + 2 * j] = 0.0f;
+                }
+            }
+            else {
+                /* 0'd first + last samples for interpolation */
+                memset(&excitation[0], 0, 5 * sizeof(float));
+                memset(&excitation[5 + 108], 0, 5 * sizeof(float));
+                
+                /* interpolate the remaining samples (spectrum is low-pass filtered) */
+                interpolate_rest(&excitation[5 + (1 - align)]);
+
+                /* scale by 0.5f to give the sinc impulse response unit energy */
+                fixed_gain *= 0.5f;
+            }
+        }
+
+        /* OG: sometimes unrolled */
+        for (int j = 0; j < 108; j++) {
+            /* This has potential to read garbage from fixed_gains/samples (-39 ~ +648). The former
+             * seems avoided by the encoder but we'll clamp it just in case, while the later is common
+             * and seemingly used on purpose, so it's allowed via joining adapt_cb + samples bufs. */
+            int idx = 108 * i + 216 - pitch_lag + j;
+            if (idx < 0) /* OG: not done but shouldn't matter */
+                idx = 0;
+
+            float tmp1 = fixed_gain * excitation[5 + j];
+            float tmp2 = pitch_gain * ctx->adapt_cb[idx];
+            ctx->samples[108 * i + j] = tmp1 + tmp2;
+        }
+    }
+
+    /* OG: may be compiler-optimized to memcpy */
+    for (int i = 0; i < 324; i++) {
+        ctx->adapt_cb[i] = ctx->samples[108 + i];
+    }
+
+    /* OG: unrolled x4 */
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 12; j++) {
+            ctx->rc_data[j] += rc_delta[j];
+        }
+
+        int blocks = i < 3 ? 1 : 33;
+        lp_synthesis_filter(ctx, 12 * i, blocks);
+    }
+}
+
+static int decode_frame_pcm(utk_context_t* ctx) {
+    int pcm_data_present = (read_byte(&ctx->br) == 0xEE);
+    int i;
+
+    decode_frame_main(ctx);
+
+    /* unread the last 8 bits and reset the bit reader 
+     * (a bit odd but should be safe in all cases, assuming ptr has been set) */
+    ctx->br.ptr--;
+    ctx->br.bits_count = 0;
+
+    if (pcm_data_present) {
+        /* Overwrite n samples at a given offset in the decoded frame with raw PCM data. */
+        int offset = read_s16(&ctx->br);
+        int count = read_s16(&ctx->br);
+
+        /* sx.exe does not do any bounds checking or clamping of these two
+         * fields (see 004274D1 in sx.exe v3.01.01), which means a specially
+         * crafted MT5:1 file can crash it. We will throw an error instead. */
+        if (offset < 0 || offset > 432) {
+            return -1; /* invalid PCM offset */
+        }
+        if (count < 0 || count > 432 - offset) {
+            return -2; /* invalid PCM count */
+        }
+
+        for (i = 0; i < count; i++) {
+            ctx->samples[offset+i] = (float)read_s16(&ctx->br);
+        }
+    }
+
+    return 432;
+}
+
+//
+
+int utk_decode_frame(utk_context_t* ctx) {
+    if (ctx->type == UTK_EA_PCM) {
+        return decode_frame_pcm(ctx);
+    }
+    else {
+        decode_frame_main(ctx);
+        return 432;
+    }
+}
+
+utk_context_t* utk_init(utk_type_t type) {
+    utk_context_t* ctx = calloc(1, sizeof(utk_context_t));
+    if (!ctx) return NULL;
+    
+    //memset(ctx, 0, sizeof(*ctx));
+    ctx->type = type;
+    
+    ctx->adapt_cb = ctx->subframes + 0;
+    ctx->samples = ctx->subframes + 324;
+    
+    return ctx;
+}
+
+void utk_free(utk_context_t* ctx) {
+    free(ctx);    
+}
+
+void utk_reset(utk_context_t* ctx) {
+    /* resets the internal state, leaving the external config/buffers
+     * untouched (could be reset externally or using utk_set_x) */
+    ctx->parsed_header = 0;
+    ctx->br.bits_value = 0;
+    ctx->br.bits_count = 0;
+    ctx->reduced_bandwidth = 0;
+    ctx->multipulse_threshold = 0;
+    memset(ctx->fixed_gains, 0, sizeof(ctx->fixed_gains));
+    memset(ctx->rc_data, 0, sizeof(ctx->rc_data));
+    memset(ctx->synth_history, 0, sizeof(ctx->synth_history));
+    memset(ctx->subframes, 0, sizeof(ctx->subframes));
+}
+
+void utk_set_callback(utk_context_t* ctx, uint8_t* buffer, size_t buffer_size, void *arg, size_t (*read_callback)(void *, int , void *)) {
+    ctx->br.buffer = buffer;
+    ctx->br.buffer_size = buffer_size;
+    ctx->br.arg = arg;
+    ctx->br.read_callback = read_callback;
+
+    /* reset the bit reader */
+    ctx->br.bits_count = 0;
+}
+
+void utk_set_buffer(utk_context_t* ctx, const uint8_t* buf, size_t buf_size) {
+    ctx->br.ptr = buf;
+    ctx->br.end = buf + buf_size;
+
+    /* reset the bit reader */
+    ctx->br.bits_count = 0;
+}
+
+float* utk_get_samples(utk_context_t* ctx) {
+    return ctx->samples;
+}
diff --git a/src/coding/libs/utkdec.h b/src/coding/libs/utkdec.h
new file mode 100644
index 00000000..fac7e1ac
--- /dev/null
+++ b/src/coding/libs/utkdec.h
@@ -0,0 +1,48 @@
+#ifndef _UTKDEK_H_
+#define _UTKDEK_H_
+#include <stdint.h>
+
+/* Decodes Electronic Arts' MicroTalk (a multipulse CELP/RELP speech codec) using utkencode lib,
+ * slightly modified for vgmstream based on decompilation of EA and CBX code.
+ * Original by Andrew D'Addesio: https://github.com/daddesio/utkencode (UNLICENSE/public domain)
+ * Info: http://wiki.niotso.org/UTK
+ *
+ * EA classifies MT as MT10:1 (smaller frames) and MT5:1 (bigger frames), but both are the same
+ * with different encoding parameters. Later revisions may have PCM blocks (rare). This codec was
+ * also reused by Traveller Tales in CBX (same devs?) with minor modifications.
+ *
+ * TODO:
+ * - lazy/avoid peeking/overreading when no bits left (OG code does it though, shouldn't matter)
+ * - same with read_callback (doesn't affect anything but cleaner)
+ */
+
+typedef enum {
+    UTK_EA,     // standard EA-MT (MT10 or MT5)
+    UTK_EA_PCM, // EA-MT with PCM blocks
+    UTK_CBX,    // Traveller's Tales Chatterbox
+} utk_type_t;
+
+/* opaque struct */
+typedef struct utk_context_t utk_context_t;
+
+/* inits UTK (must be externally created + init here) */
+utk_context_t* utk_init(utk_type_t type);
+
+void utk_free(utk_context_t*);
+
+/* reset/flush */
+void utk_reset(utk_context_t* ctx);
+
+/* loads current data (can also be used to reset buffered data if set to 0) */
+void utk_set_buffer(utk_context_t* ctx, const uint8_t* buf, size_t buf_size);
+
+/* prepares for external streaming (buf is where reads store data, arg is any external params for the callback) */
+void utk_set_callback(utk_context_t* ctx, uint8_t* buf, size_t buf_size, void* arg, size_t (*read_callback)(void*, int , void*));
+
+/* main decode; returns decoded samples on ok (always >0), < 0 on error */
+int utk_decode_frame(utk_context_t* ctx);
+
+/* get sample buf (shouldn't change between calls); sample type is PCM float (+-32768 but not clamped) */
+float* utk_get_samples(utk_context_t* ctx);
+
+#endif
diff --git a/src/libvgmstream.vcxproj b/src/libvgmstream.vcxproj
index 7a3ee985..f4d1fa2d 100644
--- a/src/libvgmstream.vcxproj
+++ b/src/libvgmstream.vcxproj
@@ -99,7 +99,6 @@
     <ClInclude Include="coding\coding.h" />
     <ClInclude Include="coding\coding_utils_samples.h" />
     <ClInclude Include="coding\compresswave_decoder_lib.h" />
-    <ClInclude Include="coding\ea_mt_decoder_utk.h" />
     <ClInclude Include="coding\g7221_decoder_aes.h" />
     <ClInclude Include="coding\g7221_decoder_lib.h" />
     <ClInclude Include="coding\g7221_decoder_lib_data.h" />
@@ -115,6 +114,7 @@
     <ClInclude Include="coding\vorbis_custom_data_fsb.h" />
     <ClInclude Include="coding\vorbis_custom_data_wwise.h" />
     <ClInclude Include="coding\vorbis_custom_decoder.h" />
+    <ClInclude Include="coding\libs\utkdec.h" />
     <ClInclude Include="layout\layout.h" />
     <ClInclude Include="meta\9tav_streamfile.h" />
     <ClInclude Include="meta\adx_keys.h" />
@@ -290,6 +290,7 @@
     <ClCompile Include="coding\xa_decoder.c" />
     <ClCompile Include="coding\xmd_decoder.c" />
     <ClCompile Include="coding\yamaha_decoder.c" />
+    <ClCompile Include="coding\libs\utkdec.c" />
     <ClCompile Include="layout\blocked.c" />
     <ClCompile Include="layout\blocked_adm.c" />
     <ClCompile Include="layout\blocked_ast.c" />
diff --git a/src/libvgmstream.vcxproj.filters b/src/libvgmstream.vcxproj.filters
index bd3fcaef..6cc23e0e 100644
--- a/src/libvgmstream.vcxproj.filters
+++ b/src/libvgmstream.vcxproj.filters
@@ -122,9 +122,6 @@
     <ClInclude Include="coding\compresswave_decoder_lib.h">
       <Filter>coding\Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="coding\ea_mt_decoder_utk.h">
-      <Filter>coding\Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="coding\g7221_decoder_aes.h">
       <Filter>coding\Header Files</Filter>
     </ClInclude>
@@ -170,6 +167,9 @@
     <ClInclude Include="coding\vorbis_custom_decoder.h">
       <Filter>coding\Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="coding\libs\utkdec.h">
+      <Filter>coding\libs\Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="layout\layout.h">
       <Filter>layout\Header Files</Filter>
     </ClInclude>
@@ -691,6 +691,9 @@
     <ClCompile Include="coding\yamaha_decoder.c">
       <Filter>coding\Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="coding\libs\utkdec.c">
+      <Filter>coding\libs\Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="layout\blocked.c">
       <Filter>layout\Source Files</Filter>
     </ClCompile>