diff --git a/library/extra/d20/d20.h b/library/extra/d20/d20.h index 5c8b980..a0e4c14 100644 --- a/library/extra/d20/d20.h +++ b/library/extra/d20/d20.h @@ -1,10 +1,8 @@ /** @file d20.h * @brief Implements Dungeons & Dragons style dice in C * - * d20.h is a reimplementation of https://github.com/opensourcedoc/d20-c, - * but following the principles of being a single header/file library with - * a minimal API - * + * d20.h is a reimplementation of https://github.com/opensourcedoc/d20-c,but following the principles of being a single header/file library with a minimal API. + * @source https://github.com/adamml/d20 * @author adamml * @date 2022-11-07 */ diff --git a/library/tools/utkencode/README.md b/library/tools/utkencode/README.md new file mode 100644 index 0000000..2159196 --- /dev/null +++ b/library/tools/utkencode/README.md @@ -0,0 +1,49 @@ +## EA MicroTalk + +EA MicroTalk (also UTalk or UTK) is a linear-predictive speech codec used in +various games by Electronic Arts. The earliest known game to use it is +Beasts & Bumpkins (1997). The codec has a bandwidth of 11.025kHz (sampling rate +22.05kHz) and frame size of 20ms (432 samples) and only supports mono. It is +typically encoded at 32 kbit/s. + +Docs: http://wiki.niotso.org/UTK + +In this repository, I have created a set of open source (public domain +via the UNLICENSE) MicroTalk decoders/encoders. + +* Use utkdecode to decode Maxis UTK (The Sims Online, SimCity 4). +* Use utkdecode-bnb to decode PT/M10 (Beasts & Bumpkins). +* Use utkdecode-fifa to decode FIFA 2001/2002 (PS2) speech samples. This tool + supports regular MicroTalk and MicroTalk Revision 3 + [SCxl files](https://wiki.multimedia.cx/index.php/Electronic_Arts_SCxl).(*) +* Use utkencode to encode Maxis UTK. (This is the simplest container format and + is currently the only one supported for encoding.) + +(*) I wasn't able to find any real-world MicroTalk Rev. 3 samples in any games. +However, you can transcode a FIFA MicroTalk Rev. 2 file to Rev. 3 using +[EA's Sound eXchange tool](https://wiki.multimedia.cx/index.php/Electronic_Arts_Sound_eXchange) +(`sx -mt_blk input.dat -=output.dat`). + +## Compiling + +``` +gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode utkdecode.c +gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-fifa utkdecode-fifa.c +gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-bnb utkdecode-bnb.c +gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkencode utkencode.c +``` + +## How the encoder works + +The encoder for now is very simple. It does LPC analysis using the Levinson +algorithm and transmits the entire excitation signal explicitly. Compression is +achieved by choosing a large fixed codebook gain, such that each excitation +sample has a large (coarse) quantization step size. Error is minimized in the +excitation domain, and the quality is somewhat poor for bitrates below about +48 kbit/s. + +However, MicroTalk is a multi-pulse codec (it is cheap to code long runs of +zeros in the excitation signal). Hence, a much better design (and indeed the +standard practice for multi-pulse speech codecs) is to search for the positions +and amplitudes of n pulses such that error is minimized in the output domain +(or the perceptually weighted domain). This new encoder is still in the works. \ No newline at end of file diff --git a/library/tools/utkencode/UNLICENSE b/library/tools/utkencode/UNLICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/library/tools/utkencode/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/library/tools/utkencode/eachunk.h b/library/tools/utkencode/eachunk.h new file mode 100644 index 0000000..183bccd --- /dev/null +++ b/library/tools/utkencode/eachunk.h @@ -0,0 +1,77 @@ +typedef struct EAChunk { + uint32_t type; + uint8_t *start; + uint8_t *ptr; + uint8_t *end; +} EAChunk; + +static void chunk_read_bytes(EAChunk *chunk, uint8_t *dest, size_t size) +{ + size_t bytes_remaining = chunk->end - chunk->ptr; + + if (bytes_remaining < size) { + fprintf(stderr, "error: unexpected end of chunk\n"); + exit(EXIT_FAILURE); + } + + memcpy(dest, chunk->ptr, size); + chunk->ptr += size; +} + +static uint32_t chunk_read_u32(EAChunk *chunk) +{ + uint8_t dest[4]; + chunk_read_bytes(chunk, dest, sizeof(dest)); + return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24); +} + +static uint32_t chunk_read_u8(EAChunk *chunk) +{ + uint8_t dest; + chunk_read_bytes(chunk, &dest, sizeof(dest)); + return dest; +} + +static uint32_t chunk_read_var_int(EAChunk *chunk) +{ + uint8_t dest[4]; + uint8_t size = chunk_read_u8(chunk); + + if (size > 4) { + fprintf(stderr, "error: invalid varint size %u\n", (unsigned)size); + exit(EXIT_FAILURE); + } + + chunk_read_bytes(chunk, dest, size); + + /* read a big-endian integer of variable length */ + switch (size) { + case 1: return dest[0]; + case 2: return (dest[0]<<8) | dest[1]; + case 3: return (dest[0]<<16) | (dest[1] << 8) | dest[2]; + case 4: return (dest[0]<<24) | (dest[1] << 16) | (dest[2] << 8) | dest[3]; + default: return 0; + } +} + +static EAChunk *read_chunk(FILE *fp) +{ + uint32_t size; + static EAChunk chunk; + static uint8_t buffer[4096]; + + chunk.type = read_u32(fp); + + size = read_u32(fp); + if (size < 8 || size-8 > sizeof(buffer)) { + fprintf(stderr, "error: invalid chunk size %u\n", (unsigned)size); + exit(EXIT_FAILURE); + } + + size -= 8; + read_bytes(fp, buffer, size); + chunk.start = chunk.ptr = buffer; + chunk.end = buffer+size; + + return &chunk; +} \ No newline at end of file diff --git a/library/tools/utkencode/io.h b/library/tools/utkencode/io.h new file mode 100644 index 0000000..4aa5c90 --- /dev/null +++ b/library/tools/utkencode/io.h @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +static void read_bytes(FILE *fp, uint8_t *dest, size_t size) +{ + size_t bytes_copied; + + if (!size) + return; + + bytes_copied = fread(dest, 1, size, fp); + if (bytes_copied < size) { + if (ferror(fp)) + fprintf(stderr, "error: fread failed: %s\n", strerror(errno)); + else + fprintf(stderr, "error: unexpected end of file\n"); + + exit(EXIT_FAILURE); + } +} + +static uint32_t read_u32(FILE *fp) +{ + uint8_t dest[4]; + read_bytes(fp, dest, sizeof(dest)); + return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24); +} + +static uint16_t read_u16(FILE *fp) +{ + uint8_t dest[2]; + read_bytes(fp, dest, sizeof(dest)); + return dest[0] | (dest[1] << 8); +} + +static uint16_t read_u8(FILE *fp) +{ + uint8_t dest; + read_bytes(fp, &dest, sizeof(dest)); + return dest; +} + +static void write_bytes(FILE *fp, const uint8_t *dest, size_t size) +{ + if (!size) + return; + + if (fwrite(dest, 1, size, fp) != size) { + fprintf(stderr, "error: fwrite failed: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } +} + +static void write_u32(FILE *fp, uint32_t x) +{ + uint8_t dest[4]; + dest[0] = (uint8_t)x; + dest[1] = (uint8_t)(x>>8); + dest[2] = (uint8_t)(x>>16); + dest[3] = (uint8_t)(x>>24); + write_bytes(fp, dest, sizeof(dest)); +} + +static void write_u16(FILE *fp, uint16_t x) +{ + uint8_t dest[2]; + dest[0] = (uint8_t)x; + dest[1] = (uint8_t)(x>>8); + write_bytes(fp, dest, sizeof(dest)); +} + +static void write_u8(FILE *fp, uint8_t x) +{ + write_bytes(fp, &x, sizeof(x)); +} \ No newline at end of file diff --git a/library/tools/utkencode/samples/DS1.M10 b/library/tools/utkencode/samples/DS1.M10 new file mode 100644 index 0000000..bfe38b9 Binary files /dev/null and b/library/tools/utkencode/samples/DS1.M10 differ diff --git a/library/tools/utkencode/samples/fifa2001-mt5.dat b/library/tools/utkencode/samples/fifa2001-mt5.dat new file mode 100644 index 0000000..0c0b918 Binary files /dev/null and b/library/tools/utkencode/samples/fifa2001-mt5.dat differ diff --git a/library/tools/utkencode/samples/fifa2001.dat b/library/tools/utkencode/samples/fifa2001.dat new file mode 100644 index 0000000..ad7e294 Binary files /dev/null and b/library/tools/utkencode/samples/fifa2001.dat differ diff --git a/library/tools/utkencode/samples/male.utk b/library/tools/utkencode/samples/male.utk new file mode 100644 index 0000000..bf25a93 Binary files /dev/null and b/library/tools/utkencode/samples/male.utk differ diff --git a/library/tools/utkencode/utk.h b/library/tools/utkencode/utk.h new file mode 100644 index 0000000..6bc6217 --- /dev/null +++ b/library/tools/utkencode/utk.h @@ -0,0 +1,446 @@ +#include +#include +#include + +/* Note: This struct assumes a member alignment of 4 bytes. +** This matters when pitch_lag > 216 on the first subframe of any given frame. + */ +typedef struct UTKContext { + FILE *fp; + const uint8_t *ptr, *end; + int parsed_header; + unsigned int bits_value; + int bits_count; + int reduced_bw; + int multipulse_thresh; + float fixed_gains[64]; + float rc[12]; + float synth_history[12]; + float adapt_cb[324]; + float decompressed_frame[432]; +} UTKContext; + +enum { MDL_NORMAL = 0, MDL_LARGEPULSE = 1 }; + +static const float utk_rc_table[64] = {+0.0f, + -.99677598476409912109375f, + -.99032700061798095703125f, + -.983879029750823974609375f, + -.977430999279022216796875f, + -.970982015132904052734375f, + -.964533984661102294921875f, + -.958085000514984130859375f, + -.9516370296478271484375f, + -.930754005908966064453125f, + -.904959976673126220703125f, + -.879167020320892333984375f, + -.853372991085052490234375f, + -.827579021453857421875f, + -.801786005496978759765625f, + -.775991976261138916015625f, + -.75019800662994384765625f, + -.724404990673065185546875f, + -.6986110210418701171875f, + -.6706349849700927734375f, + -.61904799938201904296875f, + -.567460000514984130859375f, + -.515873014926910400390625f, + -.4642859995365142822265625f, + -.4126980006694793701171875f, + -.361110985279083251953125f, + -.309523999691009521484375f, + -.257937014102935791015625f, + -.20634900033473968505859375f, + -.1547619998455047607421875f, + -.10317499935626983642578125f, + -.05158700048923492431640625f, + +0.0f, + +.05158700048923492431640625f, + +.10317499935626983642578125f, + +.1547619998455047607421875f, + +.20634900033473968505859375f, + +.257937014102935791015625f, + +.309523999691009521484375f, + +.361110985279083251953125f, + +.4126980006694793701171875f, + +.4642859995365142822265625f, + +.515873014926910400390625f, + +.567460000514984130859375f, + +.61904799938201904296875f, + +.6706349849700927734375f, + +.6986110210418701171875f, + +.724404990673065185546875f, + +.75019800662994384765625f, + +.775991976261138916015625f, + +.801786005496978759765625f, + +.827579021453857421875f, + +.853372991085052490234375f, + +.879167020320892333984375f, + +.904959976673126220703125f, + +.930754005908966064453125f, + +.9516370296478271484375f, + +.958085000514984130859375f, + +.964533984661102294921875f, + +.970982015132904052734375f, + +.977430999279022216796875f, + +.983879029750823974609375f, + +.99032700061798095703125f, + +.99677598476409912109375f}; + +static const uint8_t utk_codebooks[2][256] = { + {/* normal model */ + 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, 4, 6, 5, 9, + 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21, 4, 6, 5, 9, 4, 6, 5, 13, + 4, 6, 5, 10, 4, 6, 5, 18, 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, + 4, 6, 5, 25, 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, + 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 22, 4, 6, 5, 9, + 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 4, 6, 5, 9, 4, 6, 5, 14, + 4, 6, 5, 10, 4, 6, 5, 0, 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, + 4, 6, 5, 17, 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21, + 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 4, 6, 5, 9, + 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 26, 4, 6, 5, 9, 4, 6, 5, 13, + 4, 6, 5, 10, 4, 6, 5, 17, 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, + 4, 6, 5, 22, 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, + 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 2}, + {/* large-pulse model */ + 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, 4, 11, 7, 15, + 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27, 4, 11, 7, 15, 4, 12, 8, 19, + 4, 11, 7, 16, 4, 12, 8, 24, 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, + 4, 12, 8, 1, 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, + 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 28, 4, 11, 7, 15, + 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 4, 11, 7, 15, 4, 12, 8, 20, + 4, 11, 7, 16, 4, 12, 8, 3, 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, + 4, 12, 8, 23, 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27, + 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 4, 11, 7, 15, + 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 1, 4, 11, 7, 15, 4, 12, 8, 19, + 4, 11, 7, 16, 4, 12, 8, 23, 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, + 4, 12, 8, 28, 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, + 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 3}}; + +static const struct { + int next_model; + int code_size; + float pulse_value; +} utk_commands[29] = {{MDL_LARGEPULSE, 8, 0.0f}, {MDL_LARGEPULSE, 7, 0.0f}, + {MDL_NORMAL, 8, 0.0f}, {MDL_NORMAL, 7, 0.0f}, + {MDL_NORMAL, 2, 0.0f}, {MDL_NORMAL, 2, -1.0f}, + {MDL_NORMAL, 2, +1.0f}, {MDL_NORMAL, 3, -1.0f}, + {MDL_NORMAL, 3, +1.0f}, {MDL_LARGEPULSE, 4, -2.0f}, + {MDL_LARGEPULSE, 4, +2.0f}, {MDL_LARGEPULSE, 3, -2.0f}, + {MDL_LARGEPULSE, 3, +2.0f}, {MDL_LARGEPULSE, 5, -3.0f}, + {MDL_LARGEPULSE, 5, +3.0f}, {MDL_LARGEPULSE, 4, -3.0f}, + {MDL_LARGEPULSE, 4, +3.0f}, {MDL_LARGEPULSE, 6, -4.0f}, + {MDL_LARGEPULSE, 6, +4.0f}, {MDL_LARGEPULSE, 5, -4.0f}, + {MDL_LARGEPULSE, 5, +4.0f}, {MDL_LARGEPULSE, 7, -5.0f}, + {MDL_LARGEPULSE, 7, +5.0f}, {MDL_LARGEPULSE, 6, -5.0f}, + {MDL_LARGEPULSE, 6, +5.0f}, {MDL_LARGEPULSE, 8, -6.0f}, + {MDL_LARGEPULSE, 8, +6.0f}, {MDL_LARGEPULSE, 7, -6.0f}, + {MDL_LARGEPULSE, 7, +6.0f}}; + +static int utk_read_byte(UTKContext *ctx) { + if (ctx->ptr < ctx->end) + return *ctx->ptr++; + + if (ctx->fp) { + static uint8_t buffer[4096]; + size_t bytes_copied = fread(buffer, 1, sizeof(buffer), ctx->fp); + if (bytes_copied > 0 && bytes_copied <= sizeof(buffer)) { + ctx->ptr = buffer; + ctx->end = buffer + bytes_copied; + return *ctx->ptr++; + } + } + + return 0; +} + +static int16_t utk_read_i16(UTKContext *ctx) { + int x = utk_read_byte(ctx); + x = (x << 8) | utk_read_byte(ctx); + return x; +} + +static int utk_read_bits(UTKContext *ctx, int count) { + int ret = ctx->bits_value & ((1 << count) - 1); + ctx->bits_value >>= count; + ctx->bits_count -= count; + + if (ctx->bits_count < 8) { + /* read another byte */ + ctx->bits_value |= utk_read_byte(ctx) << ctx->bits_count; + ctx->bits_count += 8; + } + + return ret; +} + +static void utk_parse_header(UTKContext *ctx) { + int i; + float multiplier; + + ctx->reduced_bw = utk_read_bits(ctx, 1); + ctx->multipulse_thresh = 32 - utk_read_bits(ctx, 4); + ctx->fixed_gains[0] = 8.0f * (1 + utk_read_bits(ctx, 4)); + multiplier = 1.04f + utk_read_bits(ctx, 6) * 0.001f; + + for (i = 1; i < 64; i++) + ctx->fixed_gains[i] = ctx->fixed_gains[i - 1] * multiplier; +} + +static void utk_decode_excitation(UTKContext *ctx, int use_multipulse, + float *out, int stride) { + int i; + + if (use_multipulse) { + /* multi-pulse model: n pulses are coded explicitly; the rest are zero */ + int model, cmd; + model = 0; + i = 0; + while (i < 108) { + cmd = utk_codebooks[model][ctx->bits_value & 0xff]; + model = utk_commands[cmd].next_model; + utk_read_bits(ctx, utk_commands[cmd].code_size); + + if (cmd > 3) { + /* insert a pulse with magnitude <= 6.0f */ + out[i] = utk_commands[cmd].pulse_value; + i += stride; + } else if (cmd > 1) { + /* insert between 7 and 70 zeros */ + int count = 7 + utk_read_bits(ctx, 6); + if (i + count * stride > 108) + count = (108 - i) / stride; + + while (count > 0) { + out[i] = 0.0f; + i += stride; + count--; + } + } else { + /* insert a pulse with magnitude >= 7.0f */ + int x = 7; + + while (utk_read_bits(ctx, 1)) + x++; + + if (!utk_read_bits(ctx, 1)) + x *= -1; + + out[i] = (float)x; + i += stride; + } + } + } else { + /* RELP model: entire residual (excitation) signal is coded explicitly */ + i = 0; + while (i < 108) { + if (!utk_read_bits(ctx, 1)) + out[i] = 0.0f; + else if (!utk_read_bits(ctx, 1)) + out[i] = -2.0f; + else + out[i] = 2.0f; + + i += stride; + } + } +} + +static void rc_to_lpc(const float *rc, float *lpc) { + int i, j; + float tmp1[12]; + float tmp2[12]; + + for (i = 10; i >= 0; i--) + tmp2[1 + i] = rc[i]; + + tmp2[0] = 1.0f; + + for (i = 0; i < 12; i++) { + float x = -tmp2[11] * rc[11]; + + for (j = 10; j >= 0; j--) { + x -= tmp2[j] * rc[j]; + tmp2[j + 1] = x * rc[j] + tmp2[j]; + } + + tmp1[i] = tmp2[0] = x; + + for (j = 0; j < i; j++) + x -= tmp1[i - 1 - j] * lpc[j]; + + lpc[i] = x; + } +} + +static void utk_lp_synthesis_filter(UTKContext *ctx, int offset, + int num_blocks) { + int i, j, k; + float lpc[12]; + float *ptr = &ctx->decompressed_frame[offset]; + + rc_to_lpc(ctx->rc, lpc); + + for (i = 0; i < num_blocks; i++) { + for (j = 0; j < 12; j++) { + float x = *ptr; + + for (k = 0; k < j; k++) + x += lpc[k] * ctx->synth_history[k - j + 12]; + for (; k < 12; k++) + x += lpc[k] * ctx->synth_history[k - j]; + + ctx->synth_history[11 - j] = x; + *ptr++ = x; + } + } +} + +/* +** Public functions. +*/ + +static void utk_decode_frame(UTKContext *ctx) { + int i, j; + int use_multipulse = 0; + float excitation[5 + 108 + 5]; + float rc_delta[12]; + + if (!ctx->bits_count) { + ctx->bits_value = utk_read_byte(ctx); + ctx->bits_count = 8; + } + + if (!ctx->parsed_header) { + utk_parse_header(ctx); + ctx->parsed_header = 1; + } + + memset(&excitation[0], 0, 5 * sizeof(float)); + memset(&excitation[5 + 108], 0, 5 * sizeof(float)); + + /* read the reflection coefficients */ + for (i = 0; i < 12; i++) { + int idx; + if (i == 0) { + idx = utk_read_bits(ctx, 6); + if (idx < ctx->multipulse_thresh) + use_multipulse = 1; + } else if (i < 4) { + idx = utk_read_bits(ctx, 6); + } else { + idx = 16 + utk_read_bits(ctx, 5); + } + + rc_delta[i] = (utk_rc_table[idx] - ctx->rc[i]) * 0.25f; + } + + /* decode four subframes */ + for (i = 0; i < 4; i++) { + int pitch_lag = utk_read_bits(ctx, 8); + float pitch_gain = (float)utk_read_bits(ctx, 4) / 15.0f; + float fixed_gain = ctx->fixed_gains[utk_read_bits(ctx, 6)]; + + if (!ctx->reduced_bw) { + utk_decode_excitation(ctx, use_multipulse, &excitation[5], 1); + } else { + /* residual (excitation) signal is encoded at reduced bandwidth */ + int align = utk_read_bits(ctx, 1); + int zero = utk_read_bits(ctx, 1); + + utk_decode_excitation(ctx, use_multipulse, &excitation[5 + align], 2); + + if (zero) { + /* fill the remaining samples with zero + ** (spectrum is duplicated into high frequencies) */ + for (j = 0; j < 54; j++) + excitation[5 + (1 - align) + 2 * j] = 0.0f; + } else { + /* interpolate the remaining samples + ** (spectrum is low-pass filtered) */ + float *ptr = &excitation[5 + (1 - align)]; + for (j = 0; j < 108; j += 2) + ptr[j] = ptr[j - 5] * 0.01803267933428287506103515625f - + ptr[j - 3] * 0.114591561257839202880859375f + + ptr[j - 1] * 0.597385942935943603515625f + + ptr[j + 1] * 0.597385942935943603515625f - + ptr[j + 3] * 0.114591561257839202880859375f + + ptr[j + 5] * 0.01803267933428287506103515625f; + + /* scale by 0.5f to give the sinc impulse response unit energy */ + fixed_gain *= 0.5f; + } + } + + for (j = 0; j < 108; j++) + ctx->decompressed_frame[108 * i + j] = + fixed_gain * excitation[5 + j] + + pitch_gain * ctx->adapt_cb[108 * i + 216 - pitch_lag + j]; + } + + for (i = 0; i < 324; i++) + ctx->adapt_cb[i] = ctx->decompressed_frame[108 + i]; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 12; j++) + ctx->rc[j] += rc_delta[j]; + + utk_lp_synthesis_filter(ctx, 12 * i, i < 3 ? 1 : 33); + } +} + +static void utk_init(UTKContext *ctx) { memset(ctx, 0, sizeof(*ctx)); } + +static void utk_set_fp(UTKContext *ctx, FILE *fp) { + ctx->fp = fp; + + /* reset the bit reader */ + ctx->bits_count = 0; +} + +static void utk_set_ptr(UTKContext *ctx, const uint8_t *ptr, + const uint8_t *end) { + ctx->ptr = ptr; + ctx->end = end; + + /* reset the bit reader */ + ctx->bits_count = 0; +} + +/* +** MicroTalk Revision 3 decoding function. +*/ + +static void utk_rev3_decode_frame(UTKContext *ctx) { + int pcm_data_present = (utk_read_byte(ctx) == 0xee); + int i; + + utk_decode_frame(ctx); + + /* unread the last 8 bits and reset the bit reader */ + ctx->ptr--; + ctx->bits_count = 0; + + if (pcm_data_present) { + /* Overwrite n samples at a given offset in the decoded frame with + ** raw PCM data. */ + int offset = utk_read_i16(ctx); + int count = utk_read_i16(ctx); + + /* sx.exe does not do any bounds checking or clamping of these two + ** fields (see 004274D1 in sx.exe v3.01.01), which means a specially + ** crafted MT5:1 file can crash sx.exe. + ** We will throw an error instead. */ + if (offset < 0 || offset > 432) { + fprintf(stderr, "error: invalid PCM offset %d\n", offset); + exit(EXIT_FAILURE); + } + if (count < 0 || count > 432 - offset) { + fprintf(stderr, "error: invalid PCM count %d\n", count); + exit(EXIT_FAILURE); + } + + for (i = 0; i < count; i++) + ctx->decompressed_frame[offset + i] = (float)utk_read_i16(ctx); + } +} diff --git a/library/tools/utkencode/utkdecode-bnb.c b/library/tools/utkencode/utkdecode-bnb.c new file mode 100644 index 0000000..35383bb --- /dev/null +++ b/library/tools/utkencode/utkdecode-bnb.c @@ -0,0 +1,167 @@ +/* +** utkdecode-bnb +** Decode Beasts & Bumpkins M10 to wav. +** Authors: Andrew D'Addesio +** License: Public domain +** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math +** -fwhole-program -g0 -s -o utkdecode-bnb utkdecode-bnb.c +*/ +#include +#include +#include +#include +#include +#include "utk.h" +#include "io.h" +#include "eachunk.h" + +#define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) +#define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) +#define MIN(x,y) ((x)<(y)?(x):(y)) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define CLAMP(x,min,max) MIN(MAX(x,min),max) + +typedef struct PTContext { + FILE *infp, *outfp; + uint32_t num_samples; + uint32_t compression_type; + UTKContext utk; +} PTContext; + +static void pt_read_header(PTContext *pt) +{ + EAChunk *chunk = read_chunk(pt->infp); + + if ((chunk->type & 0xffff) != MAKE_U32('P','T','\x00','\x00')) { + fprintf(stderr, "error: expected PT chunk\n"); + exit(EXIT_FAILURE); + } + + while (1) { + uint8_t cmd = chunk_read_u8(chunk); + if (cmd == 0xFD) { + while (1) { + uint8_t key = chunk_read_u8(chunk); + uint32_t value = chunk_read_var_int(chunk); + + if (key == 0xFF) + break; + else if (key == 0x85) + pt->num_samples = value; + else if (key == 0x83) + pt->compression_type = value; + } + break; + } else { + chunk_read_var_int(chunk); + } + } + + if (pt->compression_type != 9) { + fprintf(stderr, "error: invalid compression type %u (expected 9 for MicroTalk 10:1)\n", + (unsigned)pt->compression_type); + exit(EXIT_FAILURE); + } + + if (pt->num_samples >= 0x01000000) { + fprintf(stderr, "error: invalid num_samples %u\n", pt->num_samples); + exit(EXIT_FAILURE); + } + + /* Initialize the decoder. */ + utk_init(&pt->utk); + + /* Write the WAV header. */ + write_u32(pt->outfp, MAKE_U32('R','I','F','F')); + write_u32(pt->outfp, 36 + pt->num_samples*2); + write_u32(pt->outfp, MAKE_U32('W','A','V','E')); + write_u32(pt->outfp, MAKE_U32('f','m','t',' ')); + write_u32(pt->outfp, 16); + write_u16(pt->outfp, 1); + write_u16(pt->outfp, 1); + write_u32(pt->outfp, 22050); + write_u32(pt->outfp, 22050*2); + write_u16(pt->outfp, 2); + write_u16(pt->outfp, 16); + write_u32(pt->outfp, MAKE_U32('d','a','t','a')); + write_u32(pt->outfp, pt->num_samples*2); +} + +static void pt_decode(PTContext *pt) +{ + UTKContext *utk = &pt->utk; + uint32_t num_samples = pt->num_samples; + + utk_set_fp(utk, pt->infp); + + while (num_samples > 0) { + int count = MIN(num_samples, 432); + int i; + + utk_decode_frame(utk); + + for (i = 0; i < count; i++) { + int x = ROUND(pt->utk.decompressed_frame[i]); + write_u16(pt->outfp, (int16_t)CLAMP(x, -32768, 32767)); + } + + num_samples -= count; + } +} + +int main(int argc, char *argv[]) +{ + PTContext pt; + const char *infile, *outfile; + FILE *infp, *outfp; + int force = 0; + + /* Parse arguments. */ + if (argc == 4 && !strcmp(argv[1], "-f")) { + force = 1; + argv++, argc--; + } + + if (argc != 3) { + printf("Usage: utkdecode-bnb [-f] infile outfile\n"); + printf("Decode Beasts & Bumpkins M10 to wav.\n"); + return EXIT_FAILURE; + } + + infile = argv[1]; + outfile = argv[2]; + + /* Open the input/output files. */ + infp = fopen(infile, "rb"); + if (!infp) { + fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); + return EXIT_FAILURE; + } + + if (!force && fopen(outfile, "rb")) { + fprintf(stderr, "error: '%s' already exists\n", outfile); + return EXIT_FAILURE; + } + + outfp = fopen(outfile, "wb"); + if (!outfp) { + fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + memset(&pt, 0, sizeof(pt)); + pt.infp = infp; + pt.outfp = outfp; + + pt_read_header(&pt); + pt_decode(&pt); + + if (fclose(outfp) != 0) { + fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + fclose(infp); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/library/tools/utkencode/utkdecode-fifa.c b/library/tools/utkencode/utkdecode-fifa.c new file mode 100644 index 0000000..06a3abb --- /dev/null +++ b/library/tools/utkencode/utkdecode-fifa.c @@ -0,0 +1,229 @@ +/* +** utkdecode-fifa +** Decode FIFA 2001/2002 MicroTalk to wav. +** Authors: Andrew D'Addesio +** License: Public domain +** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math +** -fwhole-program -g0 -s -o utkdecode-fifa utkdecode-fifa.c +*/ +#include +#include +#include +#include +#include +#include "utk.h" +#include "io.h" +#include "eachunk.h" + +#define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) +#define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) +#define MIN(x,y) ((x)<(y)?(x):(y)) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define CLAMP(x,min,max) MIN(MAX(x,min),max) + +typedef struct EAContext { + FILE *infp, *outfp; + uint32_t audio_pos; + uint32_t num_samples; + uint32_t num_data_chunks; + uint32_t compression_type; + uint32_t codec_revision; + UTKContext utk; +} EAContext; + +static void ea_read_schl(EAContext *ea) +{ + uint32_t id; + EAChunk *chunk = read_chunk(ea->infp); + + if (chunk->type != MAKE_U32('S','C','H','l')) { + fprintf(stderr, "error: expected SCHl chunk\n"); + exit(EXIT_FAILURE); + } + + id = chunk_read_u32(chunk); + if ((id & 0xffff) != MAKE_U32('P','T','\x00','\x00')) { + fprintf(stderr, "error: expected PT chunk in SCHl header\n"); + exit(EXIT_FAILURE); + } + + while (1) { + uint8_t cmd = chunk_read_u8(chunk); + if (cmd == 0xFD) { + while (1) { + uint8_t key = chunk_read_u8(chunk); + uint32_t value = chunk_read_var_int(chunk); + + if (key == 0xFF) + break; + else if (key == 0x80) + ea->codec_revision = value; + else if (key == 0x85) + ea->num_samples = value; + else if (key == 0xA0) + ea->compression_type = value; + } + break; + } else { + chunk_read_var_int(chunk); + } + } + + if (ea->compression_type != 4 && ea->compression_type != 22) { + fprintf(stderr, "error: invalid compression type %u (expected 4 for MicroTalk 10:1 or 22 for MicroTalk 5:1)\n", + (unsigned)ea->compression_type); + exit(EXIT_FAILURE); + } + + if (ea->num_samples >= 0x01000000) { + fprintf(stderr, "error: invalid num_samples %u\n", ea->num_samples); + exit(EXIT_FAILURE); + } + + /* Initialize the decoder. */ + utk_init(&ea->utk); + + /* Write the WAV header. */ + write_u32(ea->outfp, MAKE_U32('R','I','F','F')); + write_u32(ea->outfp, 36 + ea->num_samples*2); + write_u32(ea->outfp, MAKE_U32('W','A','V','E')); + write_u32(ea->outfp, MAKE_U32('f','m','t',' ')); + write_u32(ea->outfp, 16); + write_u16(ea->outfp, 1); + write_u16(ea->outfp, 1); + write_u32(ea->outfp, 22050); + write_u32(ea->outfp, 22050*2); + write_u16(ea->outfp, 2); + write_u16(ea->outfp, 16); + write_u32(ea->outfp, MAKE_U32('d','a','t','a')); + write_u32(ea->outfp, ea->num_samples*2); +} + +static void ea_read_sccl(EAContext *ea) +{ + EAChunk *chunk = read_chunk(ea->infp); + + if (chunk->type != MAKE_U32('S','C','C','l')) { + fprintf(stderr, "error: expected SCCl chunk\n"); + exit(EXIT_FAILURE); + } + + ea->num_data_chunks = chunk_read_u32(chunk); + if (ea->num_data_chunks >= 0x01000000) { + fprintf(stderr, "error: invalid num_data_chunks %u\n", (unsigned)ea->num_data_chunks); + exit(EXIT_FAILURE); + } +} + +static void ea_read_scdl(EAContext *ea) +{ + EAChunk *chunk = read_chunk(ea->infp); + UTKContext *utk = &ea->utk; + uint32_t num_samples; + + if (chunk->type != MAKE_U32('S','C','D','l')) { + fprintf(stderr, "error: expected SCDl chunk\n"); + exit(EXIT_FAILURE); + } + + num_samples = chunk_read_u32(chunk); + chunk_read_u32(chunk); /* unknown */ + chunk_read_u8(chunk); /* unknown */ + + if (num_samples > ea->num_samples - ea->audio_pos) + num_samples = ea->num_samples - ea->audio_pos; + + utk_set_ptr(utk, chunk->ptr, chunk->end); + + while (num_samples > 0) { + int count = MIN(num_samples, 432); + int i; + + if (ea->codec_revision >= 3) + utk_rev3_decode_frame(utk); + else + utk_decode_frame(utk); + + for (i = 0; i < count; i++) { + int x = ROUND(ea->utk.decompressed_frame[i]); + write_u16(ea->outfp, (int16_t)CLAMP(x, -32768, 32767)); + } + + ea->audio_pos += count; + num_samples -= count; + } +} + +static void ea_read_scel(const EAContext *ea) +{ + EAChunk *chunk = read_chunk(ea->infp); + + if (chunk->type != MAKE_U32('S','C','E','l')) { + fprintf(stderr, "error: expected SCEl chunk\n"); + exit(EXIT_FAILURE); + } + + if (ea->audio_pos != ea->num_samples) { + fprintf(stderr, "error: failed to decode the correct number of samples\n"); + exit(EXIT_FAILURE); + } +} + +int main(int argc, char *argv[]) +{ + EAContext ea; + const char *infile, *outfile; + FILE *infp, *outfp; + int force = 0; + unsigned int i; + + if (argc == 4 && !strcmp(argv[1], "-f")) { + force = 1; + argv++, argc--; + } + + if (argc != 3) { + printf("Usage: utkdecode-fifa [-f] infile outfile\n"); + printf("Decode FIFA 2001/2002 MicroTalk to wav.\n"); + return EXIT_FAILURE; + } + + infile = argv[1]; + outfile = argv[2]; + + infp = fopen(infile, "rb"); + if (!infp) { + fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); + return EXIT_FAILURE; + } + + if (!force && fopen(outfile, "rb")) { + fprintf(stderr, "error: '%s' already exists\n", outfile); + return EXIT_FAILURE; + } + + outfp = fopen(outfile, "wb"); + if (!outfp) { + fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + memset(&ea, 0, sizeof(ea)); + ea.infp = infp; + ea.outfp = outfp; + + ea_read_schl(&ea); + ea_read_sccl(&ea); + + for (i = 0; i < ea.num_data_chunks; i++) + ea_read_scdl(&ea); + + ea_read_scel(&ea); + + if (!outfp) { + fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/library/tools/utkencode/utkdecode.c b/library/tools/utkencode/utkdecode.c new file mode 100644 index 0000000..8eb5a51 --- /dev/null +++ b/library/tools/utkencode/utkdecode.c @@ -0,0 +1,172 @@ +/* +** utkdecode +** Decode Maxis UTK to wav. +** Authors: Andrew D'Addesio +** License: Public domain +** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math +** -fwhole-program -g0 -s -o utkdecode utkdecode.c +*/ +#include +#include +#include +#include +#include +#include "utk.h" +#include "io.h" + +#define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) +#define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) +#define MIN(x,y) ((x)<(y)?(x):(y)) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define CLAMP(x,min,max) MIN(MAX(x,min),max) + +int main(int argc, char *argv[]) +{ + const char *infile, *outfile; + UTKContext ctx; + uint32_t sID; + uint32_t dwOutSize; + uint32_t dwWfxSize; + uint16_t wFormatTag; + uint16_t nChannels; + uint32_t nSamplesPerSec; + uint32_t nAvgBytesPerSec; + uint16_t nBlockAlign; + uint16_t wBitsPerSample; + uint16_t cbSize; + uint32_t num_samples; + FILE *infp, *outfp; + int force = 0; + int error = 0; + int i; + + /* Parse arguments. */ + if (argc == 4 && !strcmp(argv[1], "-f")) { + force = 1; + argv++, argc--; + } + + if (argc != 3) { + printf("Usage: utkdecode [-f] infile outfile\n"); + printf("Decode Maxis UTK to wav.\n"); + return EXIT_FAILURE; + } + + infile = argv[1]; + outfile = argv[2]; + + /* Open the input/output files. */ + infp = fopen(infile, "rb"); + if (!infp) { + fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); + return EXIT_FAILURE; + } + + if (!force && fopen(outfile, "rb")) { + fprintf(stderr, "error: '%s' already exists\n", outfile); + return EXIT_FAILURE; + } + + outfp = fopen(outfile, "wb"); + if (!outfp) { + fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + /* Parse the UTK header. */ + sID = read_u32(infp); + dwOutSize = read_u32(infp); + dwWfxSize = read_u32(infp); + wFormatTag = read_u16(infp); + nChannels = read_u16(infp); + nSamplesPerSec = read_u32(infp); + nAvgBytesPerSec = read_u32(infp); + nBlockAlign = read_u16(infp); + wBitsPerSample = read_u16(infp); + cbSize = read_u16(infp); + read_u16(infp); /* padding */ + + if (sID != MAKE_U32('U','T','M','0')) { + fprintf(stderr, "error: not a valid UTK file (expected UTM0 signature)\n"); + return EXIT_FAILURE; + } else if ((dwOutSize & 0x01) != 0 || dwOutSize >= 0x01000000) { + fprintf(stderr, "error: invalid dwOutSize %u\n", (unsigned)dwOutSize); + return EXIT_FAILURE; + } else if (dwWfxSize != 20) { + fprintf(stderr, "error: invalid dwWfxSize %u (expected 20)\n", (unsigned)dwWfxSize); + return EXIT_FAILURE; + } else if (wFormatTag != 1) { + fprintf(stderr, "error: invalid wFormatTag %u (expected 1)\n", (unsigned)wFormatTag); + return EXIT_FAILURE; + } + + if (nChannels != 1) { + fprintf(stderr, "error: invalid nChannels %u (only mono is supported)\n", (unsigned)nChannels); + error = 1; + } + if (nSamplesPerSec < 8000 || nSamplesPerSec > 192000) { + fprintf(stderr, "error: invalid nSamplesPerSec %u\n", (unsigned)nSamplesPerSec); + error = 1; + } + if (nAvgBytesPerSec != nSamplesPerSec * nBlockAlign) { + fprintf(stderr, "error: invalid nAvgBytesPerSec %u (expected nSamplesPerSec * nBlockAlign)\n", (unsigned)nAvgBytesPerSec); + error = 1; + } + if (nBlockAlign != 2) { + fprintf(stderr, "error: invalid nBlockAlign %u (expected 2)\n", (unsigned)nBlockAlign); + error = 1; + } + if (wBitsPerSample != 16) { + fprintf(stderr, "error: invalid wBitsPerSample %u (expected 16)\n", (unsigned)wBitsPerSample); + error = 1; + } + if (cbSize != 0) { + fprintf(stderr, "error: invalid cbSize %u (expected 0)\n", (unsigned)cbSize); + error = 1; + } + if (error) + return EXIT_FAILURE; + + num_samples = dwOutSize/2; + + /* Write the WAV header. */ + write_u32(outfp, MAKE_U32('R','I','F','F')); + write_u32(outfp, 36 + num_samples*2); + write_u32(outfp, MAKE_U32('W','A','V','E')); + write_u32(outfp, MAKE_U32('f','m','t',' ')); + write_u32(outfp, 16); + write_u16(outfp, wFormatTag); + write_u16(outfp, nChannels); + write_u32(outfp, nSamplesPerSec); + write_u32(outfp, nAvgBytesPerSec); + write_u16(outfp, nBlockAlign); + write_u16(outfp, wBitsPerSample); + write_u32(outfp, MAKE_U32('d','a','t','a')); + write_u32(outfp, num_samples*2); + + /* Decode. */ + utk_init(&ctx); + utk_set_fp(&ctx, infp); + + while (num_samples > 0) { + int count = MIN(num_samples, 432); + + utk_decode_frame(&ctx); + + for (i = 0; i < count; i++) { + int x = ROUND(ctx.decompressed_frame[i]); + write_u16(outfp, (int16_t)CLAMP(x, -32768, 32767)); + } + + num_samples -= count; + } + + if (fclose(outfp) != 0) { + fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); + return EXIT_FAILURE; + } + + fclose(infp); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/library/tools/utkencode/utkencode.c b/library/tools/utkencode/utkencode.c new file mode 100644 index 0000000..146c687 --- /dev/null +++ b/library/tools/utkencode/utkencode.c @@ -0,0 +1,1130 @@ +/* +** utkencode +** Encode wav to Maxis UTalk. +** Authors: Fatbag +** License: Public domain (no warranties) +** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s +** -o utkencode utkencode.c +*/ + +#include +#include +#include +#include +#include +#include + +#define MIN(x,y) ((x)<(y)?(x):(y)) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x)) +#define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5))) +#define ABS(x) ((x)>=0?(x):-(x)) + +#define READ16(x) ((x)[0]|((x)[1]<<8)) +#define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24)) + +#define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8) +#define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\ + (d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24) + +const float utk_rc_table[64] = { + 0, + -.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875, + -.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375, + -.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375, + -.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625, + -.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875, + -.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875, + -.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375, + -.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625, + 0, + +.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375, + +.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875, + +.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875, + +.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625, + +.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375, + +.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375, + +.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875, + +.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375 +}; + +static const char *prog_name; + +static void print_help(void) +{ + printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name); + printf("Encode wav to Maxis UTalk.\n"); + printf("\n"); + printf("General options:\n"); + printf(" -f, --force overwrite without prompting\n"); + printf(" -q, --quiet suppress normal output and do not prompt\n"); + printf(" -h, --help display this help and exit\n"); + printf(" -V, --version output version information and exit\n"); + printf("\n"); + printf("Encoding options:\n"); + printf(" -b, --bitrate=N target bitrate in bits/sec (default 32000)\n"); + printf(" -H, --halved-inn encode innovation using half bandwidth\n"); + printf(" (default)\n"); + printf(" -F, --full-inn encode innovation using full bandwidth\n"); + printf(" -T, --huff-threshold=N use the Huffman codebook with threshold N where\n"); + printf(" N is an integer between 16 and 32 (inclusive)\n"); + printf(" (default 24)\n"); + printf(" -S, --inngain-sig=N use innovation gain significand N where N is\n"); + printf(" between 8 and 128 (inclusive) in steps of 8\n"); + printf(" (default 64)\n"); + printf(" -B, --inngain-base=N use innovation gain base N where N is between\n"); + printf(" 1.040 and 1.103 (inclusive) in steps of 0.001\n"); + printf(" (default 1.068)\n"); + printf("\n"); + printf("If infile is \"-\", read from standard input.\n"); + printf("If outfile is \"-\", write to standard output.\n"); +} + +static void print_version(void) +{ + printf("utkencode 0.0\n"); +} + +static void print_usage_error(void) +{ + fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n", + prog_name); + fprintf(stderr, "Try '%s --help' for more options.\n", prog_name); +} + +static const char short_options[] = "fqhVb:HFT:S:B:"; +static const struct option long_options[] = { + {"force", no_argument, 0, 'f'}, + {"quiet", no_argument, 0, 'q'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {"bitrate", required_argument, 0, 'b'}, + {"halved-inn", no_argument, 0, 'H'}, + {"full-inn", no_argument, 0, 'F'}, + {"huff-threshold", required_argument, 0, 'T'}, + {"inngain-sig", required_argument, 0, 'S'}, + {"inngain-base", required_argument, 0, 'B'}, + {0, 0, 0, 0} +}; + +static int bitrate = 32000; +static int force = 0; +static int quiet = 0; +static int halved_innovation = 1; +static int huffman_threshold = 24; +static int inngain_sig = 64; +static float inngain_base = 1.068f; +static const char *infile = ""; +static const char *outfile = ""; +static FILE *infp = NULL; +static FILE *outfp = NULL; + +static uint8_t wav_buffer[432*2]; +static float input_samples[12+432]; +static float adaptive_codebook[324+432]; +static uint8_t compressed_buffer[1024]; +static uint8_t inn_buffers[2][256]; +static float prev_rc[12]; +static float innovation[5+108+5]; +static float inn_gains[64]; + +struct bit_writer_context { + uint8_t written_bits_count; + size_t pos; + uint8_t *buffer; +}; + +static void read_data(FILE *fp, uint8_t *buffer, size_t size) +{ + if (fread(buffer, 1, size, fp) != size) { + fprintf(stderr, "%s: failed to read '%s': %s\n", + prog_name, infile, ferror(fp) + ? strerror(errno) : "reached end of file"); + exit(EXIT_FAILURE); + } +} + +static void write_data(FILE *fp, const uint8_t *buffer, size_t size) +{ + if (fwrite(buffer, 1, size, fp) != size) { + fprintf(stderr, "%s: failed to write to '%s': %s\n", + prog_name, outfile, ferror(fp) + ? strerror(errno) : "reached end of file"); + exit(EXIT_FAILURE); + } +} + +static void flush_data(FILE *fp) +{ + if (fflush(fp) != 0) { + fprintf(stderr, "%s: failed to flush '%s': %s\n", + prog_name, outfile, strerror(errno)); + exit(EXIT_FAILURE); + } +} + +static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer) +{ + bwc->written_bits_count = 0; + bwc->pos = 0; + bwc->buffer = buffer; + bwc->buffer[0] = 0; +} + +static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value, + uint8_t count) +{ + unsigned x = value << bwc->written_bits_count; + + bwc->buffer[bwc->pos] |= (uint8_t)x; + bwc->written_bits_count += count; + + while (bwc->written_bits_count >= 8) { + x >>= 8; + bwc->buffer[++bwc->pos] = (uint8_t)x; + bwc->written_bits_count -= 8; + } +} + +static void bwc_pad(struct bit_writer_context *bwc) +{ + if (bwc->written_bits_count != 0) { + bwc->buffer[++bwc->pos] = 0; + bwc->written_bits_count = 0; + } +} + +static void bwc_flush(struct bit_writer_context *bwc, FILE *fp) +{ + write_data(fp, bwc->buffer, bwc->pos); + bwc->buffer[0] = bwc->buffer[bwc->pos]; + bwc->pos = 0; +} + +static unsigned quantize(float value, const float *alphabet, size_t alphabet_size) +{ + unsigned i; + unsigned min_idx = 0; + float min_distance = ABS(value - alphabet[0]); + + for (i = 1; i < alphabet_size; i++) { + float distance = ABS(value - alphabet[i]); + + if (distance < min_distance) { + min_distance = distance; + min_idx = i; + } + } + + return min_idx; +} + +/* used in the parsing of some arguments */ +static int read_dec_places(const char *string, int n) +{ + int i; + int value = 0; + int pows_10[10]; + + pows_10[0] = 1; + for (i = 1; i < n; i++) + pows_10[i] = pows_10[i-1] * 10; + + for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++) + value += pows_10[n-1-i] * (string[i]-'0'); + + return (string[i] == '\0') ? value : -1; +} + +static int file_exists(const char *filename) +{ + FILE *fp; + + fp = fopen(filename, "rb"); + if (fp) { + fclose(fp); + return 1; + } + + return 0; +} + +static void find_autocorrelations(float *r, const float *samples) +{ + int i, j; + + for (i = 0; i < 13; i++) { + r[i] = 0; + for (j = 0; j < 432 - i; j++) + r[i] += samples[j]*samples[j+i]; + } +} + +static void levinson_durbin_symmetric(float *x, float *k, + const float *r, const float *y) +{ + float a[12]; /* the forward vector */ + float e; /* prediction error */ + int i; + + if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f) + goto zero; + + a[0] = 1; + e = r[0]; + x[0] = y[0]/r[0]; + + for (i = 1; i < 12; i++) { + float u, m; + float a_temp[12]; + int j; + + u = 0.0f; + for (j = 0; j < i; j++) + u += a[j]*r[i-j]; + + k[i-1] = -u/e; /* reflection coefficient i-1 */ + e += u*k[i-1]; /* update e to the new value e - u*u/e */ + + if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f) + goto zero; + + memcpy(a_temp, a, i*sizeof(float)); + a[i] = 0.0f; + for (j = 1; j <= i; j++) + a[j] += k[i-1]*a_temp[i-j]; + + m = y[i]; + for (j = 0; j < i; j++) + m -= x[j]*r[i-j]; + m /= e; + + x[i] = 0.0f; + for (j = 0; j <= i; j++) + x[j] += m*a[i-j]; + } + + k[11] = -x[11]; + + return; + +zero: + for (i = 0; i < 12; i++) + x[i] = 0.0f; + for (i = 0; i < 12; i++) + k[i] = 0.0f; +} + +static void rc_to_lpc(float *x, const float *k) +{ + float a[13]; /* the forward vector */ + unsigned i, j; + a[0] = 1; + + for (i = 1; i < 13; i++) { + float a_temp[12]; + memcpy(a_temp, a, i*sizeof(float)); + a[i] = 0.0f; + for (j = 1; j <= i; j++) + a[j] += k[i-1]*a_temp[i-j]; + } + + for (i = 1; i < 13; i++) + x[i-1] = -a[i]; +} + +static void find_rc(float *rc, const float *samples) +{ + float r[13]; + float lpc[12]; + find_autocorrelations(r, samples); + levinson_durbin_symmetric(lpc, rc, r, r+1); +} + +static void find_excitation(float *excitation, const float *source, + int length, const float *lpc) +{ + int i, j; + + for (i = 0; i < length; i++) { + float prediction = 0.0f; + for (j = 0; j < 12; j++) + prediction += lpc[j]*source[i-1-j]; + excitation[i] = source[i] - prediction; + } +} + +static void find_pitch(int *pitch_lag, float *pitch_gain, + const float *excitation) +{ + int max_corr_offset = 108; + float max_corr_value = 0.0f; + float history_energy; + float gain; + int i, j; + + /* Find the optimal pitch lag. */ + for (i = 108; i < 324; i++) { + float corr = 0.0f; + for (j = 0; j < 108; j++) + corr += excitation[j]*excitation[j-i]; + if (corr > max_corr_value) { + max_corr_offset = i; + max_corr_value = corr; + } + } + + /* Find the optimal pitch gain. */ + history_energy = 0.0f; + for (i = 0; i < 108; i++) { + float value = excitation[i-max_corr_offset]; + history_energy += value*value; + } + + if (history_energy >= 1/32768.0f) { + gain = max_corr_value / history_energy; + gain = CLAMP(gain, 0.0f, 1.0f); + + *pitch_lag = max_corr_offset; + *pitch_gain = gain; + } else { + *pitch_lag = 108; + *pitch_gain = 0.0f; + } +} + +static void interpolate(float *x, int a, int z) +{ + int i; + + if (z) { + for (i = !a; i < 108; i+=2) + x[i] = 0.0f; + } else { + for (i = !a; i < 108; i+=2) + x[i] + = (x[i-1]+x[i+1]) * .5973859429f + - (x[i-3]+x[i+3]) * .1145915613f + + (x[i-5]+x[i+5]) * .0180326793f; + } +} + +static float interpolation_error(int a, int z, const float *x) +{ + float error = 0.0f; + int i; + + if (z) { + for (i = !a; i < 108; i+=2) + error += x[i]*x[i]; + } else { + for (i = !a; i < 108; i+=2) { + float prediction + = (x[i-1]+x[i+1]) * .5973859429f + - (x[i-3]+x[i+3]) * .1145915613f + + (x[i-5]+x[i+5]) * .0180326793f; + error += (prediction - x[i])*(prediction - x[i]); + } + } + + return error; +} + +static void find_a_z_flags(int *a, int *z, const float *innovation) +{ + /* Find the a and z flags such that the least error is introduced + ** in the downsampling step. In case of a tie (e.g. in silence), + ** prefer using the zero flag. Thus, we will test in the order: + ** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */ + float error; + float best_error; + int best_a = 0, best_z = 1; + + best_error = interpolation_error(0, 1, innovation); + + error = interpolation_error(1, 1, innovation); + if (error < best_error) { + best_error = error; + best_a = 1, best_z = 1; + } + + error = interpolation_error(0, 0, innovation); + if (error < best_error) { + best_error = error; + best_a = 0, best_z = 0; + } + + error = interpolation_error(1, 0, innovation); + if (error < best_error) { + best_error = error; + best_a = 1, best_z = 0; + } + + *a = best_a; + *z = best_z; +} + +struct huffman_code { + uint16_t bits_value; + uint16_t bits_count; +}; + +static const struct huffman_code huffman_models[2][13+1+13] = { + /* model 0 */ + { + /* -13 */ {16255, 16}, + /* -12 */ {8063, 15}, + /* -11 */ {3967, 14}, + /* -10 */ {1919, 13}, + /* -9 */ {895, 12}, + /* -8 */ {383, 11}, + /* -7 */ {127, 10}, + /* -6 */ {63, 8}, + /* -5 */ {31, 7}, + /* -4 */ {15, 6}, + /* -3 */ {7, 5}, + /* -2 */ {3, 4}, + /* -1 */ {2, 2}, + /* 0 */ {0, 2}, + /* +1 */ {1, 2}, + /* +2 */ {11, 4}, + /* +3 */ {23, 5}, + /* +4 */ {47, 6}, + /* +5 */ {95, 7}, + /* +6 */ {191, 8}, + /* +7 */ {639, 10}, + /* +8 */ {1407, 11}, + /* +9 */ {2943, 12}, + /* +10 */ {6015, 13}, + /* +11 */ {12159, 14}, + /* +12 */ {24447, 15}, + /* +13 */ {49023, 16} + }, + + /* model 1 */ + { + /* -13 */ {8127, 15}, + /* -12 */ {4031, 14}, + /* -11 */ {1983, 13}, + /* -10 */ {959, 12}, + /* -9 */ {447, 11}, + /* -8 */ {191, 10}, + /* -7 */ {63, 9}, + /* -6 */ {31, 7}, + /* -5 */ {15, 6}, + /* -4 */ {7, 5}, + /* -3 */ {3, 4}, + /* -2 */ {1, 3}, + /* -1 */ {2, 3}, + /* 0 */ {0, 2}, + /* +1 */ {6, 3}, + /* +2 */ {5, 3}, + /* +3 */ {11, 4}, + /* +4 */ {23, 5}, + /* +5 */ {47, 6}, + /* +6 */ {95, 7}, + /* +7 */ {319, 9}, + /* +8 */ {703, 10}, + /* +9 */ {1471, 11}, + /* +10 */ {3007, 12}, + /* +11 */ {6079, 13}, + /* +12 */ {12223, 14}, + /* +13 */ {24511, 15} + } +}; + +static void encode_huffman(struct bit_writer_context *bwc, + float *innovation_out, int *bits_used_out, float *error_out, + const float *innovation_in, int halved_innovation, + int pow, int a, int z) +{ + int interval = halved_innovation ? 2 : 1; + float inn_gain; + float total_error = 0.0f; + int counter; + int values[108]; + int zero_counts[108]; + int model; + int bits_start, bits_end; + int i; + + inn_gain = inn_gains[pow]; + if (!z) + inn_gain *= 0.5f; + + bits_start = 8*bwc->pos + bwc->written_bits_count; + + if (halved_innovation) + bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); + else + bwc_write_bits(bwc, pow, 6); + + for (i = a; i < 108; i += interval) { + float e; + + values[i] = ROUND(CLAMP( + innovation_in[i]/inn_gain, -13.0f, 13.0f)); + + innovation_out[i] = inn_gain*values[i]; + + e = innovation_out[i] - innovation_in[i]; + total_error += e*e; + } + + *error_out = total_error; + + /* Find the zero runs at each position i (how many zeros + ** in a row there are at position i). + ** When interval=2 and a=1, start the search from i=105 instead + ** of 107 in order to duplicate the off-by-one mistake in the + ** decoder. (Thus, we will subtract a instead of adding.) + ** For details, see: http://wiki.niotso.org/UTK */ + counter = 0; + for (i = 108 - interval - a; i >= 0; i -= interval) { + if (values[i] == 0) + counter++; + else + counter = 0; + zero_counts[i] = counter; + } + + i = a; + model = 0; + while (i < 108) { + if (zero_counts[i] >= 7) { + int length = MIN(zero_counts[i], 70); + + if (model == 0) + bwc_write_bits(bwc, 255 | ((length-7)<<8), 14); + else + bwc_write_bits(bwc, 127 | ((length-7)<<7), 13); + + model = 0; + i += length * interval; + } else { + int value = values[i]; + + bwc_write_bits(bwc, + huffman_models[model][13+value].bits_value, + huffman_models[model][13+value].bits_count); + + model = (value < -1 || value > 1); + i += interval; + } + } + + bits_end = 8*bwc->pos + bwc->written_bits_count; + *bits_used_out = bits_end - bits_start; +} + +static void encode_triangular(struct bit_writer_context *bwc, + float *innovation_out, int *bits_used_out, float *error_out, + const float *innovation_in, int halved_innovation, + int pow, int a, int z) +{ + int interval = halved_innovation ? 2 : 1; + float inn_gain; + float total_error = 0.0f; + int bits_start, bits_end; + int i; + + inn_gain = 2.0f*inn_gains[pow]; + if (!z) + inn_gain *= 0.5f; + + bits_start = 8*bwc->pos + bwc->written_bits_count; + + if (halved_innovation) + bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); + else + bwc_write_bits(bwc, pow, 6); + + for (i = a; i < 108; i += interval) { + float e; + int value = ROUND(CLAMP( + innovation_in[i]/inn_gain, -1.0f, 1.0f)); + + if (value > 0) + bwc_write_bits(bwc, 3, 2); + else if (value < 0) + bwc_write_bits(bwc, 1, 2); + else + bwc_write_bits(bwc, 0, 1); + + innovation_out[i] = inn_gain*value; + + e = innovation_out[i] - innovation_in[i]; + total_error += e*e; + } + + bits_end = 8*bwc->pos + bwc->written_bits_count; + *bits_used_out = bits_end - bits_start; + + *error_out = total_error; +} + +static void low_pass_innovation(float *x, int a, int z) +{ + /* Apply a weak low-pass filter to the innovation signal suitable for + ** downsampling it by 1/2. Note that, since we are throwing out all + ** x[m] samples where m != a+2*k for integer k, we only have to filter + ** the x[n] samples where n = a+2*k. */ + int i; + + /* filter coeffs: (GNU Octave) + ** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */ + for (i = a; i < 108; i+=2) + x[i] = (z ? 1.0f : 0.5f)*(x[i] + + (x[i-1]+x[i+1]) * 0.6189590521549956f + + (x[i-3]+x[i+3]) * -0.1633990749076792f + + (x[i-5]+x[i+5]) * 0.05858453198856907f); +} + +struct innovation_encoding { + struct bit_writer_context bwc; + float innovation[108]; + int bits_used; + float error; +}; + +static void encode_innovation(struct bit_writer_context *bwc, + float *innovation, int halved_innovation, int use_huffman, + int *bits_used, int target_bit_count) +{ + int a = 0, z = 1; + struct innovation_encoding encodings[2]; + int m = 0; + + if (halved_innovation) { + find_a_z_flags(&a, &z, innovation); + low_pass_innovation(innovation, a, z); + } + + if (use_huffman) { + /* Encode using the Huffman model. */ + int interval = halved_innovation ? 2 : 1; + float max_value = 0.0f; + int min_pow; + int best_distance = 0; + int pow; + int i; + + /* Find the minimum innovation power such that the innovation + ** signal doesn't clip anywhere in time. (We consider clipping + ** a sample by <=0.5 of a quantization level to be okay since + ** the sample already rounds down [towards zero].) */ + for (i = a; i < 108; i += interval) { + float value = ABS(innovation[i]); + if (value > max_value) + max_value = value; + } + for (i = 62; i >= 0; i--) { + if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f + < max_value) + break; + } + min_pow = i+1; + + /* Find the innovation gain that results in the closest + ** to the target bitrate without clipping occurring. */ + for (pow = min_pow; pow <= 63; pow++) { + int distance; + + bwc_init(&encodings[m].bwc, inn_buffers[m]); + bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], + bwc->written_bits_count); + + encode_huffman(&encodings[m].bwc, + encodings[m].innovation, + &encodings[m].bits_used, + &encodings[m].error, + innovation, halved_innovation, + pow, a, z); + + distance = ABS(encodings[m].bits_used + - target_bit_count); + if (pow == min_pow || distance < best_distance) { + best_distance = distance; + m = !m; /* swap the buffers */ + } + } + } else { + /* Encode using the triangular noise model. */ + float best_error = 0.0f; + int pow; + + /* Find the innovation gain that results in + ** the highest quality. */ + for (pow = 0; pow <= 63; pow++) { + bwc_init(&encodings[m].bwc, inn_buffers[m]); + bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], + bwc->written_bits_count); + + encode_triangular(&encodings[m].bwc, + encodings[m].innovation, + &encodings[m].bits_used, + &encodings[m].error, + innovation, halved_innovation, + pow, a, z); + + if (pow == 0 || encodings[m].error < best_error) { + best_error = encodings[m].error; + m = !m; /* swap the buffers */ + } + } + } + + /* Swap the buffers again to return back to our best encoding. */ + m = !m; + + /* Write this encoding out to the UTK bitstream. */ + memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer, + encodings[m].bwc.pos+1); + bwc->pos += encodings[m].bwc.pos; + bwc->written_bits_count = encodings[m].bwc.written_bits_count; + + /* Update the innovation signal with the quantized version. */ + memcpy(innovation, encodings[m].innovation, 108*sizeof(float)); + if (halved_innovation) + interpolate(innovation, a, z); + + *bits_used = encodings[m].bits_used; +} + +static int parse_arguments(int argc, char *argv[]) +{ + int c; + int value; + char *endptr; + + prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode"; + + while ((c = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (c) { + case 'b': + bitrate = (int)strtol(optarg, &endptr, 10); + if (*endptr != '\0' + || bitrate < 1000 + || bitrate > 1000000) { + fprintf(stderr, "%s: invalid bitrate -- %s\n", + prog_name, optarg); + print_usage_error(); + return -1; + } + break; + case 'f': + force = 1; + break; + case 'q': + quiet = 1; + break; + case 'h': + print_help(); + return 1; + case 'V': + print_version(); + return 1; + case 'H': + halved_innovation = 1; + break; + case 'F': + halved_innovation = 0; + break; + case 'T': + huffman_threshold = (int)strtol(optarg, &endptr, 10); + if (*endptr != '\0' + || huffman_threshold < 16 + || huffman_threshold > 32) { + fprintf(stderr, "%s: invalid Huffman " + "threshold -- %s\n", prog_name, optarg); + print_usage_error(); + return -1; + } + break; + case 'S': + inngain_sig = (int)strtol(optarg, &endptr, 10); + if (*endptr != '\0' + || inngain_sig < 8 + || inngain_sig > 128 + || (inngain_sig & 7) != 0) { + fprintf(stderr, "%s: invalid innovation gain" + " significand -- %s\n", prog_name, + optarg); + print_usage_error(); + return -1; + } + break; + case 'B': + if (optarg[0] != '1' || optarg[1] != '.' + || (value = read_dec_places(optarg+2, 3)) < 0 + || value < 40 + || value > 103) { + fprintf(stderr, "%s: invalid innovation gain" + " base -- %s\n", prog_name, optarg); + print_usage_error(); + return -1; + } + inngain_base = 1.0f + (float)value/1000.0f; + break; + default: + print_usage_error(); + return -1; + } + } + + if (argc - optind == 0) { + fprintf(stderr, "%s: missing infile\n", prog_name); + print_usage_error(); + return -1; + } else if (argc - optind == 1) { + fprintf(stderr, "%s: missing outfile\n", prog_name); + print_usage_error(); + return -1; + } else if (argc - optind >= 3) { + fprintf(stderr, "%s: too many arguments passed\n", prog_name); + print_usage_error(); + return -1; + } + + infile = argv[optind]; + outfile = argv[optind+1]; + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + uint8_t wav_header[44]; + uint8_t utk_header[32]; + unsigned bytes_remaining; + int sampling_rate; + struct bit_writer_context bwc; + int i, j; + + ret = parse_arguments(argc, argv); + if (ret < 0) + return EXIT_FAILURE; + else if (ret > 0) + return EXIT_SUCCESS; + + if (!strcmp(infile, "-")) { + infp = stdin; + } else { + infp = fopen(infile, "rb"); + if (!infp) { + fprintf(stderr, "%s: failed to open '%s' for" + " reading: %s\n", prog_name, infile, + strerror(errno)); + return EXIT_FAILURE; + } + } + setvbuf(infp, NULL, _IOFBF, BUFSIZ); + + if (!strcmp(outfile, "-")) { + outfp = stdout; + } else { + if (!force && file_exists(outfile)) { + if (quiet) { + fprintf(stderr, "%s: failed to open '%s' for" + " writing: file already exists\n", + prog_name, outfile); + return EXIT_FAILURE; + } else { + fprintf(stderr, "%s: overwrite '%s'? ", + prog_name, outfile); + if (getchar() != 'y') + return EXIT_FAILURE; + } + } + + outfp = fopen(outfile, "wb"); + if (!outfp) { + fprintf(stderr, "%s: failed to open '%s' for" + " writing: %s\n", prog_name, outfile, + strerror(errno)); + return EXIT_FAILURE; + } + } + setvbuf(outfp, NULL, _IOFBF, BUFSIZ); + + if (fread(wav_header, 1, 44, infp) != 44) { + if (ferror(infp)) + fprintf(stderr, "%s: failed to read '%s': %s\n", + prog_name, infile, strerror(errno)); + else + fprintf(stderr, "%s: '%s' is not a valid wav file\n", + prog_name, infile); /* (reached end of file) */ + return EXIT_FAILURE; + } + + if (memcmp(wav_header, "RIFF", 4) != 0 + || memcmp(wav_header+8, "WAVEfmt ", 8) != 0) { + fprintf(stderr, "%s: '%s' is not a valid wav file\n", + prog_name, infile); + return EXIT_FAILURE; + } + + if (READ16(wav_header+20) != 1 /* wFormatTag */ + || READ16(wav_header+22) != 1 /* nChannels */ + || READ16(wav_header+32) != 2 /* nBlockAlign */ + || READ16(wav_header+34) != 16 /* wBitsPerSample */) { + fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n", + prog_name); + return EXIT_FAILURE; + } + + sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */ + if (sampling_rate < 1000 || sampling_rate > 1000000) { + fprintf(stderr, "%s: unsupported sampling rate %d\n", + prog_name, sampling_rate); + return EXIT_FAILURE; + } + + memcpy(utk_header, "UTM0", 4); /* sID */ + + /* Drop the last byte from the wav file if there are an odd + ** number of sample bytes. */ + bytes_remaining = READ32(wav_header+40) & (~1); + WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */ + + WRITE32(utk_header+8, 20); /* dwWfxSize */ + memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */ + WRITE32(utk_header+28, 0); /* cbSize */ + + write_data(outfp, utk_header, 32); + + bwc_init(&bwc, compressed_buffer); + + bwc_write_bits(&bwc, halved_innovation, 1); + bwc_write_bits(&bwc, 32 - huffman_threshold, 4); + bwc_write_bits(&bwc, inngain_sig/8 - 1, 4); + bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6); + bwc_flush(&bwc, outfp); + + for (i = 0; i < 12; i++) + input_samples[i] = 0.0f; + for (i = 0; i < 324; i++) + adaptive_codebook[i] = 0.0f; + for (i = 0; i < 12; i++) + prev_rc[i] = 0.0f; + for (i = 0; i < 5; i++) + innovation[i] = 0.0f; + for (i = 5+108; i < 5+108+5; i++) + innovation[i] = 0.0f; + + inn_gains[0] = inngain_sig; + for (i = 1; i < 64; i++) + inn_gains[i] = inn_gains[i-1]*inngain_base; + + while (bytes_remaining != 0) { + /* Encode the next frame of 432 samples. */ + int bytes_to_read; + int samples_to_read; + float rc[12]; + float rc_delta[12]; + int use_huffman = 0; + + bytes_to_read = (int)MIN(bytes_remaining, 432*2); + samples_to_read = bytes_to_read >> 1; + + read_data(infp, wav_buffer, bytes_to_read); + bytes_remaining -= bytes_to_read; + + for (i = 0; i < samples_to_read; i++) { + int16_t x = READ16(wav_buffer+2*i); + input_samples[12+i] = (float)x; + } + for (i = samples_to_read; i < 432; i++) + input_samples[12+i] = 0.0f; + + find_rc(rc, input_samples+12); + + /* Quantize the reflection coefficients. + ** In our encoder, we will not make use of utk_rc_table[0]. */ + for (i = 0; i < 4; i++) { + int idx = 1+quantize(rc[i], utk_rc_table+1, 63); + bwc_write_bits(&bwc, idx, 6); + rc[i] = utk_rc_table[idx]; + if (i == 0 && idx < huffman_threshold) + use_huffman = 1; + } + for (i = 4; i < 12; i++) { + int idx = quantize(rc[i], utk_rc_table+16, 32); + bwc_write_bits(&bwc, idx, 5); + rc[i] = utk_rc_table[16+idx]; + } + + for (i = 0; i < 12; i++) + rc_delta[i] = (rc[i] - prev_rc[i])/4.0f; + + memcpy(rc, prev_rc, 12*sizeof(float)); + + for (i = 0; i < 4; i++) { + /* Linearly interpolate the reflection coefficients over + ** the four subframes and find the excitation signal. */ + float lpc[12]; + + for (j = 0; j < 12; j++) + rc[j] += rc_delta[j]; + + rc_to_lpc(lpc, rc); + + find_excitation(adaptive_codebook+324+12*i, + input_samples+12+12*i, + i < 3 ? 12 : 396, lpc); + } + + memcpy(input_samples, &input_samples[432], 12*sizeof(float)); + memcpy(prev_rc, rc, 12*sizeof(float)); + + for (i = 0; i < 4; i++) { + /* Encode the i'th subframe. */ + float *excitation = adaptive_codebook+324+108*i; + int pitch_lag; + float pitch_gain; + int idx; + int bits_used; + + find_pitch(&pitch_lag, &pitch_gain, excitation); + + bwc_write_bits(&bwc, pitch_lag - 108, 8); + + idx = ROUND(pitch_gain*15.0f); + bwc_write_bits(&bwc, idx, 4); + pitch_gain = (float)idx/15.0f; + + for (j = 0; j < 108; j++) + innovation[5+j] = excitation[j] + - pitch_gain*excitation[j-pitch_lag]; + + encode_innovation(&bwc, &innovation[5], + halved_innovation, use_huffman, &bits_used, + ROUND(bitrate * 432 / sampling_rate / 4) - 18); + + /* Update the adaptive codebook using the quantized + ** innovation signal. */ + for (j = 0; j < 108; j++) + excitation[j] = innovation[5+j] + + pitch_gain*excitation[j-pitch_lag]; + } + + /* Copy the last 3 subframes to the beginning of the + ** adaptive codebook. */ + memcpy(adaptive_codebook, &adaptive_codebook[432], + 324*sizeof(float)); + + bwc_flush(&bwc, outfp); + } + + bwc_pad(&bwc); + bwc_flush(&bwc, outfp); + + flush_data(outfp); + + fclose(outfp); + fclose(infp); + + return EXIT_SUCCESS; +} \ No newline at end of file