mysimulation/library/tools/utkencode/utkencode.c

1130 lines
No EOL
28 KiB
C

/*
** utkencode
** Encode wav to Maxis UTalk.
** Authors: Fatbag
** License: Public domain (no warranties)
** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s
** -o utkencode utkencode.c
*/
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#define MIN(x,y) ((x)<(y)?(x):(y))
#define MAX(x,y) ((x)>(y)?(x):(y))
#define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x))
#define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5)))
#define ABS(x) ((x)>=0?(x):-(x))
#define READ16(x) ((x)[0]|((x)[1]<<8))
#define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24))
#define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8)
#define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\
(d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24)
const float utk_rc_table[64] = {
0,
-.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875,
-.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375,
-.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375,
-.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625,
-.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875,
-.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875,
-.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375,
-.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625,
0,
+.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375,
+.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875,
+.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875,
+.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625,
+.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375,
+.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375,
+.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875,
+.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375
};
static const char *prog_name;
static void print_help(void)
{
printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name);
printf("Encode wav to Maxis UTalk.\n");
printf("\n");
printf("General options:\n");
printf(" -f, --force overwrite without prompting\n");
printf(" -q, --quiet suppress normal output and do not prompt\n");
printf(" -h, --help display this help and exit\n");
printf(" -V, --version output version information and exit\n");
printf("\n");
printf("Encoding options:\n");
printf(" -b, --bitrate=N target bitrate in bits/sec (default 32000)\n");
printf(" -H, --halved-inn encode innovation using half bandwidth\n");
printf(" (default)\n");
printf(" -F, --full-inn encode innovation using full bandwidth\n");
printf(" -T, --huff-threshold=N use the Huffman codebook with threshold N where\n");
printf(" N is an integer between 16 and 32 (inclusive)\n");
printf(" (default 24)\n");
printf(" -S, --inngain-sig=N use innovation gain significand N where N is\n");
printf(" between 8 and 128 (inclusive) in steps of 8\n");
printf(" (default 64)\n");
printf(" -B, --inngain-base=N use innovation gain base N where N is between\n");
printf(" 1.040 and 1.103 (inclusive) in steps of 0.001\n");
printf(" (default 1.068)\n");
printf("\n");
printf("If infile is \"-\", read from standard input.\n");
printf("If outfile is \"-\", write to standard output.\n");
}
static void print_version(void)
{
printf("utkencode 0.0\n");
}
static void print_usage_error(void)
{
fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n",
prog_name);
fprintf(stderr, "Try '%s --help' for more options.\n", prog_name);
}
static const char short_options[] = "fqhVb:HFT:S:B:";
static const struct option long_options[] = {
{"force", no_argument, 0, 'f'},
{"quiet", no_argument, 0, 'q'},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
{"bitrate", required_argument, 0, 'b'},
{"halved-inn", no_argument, 0, 'H'},
{"full-inn", no_argument, 0, 'F'},
{"huff-threshold", required_argument, 0, 'T'},
{"inngain-sig", required_argument, 0, 'S'},
{"inngain-base", required_argument, 0, 'B'},
{0, 0, 0, 0}
};
static int bitrate = 32000;
static int force = 0;
static int quiet = 0;
static int halved_innovation = 1;
static int huffman_threshold = 24;
static int inngain_sig = 64;
static float inngain_base = 1.068f;
static const char *infile = "";
static const char *outfile = "";
static FILE *infp = NULL;
static FILE *outfp = NULL;
static uint8_t wav_buffer[432*2];
static float input_samples[12+432];
static float adaptive_codebook[324+432];
static uint8_t compressed_buffer[1024];
static uint8_t inn_buffers[2][256];
static float prev_rc[12];
static float innovation[5+108+5];
static float inn_gains[64];
struct bit_writer_context {
uint8_t written_bits_count;
size_t pos;
uint8_t *buffer;
};
static void read_data(FILE *fp, uint8_t *buffer, size_t size)
{
if (fread(buffer, 1, size, fp) != size) {
fprintf(stderr, "%s: failed to read '%s': %s\n",
prog_name, infile, ferror(fp)
? strerror(errno) : "reached end of file");
exit(EXIT_FAILURE);
}
}
static void write_data(FILE *fp, const uint8_t *buffer, size_t size)
{
if (fwrite(buffer, 1, size, fp) != size) {
fprintf(stderr, "%s: failed to write to '%s': %s\n",
prog_name, outfile, ferror(fp)
? strerror(errno) : "reached end of file");
exit(EXIT_FAILURE);
}
}
static void flush_data(FILE *fp)
{
if (fflush(fp) != 0) {
fprintf(stderr, "%s: failed to flush '%s': %s\n",
prog_name, outfile, strerror(errno));
exit(EXIT_FAILURE);
}
}
static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer)
{
bwc->written_bits_count = 0;
bwc->pos = 0;
bwc->buffer = buffer;
bwc->buffer[0] = 0;
}
static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value,
uint8_t count)
{
unsigned x = value << bwc->written_bits_count;
bwc->buffer[bwc->pos] |= (uint8_t)x;
bwc->written_bits_count += count;
while (bwc->written_bits_count >= 8) {
x >>= 8;
bwc->buffer[++bwc->pos] = (uint8_t)x;
bwc->written_bits_count -= 8;
}
}
static void bwc_pad(struct bit_writer_context *bwc)
{
if (bwc->written_bits_count != 0) {
bwc->buffer[++bwc->pos] = 0;
bwc->written_bits_count = 0;
}
}
static void bwc_flush(struct bit_writer_context *bwc, FILE *fp)
{
write_data(fp, bwc->buffer, bwc->pos);
bwc->buffer[0] = bwc->buffer[bwc->pos];
bwc->pos = 0;
}
static unsigned quantize(float value, const float *alphabet, size_t alphabet_size)
{
unsigned i;
unsigned min_idx = 0;
float min_distance = ABS(value - alphabet[0]);
for (i = 1; i < alphabet_size; i++) {
float distance = ABS(value - alphabet[i]);
if (distance < min_distance) {
min_distance = distance;
min_idx = i;
}
}
return min_idx;
}
/* used in the parsing of some arguments */
static int read_dec_places(const char *string, int n)
{
int i;
int value = 0;
int pows_10[10];
pows_10[0] = 1;
for (i = 1; i < n; i++)
pows_10[i] = pows_10[i-1] * 10;
for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++)
value += pows_10[n-1-i] * (string[i]-'0');
return (string[i] == '\0') ? value : -1;
}
static int file_exists(const char *filename)
{
FILE *fp;
fp = fopen(filename, "rb");
if (fp) {
fclose(fp);
return 1;
}
return 0;
}
static void find_autocorrelations(float *r, const float *samples)
{
int i, j;
for (i = 0; i < 13; i++) {
r[i] = 0;
for (j = 0; j < 432 - i; j++)
r[i] += samples[j]*samples[j+i];
}
}
static void levinson_durbin_symmetric(float *x, float *k,
const float *r, const float *y)
{
float a[12]; /* the forward vector */
float e; /* prediction error */
int i;
if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f)
goto zero;
a[0] = 1;
e = r[0];
x[0] = y[0]/r[0];
for (i = 1; i < 12; i++) {
float u, m;
float a_temp[12];
int j;
u = 0.0f;
for (j = 0; j < i; j++)
u += a[j]*r[i-j];
k[i-1] = -u/e; /* reflection coefficient i-1 */
e += u*k[i-1]; /* update e to the new value e - u*u/e */
if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f)
goto zero;
memcpy(a_temp, a, i*sizeof(float));
a[i] = 0.0f;
for (j = 1; j <= i; j++)
a[j] += k[i-1]*a_temp[i-j];
m = y[i];
for (j = 0; j < i; j++)
m -= x[j]*r[i-j];
m /= e;
x[i] = 0.0f;
for (j = 0; j <= i; j++)
x[j] += m*a[i-j];
}
k[11] = -x[11];
return;
zero:
for (i = 0; i < 12; i++)
x[i] = 0.0f;
for (i = 0; i < 12; i++)
k[i] = 0.0f;
}
static void rc_to_lpc(float *x, const float *k)
{
float a[13]; /* the forward vector */
unsigned i, j;
a[0] = 1;
for (i = 1; i < 13; i++) {
float a_temp[12];
memcpy(a_temp, a, i*sizeof(float));
a[i] = 0.0f;
for (j = 1; j <= i; j++)
a[j] += k[i-1]*a_temp[i-j];
}
for (i = 1; i < 13; i++)
x[i-1] = -a[i];
}
static void find_rc(float *rc, const float *samples)
{
float r[13];
float lpc[12];
find_autocorrelations(r, samples);
levinson_durbin_symmetric(lpc, rc, r, r+1);
}
static void find_excitation(float *excitation, const float *source,
int length, const float *lpc)
{
int i, j;
for (i = 0; i < length; i++) {
float prediction = 0.0f;
for (j = 0; j < 12; j++)
prediction += lpc[j]*source[i-1-j];
excitation[i] = source[i] - prediction;
}
}
static void find_pitch(int *pitch_lag, float *pitch_gain,
const float *excitation)
{
int max_corr_offset = 108;
float max_corr_value = 0.0f;
float history_energy;
float gain;
int i, j;
/* Find the optimal pitch lag. */
for (i = 108; i < 324; i++) {
float corr = 0.0f;
for (j = 0; j < 108; j++)
corr += excitation[j]*excitation[j-i];
if (corr > max_corr_value) {
max_corr_offset = i;
max_corr_value = corr;
}
}
/* Find the optimal pitch gain. */
history_energy = 0.0f;
for (i = 0; i < 108; i++) {
float value = excitation[i-max_corr_offset];
history_energy += value*value;
}
if (history_energy >= 1/32768.0f) {
gain = max_corr_value / history_energy;
gain = CLAMP(gain, 0.0f, 1.0f);
*pitch_lag = max_corr_offset;
*pitch_gain = gain;
} else {
*pitch_lag = 108;
*pitch_gain = 0.0f;
}
}
static void interpolate(float *x, int a, int z)
{
int i;
if (z) {
for (i = !a; i < 108; i+=2)
x[i] = 0.0f;
} else {
for (i = !a; i < 108; i+=2)
x[i]
= (x[i-1]+x[i+1]) * .5973859429f
- (x[i-3]+x[i+3]) * .1145915613f
+ (x[i-5]+x[i+5]) * .0180326793f;
}
}
static float interpolation_error(int a, int z, const float *x)
{
float error = 0.0f;
int i;
if (z) {
for (i = !a; i < 108; i+=2)
error += x[i]*x[i];
} else {
for (i = !a; i < 108; i+=2) {
float prediction
= (x[i-1]+x[i+1]) * .5973859429f
- (x[i-3]+x[i+3]) * .1145915613f
+ (x[i-5]+x[i+5]) * .0180326793f;
error += (prediction - x[i])*(prediction - x[i]);
}
}
return error;
}
static void find_a_z_flags(int *a, int *z, const float *innovation)
{
/* Find the a and z flags such that the least error is introduced
** in the downsampling step. In case of a tie (e.g. in silence),
** prefer using the zero flag. Thus, we will test in the order:
** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */
float error;
float best_error;
int best_a = 0, best_z = 1;
best_error = interpolation_error(0, 1, innovation);
error = interpolation_error(1, 1, innovation);
if (error < best_error) {
best_error = error;
best_a = 1, best_z = 1;
}
error = interpolation_error(0, 0, innovation);
if (error < best_error) {
best_error = error;
best_a = 0, best_z = 0;
}
error = interpolation_error(1, 0, innovation);
if (error < best_error) {
best_error = error;
best_a = 1, best_z = 0;
}
*a = best_a;
*z = best_z;
}
struct huffman_code {
uint16_t bits_value;
uint16_t bits_count;
};
static const struct huffman_code huffman_models[2][13+1+13] = {
/* model 0 */
{
/* -13 */ {16255, 16},
/* -12 */ {8063, 15},
/* -11 */ {3967, 14},
/* -10 */ {1919, 13},
/* -9 */ {895, 12},
/* -8 */ {383, 11},
/* -7 */ {127, 10},
/* -6 */ {63, 8},
/* -5 */ {31, 7},
/* -4 */ {15, 6},
/* -3 */ {7, 5},
/* -2 */ {3, 4},
/* -1 */ {2, 2},
/* 0 */ {0, 2},
/* +1 */ {1, 2},
/* +2 */ {11, 4},
/* +3 */ {23, 5},
/* +4 */ {47, 6},
/* +5 */ {95, 7},
/* +6 */ {191, 8},
/* +7 */ {639, 10},
/* +8 */ {1407, 11},
/* +9 */ {2943, 12},
/* +10 */ {6015, 13},
/* +11 */ {12159, 14},
/* +12 */ {24447, 15},
/* +13 */ {49023, 16}
},
/* model 1 */
{
/* -13 */ {8127, 15},
/* -12 */ {4031, 14},
/* -11 */ {1983, 13},
/* -10 */ {959, 12},
/* -9 */ {447, 11},
/* -8 */ {191, 10},
/* -7 */ {63, 9},
/* -6 */ {31, 7},
/* -5 */ {15, 6},
/* -4 */ {7, 5},
/* -3 */ {3, 4},
/* -2 */ {1, 3},
/* -1 */ {2, 3},
/* 0 */ {0, 2},
/* +1 */ {6, 3},
/* +2 */ {5, 3},
/* +3 */ {11, 4},
/* +4 */ {23, 5},
/* +5 */ {47, 6},
/* +6 */ {95, 7},
/* +7 */ {319, 9},
/* +8 */ {703, 10},
/* +9 */ {1471, 11},
/* +10 */ {3007, 12},
/* +11 */ {6079, 13},
/* +12 */ {12223, 14},
/* +13 */ {24511, 15}
}
};
static void encode_huffman(struct bit_writer_context *bwc,
float *innovation_out, int *bits_used_out, float *error_out,
const float *innovation_in, int halved_innovation,
int pow, int a, int z)
{
int interval = halved_innovation ? 2 : 1;
float inn_gain;
float total_error = 0.0f;
int counter;
int values[108];
int zero_counts[108];
int model;
int bits_start, bits_end;
int i;
inn_gain = inn_gains[pow];
if (!z)
inn_gain *= 0.5f;
bits_start = 8*bwc->pos + bwc->written_bits_count;
if (halved_innovation)
bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
else
bwc_write_bits(bwc, pow, 6);
for (i = a; i < 108; i += interval) {
float e;
values[i] = ROUND(CLAMP(
innovation_in[i]/inn_gain, -13.0f, 13.0f));
innovation_out[i] = inn_gain*values[i];
e = innovation_out[i] - innovation_in[i];
total_error += e*e;
}
*error_out = total_error;
/* Find the zero runs at each position i (how many zeros
** in a row there are at position i).
** When interval=2 and a=1, start the search from i=105 instead
** of 107 in order to duplicate the off-by-one mistake in the
** decoder. (Thus, we will subtract a instead of adding.)
** For details, see: http://wiki.niotso.org/UTK */
counter = 0;
for (i = 108 - interval - a; i >= 0; i -= interval) {
if (values[i] == 0)
counter++;
else
counter = 0;
zero_counts[i] = counter;
}
i = a;
model = 0;
while (i < 108) {
if (zero_counts[i] >= 7) {
int length = MIN(zero_counts[i], 70);
if (model == 0)
bwc_write_bits(bwc, 255 | ((length-7)<<8), 14);
else
bwc_write_bits(bwc, 127 | ((length-7)<<7), 13);
model = 0;
i += length * interval;
} else {
int value = values[i];
bwc_write_bits(bwc,
huffman_models[model][13+value].bits_value,
huffman_models[model][13+value].bits_count);
model = (value < -1 || value > 1);
i += interval;
}
}
bits_end = 8*bwc->pos + bwc->written_bits_count;
*bits_used_out = bits_end - bits_start;
}
static void encode_triangular(struct bit_writer_context *bwc,
float *innovation_out, int *bits_used_out, float *error_out,
const float *innovation_in, int halved_innovation,
int pow, int a, int z)
{
int interval = halved_innovation ? 2 : 1;
float inn_gain;
float total_error = 0.0f;
int bits_start, bits_end;
int i;
inn_gain = 2.0f*inn_gains[pow];
if (!z)
inn_gain *= 0.5f;
bits_start = 8*bwc->pos + bwc->written_bits_count;
if (halved_innovation)
bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
else
bwc_write_bits(bwc, pow, 6);
for (i = a; i < 108; i += interval) {
float e;
int value = ROUND(CLAMP(
innovation_in[i]/inn_gain, -1.0f, 1.0f));
if (value > 0)
bwc_write_bits(bwc, 3, 2);
else if (value < 0)
bwc_write_bits(bwc, 1, 2);
else
bwc_write_bits(bwc, 0, 1);
innovation_out[i] = inn_gain*value;
e = innovation_out[i] - innovation_in[i];
total_error += e*e;
}
bits_end = 8*bwc->pos + bwc->written_bits_count;
*bits_used_out = bits_end - bits_start;
*error_out = total_error;
}
static void low_pass_innovation(float *x, int a, int z)
{
/* Apply a weak low-pass filter to the innovation signal suitable for
** downsampling it by 1/2. Note that, since we are throwing out all
** x[m] samples where m != a+2*k for integer k, we only have to filter
** the x[n] samples where n = a+2*k. */
int i;
/* filter coeffs: (GNU Octave)
** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */
for (i = a; i < 108; i+=2)
x[i] = (z ? 1.0f : 0.5f)*(x[i]
+ (x[i-1]+x[i+1]) * 0.6189590521549956f
+ (x[i-3]+x[i+3]) * -0.1633990749076792f
+ (x[i-5]+x[i+5]) * 0.05858453198856907f);
}
struct innovation_encoding {
struct bit_writer_context bwc;
float innovation[108];
int bits_used;
float error;
};
static void encode_innovation(struct bit_writer_context *bwc,
float *innovation, int halved_innovation, int use_huffman,
int *bits_used, int target_bit_count)
{
int a = 0, z = 1;
struct innovation_encoding encodings[2];
int m = 0;
if (halved_innovation) {
find_a_z_flags(&a, &z, innovation);
low_pass_innovation(innovation, a, z);
}
if (use_huffman) {
/* Encode using the Huffman model. */
int interval = halved_innovation ? 2 : 1;
float max_value = 0.0f;
int min_pow;
int best_distance = 0;
int pow;
int i;
/* Find the minimum innovation power such that the innovation
** signal doesn't clip anywhere in time. (We consider clipping
** a sample by <=0.5 of a quantization level to be okay since
** the sample already rounds down [towards zero].) */
for (i = a; i < 108; i += interval) {
float value = ABS(innovation[i]);
if (value > max_value)
max_value = value;
}
for (i = 62; i >= 0; i--) {
if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f
< max_value)
break;
}
min_pow = i+1;
/* Find the innovation gain that results in the closest
** to the target bitrate without clipping occurring. */
for (pow = min_pow; pow <= 63; pow++) {
int distance;
bwc_init(&encodings[m].bwc, inn_buffers[m]);
bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
bwc->written_bits_count);
encode_huffman(&encodings[m].bwc,
encodings[m].innovation,
&encodings[m].bits_used,
&encodings[m].error,
innovation, halved_innovation,
pow, a, z);
distance = ABS(encodings[m].bits_used
- target_bit_count);
if (pow == min_pow || distance < best_distance) {
best_distance = distance;
m = !m; /* swap the buffers */
}
}
} else {
/* Encode using the triangular noise model. */
float best_error = 0.0f;
int pow;
/* Find the innovation gain that results in
** the highest quality. */
for (pow = 0; pow <= 63; pow++) {
bwc_init(&encodings[m].bwc, inn_buffers[m]);
bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
bwc->written_bits_count);
encode_triangular(&encodings[m].bwc,
encodings[m].innovation,
&encodings[m].bits_used,
&encodings[m].error,
innovation, halved_innovation,
pow, a, z);
if (pow == 0 || encodings[m].error < best_error) {
best_error = encodings[m].error;
m = !m; /* swap the buffers */
}
}
}
/* Swap the buffers again to return back to our best encoding. */
m = !m;
/* Write this encoding out to the UTK bitstream. */
memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer,
encodings[m].bwc.pos+1);
bwc->pos += encodings[m].bwc.pos;
bwc->written_bits_count = encodings[m].bwc.written_bits_count;
/* Update the innovation signal with the quantized version. */
memcpy(innovation, encodings[m].innovation, 108*sizeof(float));
if (halved_innovation)
interpolate(innovation, a, z);
*bits_used = encodings[m].bits_used;
}
static int parse_arguments(int argc, char *argv[])
{
int c;
int value;
char *endptr;
prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode";
while ((c = getopt_long(argc, argv, short_options,
long_options, NULL)) != -1) {
switch (c) {
case 'b':
bitrate = (int)strtol(optarg, &endptr, 10);
if (*endptr != '\0'
|| bitrate < 1000
|| bitrate > 1000000) {
fprintf(stderr, "%s: invalid bitrate -- %s\n",
prog_name, optarg);
print_usage_error();
return -1;
}
break;
case 'f':
force = 1;
break;
case 'q':
quiet = 1;
break;
case 'h':
print_help();
return 1;
case 'V':
print_version();
return 1;
case 'H':
halved_innovation = 1;
break;
case 'F':
halved_innovation = 0;
break;
case 'T':
huffman_threshold = (int)strtol(optarg, &endptr, 10);
if (*endptr != '\0'
|| huffman_threshold < 16
|| huffman_threshold > 32) {
fprintf(stderr, "%s: invalid Huffman "
"threshold -- %s\n", prog_name, optarg);
print_usage_error();
return -1;
}
break;
case 'S':
inngain_sig = (int)strtol(optarg, &endptr, 10);
if (*endptr != '\0'
|| inngain_sig < 8
|| inngain_sig > 128
|| (inngain_sig & 7) != 0) {
fprintf(stderr, "%s: invalid innovation gain"
" significand -- %s\n", prog_name,
optarg);
print_usage_error();
return -1;
}
break;
case 'B':
if (optarg[0] != '1' || optarg[1] != '.'
|| (value = read_dec_places(optarg+2, 3)) < 0
|| value < 40
|| value > 103) {
fprintf(stderr, "%s: invalid innovation gain"
" base -- %s\n", prog_name, optarg);
print_usage_error();
return -1;
}
inngain_base = 1.0f + (float)value/1000.0f;
break;
default:
print_usage_error();
return -1;
}
}
if (argc - optind == 0) {
fprintf(stderr, "%s: missing infile\n", prog_name);
print_usage_error();
return -1;
} else if (argc - optind == 1) {
fprintf(stderr, "%s: missing outfile\n", prog_name);
print_usage_error();
return -1;
} else if (argc - optind >= 3) {
fprintf(stderr, "%s: too many arguments passed\n", prog_name);
print_usage_error();
return -1;
}
infile = argv[optind];
outfile = argv[optind+1];
return 0;
}
int main(int argc, char *argv[])
{
int ret;
uint8_t wav_header[44];
uint8_t utk_header[32];
unsigned bytes_remaining;
int sampling_rate;
struct bit_writer_context bwc;
int i, j;
ret = parse_arguments(argc, argv);
if (ret < 0)
return EXIT_FAILURE;
else if (ret > 0)
return EXIT_SUCCESS;
if (!strcmp(infile, "-")) {
infp = stdin;
} else {
infp = fopen(infile, "rb");
if (!infp) {
fprintf(stderr, "%s: failed to open '%s' for"
" reading: %s\n", prog_name, infile,
strerror(errno));
return EXIT_FAILURE;
}
}
setvbuf(infp, NULL, _IOFBF, BUFSIZ);
if (!strcmp(outfile, "-")) {
outfp = stdout;
} else {
if (!force && file_exists(outfile)) {
if (quiet) {
fprintf(stderr, "%s: failed to open '%s' for"
" writing: file already exists\n",
prog_name, outfile);
return EXIT_FAILURE;
} else {
fprintf(stderr, "%s: overwrite '%s'? ",
prog_name, outfile);
if (getchar() != 'y')
return EXIT_FAILURE;
}
}
outfp = fopen(outfile, "wb");
if (!outfp) {
fprintf(stderr, "%s: failed to open '%s' for"
" writing: %s\n", prog_name, outfile,
strerror(errno));
return EXIT_FAILURE;
}
}
setvbuf(outfp, NULL, _IOFBF, BUFSIZ);
if (fread(wav_header, 1, 44, infp) != 44) {
if (ferror(infp))
fprintf(stderr, "%s: failed to read '%s': %s\n",
prog_name, infile, strerror(errno));
else
fprintf(stderr, "%s: '%s' is not a valid wav file\n",
prog_name, infile); /* (reached end of file) */
return EXIT_FAILURE;
}
if (memcmp(wav_header, "RIFF", 4) != 0
|| memcmp(wav_header+8, "WAVEfmt ", 8) != 0) {
fprintf(stderr, "%s: '%s' is not a valid wav file\n",
prog_name, infile);
return EXIT_FAILURE;
}
if (READ16(wav_header+20) != 1 /* wFormatTag */
|| READ16(wav_header+22) != 1 /* nChannels */
|| READ16(wav_header+32) != 2 /* nBlockAlign */
|| READ16(wav_header+34) != 16 /* wBitsPerSample */) {
fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n",
prog_name);
return EXIT_FAILURE;
}
sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */
if (sampling_rate < 1000 || sampling_rate > 1000000) {
fprintf(stderr, "%s: unsupported sampling rate %d\n",
prog_name, sampling_rate);
return EXIT_FAILURE;
}
memcpy(utk_header, "UTM0", 4); /* sID */
/* Drop the last byte from the wav file if there are an odd
** number of sample bytes. */
bytes_remaining = READ32(wav_header+40) & (~1);
WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */
WRITE32(utk_header+8, 20); /* dwWfxSize */
memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */
WRITE32(utk_header+28, 0); /* cbSize */
write_data(outfp, utk_header, 32);
bwc_init(&bwc, compressed_buffer);
bwc_write_bits(&bwc, halved_innovation, 1);
bwc_write_bits(&bwc, 32 - huffman_threshold, 4);
bwc_write_bits(&bwc, inngain_sig/8 - 1, 4);
bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6);
bwc_flush(&bwc, outfp);
for (i = 0; i < 12; i++)
input_samples[i] = 0.0f;
for (i = 0; i < 324; i++)
adaptive_codebook[i] = 0.0f;
for (i = 0; i < 12; i++)
prev_rc[i] = 0.0f;
for (i = 0; i < 5; i++)
innovation[i] = 0.0f;
for (i = 5+108; i < 5+108+5; i++)
innovation[i] = 0.0f;
inn_gains[0] = inngain_sig;
for (i = 1; i < 64; i++)
inn_gains[i] = inn_gains[i-1]*inngain_base;
while (bytes_remaining != 0) {
/* Encode the next frame of 432 samples. */
int bytes_to_read;
int samples_to_read;
float rc[12];
float rc_delta[12];
int use_huffman = 0;
bytes_to_read = (int)MIN(bytes_remaining, 432*2);
samples_to_read = bytes_to_read >> 1;
read_data(infp, wav_buffer, bytes_to_read);
bytes_remaining -= bytes_to_read;
for (i = 0; i < samples_to_read; i++) {
int16_t x = READ16(wav_buffer+2*i);
input_samples[12+i] = (float)x;
}
for (i = samples_to_read; i < 432; i++)
input_samples[12+i] = 0.0f;
find_rc(rc, input_samples+12);
/* Quantize the reflection coefficients.
** In our encoder, we will not make use of utk_rc_table[0]. */
for (i = 0; i < 4; i++) {
int idx = 1+quantize(rc[i], utk_rc_table+1, 63);
bwc_write_bits(&bwc, idx, 6);
rc[i] = utk_rc_table[idx];
if (i == 0 && idx < huffman_threshold)
use_huffman = 1;
}
for (i = 4; i < 12; i++) {
int idx = quantize(rc[i], utk_rc_table+16, 32);
bwc_write_bits(&bwc, idx, 5);
rc[i] = utk_rc_table[16+idx];
}
for (i = 0; i < 12; i++)
rc_delta[i] = (rc[i] - prev_rc[i])/4.0f;
memcpy(rc, prev_rc, 12*sizeof(float));
for (i = 0; i < 4; i++) {
/* Linearly interpolate the reflection coefficients over
** the four subframes and find the excitation signal. */
float lpc[12];
for (j = 0; j < 12; j++)
rc[j] += rc_delta[j];
rc_to_lpc(lpc, rc);
find_excitation(adaptive_codebook+324+12*i,
input_samples+12+12*i,
i < 3 ? 12 : 396, lpc);
}
memcpy(input_samples, &input_samples[432], 12*sizeof(float));
memcpy(prev_rc, rc, 12*sizeof(float));
for (i = 0; i < 4; i++) {
/* Encode the i'th subframe. */
float *excitation = adaptive_codebook+324+108*i;
int pitch_lag;
float pitch_gain;
int idx;
int bits_used;
find_pitch(&pitch_lag, &pitch_gain, excitation);
bwc_write_bits(&bwc, pitch_lag - 108, 8);
idx = ROUND(pitch_gain*15.0f);
bwc_write_bits(&bwc, idx, 4);
pitch_gain = (float)idx/15.0f;
for (j = 0; j < 108; j++)
innovation[5+j] = excitation[j]
- pitch_gain*excitation[j-pitch_lag];
encode_innovation(&bwc, &innovation[5],
halved_innovation, use_huffman, &bits_used,
ROUND(bitrate * 432 / sampling_rate / 4) - 18);
/* Update the adaptive codebook using the quantized
** innovation signal. */
for (j = 0; j < 108; j++)
excitation[j] = innovation[5+j]
+ pitch_gain*excitation[j-pitch_lag];
}
/* Copy the last 3 subframes to the beginning of the
** adaptive codebook. */
memcpy(adaptive_codebook, &adaptive_codebook[432],
324*sizeof(float));
bwc_flush(&bwc, outfp);
}
bwc_pad(&bwc);
bwc_flush(&bwc, outfp);
flush_data(outfp);
fclose(outfp);
fclose(infp);
return EXIT_SUCCESS;
}