/* ** utkencode ** Encode wav to Maxis UTalk. ** Authors: Fatbag ** License: Public domain (no warranties) ** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s ** -o utkencode utkencode.c */ #include #include #include #include #include #include #define MIN(x,y) ((x)<(y)?(x):(y)) #define MAX(x,y) ((x)>(y)?(x):(y)) #define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x)) #define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5))) #define ABS(x) ((x)>=0?(x):-(x)) #define READ16(x) ((x)[0]|((x)[1]<<8)) #define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24)) #define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8) #define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\ (d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24) const float utk_rc_table[64] = { 0, -.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875, -.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375, -.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375, -.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625, -.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875, -.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875, -.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375, -.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625, 0, +.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375, +.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875, +.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875, +.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625, +.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375, +.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375, +.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875, +.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375 }; static const char *prog_name; static void print_help(void) { printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name); printf("Encode wav to Maxis UTalk.\n"); printf("\n"); printf("General options:\n"); printf(" -f, --force overwrite without prompting\n"); printf(" -q, --quiet suppress normal output and do not prompt\n"); printf(" -h, --help display this help and exit\n"); printf(" -V, --version output version information and exit\n"); printf("\n"); printf("Encoding options:\n"); printf(" -b, --bitrate=N target bitrate in bits/sec (default 32000)\n"); printf(" -H, --halved-inn encode innovation using half bandwidth\n"); printf(" (default)\n"); printf(" -F, --full-inn encode innovation using full bandwidth\n"); printf(" -T, --huff-threshold=N use the Huffman codebook with threshold N where\n"); printf(" N is an integer between 16 and 32 (inclusive)\n"); printf(" (default 24)\n"); printf(" -S, --inngain-sig=N use innovation gain significand N where N is\n"); printf(" between 8 and 128 (inclusive) in steps of 8\n"); printf(" (default 64)\n"); printf(" -B, --inngain-base=N use innovation gain base N where N is between\n"); printf(" 1.040 and 1.103 (inclusive) in steps of 0.001\n"); printf(" (default 1.068)\n"); printf("\n"); printf("If infile is \"-\", read from standard input.\n"); printf("If outfile is \"-\", write to standard output.\n"); } static void print_version(void) { printf("utkencode 0.0\n"); } static void print_usage_error(void) { fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n", prog_name); fprintf(stderr, "Try '%s --help' for more options.\n", prog_name); } static const char short_options[] = "fqhVb:HFT:S:B:"; static const struct option long_options[] = { {"force", no_argument, 0, 'f'}, {"quiet", no_argument, 0, 'q'}, {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, {"bitrate", required_argument, 0, 'b'}, {"halved-inn", no_argument, 0, 'H'}, {"full-inn", no_argument, 0, 'F'}, {"huff-threshold", required_argument, 0, 'T'}, {"inngain-sig", required_argument, 0, 'S'}, {"inngain-base", required_argument, 0, 'B'}, {0, 0, 0, 0} }; static int bitrate = 32000; static int force = 0; static int quiet = 0; static int halved_innovation = 1; static int huffman_threshold = 24; static int inngain_sig = 64; static float inngain_base = 1.068f; static const char *infile = ""; static const char *outfile = ""; static FILE *infp = NULL; static FILE *outfp = NULL; static uint8_t wav_buffer[432*2]; static float input_samples[12+432]; static float adaptive_codebook[324+432]; static uint8_t compressed_buffer[1024]; static uint8_t inn_buffers[2][256]; static float prev_rc[12]; static float innovation[5+108+5]; static float inn_gains[64]; struct bit_writer_context { uint8_t written_bits_count; size_t pos; uint8_t *buffer; }; static void read_data(FILE *fp, uint8_t *buffer, size_t size) { if (fread(buffer, 1, size, fp) != size) { fprintf(stderr, "%s: failed to read '%s': %s\n", prog_name, infile, ferror(fp) ? strerror(errno) : "reached end of file"); exit(EXIT_FAILURE); } } static void write_data(FILE *fp, const uint8_t *buffer, size_t size) { if (fwrite(buffer, 1, size, fp) != size) { fprintf(stderr, "%s: failed to write to '%s': %s\n", prog_name, outfile, ferror(fp) ? strerror(errno) : "reached end of file"); exit(EXIT_FAILURE); } } static void flush_data(FILE *fp) { if (fflush(fp) != 0) { fprintf(stderr, "%s: failed to flush '%s': %s\n", prog_name, outfile, strerror(errno)); exit(EXIT_FAILURE); } } static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer) { bwc->written_bits_count = 0; bwc->pos = 0; bwc->buffer = buffer; bwc->buffer[0] = 0; } static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value, uint8_t count) { unsigned x = value << bwc->written_bits_count; bwc->buffer[bwc->pos] |= (uint8_t)x; bwc->written_bits_count += count; while (bwc->written_bits_count >= 8) { x >>= 8; bwc->buffer[++bwc->pos] = (uint8_t)x; bwc->written_bits_count -= 8; } } static void bwc_pad(struct bit_writer_context *bwc) { if (bwc->written_bits_count != 0) { bwc->buffer[++bwc->pos] = 0; bwc->written_bits_count = 0; } } static void bwc_flush(struct bit_writer_context *bwc, FILE *fp) { write_data(fp, bwc->buffer, bwc->pos); bwc->buffer[0] = bwc->buffer[bwc->pos]; bwc->pos = 0; } static unsigned quantize(float value, const float *alphabet, size_t alphabet_size) { unsigned i; unsigned min_idx = 0; float min_distance = ABS(value - alphabet[0]); for (i = 1; i < alphabet_size; i++) { float distance = ABS(value - alphabet[i]); if (distance < min_distance) { min_distance = distance; min_idx = i; } } return min_idx; } /* used in the parsing of some arguments */ static int read_dec_places(const char *string, int n) { int i; int value = 0; int pows_10[10]; pows_10[0] = 1; for (i = 1; i < n; i++) pows_10[i] = pows_10[i-1] * 10; for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++) value += pows_10[n-1-i] * (string[i]-'0'); return (string[i] == '\0') ? value : -1; } static int file_exists(const char *filename) { FILE *fp; fp = fopen(filename, "rb"); if (fp) { fclose(fp); return 1; } return 0; } static void find_autocorrelations(float *r, const float *samples) { int i, j; for (i = 0; i < 13; i++) { r[i] = 0; for (j = 0; j < 432 - i; j++) r[i] += samples[j]*samples[j+i]; } } static void levinson_durbin_symmetric(float *x, float *k, const float *r, const float *y) { float a[12]; /* the forward vector */ float e; /* prediction error */ int i; if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f) goto zero; a[0] = 1; e = r[0]; x[0] = y[0]/r[0]; for (i = 1; i < 12; i++) { float u, m; float a_temp[12]; int j; u = 0.0f; for (j = 0; j < i; j++) u += a[j]*r[i-j]; k[i-1] = -u/e; /* reflection coefficient i-1 */ e += u*k[i-1]; /* update e to the new value e - u*u/e */ if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f) goto zero; memcpy(a_temp, a, i*sizeof(float)); a[i] = 0.0f; for (j = 1; j <= i; j++) a[j] += k[i-1]*a_temp[i-j]; m = y[i]; for (j = 0; j < i; j++) m -= x[j]*r[i-j]; m /= e; x[i] = 0.0f; for (j = 0; j <= i; j++) x[j] += m*a[i-j]; } k[11] = -x[11]; return; zero: for (i = 0; i < 12; i++) x[i] = 0.0f; for (i = 0; i < 12; i++) k[i] = 0.0f; } static void rc_to_lpc(float *x, const float *k) { float a[13]; /* the forward vector */ unsigned i, j; a[0] = 1; for (i = 1; i < 13; i++) { float a_temp[12]; memcpy(a_temp, a, i*sizeof(float)); a[i] = 0.0f; for (j = 1; j <= i; j++) a[j] += k[i-1]*a_temp[i-j]; } for (i = 1; i < 13; i++) x[i-1] = -a[i]; } static void find_rc(float *rc, const float *samples) { float r[13]; float lpc[12]; find_autocorrelations(r, samples); levinson_durbin_symmetric(lpc, rc, r, r+1); } static void find_excitation(float *excitation, const float *source, int length, const float *lpc) { int i, j; for (i = 0; i < length; i++) { float prediction = 0.0f; for (j = 0; j < 12; j++) prediction += lpc[j]*source[i-1-j]; excitation[i] = source[i] - prediction; } } static void find_pitch(int *pitch_lag, float *pitch_gain, const float *excitation) { int max_corr_offset = 108; float max_corr_value = 0.0f; float history_energy; float gain; int i, j; /* Find the optimal pitch lag. */ for (i = 108; i < 324; i++) { float corr = 0.0f; for (j = 0; j < 108; j++) corr += excitation[j]*excitation[j-i]; if (corr > max_corr_value) { max_corr_offset = i; max_corr_value = corr; } } /* Find the optimal pitch gain. */ history_energy = 0.0f; for (i = 0; i < 108; i++) { float value = excitation[i-max_corr_offset]; history_energy += value*value; } if (history_energy >= 1/32768.0f) { gain = max_corr_value / history_energy; gain = CLAMP(gain, 0.0f, 1.0f); *pitch_lag = max_corr_offset; *pitch_gain = gain; } else { *pitch_lag = 108; *pitch_gain = 0.0f; } } static void interpolate(float *x, int a, int z) { int i; if (z) { for (i = !a; i < 108; i+=2) x[i] = 0.0f; } else { for (i = !a; i < 108; i+=2) x[i] = (x[i-1]+x[i+1]) * .5973859429f - (x[i-3]+x[i+3]) * .1145915613f + (x[i-5]+x[i+5]) * .0180326793f; } } static float interpolation_error(int a, int z, const float *x) { float error = 0.0f; int i; if (z) { for (i = !a; i < 108; i+=2) error += x[i]*x[i]; } else { for (i = !a; i < 108; i+=2) { float prediction = (x[i-1]+x[i+1]) * .5973859429f - (x[i-3]+x[i+3]) * .1145915613f + (x[i-5]+x[i+5]) * .0180326793f; error += (prediction - x[i])*(prediction - x[i]); } } return error; } static void find_a_z_flags(int *a, int *z, const float *innovation) { /* Find the a and z flags such that the least error is introduced ** in the downsampling step. In case of a tie (e.g. in silence), ** prefer using the zero flag. Thus, we will test in the order: ** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */ float error; float best_error; int best_a = 0, best_z = 1; best_error = interpolation_error(0, 1, innovation); error = interpolation_error(1, 1, innovation); if (error < best_error) { best_error = error; best_a = 1, best_z = 1; } error = interpolation_error(0, 0, innovation); if (error < best_error) { best_error = error; best_a = 0, best_z = 0; } error = interpolation_error(1, 0, innovation); if (error < best_error) { best_error = error; best_a = 1, best_z = 0; } *a = best_a; *z = best_z; } struct huffman_code { uint16_t bits_value; uint16_t bits_count; }; static const struct huffman_code huffman_models[2][13+1+13] = { /* model 0 */ { /* -13 */ {16255, 16}, /* -12 */ {8063, 15}, /* -11 */ {3967, 14}, /* -10 */ {1919, 13}, /* -9 */ {895, 12}, /* -8 */ {383, 11}, /* -7 */ {127, 10}, /* -6 */ {63, 8}, /* -5 */ {31, 7}, /* -4 */ {15, 6}, /* -3 */ {7, 5}, /* -2 */ {3, 4}, /* -1 */ {2, 2}, /* 0 */ {0, 2}, /* +1 */ {1, 2}, /* +2 */ {11, 4}, /* +3 */ {23, 5}, /* +4 */ {47, 6}, /* +5 */ {95, 7}, /* +6 */ {191, 8}, /* +7 */ {639, 10}, /* +8 */ {1407, 11}, /* +9 */ {2943, 12}, /* +10 */ {6015, 13}, /* +11 */ {12159, 14}, /* +12 */ {24447, 15}, /* +13 */ {49023, 16} }, /* model 1 */ { /* -13 */ {8127, 15}, /* -12 */ {4031, 14}, /* -11 */ {1983, 13}, /* -10 */ {959, 12}, /* -9 */ {447, 11}, /* -8 */ {191, 10}, /* -7 */ {63, 9}, /* -6 */ {31, 7}, /* -5 */ {15, 6}, /* -4 */ {7, 5}, /* -3 */ {3, 4}, /* -2 */ {1, 3}, /* -1 */ {2, 3}, /* 0 */ {0, 2}, /* +1 */ {6, 3}, /* +2 */ {5, 3}, /* +3 */ {11, 4}, /* +4 */ {23, 5}, /* +5 */ {47, 6}, /* +6 */ {95, 7}, /* +7 */ {319, 9}, /* +8 */ {703, 10}, /* +9 */ {1471, 11}, /* +10 */ {3007, 12}, /* +11 */ {6079, 13}, /* +12 */ {12223, 14}, /* +13 */ {24511, 15} } }; static void encode_huffman(struct bit_writer_context *bwc, float *innovation_out, int *bits_used_out, float *error_out, const float *innovation_in, int halved_innovation, int pow, int a, int z) { int interval = halved_innovation ? 2 : 1; float inn_gain; float total_error = 0.0f; int counter; int values[108]; int zero_counts[108]; int model; int bits_start, bits_end; int i; inn_gain = inn_gains[pow]; if (!z) inn_gain *= 0.5f; bits_start = 8*bwc->pos + bwc->written_bits_count; if (halved_innovation) bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); else bwc_write_bits(bwc, pow, 6); for (i = a; i < 108; i += interval) { float e; values[i] = ROUND(CLAMP( innovation_in[i]/inn_gain, -13.0f, 13.0f)); innovation_out[i] = inn_gain*values[i]; e = innovation_out[i] - innovation_in[i]; total_error += e*e; } *error_out = total_error; /* Find the zero runs at each position i (how many zeros ** in a row there are at position i). ** When interval=2 and a=1, start the search from i=105 instead ** of 107 in order to duplicate the off-by-one mistake in the ** decoder. (Thus, we will subtract a instead of adding.) ** For details, see: http://wiki.niotso.org/UTK */ counter = 0; for (i = 108 - interval - a; i >= 0; i -= interval) { if (values[i] == 0) counter++; else counter = 0; zero_counts[i] = counter; } i = a; model = 0; while (i < 108) { if (zero_counts[i] >= 7) { int length = MIN(zero_counts[i], 70); if (model == 0) bwc_write_bits(bwc, 255 | ((length-7)<<8), 14); else bwc_write_bits(bwc, 127 | ((length-7)<<7), 13); model = 0; i += length * interval; } else { int value = values[i]; bwc_write_bits(bwc, huffman_models[model][13+value].bits_value, huffman_models[model][13+value].bits_count); model = (value < -1 || value > 1); i += interval; } } bits_end = 8*bwc->pos + bwc->written_bits_count; *bits_used_out = bits_end - bits_start; } static void encode_triangular(struct bit_writer_context *bwc, float *innovation_out, int *bits_used_out, float *error_out, const float *innovation_in, int halved_innovation, int pow, int a, int z) { int interval = halved_innovation ? 2 : 1; float inn_gain; float total_error = 0.0f; int bits_start, bits_end; int i; inn_gain = 2.0f*inn_gains[pow]; if (!z) inn_gain *= 0.5f; bits_start = 8*bwc->pos + bwc->written_bits_count; if (halved_innovation) bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); else bwc_write_bits(bwc, pow, 6); for (i = a; i < 108; i += interval) { float e; int value = ROUND(CLAMP( innovation_in[i]/inn_gain, -1.0f, 1.0f)); if (value > 0) bwc_write_bits(bwc, 3, 2); else if (value < 0) bwc_write_bits(bwc, 1, 2); else bwc_write_bits(bwc, 0, 1); innovation_out[i] = inn_gain*value; e = innovation_out[i] - innovation_in[i]; total_error += e*e; } bits_end = 8*bwc->pos + bwc->written_bits_count; *bits_used_out = bits_end - bits_start; *error_out = total_error; } static void low_pass_innovation(float *x, int a, int z) { /* Apply a weak low-pass filter to the innovation signal suitable for ** downsampling it by 1/2. Note that, since we are throwing out all ** x[m] samples where m != a+2*k for integer k, we only have to filter ** the x[n] samples where n = a+2*k. */ int i; /* filter coeffs: (GNU Octave) ** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */ for (i = a; i < 108; i+=2) x[i] = (z ? 1.0f : 0.5f)*(x[i] + (x[i-1]+x[i+1]) * 0.6189590521549956f + (x[i-3]+x[i+3]) * -0.1633990749076792f + (x[i-5]+x[i+5]) * 0.05858453198856907f); } struct innovation_encoding { struct bit_writer_context bwc; float innovation[108]; int bits_used; float error; }; static void encode_innovation(struct bit_writer_context *bwc, float *innovation, int halved_innovation, int use_huffman, int *bits_used, int target_bit_count) { int a = 0, z = 1; struct innovation_encoding encodings[2]; int m = 0; if (halved_innovation) { find_a_z_flags(&a, &z, innovation); low_pass_innovation(innovation, a, z); } if (use_huffman) { /* Encode using the Huffman model. */ int interval = halved_innovation ? 2 : 1; float max_value = 0.0f; int min_pow; int best_distance = 0; int pow; int i; /* Find the minimum innovation power such that the innovation ** signal doesn't clip anywhere in time. (We consider clipping ** a sample by <=0.5 of a quantization level to be okay since ** the sample already rounds down [towards zero].) */ for (i = a; i < 108; i += interval) { float value = ABS(innovation[i]); if (value > max_value) max_value = value; } for (i = 62; i >= 0; i--) { if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f < max_value) break; } min_pow = i+1; /* Find the innovation gain that results in the closest ** to the target bitrate without clipping occurring. */ for (pow = min_pow; pow <= 63; pow++) { int distance; bwc_init(&encodings[m].bwc, inn_buffers[m]); bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], bwc->written_bits_count); encode_huffman(&encodings[m].bwc, encodings[m].innovation, &encodings[m].bits_used, &encodings[m].error, innovation, halved_innovation, pow, a, z); distance = ABS(encodings[m].bits_used - target_bit_count); if (pow == min_pow || distance < best_distance) { best_distance = distance; m = !m; /* swap the buffers */ } } } else { /* Encode using the triangular noise model. */ float best_error = 0.0f; int pow; /* Find the innovation gain that results in ** the highest quality. */ for (pow = 0; pow <= 63; pow++) { bwc_init(&encodings[m].bwc, inn_buffers[m]); bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], bwc->written_bits_count); encode_triangular(&encodings[m].bwc, encodings[m].innovation, &encodings[m].bits_used, &encodings[m].error, innovation, halved_innovation, pow, a, z); if (pow == 0 || encodings[m].error < best_error) { best_error = encodings[m].error; m = !m; /* swap the buffers */ } } } /* Swap the buffers again to return back to our best encoding. */ m = !m; /* Write this encoding out to the UTK bitstream. */ memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer, encodings[m].bwc.pos+1); bwc->pos += encodings[m].bwc.pos; bwc->written_bits_count = encodings[m].bwc.written_bits_count; /* Update the innovation signal with the quantized version. */ memcpy(innovation, encodings[m].innovation, 108*sizeof(float)); if (halved_innovation) interpolate(innovation, a, z); *bits_used = encodings[m].bits_used; } static int parse_arguments(int argc, char *argv[]) { int c; int value; char *endptr; prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode"; while ((c = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) { switch (c) { case 'b': bitrate = (int)strtol(optarg, &endptr, 10); if (*endptr != '\0' || bitrate < 1000 || bitrate > 1000000) { fprintf(stderr, "%s: invalid bitrate -- %s\n", prog_name, optarg); print_usage_error(); return -1; } break; case 'f': force = 1; break; case 'q': quiet = 1; break; case 'h': print_help(); return 1; case 'V': print_version(); return 1; case 'H': halved_innovation = 1; break; case 'F': halved_innovation = 0; break; case 'T': huffman_threshold = (int)strtol(optarg, &endptr, 10); if (*endptr != '\0' || huffman_threshold < 16 || huffman_threshold > 32) { fprintf(stderr, "%s: invalid Huffman " "threshold -- %s\n", prog_name, optarg); print_usage_error(); return -1; } break; case 'S': inngain_sig = (int)strtol(optarg, &endptr, 10); if (*endptr != '\0' || inngain_sig < 8 || inngain_sig > 128 || (inngain_sig & 7) != 0) { fprintf(stderr, "%s: invalid innovation gain" " significand -- %s\n", prog_name, optarg); print_usage_error(); return -1; } break; case 'B': if (optarg[0] != '1' || optarg[1] != '.' || (value = read_dec_places(optarg+2, 3)) < 0 || value < 40 || value > 103) { fprintf(stderr, "%s: invalid innovation gain" " base -- %s\n", prog_name, optarg); print_usage_error(); return -1; } inngain_base = 1.0f + (float)value/1000.0f; break; default: print_usage_error(); return -1; } } if (argc - optind == 0) { fprintf(stderr, "%s: missing infile\n", prog_name); print_usage_error(); return -1; } else if (argc - optind == 1) { fprintf(stderr, "%s: missing outfile\n", prog_name); print_usage_error(); return -1; } else if (argc - optind >= 3) { fprintf(stderr, "%s: too many arguments passed\n", prog_name); print_usage_error(); return -1; } infile = argv[optind]; outfile = argv[optind+1]; return 0; } int main(int argc, char *argv[]) { int ret; uint8_t wav_header[44]; uint8_t utk_header[32]; unsigned bytes_remaining; int sampling_rate; struct bit_writer_context bwc; int i, j; ret = parse_arguments(argc, argv); if (ret < 0) return EXIT_FAILURE; else if (ret > 0) return EXIT_SUCCESS; if (!strcmp(infile, "-")) { infp = stdin; } else { infp = fopen(infile, "rb"); if (!infp) { fprintf(stderr, "%s: failed to open '%s' for" " reading: %s\n", prog_name, infile, strerror(errno)); return EXIT_FAILURE; } } setvbuf(infp, NULL, _IOFBF, BUFSIZ); if (!strcmp(outfile, "-")) { outfp = stdout; } else { if (!force && file_exists(outfile)) { if (quiet) { fprintf(stderr, "%s: failed to open '%s' for" " writing: file already exists\n", prog_name, outfile); return EXIT_FAILURE; } else { fprintf(stderr, "%s: overwrite '%s'? ", prog_name, outfile); if (getchar() != 'y') return EXIT_FAILURE; } } outfp = fopen(outfile, "wb"); if (!outfp) { fprintf(stderr, "%s: failed to open '%s' for" " writing: %s\n", prog_name, outfile, strerror(errno)); return EXIT_FAILURE; } } setvbuf(outfp, NULL, _IOFBF, BUFSIZ); if (fread(wav_header, 1, 44, infp) != 44) { if (ferror(infp)) fprintf(stderr, "%s: failed to read '%s': %s\n", prog_name, infile, strerror(errno)); else fprintf(stderr, "%s: '%s' is not a valid wav file\n", prog_name, infile); /* (reached end of file) */ return EXIT_FAILURE; } if (memcmp(wav_header, "RIFF", 4) != 0 || memcmp(wav_header+8, "WAVEfmt ", 8) != 0) { fprintf(stderr, "%s: '%s' is not a valid wav file\n", prog_name, infile); return EXIT_FAILURE; } if (READ16(wav_header+20) != 1 /* wFormatTag */ || READ16(wav_header+22) != 1 /* nChannels */ || READ16(wav_header+32) != 2 /* nBlockAlign */ || READ16(wav_header+34) != 16 /* wBitsPerSample */) { fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n", prog_name); return EXIT_FAILURE; } sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */ if (sampling_rate < 1000 || sampling_rate > 1000000) { fprintf(stderr, "%s: unsupported sampling rate %d\n", prog_name, sampling_rate); return EXIT_FAILURE; } memcpy(utk_header, "UTM0", 4); /* sID */ /* Drop the last byte from the wav file if there are an odd ** number of sample bytes. */ bytes_remaining = READ32(wav_header+40) & (~1); WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */ WRITE32(utk_header+8, 20); /* dwWfxSize */ memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */ WRITE32(utk_header+28, 0); /* cbSize */ write_data(outfp, utk_header, 32); bwc_init(&bwc, compressed_buffer); bwc_write_bits(&bwc, halved_innovation, 1); bwc_write_bits(&bwc, 32 - huffman_threshold, 4); bwc_write_bits(&bwc, inngain_sig/8 - 1, 4); bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6); bwc_flush(&bwc, outfp); for (i = 0; i < 12; i++) input_samples[i] = 0.0f; for (i = 0; i < 324; i++) adaptive_codebook[i] = 0.0f; for (i = 0; i < 12; i++) prev_rc[i] = 0.0f; for (i = 0; i < 5; i++) innovation[i] = 0.0f; for (i = 5+108; i < 5+108+5; i++) innovation[i] = 0.0f; inn_gains[0] = inngain_sig; for (i = 1; i < 64; i++) inn_gains[i] = inn_gains[i-1]*inngain_base; while (bytes_remaining != 0) { /* Encode the next frame of 432 samples. */ int bytes_to_read; int samples_to_read; float rc[12]; float rc_delta[12]; int use_huffman = 0; bytes_to_read = (int)MIN(bytes_remaining, 432*2); samples_to_read = bytes_to_read >> 1; read_data(infp, wav_buffer, bytes_to_read); bytes_remaining -= bytes_to_read; for (i = 0; i < samples_to_read; i++) { int16_t x = READ16(wav_buffer+2*i); input_samples[12+i] = (float)x; } for (i = samples_to_read; i < 432; i++) input_samples[12+i] = 0.0f; find_rc(rc, input_samples+12); /* Quantize the reflection coefficients. ** In our encoder, we will not make use of utk_rc_table[0]. */ for (i = 0; i < 4; i++) { int idx = 1+quantize(rc[i], utk_rc_table+1, 63); bwc_write_bits(&bwc, idx, 6); rc[i] = utk_rc_table[idx]; if (i == 0 && idx < huffman_threshold) use_huffman = 1; } for (i = 4; i < 12; i++) { int idx = quantize(rc[i], utk_rc_table+16, 32); bwc_write_bits(&bwc, idx, 5); rc[i] = utk_rc_table[16+idx]; } for (i = 0; i < 12; i++) rc_delta[i] = (rc[i] - prev_rc[i])/4.0f; memcpy(rc, prev_rc, 12*sizeof(float)); for (i = 0; i < 4; i++) { /* Linearly interpolate the reflection coefficients over ** the four subframes and find the excitation signal. */ float lpc[12]; for (j = 0; j < 12; j++) rc[j] += rc_delta[j]; rc_to_lpc(lpc, rc); find_excitation(adaptive_codebook+324+12*i, input_samples+12+12*i, i < 3 ? 12 : 396, lpc); } memcpy(input_samples, &input_samples[432], 12*sizeof(float)); memcpy(prev_rc, rc, 12*sizeof(float)); for (i = 0; i < 4; i++) { /* Encode the i'th subframe. */ float *excitation = adaptive_codebook+324+108*i; int pitch_lag; float pitch_gain; int idx; int bits_used; find_pitch(&pitch_lag, &pitch_gain, excitation); bwc_write_bits(&bwc, pitch_lag - 108, 8); idx = ROUND(pitch_gain*15.0f); bwc_write_bits(&bwc, idx, 4); pitch_gain = (float)idx/15.0f; for (j = 0; j < 108; j++) innovation[5+j] = excitation[j] - pitch_gain*excitation[j-pitch_lag]; encode_innovation(&bwc, &innovation[5], halved_innovation, use_huffman, &bits_used, ROUND(bitrate * 432 / sampling_rate / 4) - 18); /* Update the adaptive codebook using the quantized ** innovation signal. */ for (j = 0; j < 108; j++) excitation[j] = innovation[5+j] + pitch_gain*excitation[j-pitch_lag]; } /* Copy the last 3 subframes to the beginning of the ** adaptive codebook. */ memcpy(adaptive_codebook, &adaptive_codebook[432], 324*sizeof(float)); bwc_flush(&bwc, outfp); } bwc_pad(&bwc); bwc_flush(&bwc, outfp); flush_data(outfp); fclose(outfp); fclose(infp); return EXIT_SUCCESS; }