pebble/tests/fw/test_utf8.c
2025-01-27 11:38:16 -08:00

214 lines
7.4 KiB
C

/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "applib/graphics/utf8.h"
#include "utf8_test_data.h"
#include "clar.h"
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
// Stubs
///////////////////////////////////////////////////////////
#include "stubs_logging.h"
#include "stubs_passert.h"
// Tests
///////////////////////////////////////////////////////////
void test_utf8__decode_test_string_valid(void) {
static const int NUM_VALID_CODEPOINTS = sizeof(s_valid_test_codepoints) / sizeof(uint32_t);
bool is_valid = utf8_is_valid_string(s_valid_test_string);
cl_assert(is_valid);
utf8_t* valid_test_string_utf8 = (utf8_t*)s_valid_test_string;
for (int i = 0; i < NUM_VALID_CODEPOINTS; ++i) {
uint32_t decoded_codepoint = utf8_peek_codepoint(valid_test_string_utf8, NULL);
uint32_t actual_codepoint = s_valid_test_codepoints[i];
cl_assert_equal_i(decoded_codepoint, actual_codepoint);
valid_test_string_utf8 = utf8_get_next(valid_test_string_utf8);
}
}
void test_utf8__decode_malformed_test_string(void) {
bool success = false;
utf8_get_bounds(&success, s_malformed_test_string);
cl_assert(!success);
utf8_t* malformed_test_string_utf8 = (utf8_t*)s_malformed_test_string;
for (int i = 0; i < (UTF8_TEST_MALFORMED_CODEPOINT_INDEX - 1); i++) {
uint32_t decoded_codepoint = utf8_peek_codepoint(malformed_test_string_utf8, NULL);
uint32_t actual_codepoint = s_valid_test_codepoints[i];
cl_assert_equal_i(decoded_codepoint, actual_codepoint);
malformed_test_string_utf8 = utf8_get_next(malformed_test_string_utf8);
}
// When we decode the invalid codepoint, it should return an invalid stream
// error and set the pointer to the stream to be null
cl_assert_equal_i(utf8_peek_codepoint(malformed_test_string_utf8, NULL), 0);
cl_assert_(*malformed_test_string_utf8 == 0xcd, "Failed to invalidate an invalid UTF-8 test string");
}
void test_utf8__decode_all_gothic_codepoints(void) {
static const int NUM_GOTHIC_CODEPOINTS = sizeof(s_valid_gothic_codepoints) / sizeof(uint32_t);
bool is_valid = utf8_is_valid_string(s_valid_gothic_codepoints_string);
cl_assert(is_valid);
utf8_t* valid_gothic_codepoints_utf8 = (utf8_t*) s_valid_gothic_codepoints_string;
for (int i = 0; i < NUM_GOTHIC_CODEPOINTS; i++) {
uint32_t decoded_codepoint = utf8_peek_codepoint(valid_gothic_codepoints_utf8, NULL);
uint32_t actual_codepoint = s_valid_gothic_codepoints[i];
cl_assert_equal_i(decoded_codepoint, actual_codepoint);
valid_gothic_codepoints_utf8 = utf8_get_next(valid_gothic_codepoints_utf8);
}
}
void test_utf8__emoji_codepoints(void) {
cl_assert(utf8_is_valid_string("\xF0\x9F\x98\x84"));
cl_assert(utf8_is_valid_string("😃"));
}
void test_utf8__copy_single_byte_char(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)"hello", 5);
cl_assert_equal_i(copied, 1);
cl_assert_equal_s((char *)dest, "h");
}
void test_utf8__copy_multibyte_char(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)NIHAO, 5);
cl_assert_equal_i(copied, NIHAO_FIRST_CHARACTER_BYTES);
cl_assert_equal_s((char *)dest, NIHAO_FIRST_CHARACTER);
}
void test_utf8__copy_insufficient_space(void) {
utf8_t dest[5];
dest[0] = 0;
size_t copied = utf8_copy_character(dest, (utf8_t *)NIHAO, 2);
cl_assert_equal_i(copied, 0);
cl_assert_equal_s((char *)dest, "");
}
void test_utf8__copy_fill_buffer(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)NIHAO, 3);
cl_assert_equal_i(copied, NIHAO_FIRST_CHARACTER_BYTES);
cl_assert_equal_s((char *)dest, NIHAO_FIRST_CHARACTER);
}
void test_utf8__copy_last_character(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)NIHAO_FIRST_CHARACTER, 5);
cl_assert_equal_i(copied, NIHAO_FIRST_CHARACTER_BYTES);
cl_assert_equal_s((char *)dest, NIHAO_FIRST_CHARACTER);
}
void test_utf8__copy_invalid_last_character(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)"\xf0", 5);
cl_assert_equal_i(copied, 0);
cl_assert_equal_s((char *)dest, "");
}
void test_utf8__invalid_character(void) {
utf8_t dest[5];
memset(dest, 0, 5);
size_t copied = utf8_copy_character(dest, (utf8_t *)"\xf0hi", 5);
cl_assert_equal_i(copied, 0);
cl_assert_equal_s((char *)dest, "");
}
void test_utf8__get_size_truncate(void) {
cl_assert_equal_i(0, utf8_get_size_truncate("", 1));
cl_assert_equal_i(0, utf8_get_size_truncate("", 100));
cl_assert_equal_i(0, utf8_get_size_truncate(" ", 1));
cl_assert_equal_i(2, utf8_get_size_truncate("ab", 3));
cl_assert_equal_i(2, utf8_get_size_truncate("abc", 3));
cl_assert_equal_i(17, utf8_get_size_truncate("Hello World! \xF0\x9F\x98\x84", 100));
cl_assert_equal_i(13, utf8_get_size_truncate("Hello World! \xF0\x9F\x98\x84", 17));
cl_assert_equal_i(16, utf8_get_size_truncate("Hello World! \xF0\x9F\x98", 17));
cl_assert_equal_i(13, utf8_get_size_truncate("Hello World! \xF0\x9F\x98\x84", 16));
cl_assert_passert(utf8_get_size_truncate("Hi", 0));
}
void test_utf8__truncate_with_ellipsis(void) {
// basic smoke test
char *output_buffer = malloc(6);
size_t trunc_size = utf8_truncate_with_ellipsis("WWWWWWWWWWWWWWW", output_buffer, 6);
cl_assert_equal_s(output_buffer, "WW\xe2\x80\xa6");
cl_assert_equal_i(trunc_size, 6);
// test where max_length < ellipsis_length
output_buffer = realloc(output_buffer, 3);
trunc_size = utf8_truncate_with_ellipsis("Hey", output_buffer, 3);
cl_assert_equal_i(trunc_size, 0);
// test where max_length == ellipsis_length
output_buffer = realloc(output_buffer, 4);
trunc_size = utf8_truncate_with_ellipsis("Hello", output_buffer, 4);
cl_assert_equal_s(output_buffer, "\xe2\x80\xa6");
cl_assert_equal_i(trunc_size, 4);
// test where max_length == ellipsis_length + 1
output_buffer = realloc(output_buffer, 5);
trunc_size = utf8_truncate_with_ellipsis("Hello", output_buffer, 5);
cl_assert_equal_s(output_buffer, "H\xe2\x80\xa6");
cl_assert_equal_i(trunc_size, 5);
// test that if we don't need to truncate, we don't
output_buffer = realloc(output_buffer, 12);
trunc_size = utf8_truncate_with_ellipsis("Hello there", output_buffer, 12);
cl_assert_equal_s(output_buffer, "Hello there");
cl_assert_equal_i(trunc_size, 12);
// test that our utf8 support works properly and doesn't split multibyte characters
output_buffer = realloc(output_buffer, 19);
trunc_size = utf8_truncate_with_ellipsis("Hello World! \xF0\x9F\x98\x84 11111", output_buffer, 19);
cl_assert_equal_s(output_buffer, "Hello World! \xe2\x80\xa6");
cl_assert_equal_i(trunc_size, 17);
// test that we access unallocated memory if the output buffer is too small
output_buffer = realloc(output_buffer, 5);
cl_assert_passert(utf8_truncate_with_ellipsis("Hello", output_buffer, 6));
free(output_buffer);
}