diff --git a/.github/actions/run-rust-python-tests/action.yml b/.github/actions/run-rust-python-tests/action.yml
index 6ff2650..b06aefb 100644
--- a/.github/actions/run-rust-python-tests/action.yml
+++ b/.github/actions/run-rust-python-tests/action.yml
@@ -63,5 +63,7 @@ runs:
 
     - name: Run pytest
       shell: bash
+      env:
+        PYTHONUTF8: "1"
       run: |
         pytest -q
diff --git a/tests/test_harmony.py b/tests/test_harmony.py
index 8392d7f..07d5562 100644
--- a/tests/test_harmony.py
+++ b/tests/test_harmony.py
@@ -59,7 +59,7 @@ def _assert_tokens_eq(encoding, expected: List[int], actual: List[int]):  # type
 
 
 def read_expected_tokens(file_path: Path) -> List[int]:
-    with open(file_path, "r") as f:
+    with open(file_path, "r", encoding="utf-8") as f:
         return [int(x) for x in f.read().split()]
 
 
@@ -78,7 +78,9 @@ def test_simple_convo(encoding_name):
     encoding = load_harmony_encoding(encoding_name)
 
     expected_text = (
-        (ROOT_DIR / "test-data" / "test_simple_convo.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_simple_convo.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
     expected_tokens = encoding.encode(expected_text, allowed_special="all")
 
@@ -143,7 +145,7 @@ def test_simple_convo_with_effort(encoding_name):
     ]
 
     for effort, tokens_path, use_instruction in test_cases:
-        expected_text = tokens_path.read_text().rstrip()
+        expected_text = tokens_path.read_text(encoding="utf-8").rstrip()
         expected_tokens = encoding.encode(expected_text, allowed_special="all")
         sys = (
             SystemContent.new()
@@ -299,7 +301,7 @@ def test_reasoning_system_message(encoding_name):
 
     expected_text = (
         (ROOT_DIR / "test-data" / "test_reasoning_system_message.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
     expected = encoding.encode(expected_text, allowed_special="all")
@@ -336,7 +338,7 @@ def test_reasoning_system_message_no_instruction(encoding_name):
 
     expected_text = (
         (ROOT_DIR / "test-data" / "test_reasoning_system_message_no_instruction.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
     expected = encoding.encode(expected_text, allowed_special="all")
@@ -376,7 +378,7 @@ def test_reasoning_system_message_with_dates(encoding_name):
 
     expected_text = (
         (ROOT_DIR / "test-data" / "test_reasoning_system_message_with_dates.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
     expected = encoding.encode(expected_text, allowed_special="all")
@@ -409,7 +411,7 @@ def test_render_functions_with_parameters():
 
     expected_output = (
         (ROOT_DIR / "test-data" / "test_render_functions_with_parameters.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -526,7 +528,9 @@ def test_render_functions_with_parameters():
 def test_no_tools():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
     expected_output = (
-        (ROOT_DIR / "test-data" / "test_no_tools.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_no_tools.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
 
     convo = Conversation.from_messages(
@@ -546,7 +550,9 @@ def test_no_tools():
 def test_browser_tool_only():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
     expected_output = (
-        (ROOT_DIR / "test-data" / "test_browser_tool_only.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_browser_tool_only.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
 
     convo = Conversation.from_messages(
@@ -569,7 +575,7 @@ def test_browser_and_function_tool():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
     expected_output = (
         (ROOT_DIR / "test-data" / "test_browser_and_function_tool.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -611,7 +617,7 @@ def test_browser_and_python_tool():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
     expected_output = (
         (ROOT_DIR / "test-data" / "test_browser_and_python_tool.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -637,7 +643,7 @@ def test_dropping_cot_by_default():
 
     expected_output = (
         (ROOT_DIR / "test-data" / "test_dropping_cot_by_default.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -667,7 +673,7 @@ def test_does_not_drop_if_ongoing_analysis():
 
     expected_output = (
         (ROOT_DIR / "test-data" / "test_does_not_drop_if_ongoing_analysis.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -702,7 +708,9 @@ def test_preserve_cot():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
     expected_output = (
-        (ROOT_DIR / "test-data" / "test_preserve_cot.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_preserve_cot.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
 
     convo = Conversation.from_messages(
@@ -738,7 +746,7 @@ def test_keep_analysis_between_final_messages():
 
     expected_output = (
         (ROOT_DIR / "test-data" / "test_keep_analysis_between_finals.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
         .rstrip()
     )
 
@@ -880,7 +888,9 @@ def test_tool_response_parsing():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
     text_tokens = (
-        (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
 
     expected_message = (
@@ -904,7 +914,9 @@ def test_streamable_parser():
     encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
     text_tokens = (
-        (ROOT_DIR / "test-data" / "test_streamable_parser.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_streamable_parser.txt")
+        .read_text(encoding="utf-8")
+        .rstrip()
     )
 
     tokens = encoding.encode(text_tokens, allowed_special="all")