Merge pull request #18 from openai/dev/scl/better-ci

better ci
2025-08-23 01:17:09 -04:00 · 2025-08-05 13:44:48 -07:00 · 2025-08-05 13:44:48 -07:00 · b255cbeb62
commit b255cbeb62
parent 058ef3257c 3c06d6fbca
6 changed files with 162 additions and 62 deletions
--- a/.github/actions/run-rust-python-tests/action.yml
+++ b/.github/actions/run-rust-python-tests/action.yml
@ -0,0 +1,69 @@
 name: Run Rust and Python tests
 description: Format, clippy, Rust tests (incl. doctests), build Python extension and run pytest
 inputs:
  python-version:
    description: Python version to use
    required: false
    default: "3.11"
  rust-toolchain:
    description: Rust toolchain channel
    required: false
    default: stable
 runs:
  using: composite
  steps:
    - name: Setup Rust toolchain
      uses: dtolnay/rust-toolchain@stable
      with:
        toolchain: ${{ inputs.rust-toolchain }}
        components: clippy,rustfmt
    - name: Setup Python
      uses: actions/setup-python@v5
      with:
        python-version: ${{ inputs.python-version }}
    - name: Upgrade pip
      shell: bash
      run: |
        python -m pip install --upgrade pip
    - name: Install test deps
      shell: bash
      run: |
        python -m pip install pytest
    - name: Check rustfmt
      shell: bash
      run: |
        cargo fmt --all --check
    - name: Run clippy
      shell: bash
      run: |
        cargo clippy --all-targets --all-features -- -D warnings
    - name: Run Rust tests (unit/integration)
      shell: bash
      run: |
        cargo test --all-targets --all-features
    - name: Run Rust doctests
      shell: bash
      run: |
        cargo test --doc
    - name: Build and install Python package
      shell: bash
      run: |
        pip install .
    - name: Run pytest
      shell: bash
      env:
        PYTHONUTF8: "1"
      run: |
        pytest -q
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@ -19,7 +19,23 @@ permissions:
  contents: read
 jobs:
  tests:
    name: Tests (fmt, clippy, cargo test, doctest, pytest)
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-14, windows-latest]
    steps:
      - uses: actions/checkout@v4
      - name: Run composite test suite
        uses: ./.github/actions/run-rust-python-tests
        with:
          python-version: "3.11"
          rust-toolchain: stable
  linux:
    needs: [tests]
    runs-on: ${{ matrix.platform.runner }}
    strategy:
      matrix:
@ -54,6 +70,7 @@ jobs:
          path: dist
  musllinux:
    needs: [tests]
    runs-on: ${{ matrix.platform.runner }}
    strategy:
      matrix:
@ -88,6 +105,7 @@ jobs:
          path: dist
  windows:
    needs: [tests]
    runs-on: ${{ matrix.platform.runner }}
    strategy:
      matrix:
@ -115,6 +133,7 @@ jobs:
          path: dist
  macos:
    needs: [tests]
    runs-on: ${{ matrix.platform.runner }}
    strategy:
      matrix:
@ -141,6 +160,7 @@ jobs:
          path: dist
  sdist:
    needs: [tests]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
--- a/src/py_module.rs
+++ b/src/py_module.rs
@ -12,9 +12,6 @@
 //! A thin, typed, user-facing Python wrapper around these low-level bindings is
 //! provided in `harmony/__init__.py`.
 // Only compile when the `python-binding` feature is enabled.
 #![cfg(feature = "python-binding")]
 use pyo3::prelude::*;
 // We need the `Python` type later on.
@ -34,8 +31,6 @@ use crate::{
    load_harmony_encoding, HarmonyEncodingName,
 };
 use serde_json;
 /// A thin PyO3 wrapper around the Rust `HarmonyEncoding` struct.
 #[pyclass]
 struct PyHarmonyEncoding {
@ -393,8 +388,7 @@ fn openai_harmony(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
            "python" => ToolNamespaceConfig::python(),
            _ => {
                return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
-                    "Unknown tool namespace: {}",
+                    "Unknown tool namespace: {tool}"
                    tool
                )));
            }
        };
--- a/src/tests.rs
+++ b/src/tests.rs
@ -1,3 +1,5 @@
 use std::path::Path;
 use crate::{
    chat::{
        Author, Conversation, DeveloperContent, Message, ReasoningEffort, Role, SystemContent,
@ -10,12 +12,25 @@ use crate::{
 use pretty_assertions::{assert_eq, Comparison};
 use serde_json::json;
-fn parse_tokens(text: &str) -> Vec<Rank> {
+fn parse_tokens(text: impl AsRef<str>) -> Vec<Rank> {
-    text.split_whitespace()
+    text.as_ref()
        .split_whitespace()
        .map(|s| s.parse().unwrap())
        .collect()
 }
 fn load_test_data(path: impl AsRef<Path>) -> String {
    // on windows, we need to replace \r\n with \n
    let cargo_manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
    let src_dir = cargo_manifest_dir.join("src");
    let path = src_dir.join(path);
    std::fs::read_to_string(path)
        .unwrap()
        .replace("\r\n", "\n")
        .trim_end()
        .to_string()
 }
 const ENCODINGS: [HarmonyEncodingName; 1] = [HarmonyEncodingName::HarmonyGptOss];
 #[test]
@ -25,7 +40,7 @@ fn test_simple_convo() {
        let expected_tokens = encoding
            .tokenizer
            .encode(
-                include_str!("../test-data/test_simple_convo.txt").trim_end(),
+                load_test_data("../test-data/test_simple_convo.txt").as_str(),
                &encoding.tokenizer.special_tokens(),
            )
            .0;
@ -50,45 +65,42 @@ fn test_simple_convo_with_effort() {
    let test_cases = [
        (
            ReasoningEffort::Low,
-            include_str!("../test-data/test_simple_convo_low_effort.txt"),
+            load_test_data("../test-data/test_simple_convo_low_effort.txt"),
            true,
        ),
        (
            ReasoningEffort::Medium,
-            include_str!("../test-data/test_simple_convo_medium_effort.txt"),
+            load_test_data("../test-data/test_simple_convo_medium_effort.txt"),
            true,
        ),
        (
            ReasoningEffort::High,
-            include_str!("../test-data/test_simple_convo_high_effort.txt"),
+            load_test_data("../test-data/test_simple_convo_high_effort.txt"),
            true,
        ),
        (
            ReasoningEffort::Low,
-            include_str!("../test-data/test_simple_convo_low_effort_no_instruction.txt"),
+            load_test_data("../test-data/test_simple_convo_low_effort_no_instruction.txt"),
            false,
        ),
        (
            ReasoningEffort::Medium,
-            include_str!("../test-data/test_simple_convo_medium_effort_no_instruction.txt"),
+            load_test_data("../test-data/test_simple_convo_medium_effort_no_instruction.txt"),
            false,
        ),
        (
            ReasoningEffort::High,
-            include_str!("../test-data/test_simple_convo_high_effort_no_instruction.txt"),
+            load_test_data("../test-data/test_simple_convo_high_effort_no_instruction.txt"),
            false,
        ),
    ];
    for encoding_name in ENCODINGS {
        let encoding = load_harmony_encoding(encoding_name).unwrap();
-        for (effort, expected_text, use_instruction) in test_cases {
+        for &(effort, ref expected_text, use_instruction) in &test_cases {
            let expected_tokens = encoding
                .tokenizer
-                .encode(
+                .encode(expected_text.as_str(), &encoding.tokenizer.special_tokens())
                    expected_text.trim_end(),
                    &encoding.tokenizer.special_tokens(),
                )
                .0;
            let sys = SystemContent::new()
                .with_model_identity("You are ChatGPT, a large language model trained by OpenAI.")
@ -123,8 +135,8 @@ fn test_simple_convo_with_effort() {
 #[test]
 fn test_simple_reasoning_response() {
-    let expected_tokens = parse_tokens(include_str!(
+    let expected_tokens = parse_tokens(load_test_data(
-        "../test-data/test_simple_reasoning_response.txt"
+        "../test-data/test_simple_reasoning_response.txt",
    ));
    for encoding_name in ENCODINGS {
        let encoding = load_harmony_encoding(encoding_name).unwrap();
@ -180,7 +192,7 @@ fn test_reasoning_system_message() {
        let expected = encoding
            .tokenizer
            .encode(
-                include_str!("../test-data/test_reasoning_system_message.txt").trim_end(),
+                load_test_data("../test-data/test_reasoning_system_message.txt").as_str(),
                &encoding.tokenizer.special_tokens(),
            )
            .0;
@ -211,8 +223,8 @@ fn test_reasoning_system_message_no_instruction() {
        let expected = encoding
            .tokenizer
            .encode(
-                include_str!("../test-data/test_reasoning_system_message_no_instruction.txt")
+                load_test_data("../test-data/test_reasoning_system_message_no_instruction.txt")
-                    .trim_end(),
+                    .as_str(),
                &encoding.tokenizer.special_tokens(),
            )
            .0;
@ -245,8 +257,8 @@ fn test_reasoning_system_message_with_dates() {
        let expected = encoding
            .tokenizer
            .encode(
-                include_str!("../test-data/test_reasoning_system_message_with_dates.txt")
+                load_test_data("../test-data/test_reasoning_system_message_with_dates.txt")
-                    .trim_end(),
+                    .as_str(),
                &encoding.tokenizer.special_tokens(),
            )
            .0;
@ -275,8 +287,7 @@ fn test_reasoning_system_message_with_dates() {
 #[test]
 fn test_render_functions_with_parameters() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let expected_output =
+    let expected_output = load_test_data("../test-data/test_render_functions_with_parameters.txt");
        include_str!("../test-data/test_render_functions_with_parameters.txt").trim_end();
    let sys = SystemContent::new()
        .with_reasoning_effort(ReasoningEffort::High)
@ -382,7 +393,7 @@ fn test_render_functions_with_parameters() {
 #[test]
 fn test_browser_and_python_tool() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let expected_output = include_str!("../test-data/test_browser_and_python_tool.txt").trim_end();
+    let expected_output = load_test_data("../test-data/test_browser_and_python_tool.txt");
    let convo = Conversation::from_messages([Message::from_role_and_content(
        Role::System,
@ -403,7 +414,7 @@ fn test_browser_and_python_tool() {
 #[test]
 fn test_dropping_cot_by_default() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let expected_output = include_str!("../test-data/test_dropping_cot_by_default.txt").trim_end();
+    let expected_output = load_test_data("../test-data/test_dropping_cot_by_default.txt");
    let convo = Conversation::from_messages([
        Message::from_role_and_content(Role::User, "What is 2 + 2?"),
@ -433,8 +444,7 @@ fn test_dropping_cot_by_default() {
 #[test]
 fn test_does_not_drop_if_ongoing_analysis() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let expected_output =
+    let expected_output = load_test_data("../test-data/test_does_not_drop_if_ongoing_analysis.txt");
        include_str!("../test-data/test_does_not_drop_if_ongoing_analysis.txt").trim_end();
    let convo = Conversation::from_messages([
        Message::from_role_and_content(Role::User, "What is the weather in SF?"),
@ -470,7 +480,7 @@ fn test_does_not_drop_if_ongoing_analysis() {
 #[test]
 fn test_preserve_cot() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let expected_output = include_str!("../test-data/test_preserve_cot.txt").trim_end();
+    let expected_output = load_test_data("../test-data/test_preserve_cot.txt");
    let convo = Conversation::from_messages([
        Message::from_role_and_content(Role::User, "What is 2 + 2?"),
@ -534,10 +544,10 @@ fn test_decode_utf8_invalid_token() {
 #[test]
 fn test_tool_response_parsing() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let text_tokens = include_str!("../test-data/test_tool_response_parsing.txt").trim_end();
+    let text_tokens = load_test_data("../test-data/test_tool_response_parsing.txt");
    let tokens = encoding
        .tokenizer
-        .encode(text_tokens, &encoding.tokenizer.special_tokens())
+        .encode(&text_tokens, &encoding.tokenizer.special_tokens())
        .0;
    let expected_message = Message::from_author_and_content(
@ -616,10 +626,10 @@ fn test_invalid_utf8_decoding() {
 #[test]
 fn test_streamable_parser() {
    let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
-    let text = include_str!("../test-data/test_streamable_parser.txt").trim_end();
+    let text = load_test_data("../test-data/test_streamable_parser.txt");
    let tokens = encoding
        .tokenizer
-        .encode(text, &encoding.tokenizer.special_tokens())
+        .encode(&text, &encoding.tokenizer.special_tokens())
        .0;
    let mut parser =
        crate::encoding::StreamableParser::new(encoding.clone(), Some(Role::Assistant)).unwrap();
--- a/src/wasm_module.rs
+++ b/src/wasm_module.rs
@ -1,5 +1,3 @@
 #![cfg(feature = "wasm-binding")]
 use wasm_bindgen::prelude::*;
 use crate::{
@ -9,8 +7,6 @@ use crate::{
 };
 use serde::Deserialize;
 use serde_json;
 use serde_wasm_bindgen;
 #[wasm_bindgen]
 extern "C" {
@ -335,8 +331,7 @@ pub fn get_tool_namespace_config(tool: &str) -> Result<JsValue, JsValue> {
        "python" => ToolNamespaceConfig::python(),
        _ => {
            return Err(JsValue::from_str(&format!(
-                "Unknown tool namespace: {}",
+                "Unknown tool namespace: {tool}"
                tool
            )))
        }
    };
--- a/tests/test_harmony.py
+++ b/tests/test_harmony.py
@ -59,7 +59,7 @@ def _assert_tokens_eq(encoding, expected: List[int], actual: List[int]):  # type
 def read_expected_tokens(file_path: Path) -> List[int]:
-    with open(file_path, "r") as f:
+    with open(file_path, "r", encoding="utf-8") as f:
        return [int(x) for x in f.read().split()]
@ -78,7 +78,9 @@ def test_simple_convo(encoding_name):
    encoding = load_harmony_encoding(encoding_name)
    expected_text = (
-        (ROOT_DIR / "test-data" / "test_simple_convo.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_simple_convo.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    expected_tokens = encoding.encode(expected_text, allowed_special="all")
@ -143,7 +145,7 @@ def test_simple_convo_with_effort(encoding_name):
    ]
    for effort, tokens_path, use_instruction in test_cases:
-        expected_text = tokens_path.read_text().rstrip()
+        expected_text = tokens_path.read_text(encoding="utf-8").rstrip()
        expected_tokens = encoding.encode(expected_text, allowed_special="all")
        sys = (
            SystemContent.new()
@ -299,7 +301,7 @@ def test_reasoning_system_message(encoding_name):
    expected_text = (
        (ROOT_DIR / "test-data" / "test_reasoning_system_message.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
    expected = encoding.encode(expected_text, allowed_special="all")
@ -336,7 +338,7 @@ def test_reasoning_system_message_no_instruction(encoding_name):
    expected_text = (
        (ROOT_DIR / "test-data" / "test_reasoning_system_message_no_instruction.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
    expected = encoding.encode(expected_text, allowed_special="all")
@ -376,7 +378,7 @@ def test_reasoning_system_message_with_dates(encoding_name):
    expected_text = (
        (ROOT_DIR / "test-data" / "test_reasoning_system_message_with_dates.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
    expected = encoding.encode(expected_text, allowed_special="all")
@ -409,7 +411,7 @@ def test_render_functions_with_parameters():
    expected_output = (
        (ROOT_DIR / "test-data" / "test_render_functions_with_parameters.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -526,7 +528,9 @@ def test_render_functions_with_parameters():
 def test_no_tools():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    expected_output = (
-        (ROOT_DIR / "test-data" / "test_no_tools.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_no_tools.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    convo = Conversation.from_messages(
@ -546,7 +550,9 @@ def test_no_tools():
 def test_browser_tool_only():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    expected_output = (
-        (ROOT_DIR / "test-data" / "test_browser_tool_only.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_browser_tool_only.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    convo = Conversation.from_messages(
@ -569,7 +575,7 @@ def test_browser_and_function_tool():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    expected_output = (
        (ROOT_DIR / "test-data" / "test_browser_and_function_tool.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -611,7 +617,7 @@ def test_browser_and_python_tool():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    expected_output = (
        (ROOT_DIR / "test-data" / "test_browser_and_python_tool.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -637,7 +643,7 @@ def test_dropping_cot_by_default():
    expected_output = (
        (ROOT_DIR / "test-data" / "test_dropping_cot_by_default.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -667,7 +673,7 @@ def test_does_not_drop_if_ongoing_analysis():
    expected_output = (
        (ROOT_DIR / "test-data" / "test_does_not_drop_if_ongoing_analysis.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -702,7 +708,9 @@ def test_preserve_cot():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    expected_output = (
-        (ROOT_DIR / "test-data" / "test_preserve_cot.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_preserve_cot.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    convo = Conversation.from_messages(
@ -738,7 +746,7 @@ def test_keep_analysis_between_final_messages():
    expected_output = (
        (ROOT_DIR / "test-data" / "test_keep_analysis_between_finals.txt")
-        .read_text()
+        .read_text(encoding="utf-8")
        .rstrip()
    )
@ -880,7 +888,9 @@ def test_tool_response_parsing():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    text_tokens = (
-        (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    expected_message = (
@ -904,7 +914,9 @@ def test_streamable_parser():
    encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
    text_tokens = (
-        (ROOT_DIR / "test-data" / "test_streamable_parser.txt").read_text().rstrip()
+        (ROOT_DIR / "test-data" / "test_streamable_parser.txt")
        .read_text(encoding="utf-8")
        .rstrip()
    )
    tokens = encoding.encode(text_tokens, allowed_special="all")