diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 45b5516..4fcb76f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,6 +48,8 @@ jobs: target: aarch64 - runner: ubuntu-22.04 target: armv7 + - runner: ubuntu-22.04 + target: ppc64le steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/.gitignore b/.gitignore index c8f0442..bab5ca1 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,6 @@ docs/_build/ # Pyenv .python-version + +# Avoid ignoring shadcn utils +!demo/harmony-demo/src/lib diff --git a/Cargo.lock b/Cargo.lock index 6b774e8..e0b0a71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1317,7 +1317,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openai-harmony" -version = "0.0.2" +version = "0.0.4" dependencies = [ "anyhow", "base64", diff --git a/Cargo.toml b/Cargo.toml index 12ebc27..23fa1ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "openai-harmony" -version = "0.0.2" +version = "0.0.4" edition = "2021" license = "Apache-2.0" repository = "https://github.com/openai/harmony" diff --git a/README.md b/README.md index d9c5f13..aeef157 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,9 @@ Current date: 2025-06-28 Reasoning: high # Valid channels: analysis, commentary, final. Channel must be included for every message. -Calls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>developer<|message|># Instructions +Calls to these tools must go to the commentary channel: 'functions'.<|end|> + +<|start|>developer<|message|># Instructions Always respond in riddles @@ -149,7 +151,7 @@ through thin [`pyo3`](https://pyo3.rs/) bindings. │ ├── tests.rs # Canonical Rust test-suite │ └── py_module.rs # PyO3 bindings ⇒ compiled as openai_harmony.*.so │ -├── harmony/ # Pure-Python wrapper around the binding +├── python/openai_harmony/ # Pure-Python wrapper around the binding │ └── __init__.py # Dataclasses + helper API mirroring chat.rs │ ├── tests/ # Python test-suite (1-to-1 port of tests.rs) diff --git a/demo/harmony-demo/src/lib/utils.ts b/demo/harmony-demo/src/lib/utils.ts new file mode 100644 index 0000000..bd0c391 --- /dev/null +++ b/demo/harmony-demo/src/lib/utils.ts @@ -0,0 +1,6 @@ +import { clsx, type ClassValue } from "clsx" +import { twMerge } from "tailwind-merge" + +export function cn(...inputs: ClassValue[]) { + return twMerge(clsx(inputs)) +} diff --git a/docs/format.md b/docs/format.md index a03aae8..2402406 100644 --- a/docs/format.md +++ b/docs/format.md @@ -52,19 +52,14 @@ encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) system_message = ( SystemContent.new() - .with_model_identity( - "You are ChatGPT, a large language model trained by OpenAI." - ) .with_reasoning_effort(ReasoningEffort.HIGH) .with_conversation_start_date("2025-06-28") - .with_knowledge_cutoff("2024-06") - .with_required_channels(["analysis", "commentary", "final"]) ) developer_message = ( DeveloperContent.new() .with_instructions("Always respond in riddles") - .with_tools( + .with_function_tools( [ ToolDescription.new( "get_current_weather", @@ -101,11 +96,11 @@ convo = Conversation.from_messages( Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}') .with_channel("commentary") .with_recipient("functions.get_weather") - .with_content_type("json"), + .with_content_type("<|constrain|> json"), Message.from_author_and_content( Author.new(Role.TOOL, "functions.lookup_weather"), '{ "temperature": 20, "sunny": true }', - ), + ).with_channel("commentary"), ] ) @@ -229,6 +224,8 @@ Once its done generating it will stop with either a `<|return|>` token indicatin The `final` channel will contain the answer to your user’s request. Check out the [reasoning section](#reasoning) for more details on the chain-of-thought. +**Implementation note:** `<|return|>` is a decode-time stop token only. When you add the assistant’s generated reply to conversation history for the next turn, replace the trailing `<|return|>` with `<|end|>` so that stored messages are fully formed as `<|start|>{header}<|message|>{content}<|end|>`. Prior messages in prompts should therefore end with `<|end|>`. For supervised targets/training examples, ending with `<|return|>` is appropriate; for persisted history, normalize to `<|end|>`. + ### System message format The system message is used to provide general information to the system. This is different to what might be considered the “system prompt” in other prompt formats. For that, check out the [developer message format](#developer-message-format). @@ -305,7 +302,7 @@ And the actual answer is: ``` **Important:** -The model has not been trained to the same safety standards in the chain-of-thought as it has for final output. You should We recommend not to show the chain-of-thought to your users as they might contain harmful content. [Learn more in the model card](https://openai.com/index/gpt-oss-model-card/). +The model has not been trained to the same safety standards in the chain-of-thought as it has for final output. We recommend not to show the chain-of-thought to your users as they might contain harmful content. [Learn more in the model card](https://openai.com/index/gpt-oss-model-card/). #### Handling reasoning output in subsequent sampling @@ -327,7 +324,7 @@ Then the input for the next sampling should be ``` <|start|>user<|message|>What is 2 + 2?<|end|> -<|start|>assistant<|channel|>final<|message|>2 + 2 = 4.<|return|> +<|start|>assistant<|channel|>final<|message|>2 + 2 = 4.<|end|> <|start|>user<|message|>What about 9 / 2?<|end|> <|start|>assistant ``` diff --git a/python/openai_harmony/__init__.py b/python/openai_harmony/__init__.py index 3485864..33afbd7 100644 --- a/python/openai_harmony/__init__.py +++ b/python/openai_harmony/__init__.py @@ -425,6 +425,10 @@ class RenderConversationConfig(BaseModel): auto_drop_analysis: bool = True +class RenderOptions(BaseModel): + conversation_has_function_tools: bool = False + + class HarmonyEncoding: """High-level wrapper around the Rust ``PyHarmonyEncoding`` class.""" @@ -498,9 +502,20 @@ class HarmonyEncoding: config=config_dict, ) - def render(self, message: Message) -> List[int]: + def render( + self, message: Message, render_options: Optional[RenderOptions] = None + ) -> List[int]: """Render a single message into tokens.""" - return self._inner.render(message_json=message.to_json()) + if render_options is None: + render_options_dict = {"conversation_has_function_tools": False} + else: + render_options_dict = { + "conversation_has_function_tools": render_options.conversation_has_function_tools + } + + return self._inner.render( + message_json=message.to_json(), render_options=render_options_dict + ) # -- Parsing ------------------------------------------------------- @@ -693,6 +708,7 @@ __all__ = [ "Author", "Content", "TextContent", + "DeveloperContent", "ToolDescription", "SystemContent", "Message", diff --git a/src/encoding.rs b/src/encoding.rs index c58e8b8..6a9305b 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -5,10 +5,7 @@ use crate::{ use anyhow::Context as _; use std::{ collections::{HashMap, HashSet}, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, + sync::Arc, vec, }; @@ -92,7 +89,6 @@ pub struct HarmonyEncoding { pub(crate) format_token_mapping: HashMap, pub(crate) stop_formatting_tokens: HashSet, pub(crate) stop_formatting_tokens_for_assistant_actions: HashSet, - pub(crate) conversation_has_function_tools: Arc, } impl std::fmt::Debug for HarmonyEncoding { @@ -191,8 +187,9 @@ impl HarmonyEncoding { } }) }); - self.conversation_has_function_tools - .store(has_function_tools, Ordering::Relaxed); + let render_options = RenderOptions { + conversation_has_function_tools: has_function_tools, + }; let last_assistant_is_final = messages .iter() .rev() @@ -217,9 +214,7 @@ impl HarmonyEncoding { && first_final_idx.is_some_and(|first| *idx < first) && msg.channel.as_deref() == Some("analysis")) }) - .try_for_each(|(_, msg)| self.render_into(msg, into)); - self.conversation_has_function_tools - .store(false, Ordering::Relaxed); + .try_for_each(|(_, msg)| self.render_into(msg, into, Some(&render_options))); result?; Ok(()) } @@ -305,18 +300,27 @@ impl HarmonyEncoding { } /// Render a single message into tokens. - pub fn render(&self, message: &Message) -> anyhow::Result> { + pub fn render( + &self, + message: &Message, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result> { let mut out = vec![]; - Render::::render(self, message, &mut out)?; + Render::::render(self, message, &mut out, render_options)?; Ok(out) } /// Render a single message into the provided buffer. - pub fn render_into(&self, message: &Message, into: &mut B) -> anyhow::Result<()> + pub fn render_into( + &self, + message: &Message, + into: &mut B, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { - Render::::render(self, message, into) + Render::::render(self, message, into, render_options) } } @@ -772,14 +776,29 @@ impl HarmonyEncoding { } } +#[derive(Clone, Copy, Debug, Default)] +pub struct RenderOptions { + pub conversation_has_function_tools: bool, +} + trait Render { - fn render(&self, item: &T, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + item: &T, + into: &mut B, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend; } impl Render for HarmonyEncoding { - fn render(&self, message: &Message, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + message: &Message, + into: &mut B, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { @@ -816,7 +835,23 @@ impl Render for HarmonyEncoding { // finally content type if let Some(content_type) = &message.content_type { - self.render_text_into(format!(" {content_type}"), into)?; + // <|constrain|> is a unique case which needs to be tokenized as a special token + if let Some(constrain_marker) = + self.mapped_format_token(FormattingToken::ConstrainedFormat) + { + if let Some(rest) = content_type.strip_prefix(constrain_marker) { + // Render the space, then the constrain marker as a special token, then the rest as text (if any) + self.render_text_into(" ", into)?; + self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?; + if !rest.is_empty() { + self.render_text_into(rest, into)?; + } + } else { + self.render_text_into(format!(" {content_type}"), into)?; + } + } else { + self.render_text_into(format!(" {content_type}"), into)?; + } } self.render_formatting_token_into(FormattingToken::Message, into)?; @@ -836,7 +871,7 @@ impl Render for HarmonyEncoding { message.author.role ); } - Render::::render(self, content, into)?; + Render::::render(self, content, into, render_options)?; } // If there is a tool call we should render a tool call token @@ -851,15 +886,22 @@ impl Render for HarmonyEncoding { // Dispatch Content variants to their specific Render implementations impl Render for HarmonyEncoding { - fn render(&self, content: &Content, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + content: &Content, + into: &mut B, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { match content { - Content::Text(text) => Render::::render(self, text, into), - Content::SystemContent(sys) => Render::::render(self, sys, into), + Content::Text(text) => Render::::render(self, text, into, render_options), + Content::SystemContent(sys) => { + Render::::render(self, sys, into, render_options) + } Content::DeveloperContent(dev) => { - Render::::render(self, dev, into) + Render::::render(self, dev, into, render_options) } } } @@ -867,7 +909,12 @@ impl Render for HarmonyEncoding { // Render plain text content impl Render for HarmonyEncoding { - fn render(&self, text: &TextContent, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + text: &TextContent, + into: &mut B, + _render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { @@ -877,7 +924,12 @@ impl Render for HarmonyEncoding { // Render system-specific content (model identity, instructions, effort) impl Render for HarmonyEncoding { - fn render(&self, sys: &SystemContent, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + sys: &SystemContent, + into: &mut B, + render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { @@ -923,7 +975,7 @@ impl Render for HarmonyEncoding { if channel_config.channel_required { channels_header.push_str(" Channel must be included for every message."); } - if self.conversation_has_function_tools.load(Ordering::Relaxed) { + if render_options.is_some_and(|o| o.conversation_has_function_tools) { channels_header.push('\n'); channels_header.push_str( "Calls to these tools must go to the commentary channel: 'functions'.", @@ -940,7 +992,12 @@ impl Render for HarmonyEncoding { // Render developer-specific content (instructions, tools) impl Render for HarmonyEncoding { - fn render(&self, dev: &crate::chat::DeveloperContent, into: &mut B) -> anyhow::Result<()> + fn render( + &self, + dev: &crate::chat::DeveloperContent, + into: &mut B, + _render_options: Option<&RenderOptions>, + ) -> anyhow::Result<()> where B: Extend, { diff --git a/src/py_module.rs b/src/py_module.rs index e7bb9e5..c5c7b0a 100644 --- a/src/py_module.rs +++ b/src/py_module.rs @@ -178,13 +178,29 @@ impl PyHarmonyEncoding { } /// Render a single message into tokens. - fn render(&self, message_json: &str) -> PyResult> { + fn render( + &self, + message_json: &str, + render_options: Option>, + ) -> PyResult> { let message: crate::chat::Message = serde_json::from_str(message_json).map_err(|e| { PyErr::new::(format!("invalid message JSON: {e}")) })?; + let rust_options = if let Some(options_dict) = render_options { + let conversation_has_function_tools = options_dict + .get_item("conversation_has_function_tools")? + .and_then(|v| v.extract().ok()) + .unwrap_or(false); + Some(crate::encoding::RenderOptions { + conversation_has_function_tools, + }) + } else { + None + }; + self.inner - .render(&message) + .render(&message, rust_options.as_ref()) .map_err(|e| PyErr::new::(e.to_string())) } diff --git a/src/registry.rs b/src/registry.rs index 6d8a98f..d1ffd2e 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -1,6 +1,6 @@ use std::{ collections::{HashMap, HashSet}, - sync::{atomic::AtomicBool, Arc}, + sync::Arc, }; use crate::{ @@ -76,7 +76,6 @@ pub fn load_harmony_encoding(name: HarmonyEncodingName) -> anyhow::Result Result, JsValue> { + pub fn render( + &self, + message: JsMessage, + render_options: JsRenderOptions, + ) -> Result, JsValue> { let message: JsValue = message.into(); let message: crate::chat::Message = serde_wasm_bindgen::from_value(message) .map_err(|e| JsValue::from_str(&format!("invalid message JSON: {e}")))?; + + #[derive(Deserialize)] + struct RenderOptions { + conversation_has_function_tools: Option, + } + let render_options: JsValue = render_options.into(); + let rust_options = if render_options.is_undefined() || render_options.is_null() { + None + } else { + let cfg: RenderOptions = serde_wasm_bindgen::from_value(render_options) + .map_err(|e| JsValue::from_str(&format!("invalid render options: {e}")))?; + Some(crate::encoding::RenderOptions { + conversation_has_function_tools: cfg + .conversation_has_function_tools + .unwrap_or(false), + }) + }; + self.inner - .render(&message) + .render(&message, rust_options.as_ref()) .map_err(|e| JsValue::from_str(&e.to_string())) } diff --git a/tests/test_harmony.py b/tests/test_harmony.py index 07d5562..dd34e81 100644 --- a/tests/test_harmony.py +++ b/tests/test_harmony.py @@ -233,6 +233,36 @@ def test_simple_tool_call(encoding_name): assert parsed == expected +@pytest.mark.parametrize( + "encoding_name", + [ + HarmonyEncodingName.HARMONY_GPT_OSS, + ], +) +def test_tool_call_with_constrain_tokenized_correctly(encoding_name): + """ + Despite passing <|constrain|> as a string in "content_type" it has to be kept as a special token. + """ + encoding = load_harmony_encoding(encoding_name) + text = ( + "<|start|>assistant to=functions.get_weather<|channel|>commentary" + ' <|constrain|>json<|message|>{"location": "Tokyo"}<|call|>' + ) + tokens = encoding.encode(text, allowed_special="all") + parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None) + expected = [ + Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}') + .with_channel("commentary") + .with_recipient("functions.get_weather") + .with_content_type("<|constrain|>json"), + ] + assert parsed == expected + + rendered = encoding.render_conversation(Conversation.from_messages(expected)) + assert text == encoding.decode_utf8(tokens) + assert rendered == tokens + + @pytest.mark.parametrize( "encoding_name", [ @@ -248,7 +278,7 @@ def test_tool_call_with_constrain_marker_adjacent(encoding_name): encoding = load_harmony_encoding(encoding_name) text = ( "<|start|>assistant to=functions.get_weather<|channel|>commentary" - '<|constrain|>json<|message|>{"location": "Tokyo"}<|end|>' + '<|constrain|>json<|message|>{"location": "Tokyo"}<|call|>' ) tokens = encoding.encode(text, allowed_special="all") parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None) @@ -702,6 +732,8 @@ def test_does_not_drop_if_ongoing_analysis(): ) assert encoding.decode_utf8(tokens) == expected_output + # ensure that <|constrain|>json part is tokenized correctly as special tokens + assert encoding.encode(expected_output, allowed_special="all") == tokens def test_preserve_cot():