mirror of
https://github.com/openai/harmony.git
synced 2025-08-23 01:17:09 -04:00
951 lines
31 KiB
Python
951 lines
31 KiB
Python
"""Port of the original Rust test-suite to Python.
|
|
|
|
The tests mirror the scenarios from ``src/tests.rs`` and exercise the public
|
|
Python API. They ensure that the bindings give byte-for-byte identical output
|
|
to the canonical Rust implementation.
|
|
"""
|
|
|
|
# ruff: noqa: E402 # postpone imports until path manipulation is done
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List
|
|
|
|
# Ensure that the project root is on *sys.path* so that ``import harmony``
|
|
# picks up the local Python package during test execution (pytest changes the
|
|
# working directory which would otherwise hide the module).
|
|
ROOT_DIR = Path(__file__).resolve().parent.parent
|
|
if str(ROOT_DIR) not in sys.path:
|
|
sys.path.insert(0, str(ROOT_DIR))
|
|
|
|
import pytest # noqa: E402
|
|
from openai_harmony import ( # noqa: E402
|
|
Author,
|
|
Conversation,
|
|
DeveloperContent,
|
|
HarmonyEncodingName,
|
|
HarmonyError,
|
|
Message,
|
|
ReasoningEffort,
|
|
RenderConversationConfig,
|
|
Role,
|
|
StreamableParser,
|
|
SystemContent,
|
|
ToolDescription,
|
|
load_harmony_encoding,
|
|
)
|
|
from pydantic import ValidationError
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _assert_tokens_eq(encoding, expected: List[int], actual: List[int]): # type: ignore[arg-type]
|
|
"""Mimic the pretty-assertions output from the Rust test-suite."""
|
|
|
|
if expected != actual:
|
|
exp_decoded = encoding.decode_utf8(expected)
|
|
act_decoded = encoding.decode_utf8(actual)
|
|
raise AssertionError(
|
|
"tokens are not equal.\n\n"
|
|
"Tokens (< expected / actual >):\n"
|
|
f"{expected}\n{actual}\n\n"
|
|
"Decoded (< expected / actual >):\n"
|
|
f"{exp_decoded!r}\n{act_decoded!r}"
|
|
)
|
|
|
|
|
|
def read_expected_tokens(file_path: Path) -> List[int]:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
return [int(x) for x in f.read().split()]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests (1-1 port from the Rust side)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_simple_convo(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
expected_text = (
|
|
(ROOT_DIR / "test-data" / "test_simple_convo.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
expected_tokens = encoding.encode(expected_text, allowed_special="all")
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(
|
|
Role.SYSTEM,
|
|
SystemContent.new().with_model_identity(
|
|
"You are ChatGPT, a large language model trained by OpenAI."
|
|
),
|
|
),
|
|
Message.from_role_and_content(Role.USER, "What is 2 + 2?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
_assert_tokens_eq(encoding, expected_tokens, tokens)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_simple_convo_with_effort(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
test_cases = [
|
|
(
|
|
ReasoningEffort.LOW,
|
|
ROOT_DIR / "test-data" / "test_simple_convo_low_effort.txt",
|
|
True,
|
|
),
|
|
(
|
|
ReasoningEffort.MEDIUM,
|
|
ROOT_DIR / "test-data" / "test_simple_convo_medium_effort.txt",
|
|
True,
|
|
),
|
|
(
|
|
ReasoningEffort.HIGH,
|
|
ROOT_DIR / "test-data" / "test_simple_convo_high_effort.txt",
|
|
True,
|
|
),
|
|
(
|
|
ReasoningEffort.LOW,
|
|
ROOT_DIR / "test-data" / "test_simple_convo_low_effort_no_instruction.txt",
|
|
False,
|
|
),
|
|
(
|
|
ReasoningEffort.MEDIUM,
|
|
ROOT_DIR
|
|
/ "test-data"
|
|
/ "test_simple_convo_medium_effort_no_instruction.txt",
|
|
False,
|
|
),
|
|
(
|
|
ReasoningEffort.HIGH,
|
|
ROOT_DIR / "test-data" / "test_simple_convo_high_effort_no_instruction.txt",
|
|
False,
|
|
),
|
|
]
|
|
|
|
for effort, tokens_path, use_instruction in test_cases:
|
|
expected_text = tokens_path.read_text(encoding="utf-8").rstrip()
|
|
expected_tokens = encoding.encode(expected_text, allowed_special="all")
|
|
sys = (
|
|
SystemContent.new()
|
|
.with_model_identity(
|
|
"You are ChatGPT, a large language model trained by OpenAI."
|
|
)
|
|
.with_reasoning_effort(effort)
|
|
)
|
|
messages = [Message.from_role_and_content(Role.SYSTEM, sys)]
|
|
if use_instruction:
|
|
dev = DeveloperContent.new().with_instructions(
|
|
"Answer the user's questions like a robot."
|
|
)
|
|
messages.append(Message.from_role_and_content(Role.DEVELOPER, dev))
|
|
messages.append(
|
|
Message.from_role_and_content(
|
|
Role.USER,
|
|
"What is the capital of the largest country in the world?",
|
|
)
|
|
)
|
|
convo = Conversation.from_messages(messages)
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
_assert_tokens_eq(encoding, expected_tokens, tokens)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_simple_reasoning_response(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
expected_tokens = read_expected_tokens(
|
|
ROOT_DIR / "test-data" / "test_simple_reasoning_response.txt"
|
|
)
|
|
|
|
messages = encoding.parse_messages_from_completion_tokens(
|
|
expected_tokens, role=Role.ASSISTANT
|
|
)
|
|
|
|
expected = [
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT,
|
|
'User asks: "What is 2 + 2?" Simple arithmetic. Provide answer.',
|
|
).with_channel("analysis"),
|
|
Message.from_role_and_content(Role.ASSISTANT, "2 + 2 = 4.").with_channel(
|
|
"final"
|
|
),
|
|
]
|
|
|
|
assert messages == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_simple_tool_call(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
response = read_expected_tokens(
|
|
ROOT_DIR / "test-data" / "test_simple_tool_call.txt"
|
|
)
|
|
|
|
parsed = encoding.parse_messages_from_completion_tokens(
|
|
response,
|
|
role=Role.ASSISTANT,
|
|
)
|
|
|
|
expected = [
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT,
|
|
'User asks: "What is the weather in Tokyo?" We need to use lookup_weather tool.',
|
|
).with_channel("analysis"),
|
|
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
|
|
.with_channel("analysis")
|
|
.with_recipient("lookup_weather")
|
|
.with_content_type("code"),
|
|
]
|
|
|
|
assert parsed == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_tool_call_with_constrain_marker_adjacent(encoding_name):
|
|
"""
|
|
There are moments where the model might not output a space before constrain resulting in the
|
|
content type being parsed as part of the recipient. This test ensures that we handle this case
|
|
correctly and instead handle it as a separate content type.
|
|
"""
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
text = (
|
|
"<|start|>assistant to=functions.get_weather<|channel|>commentary"
|
|
'<|constrain|>json<|message|>{"location": "Tokyo"}<|end|>'
|
|
)
|
|
tokens = encoding.encode(text, allowed_special="all")
|
|
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
|
|
expected = [
|
|
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
|
|
.with_channel("commentary")
|
|
.with_recipient("functions.get_weather")
|
|
.with_content_type("<|constrain|>json"),
|
|
]
|
|
assert parsed == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_tool_call_with_channel_before_recipient_and_constrain_adjacent(
|
|
encoding_name,
|
|
):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
text = (
|
|
"<|start|>assistant<|channel|>commentary to=functions.get_weather"
|
|
'<|constrain|>json<|message|>{"latitude":48.8566,"longitude":2.3522}<|call|>'
|
|
)
|
|
tokens = encoding.encode(text, allowed_special="all")
|
|
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
|
|
expected = [
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT, '{"latitude":48.8566,"longitude":2.3522}'
|
|
)
|
|
.with_channel("commentary")
|
|
.with_recipient("functions.get_weather")
|
|
.with_content_type("<|constrain|>json"),
|
|
]
|
|
|
|
assert parsed == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_reasoning_system_message(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
expected_text = (
|
|
(ROOT_DIR / "test-data" / "test_reasoning_system_message.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
expected = encoding.encode(expected_text, allowed_special="all")
|
|
|
|
sys = (
|
|
SystemContent.new()
|
|
.with_model_identity(
|
|
"You are ChatGPT, a large language model trained by OpenAI."
|
|
)
|
|
.with_reasoning_effort(ReasoningEffort.MEDIUM)
|
|
.with_required_channels(["analysis", "final"])
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.SYSTEM, sys),
|
|
Message.from_role_and_content(Role.USER, "What is 2 + 2?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
_assert_tokens_eq(encoding, expected, tokens)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_reasoning_system_message_no_instruction(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
expected_text = (
|
|
(ROOT_DIR / "test-data" / "test_reasoning_system_message_no_instruction.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
expected = encoding.encode(expected_text, allowed_special="all")
|
|
|
|
sys = (
|
|
SystemContent.new()
|
|
.with_model_identity(
|
|
"You are ChatGPT, a large language model trained by OpenAI."
|
|
)
|
|
.with_reasoning_effort(ReasoningEffort.HIGH)
|
|
.with_required_channels(["analysis", "final"])
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.SYSTEM, sys),
|
|
Message.from_role_and_content(
|
|
Role.USER,
|
|
"What is the best place to eat candy in the world?",
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
_assert_tokens_eq(encoding, expected, tokens)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"encoding_name",
|
|
[
|
|
HarmonyEncodingName.HARMONY_GPT_OSS,
|
|
],
|
|
)
|
|
def test_reasoning_system_message_with_dates(encoding_name):
|
|
encoding = load_harmony_encoding(encoding_name)
|
|
|
|
expected_text = (
|
|
(ROOT_DIR / "test-data" / "test_reasoning_system_message_with_dates.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
expected = encoding.encode(expected_text, allowed_special="all")
|
|
|
|
sys = (
|
|
SystemContent.new()
|
|
.with_model_identity(
|
|
"You are ChatGPT, a large language model trained by OpenAI."
|
|
)
|
|
.with_reasoning_effort(ReasoningEffort.MEDIUM)
|
|
.with_conversation_start_date("2021-01-01")
|
|
.with_knowledge_cutoff("2021-01")
|
|
.with_required_channels(["analysis", "final"])
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.SYSTEM, sys),
|
|
Message.from_role_and_content(Role.USER, "What is 42 * pi?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
_assert_tokens_eq(encoding, expected, tokens)
|
|
|
|
|
|
def test_render_functions_with_parameters():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_render_functions_with_parameters.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
sys = (
|
|
SystemContent.new()
|
|
.with_reasoning_effort(ReasoningEffort.HIGH)
|
|
.with_conversation_start_date("2025-06-28")
|
|
)
|
|
|
|
dev = (
|
|
DeveloperContent.new()
|
|
.with_instructions("Always respond in riddles")
|
|
.with_function_tools(
|
|
[
|
|
ToolDescription.new(
|
|
"get_location",
|
|
"Gets the location of the user.",
|
|
),
|
|
ToolDescription.new(
|
|
"get_current_weather",
|
|
"Gets the current weather in the provided location.",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"format": {
|
|
"type": "string",
|
|
"enum": ["celsius", "fahrenheit"],
|
|
"default": "celsius",
|
|
},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
),
|
|
ToolDescription.new(
|
|
"get_multiple_weathers",
|
|
"Gets the current weather in the provided list of locations.",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"locations": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
},
|
|
"description": 'List of city and state, e.g. ["San Francisco, CA", "New York, NY"]',
|
|
},
|
|
"format": {
|
|
"type": "string",
|
|
"enum": ["celsius", "fahrenheit"],
|
|
"default": "celsius",
|
|
},
|
|
},
|
|
"required": ["locations"],
|
|
},
|
|
),
|
|
ToolDescription.new(
|
|
"kitchensink",
|
|
"A function with various complex schemas.",
|
|
parameters={
|
|
"description": "params object",
|
|
"type": "object",
|
|
"properties": {
|
|
"string": {
|
|
"type": "string",
|
|
"title": "STRING",
|
|
"description": "A string",
|
|
"examples": ["hello", "world"],
|
|
},
|
|
"string_nullable": {
|
|
"type": "string",
|
|
"nullable": True,
|
|
"description": "A nullable string",
|
|
"default": "the default",
|
|
},
|
|
"string_enum": {"type": "string", "enum": ["a", "b", "c"]},
|
|
"oneof_string_or_number": {
|
|
"oneOf": [
|
|
{
|
|
"type": "string",
|
|
"default": "default_string_in_oneof",
|
|
},
|
|
{
|
|
"type": "number",
|
|
"description": "numbers can happen too",
|
|
},
|
|
],
|
|
"description": "a oneof",
|
|
"default": 20,
|
|
},
|
|
},
|
|
},
|
|
),
|
|
]
|
|
)
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.SYSTEM, sys),
|
|
Message.from_role_and_content(Role.DEVELOPER, dev),
|
|
Message.from_role_and_content(Role.USER, "What is the weather like in SF?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_no_tools():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_no_tools.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(
|
|
Role.SYSTEM,
|
|
SystemContent.new().with_conversation_start_date("2025-06-28"),
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_browser_tool_only():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_browser_tool_only.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(
|
|
Role.SYSTEM,
|
|
SystemContent.new()
|
|
.with_conversation_start_date("2025-06-28")
|
|
.with_browser_tool(),
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_browser_and_function_tool():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_browser_and_function_tool.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(
|
|
Role.SYSTEM,
|
|
SystemContent.new()
|
|
.with_conversation_start_date("2025-06-28")
|
|
.with_browser_tool(),
|
|
),
|
|
Message.from_role_and_content(
|
|
Role.DEVELOPER,
|
|
DeveloperContent.new().with_function_tools(
|
|
[
|
|
ToolDescription.new(
|
|
"lookup_weather",
|
|
"Use this tool to lookup the weather in a given location. Call it with the parameter 'location', can be any textual description of a location.",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {"type": "string"},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
)
|
|
]
|
|
),
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_browser_and_python_tool():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_browser_and_python_tool.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(
|
|
Role.SYSTEM,
|
|
SystemContent.new()
|
|
.with_conversation_start_date("2025-06-28")
|
|
.with_browser_tool()
|
|
.with_python_tool(),
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(convo, Role.ASSISTANT)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_dropping_cot_by_default():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_dropping_cot_by_default.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.USER, "What is 2 + 2?"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT,
|
|
"User asks: “What is 2 + 2?” Simple arithmetic. Provide answer.",
|
|
).with_channel("analysis"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT, "2 + 2 equals 4."
|
|
).with_channel("final"),
|
|
Message.from_role_and_content(Role.USER, "What about 9 / 2?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(
|
|
convo, Role.ASSISTANT, RenderConversationConfig(auto_drop_analysis=True)
|
|
)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_does_not_drop_if_ongoing_analysis():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_does_not_drop_if_ongoing_analysis.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.USER, "What is the weather in SF?"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT,
|
|
"User asks: “What is the weather in SF?” We need to use lookup_weather tool.",
|
|
).with_channel("analysis"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT, '{"location": "San Francisco"}'
|
|
)
|
|
.with_channel("commentary")
|
|
.with_recipient("functions.lookup_weather")
|
|
.with_content_type("<|constrain|>json"),
|
|
Message.from_author_and_content(
|
|
Author.new(Role.TOOL, "functions.lookup_weather"),
|
|
'{"temperature": 20, "description": "sunny"}',
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(
|
|
convo, Role.ASSISTANT, RenderConversationConfig(auto_drop_analysis=True)
|
|
)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_preserve_cot():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_preserve_cot.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.USER, "What is 2 + 2?"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT,
|
|
'User asks a simple question: "What is 2 + 2?" The answer: 4.',
|
|
).with_channel("analysis"),
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT, "2 + 2 equals 4."
|
|
).with_channel("final"),
|
|
Message.from_role_and_content(Role.USER, "What about 9 / 2?"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation_for_completion(
|
|
convo, Role.ASSISTANT, RenderConversationConfig(auto_drop_analysis=False)
|
|
)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_reserved_token_decoding():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
assert encoding.decode_utf8([200014]) == "<|reserved_200014|>"
|
|
assert encoding.decode_utf8([201088]) == "<|reserved_201088|>"
|
|
|
|
|
|
def test_keep_analysis_between_final_messages():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
expected_output = (
|
|
(ROOT_DIR / "test-data" / "test_keep_analysis_between_finals.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.USER, "What is 2 + 2?"),
|
|
Message.from_role_and_content(Role.ASSISTANT, "thinking 2+2").with_channel(
|
|
"analysis"
|
|
),
|
|
Message.from_role_and_content(Role.ASSISTANT, "4").with_channel("final"),
|
|
Message.from_role_and_content(Role.USER, "What is 3 + 5?"),
|
|
Message.from_role_and_content(Role.ASSISTANT, "thinking 3+5").with_channel(
|
|
"analysis"
|
|
),
|
|
Message.from_role_and_content(Role.ASSISTANT, "8").with_channel("final"),
|
|
]
|
|
)
|
|
|
|
tokens = encoding.render_conversation(convo)
|
|
|
|
assert encoding.decode_utf8(tokens) == expected_output
|
|
|
|
|
|
def test_render_and_render_conversation_roundtrip():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
msg = Message.from_role_and_content(Role.USER, "Hello")
|
|
convo = Conversation.from_messages([msg])
|
|
|
|
tokens_msg = encoding.render(msg)
|
|
tokens_convo = encoding.render_conversation(convo)
|
|
assert tokens_msg == tokens_convo
|
|
|
|
tokens_completion = encoding.render_conversation_for_completion(
|
|
convo, Role.ASSISTANT
|
|
)
|
|
assert tokens_completion[: len(tokens_convo)] == tokens_convo
|
|
|
|
|
|
def test_render_conversation_for_training_final_channel():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
convo = Conversation.from_messages(
|
|
[
|
|
Message.from_role_and_content(Role.USER, "hi"),
|
|
Message.from_role_and_content(Role.ASSISTANT, "hello").with_channel(
|
|
"final"
|
|
),
|
|
]
|
|
)
|
|
|
|
tokens_training = encoding.render_conversation_for_training(convo)
|
|
tokens_regular = encoding.render_conversation(convo)
|
|
token_return = encoding.encode("<|return|>", allowed_special={"<|return|>"})[0]
|
|
token_end = encoding.encode("<|end|>", allowed_special={"<|end|>"})[0]
|
|
|
|
assert tokens_regular[:-1] == tokens_training[:-1]
|
|
assert tokens_regular[-1] == token_end
|
|
assert tokens_training[-1] == token_return
|
|
|
|
|
|
def test_render_conversation_for_training_non_final():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
convo = Conversation.from_messages([Message.from_role_and_content(Role.USER, "hi")])
|
|
|
|
tokens_training = encoding.render_conversation_for_training(convo)
|
|
tokens_regular = encoding.render_conversation(convo)
|
|
|
|
assert tokens_training == tokens_regular
|
|
|
|
|
|
def test_decode_utf8_invalid_token():
|
|
"""Invalid tokens should raise an exception (type doesn't matter)."""
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
with pytest.raises(HarmonyError, match="Invalid token for decoding: 99999999"):
|
|
encoding.decode_utf8([99999999])
|
|
|
|
with pytest.raises(
|
|
ValidationError,
|
|
match="Input should be a valid dictionary or instance of Message",
|
|
):
|
|
encoding.render_conversation_for_completion(
|
|
Conversation.from_messages([SystemContent.new()]),
|
|
Role.ASSISTANT,
|
|
)
|
|
|
|
|
|
def test_encode_decode_roundtrip():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
assert encoding.decode_utf8(encoding.encode("hello world")) == "hello world"
|
|
assert encoding.decode(encoding.encode("hello world")) == "hello world"
|
|
|
|
|
|
def test_encode_allowed_special():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
assert encoding.encode("hello world") == [24912, 2375]
|
|
assert encoding.encode("<|start|>", allowed_special={"<|start|>"}) == [200006]
|
|
assert encoding.encode("<|start|>", allowed_special="all") == [200006]
|
|
|
|
with pytest.raises(
|
|
HarmonyError, match="Encountered text corresponding to disallowed special token"
|
|
):
|
|
encoding.encode("<|start|>")
|
|
|
|
assert encoding.encode("<|start|>", disallowed_special=()) == [
|
|
27,
|
|
91,
|
|
5236,
|
|
91,
|
|
29,
|
|
]
|
|
|
|
|
|
def test_is_special_token():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
assert encoding.is_special_token(200006) # <|start|>
|
|
assert not encoding.is_special_token(24912) # hello
|
|
|
|
|
|
def test_invalid_utf8_decoding():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
tokens = [132990, 9552]
|
|
|
|
with pytest.raises(HarmonyError, match="Invalid utf-8"):
|
|
# This will raise an error because the tokens are invalid utf-8
|
|
encoding.decode_utf8(tokens)
|
|
|
|
# This will not raise an error because it will replace the invalid utf-8 characters to not raise an error
|
|
# to match the behavior of tiktoken
|
|
assert "Chicken" in encoding.decode(tokens)
|
|
|
|
|
|
def test_tool_response_parsing():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
text_tokens = (
|
|
(ROOT_DIR / "test-data" / "test_tool_response_parsing.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
expected_message = (
|
|
Message.from_author_and_content(
|
|
Author.new(Role.TOOL, "browser.search"),
|
|
'{"result": "https://openai.com/"}',
|
|
)
|
|
.with_channel("commentary")
|
|
.with_recipient("assistant")
|
|
)
|
|
|
|
output_tokens = encoding.render(expected_message)
|
|
output_tokens = output_tokens[:-1] # remove the <|end|> token
|
|
|
|
messages = encoding.parse_messages_from_completion_tokens(output_tokens, None)
|
|
assert len(messages) == 1
|
|
assert encoding.decode_utf8(output_tokens) == text_tokens
|
|
|
|
|
|
def test_streamable_parser():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
text_tokens = (
|
|
(ROOT_DIR / "test-data" / "test_streamable_parser.txt")
|
|
.read_text(encoding="utf-8")
|
|
.rstrip()
|
|
)
|
|
|
|
tokens = encoding.encode(text_tokens, allowed_special="all")
|
|
parser = StreamableParser(encoding, Role.ASSISTANT)
|
|
for token in tokens:
|
|
parser.process(token)
|
|
assert len(parser.messages) == 3
|
|
|
|
|
|
def test_streamable_parser_tool_call_with_constrain_adjacent():
|
|
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
text = (
|
|
"<|start|>assistant<|channel|>commentary to=functions.get_weather"
|
|
'<|constrain|>json<|message|>{"latitude":48.8566,"longitude":2.3522}<|call|>'
|
|
)
|
|
|
|
tokens = encoding.encode(text, allowed_special="all")
|
|
parser = StreamableParser(encoding, None)
|
|
for token in tokens:
|
|
parser.process(token)
|
|
|
|
expected = [
|
|
Message.from_role_and_content(
|
|
Role.ASSISTANT, '{"latitude":48.8566,"longitude":2.3522}'
|
|
)
|
|
.with_channel("commentary")
|
|
.with_recipient("functions.get_weather")
|
|
.with_content_type("<|constrain|>json"),
|
|
]
|
|
|
|
assert parser.messages == expected
|