Compare commits

..

No commits in common. "52176bfbec8f1d5e876b9793626a4153c71b3d4a" and "2387e4ae4f5343d4428ad8f6b005e97f9c804426" have entirely different histories.

6 changed files with 4 additions and 61 deletions

3
.gitignore vendored
View file

@ -70,6 +70,3 @@ docs/_build/
# Pyenv
.python-version
# Avoid ignoring shadcn utils
!demo/harmony-demo/src/lib

2
Cargo.lock generated
View file

@ -1317,7 +1317,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "openai-harmony"
version = "0.0.4"
version = "0.0.3"
dependencies = [
"anyhow",
"base64",

View file

@ -1,6 +1,6 @@
[package]
name = "openai-harmony"
version = "0.0.4"
version = "0.0.3"
edition = "2021"
license = "Apache-2.0"
repository = "https://github.com/openai/harmony"

View file

@ -1,6 +0,0 @@
import { clsx, type ClassValue } from "clsx"
import { twMerge } from "tailwind-merge"
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}

View file

@ -835,23 +835,7 @@ impl Render<Message> for HarmonyEncoding {
// finally content type
if let Some(content_type) = &message.content_type {
// <|constrain|> is a unique case which needs to be tokenized as a special token
if let Some(constrain_marker) =
self.mapped_format_token(FormattingToken::ConstrainedFormat)
{
if let Some(rest) = content_type.strip_prefix(constrain_marker) {
// Render the space, then the constrain marker as a special token, then the rest as text (if any)
self.render_text_into(" ", into)?;
self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?;
if !rest.is_empty() {
self.render_text_into(rest, into)?;
}
} else {
self.render_text_into(format!(" {content_type}"), into)?;
}
} else {
self.render_text_into(format!(" {content_type}"), into)?;
}
self.render_text_into(format!(" {content_type}"), into)?;
}
self.render_formatting_token_into(FormattingToken::Message, into)?;

View file

@ -233,36 +233,6 @@ def test_simple_tool_call(encoding_name):
assert parsed == expected
@pytest.mark.parametrize(
"encoding_name",
[
HarmonyEncodingName.HARMONY_GPT_OSS,
],
)
def test_tool_call_with_constrain_tokenized_correctly(encoding_name):
"""
Despite passing <|constrain|> as a string in "content_type" it has to be kept as a special token.
"""
encoding = load_harmony_encoding(encoding_name)
text = (
"<|start|>assistant to=functions.get_weather<|channel|>commentary"
' <|constrain|>json<|message|>{"location": "Tokyo"}<|call|>'
)
tokens = encoding.encode(text, allowed_special="all")
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
expected = [
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
.with_channel("commentary")
.with_recipient("functions.get_weather")
.with_content_type("<|constrain|>json"),
]
assert parsed == expected
rendered = encoding.render_conversation(Conversation.from_messages(expected))
assert text == encoding.decode_utf8(tokens)
assert rendered == tokens
@pytest.mark.parametrize(
"encoding_name",
[
@ -278,7 +248,7 @@ def test_tool_call_with_constrain_marker_adjacent(encoding_name):
encoding = load_harmony_encoding(encoding_name)
text = (
"<|start|>assistant to=functions.get_weather<|channel|>commentary"
'<|constrain|>json<|message|>{"location": "Tokyo"}<|call|>'
'<|constrain|>json<|message|>{"location": "Tokyo"}<|end|>'
)
tokens = encoding.encode(text, allowed_special="all")
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
@ -732,8 +702,6 @@ def test_does_not_drop_if_ongoing_analysis():
)
assert encoding.decode_utf8(tokens) == expected_output
# ensure that <|constrain|>json part is tokenized correctly as special tokens
assert encoding.encode(expected_output, allowed_special="all") == tokens
def test_preserve_cot():