mirror of
https://github.com/openai/harmony.git
synced 2025-08-22 07:17:08 -04:00
Compare commits
3 commits
2387e4ae4f
...
52176bfbec
Author | SHA1 | Date | |
---|---|---|---|
|
52176bfbec | ||
|
72079ca497 | ||
|
8a4645f0f9 |
6 changed files with 61 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -70,3 +70,6 @@ docs/_build/
|
|||
|
||||
# Pyenv
|
||||
.python-version
|
||||
|
||||
# Avoid ignoring shadcn utils
|
||||
!demo/harmony-demo/src/lib
|
||||
|
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1317,7 +1317,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|||
|
||||
[[package]]
|
||||
name = "openai-harmony"
|
||||
version = "0.0.3"
|
||||
version = "0.0.4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "openai-harmony"
|
||||
version = "0.0.3"
|
||||
version = "0.0.4"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/openai/harmony"
|
||||
|
|
6
demo/harmony-demo/src/lib/utils.ts
Normal file
6
demo/harmony-demo/src/lib/utils.ts
Normal file
|
@ -0,0 +1,6 @@
|
|||
import { clsx, type ClassValue } from "clsx"
|
||||
import { twMerge } from "tailwind-merge"
|
||||
|
||||
export function cn(...inputs: ClassValue[]) {
|
||||
return twMerge(clsx(inputs))
|
||||
}
|
|
@ -835,7 +835,23 @@ impl Render<Message> for HarmonyEncoding {
|
|||
|
||||
// finally content type
|
||||
if let Some(content_type) = &message.content_type {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
// <|constrain|> is a unique case which needs to be tokenized as a special token
|
||||
if let Some(constrain_marker) =
|
||||
self.mapped_format_token(FormattingToken::ConstrainedFormat)
|
||||
{
|
||||
if let Some(rest) = content_type.strip_prefix(constrain_marker) {
|
||||
// Render the space, then the constrain marker as a special token, then the rest as text (if any)
|
||||
self.render_text_into(" ", into)?;
|
||||
self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?;
|
||||
if !rest.is_empty() {
|
||||
self.render_text_into(rest, into)?;
|
||||
}
|
||||
} else {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
}
|
||||
} else {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.render_formatting_token_into(FormattingToken::Message, into)?;
|
||||
|
|
|
@ -233,6 +233,36 @@ def test_simple_tool_call(encoding_name):
|
|||
assert parsed == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"encoding_name",
|
||||
[
|
||||
HarmonyEncodingName.HARMONY_GPT_OSS,
|
||||
],
|
||||
)
|
||||
def test_tool_call_with_constrain_tokenized_correctly(encoding_name):
|
||||
"""
|
||||
Despite passing <|constrain|> as a string in "content_type" it has to be kept as a special token.
|
||||
"""
|
||||
encoding = load_harmony_encoding(encoding_name)
|
||||
text = (
|
||||
"<|start|>assistant to=functions.get_weather<|channel|>commentary"
|
||||
' <|constrain|>json<|message|>{"location": "Tokyo"}<|call|>'
|
||||
)
|
||||
tokens = encoding.encode(text, allowed_special="all")
|
||||
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
|
||||
expected = [
|
||||
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
|
||||
.with_channel("commentary")
|
||||
.with_recipient("functions.get_weather")
|
||||
.with_content_type("<|constrain|>json"),
|
||||
]
|
||||
assert parsed == expected
|
||||
|
||||
rendered = encoding.render_conversation(Conversation.from_messages(expected))
|
||||
assert text == encoding.decode_utf8(tokens)
|
||||
assert rendered == tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"encoding_name",
|
||||
[
|
||||
|
@ -248,7 +278,7 @@ def test_tool_call_with_constrain_marker_adjacent(encoding_name):
|
|||
encoding = load_harmony_encoding(encoding_name)
|
||||
text = (
|
||||
"<|start|>assistant to=functions.get_weather<|channel|>commentary"
|
||||
'<|constrain|>json<|message|>{"location": "Tokyo"}<|end|>'
|
||||
'<|constrain|>json<|message|>{"location": "Tokyo"}<|call|>'
|
||||
)
|
||||
tokens = encoding.encode(text, allowed_special="all")
|
||||
parsed = encoding.parse_messages_from_completion_tokens(tokens, role=None)
|
||||
|
@ -702,6 +732,8 @@ def test_does_not_drop_if_ongoing_analysis():
|
|||
)
|
||||
|
||||
assert encoding.decode_utf8(tokens) == expected_output
|
||||
# ensure that <|constrain|>json part is tokenized correctly as special tokens
|
||||
assert encoding.encode(expected_output, allowed_special="all") == tokens
|
||||
|
||||
|
||||
def test_preserve_cot():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue