mirror of
https://github.com/openai/harmony.git
synced 2025-08-24 04:17:09 -04:00
Fix tokenization of <|constrain|> content type in rendering (#47)
This commit is contained in:
parent
2387e4ae4f
commit
8a4645f0f9
2 changed files with 49 additions and 2 deletions
|
@ -835,7 +835,22 @@ impl Render<Message> for HarmonyEncoding {
|
|||
|
||||
// finally content type
|
||||
if let Some(content_type) = &message.content_type {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
// <|constrain|> is a unique case which needs to be tokenized as a special token
|
||||
if let Some(constrain_marker) = self.mapped_format_token(FormattingToken::ConstrainedFormat) {
|
||||
if content_type.starts_with(constrain_marker) {
|
||||
// Render the space, then the constrain marker as a special token, then the rest as text (if any)
|
||||
self.render_text_into(" ", into)?;
|
||||
self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?;
|
||||
let rest = &content_type[constrain_marker.len()..];
|
||||
if !rest.is_empty() {
|
||||
self.render_text_into(rest, into)?;
|
||||
}
|
||||
} else {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
}
|
||||
} else {
|
||||
self.render_text_into(format!(" {content_type}"), into)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.render_formatting_token_into(FormattingToken::Message, into)?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue