pebble/tools/font/fontgen.py
2025-01-27 11:38:16 -08:00

471 lines
20 KiB
Python
Executable file

#!/usr/bin/env python
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import freetype
import os
import re
import struct
import sys
import itertools
import json
from math import ceil
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
import generate_c_byte_array
# Font v3 -- https://pebbletechnology.atlassian.net/wiki/display/DEV/Pebble+Resource+Pack+Format
# FontInfo
# (uint8_t) version - v1
# (uint8_t) max_height - v1
# (uint16_t) number_of_glyphs - v1
# (uint16_t) wildcard_codepoint - v1
# (uint8_t) hash_table_size - v2
# (uint8_t) codepoint_bytes - v2
# (uint8_t) size - v3 # Save the size of FontInfo for sanity
# (uint8_t) features - v3
#
# font_info_struct_size is the size of the FontInfo structure. This makes extending this structure
# in the future far simpler.
#
# 'features' is a bitmap defined as follows:
# 0: offset table offsets uint32 if 0, uint16 if 1
# 1: glyphs are bitmapped if 0, RLE4 encoded if 1
# 2-7: reserved
#
# (uint32_t) hash_table[]
# glyph_tables are found in the resource image by converting a codepoint into an offset from
# the start of the resource. This conversion is implemented as a hash where collisions are
# resolved by separate chaining. Each entry in the hash table is as follows:
# (uint8_t) hash value
# (uint8_t) offset_table_size
# (uint16_t) offset
# A codepoint is converted into a hash value by the hash function -- this value is a direct
# index into the hash table array. 'offset' is the location of the correct offset_table list
# from the start of offset_tables, and offset_table_size gives the number of glyph_tables in
# the list (i.e., the number of codepoints that hash to the same value).
#
# (uint32_t) offset_tables[][]
# this list of tables contains offsets into the glyph_table for the codepoint.
# each offset is counted in 32-bit blocks from the start of glyph_table.
# packed: (codepoint_bytes [uint16_t | uint32_t]) codepoint
# (features[0] [uint16_t | uint32_t]) offset
#
# (uint32_t) glyph_table[]
# [0]: the 32-bit block for offset 0 is used to indicate that a glyph is not supported
# then for each glyph:
# [offset + 0] packed: (int_8) offset_top
# (int_8) offset_left,
# (uint_8) bitmap_height, NB: in v3, if RLE4 compressed, this
# field is contains the number of
# RLE4 units.
# (uint_8) bitmap_width (LSB)
#
# [offset + 1] (int_8) horizontal_advance
# (24 bits) zero padding
# [offset + 2] bitmap data (unaligned rows of bits), padded with 0's at
# the end to make the bitmap data as a whole use multiples of 32-bit
# blocks
MIN_CODEPOINT = 0x20
MAX_2_BYTES_CODEPOINT = 0xffff
MAX_EXTENDED_CODEPOINT = 0x10ffff
FONT_VERSION_1 = 1
FONT_VERSION_2 = 2
FONT_VERSION_3 = 3
# Set a codepoint that the font doesn't know how to render
# The watch will use this glyph as the wildcard character
WILDCARD_CODEPOINT = 0x25AF # White vertical rectangle
ELLIPSIS_CODEPOINT = 0x2026
# Features
FEATURE_OFFSET_16 = 0x01
FEATURE_RLE4 = 0x02
HASH_TABLE_SIZE = 255
OFFSET_TABLE_MAX_SIZE = 128
MAX_GLYPHS_EXTENDED = HASH_TABLE_SIZE * OFFSET_TABLE_MAX_SIZE
MAX_GLYPHS = 256
def grouper(n, iterable, fillvalue=None):
"""grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"""
args = [iter(iterable)] * n
return itertools.izip_longest(fillvalue=fillvalue, *args)
def hasher(codepoint, num_glyphs):
return (codepoint % num_glyphs)
def bits(x):
data = []
for i in range(8):
data.insert(0, int((x & 1) == 1))
x = x >> 1
return data
class Font:
def __init__(self, ttf_path, height, max_glyphs, max_glyph_size, legacy):
self.version = FONT_VERSION_3
self.ttf_path = ttf_path
self.max_height = int(height)
self.legacy = legacy
self.face = freetype.Face(self.ttf_path)
self.face.set_pixel_sizes(0, self.max_height)
self.name = self.face.family_name + "_" + self.face.style_name
self.wildcard_codepoint = WILDCARD_CODEPOINT
self.number_of_glyphs = 0
self.table_size = HASH_TABLE_SIZE
self.tracking_adjust = 0
self.regex = None
self.codepoints = range(MIN_CODEPOINT, MAX_EXTENDED_CODEPOINT)
self.codepoint_bytes = 2
self.max_glyphs = max_glyphs
self.max_glyph_size = max_glyph_size
self.glyph_table = []
self.hash_table = [0] * self.table_size
self.offset_tables = [[] for i in range(self.table_size)]
self.offset_size_bytes = 4
self.features = 0
self.glyph_header = ''.join((
'<', # little_endian
'B', # bitmap_width
'B', # bitmap_height
'b', # offset_left
'b', # offset_top
'b' # horizontal_advance
))
def set_compression(self, engine):
if self.version != FONT_VERSION_3:
raise Exception("Compression being set but version != 3 ({})". format(self.version))
if engine == 'RLE4':
self.features |= FEATURE_RLE4
else:
raise Exception("Unsupported compression engine: '{}'. Font {}".format(engine,
self.ttf_path))
def set_version(self, version):
self.version = version
def set_tracking_adjust(self, adjust):
self.tracking_adjust = adjust
def set_regex_filter(self, regex_string):
if regex_string != ".*":
try:
self.regex = re.compile(unicode(regex_string, 'utf8'), re.UNICODE)
except Exception, e:
raise Exception("Supplied filter argument was not a valid regular expression."
"Font: {}".format(self.ttf_path))
else:
self.regex = None
def set_codepoint_list(self, list_path):
codepoints_file = open(list_path)
codepoints_json = json.load(codepoints_file)
self.codepoints = [int(cp) for cp in codepoints_json["codepoints"]]
def is_supported_glyph(self, codepoint):
return (self.face.get_char_index(codepoint) > 0 or
(codepoint == unichr(self.wildcard_codepoint)))
def compress_glyph_RLE4(self, bitmap):
# This Run Length Compression scheme works by converting runs of identical symbols to the
# symbol and the length of the run. The length of each run of symbols is limited to
# [1..2**(RLElen-1)]. For RLE4, the length is 3 bits (0-7), or 1-8 consecutive symbols.
# For example: 11110111 is compressed to 1*4, 0*1, 1*3. or [(1, 4), (0, 1), (1, 3)]
RLE_LEN = 2**(4-1) # TODO possibly make this a parameter.
# It would likely be a good idea to look into the 'bitstream' package for lengths that won't
# easily fit into a byte/short/int.
# First, generate a list of tuples (bit, count).
unit_list = [(name, len(list(group))) for name, group in itertools.groupby(bitmap)]
# Second, generate a list of RLE tuples where count <= RLE_LEN. This intermediate step will
# make it much easier to implement the binary stream packer below.
rle_unit_list = []
for name, length in unit_list:
while length > 0:
unit_len = min(length, RLE_LEN)
rle_unit_list.append((name, unit_len))
length -= unit_len
# Note that num_units does not include the padding added below.
num_units = len(rle_unit_list)
# If the list is odd, add a padding unit
if (num_units % 2) == 1:
rle_unit_list.append((0, 1))
# Now pack the tuples into a binary stream. We can't pack nibbles, so join two
glyph_packed = []
it = iter(rle_unit_list)
for name, length in it:
name2, length2 = next(it)
packed_byte = name << 3 | (length - 1) | name2 << 7 | (length2 - 1) << 4
glyph_packed.append(struct.pack('<B', packed_byte))
# Pad out to the nearest 4 bytes
while (len(glyph_packed) % 4) > 0:
glyph_packed.append(struct.pack('<B', 0))
return (glyph_packed, num_units)
# Make sure that we will be able to decompress the glyph in-place
def check_decompress_glyph_RLE4(self, glyph_packed, width, rle_units):
# The glyph buffer before decoding is arranged as follows:
# [ <header> | <free space> | <encoded glyph> ]
# Make sure that we can decode the encoded glyph to end up with the following arrangement:
# [ <header> | <decoded glyph> ]
# without overwriting the unprocessed encoded glyph in the process
header_size = struct.calcsize(self.glyph_header)
dst_ptr = header_size
src_ptr = self.max_glyph_size - len(glyph_packed)
def glyph_packed_iterator(tbl, num):
for i in xrange(0, num):
yield struct.unpack('<B', tbl[i])[0]
# Generate glyph buffer. Ignore the header
bitmap = [0] * self.max_glyph_size
bitmap[-len(glyph_packed):] = glyph_packed_iterator(glyph_packed, len(glyph_packed))
out_num_bits = 0
out = 0
total_length = 0
while rle_units > 0:
if src_ptr >= self.max_glyph_size:
raise Exception("Error: input stream too large for buffer. Font {}".
format(self.ttf_path))
unit_pair = bitmap[src_ptr]
src_ptr += 1
for i in range(min(rle_units, 2)):
colour = (unit_pair >> 3) & 1
length = (unit_pair & 0x07) + 1
total_length += length
if colour:
# Generate the bitpattern 111...
add = (1 << length) - 1
out |= (add << out_num_bits)
out_num_bits += length
if out_num_bits >= 8:
if dst_ptr >= src_ptr:
raise Exception("Error: unable to RLE4 decode in place! Overrun. Font {}".
format(self.ttf_path))
if dst_ptr >= self.max_glyph_size:
raise Exception("Error: output bitmap too large for buffer. Font {}".
format(self.ttf_path))
bitmap[dst_ptr] = (out & 0xFF)
dst_ptr += 1
out >>= 8
out_num_bits -= 8
unit_pair >>= 4
rle_units -= 1
while out_num_bits > 0:
bitmap[dst_ptr] = (out & 0xFF)
dst_ptr += 1
out >>= 8
out_num_bits -= 8
# Success! We can in-place decode this glyph
return True
def glyph_bits(self, codepoint, gindex):
flags = (freetype.FT_LOAD_RENDER if self.legacy else
freetype.FT_LOAD_RENDER | freetype.FT_LOAD_MONOCHROME | freetype.FT_LOAD_TARGET_MONO)
self.face.load_glyph(gindex, flags)
# Font metrics
bitmap = self.face.glyph.bitmap
advance = self.face.glyph.advance.x / 64 # Convert 26.6 fixed float format to px
advance += self.tracking_adjust
width = bitmap.width
height = bitmap.rows
left = self.face.glyph.bitmap_left
bottom = self.max_height - self.face.glyph.bitmap_top
pixel_mode = self.face.glyph.bitmap.pixel_mode
glyph_packed = []
if height and width:
glyph_bitmap = []
if pixel_mode == 1: # monochrome font, 1 bit per pixel
for i in range(bitmap.rows):
row = []
for j in range(bitmap.pitch):
row.extend(bits(bitmap.buffer[i*bitmap.pitch+j]))
glyph_bitmap.extend(row[:bitmap.width])
elif pixel_mode == 2: # grey font, 255 bits per pixel
for val in bitmap.buffer:
glyph_bitmap.extend([1 if val > 127 else 0])
else:
# freetype-py should never give us a value not in (1,2)
raise Exception("Unsupported pixel mode: {}. Font {}".
format(pixel_mode, self.ttf_path))
if (self.features & FEATURE_RLE4):
# HACK WARNING: override the height with the number of RLE4 units.
glyph_packed, height = self.compress_glyph_RLE4(glyph_bitmap)
if height > 255:
raise Exception("Unable to RLE4 compress -- more than 255 units required"
"({}). Font {}".format(height, self.ttf_path))
# Check that we can in-place decompress. Will raise an exception if not.
self.check_decompress_glyph_RLE4(glyph_packed, width, height)
else:
for word in grouper(32, glyph_bitmap, 0):
w = 0
for index, bit in enumerate(word):
w |= bit << index
glyph_packed.append(struct.pack('<I', w))
# Confirm that we're smaller than the cache size
size = ((width * height) + (8 - 1)) / 8
if size > self.max_glyph_size:
raise Exception("Glyph too large! codepoint {}: {} > {}. Font {}".
format(codepoint, size, self.max_glyph_size, self.ttf_path))
glyph_header = struct.pack(self.glyph_header, width, height, left, bottom, advance)
return glyph_header + ''.join(glyph_packed)
def fontinfo_bits(self):
if self.version == FONT_VERSION_2:
s = struct.Struct('<BBHHBB')
return s.pack(self.version,
self.max_height,
self.number_of_glyphs,
self.wildcard_codepoint,
self.table_size,
self.codepoint_bytes)
else:
s = struct.Struct('<BBHHBBBB')
return s.pack(self.version,
self.max_height,
self.number_of_glyphs,
self.wildcard_codepoint,
self.table_size,
self.codepoint_bytes,
s.size,
self.features)
def build_tables(self):
def build_hash_table(bucket_sizes):
acc = 0
for i in range(self.table_size):
bucket_size = bucket_sizes[i]
self.hash_table[i] = (struct.pack('<BBH', i, bucket_size, acc))
acc += bucket_size * (self.offset_size_bytes + self.codepoint_bytes)
def build_offset_tables(glyph_entries):
offset_table_format = '<'
offset_table_format += 'L' if self.codepoint_bytes == 4 else 'H'
offset_table_format += 'L' if self.offset_size_bytes == 4 else 'H'
bucket_sizes = [0] * self.table_size
for entry in glyph_entries:
codepoint, offset = entry
glyph_hash = hasher(codepoint, self.table_size)
self.offset_tables[glyph_hash].append(
struct.pack(offset_table_format, codepoint, offset))
bucket_sizes[glyph_hash] = bucket_sizes[glyph_hash] + 1
if bucket_sizes[glyph_hash] > OFFSET_TABLE_MAX_SIZE:
print "error: %d > 127" % bucket_sizes[glyph_hash]
return bucket_sizes
def add_glyph(codepoint, next_offset, gindex, glyph_indices_lookup):
offset = next_offset
if gindex not in glyph_indices_lookup:
glyph_bits = self.glyph_bits(codepoint, gindex)
glyph_indices_lookup[gindex] = offset
self.glyph_table.append(glyph_bits)
next_offset += len(glyph_bits)
else:
offset = glyph_indices_lookup[gindex]
if (codepoint > MAX_2_BYTES_CODEPOINT):
self.codepoint_bytes = 4
self.number_of_glyphs += 1
return offset, next_offset, glyph_indices_lookup
def codepoint_is_in_subset(codepoint):
if (codepoint not in (WILDCARD_CODEPOINT, ELLIPSIS_CODEPOINT)):
if self.regex is not None:
if self.regex.match(unichr(codepoint)) is None:
return False
if codepoint not in self.codepoints:
return False
return True
glyph_entries = []
# MJZ: The 0th offset of the glyph table is 32-bits of
# padding, no idea why.
self.glyph_table.append(struct.pack('<I', 0))
self.number_of_glyphs = 0
glyph_indices_lookup = dict()
next_offset = 4
codepoint, gindex = self.face.get_first_char()
# add wildcard_glyph
offset, next_offset, glyph_indices_lookup = add_glyph(WILDCARD_CODEPOINT, next_offset, 0,
glyph_indices_lookup)
glyph_entries.append((WILDCARD_CODEPOINT, offset))
while gindex:
# Hard limit on the number of glyphs in a font
if (self.number_of_glyphs > self.max_glyphs):
break
if (codepoint is WILDCARD_CODEPOINT):
raise Exception('Wildcard codepoint is used for something else in this font.'
'Font {}'.format(self.ttf_path))
if (gindex is 0):
raise Exception('0 index is reused by a non wildcard glyph. Font {}'.
format(self.ttf_path))
if (codepoint_is_in_subset(codepoint)):
offset, next_offset, glyph_indices_lookup = add_glyph(codepoint, next_offset,
gindex, glyph_indices_lookup)
glyph_entries.append((codepoint, offset))
codepoint, gindex = self.face.get_next_char(codepoint, gindex)
# Decide if we need 2 byte or 4 byte offsets
glyph_data_bytes = sum(len(glyph) for glyph in self.glyph_table)
if self.version == FONT_VERSION_3 and glyph_data_bytes < 65536:
self.features |= FEATURE_OFFSET_16
self.offset_size_bytes = 2
# Make sure the entries are sorted by codepoint
sorted_entries = sorted(glyph_entries, key=lambda entry: entry[0])
hash_bucket_sizes = build_offset_tables(sorted_entries)
build_hash_table(hash_bucket_sizes)
def bitstring(self):
btstr = self.fontinfo_bits()
btstr += ''.join(self.hash_table)
for table in self.offset_tables:
btstr += ''.join(table)
btstr += ''.join(self.glyph_table)
return btstr