blob: cc277cb694594ad9b0a35878396dda240515d167 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sfnt
// Compact Font Format (CFF) fonts are written in PostScript, a stack-based
// programming language.
//
// A fundamental concept is a DICT, or a key-value map, expressed in reverse
// Polish notation. For example, this sequence of operations:
// - push the number 379
// - version operator
// - push the number 392
// - Notice operator
// - etc
// - push the number 100
// - push the number 0
// - push the number 500
// - push the number 800
// - FontBBox operator
// - etc
// defines a DICT that maps "version" to the String ID (SID) 379, "Notice" to
// the SID 392, "FontBBox" to the four numbers [100, 0, 500, 800], etc.
//
// The first 391 String IDs (starting at 0) are predefined as per the CFF spec
// Appendix A, in 5176.CFF.pdf referenced below. For example, 379 means
// "001.000". String ID 392 is not predefined, and is mapped by a separate
// structure, the "String INDEX", inside the CFF data. (String ID 391 is also
// not predefined. Specifically for ../testdata/CFFTest.otf, 391 means
// "uni4E2D", as this font contains a glyph for U+4E2D).
//
// The actual glyph vectors are similarly encoded (in PostScript), in a format
// called Type 2 Charstrings. The wire encoding is similar to but not exactly
// the same as CFF's. For example, the byte 0x05 means FontBBox for CFF DICTs,
// but means rlineto (relative line-to) for Type 2 Charstrings. See
// 5176.CFF.pdf Appendix H and 5177.Type2.pdf Appendix A in the PDF files
// referenced below.
//
// CFF is a stand-alone format, but CFF as used in SFNT fonts have further
// restrictions. For example, a stand-alone CFF can contain multiple fonts, but
// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The Name
// INDEX in the CFF must contain only one entry; that is, there must be only
// one font in the CFF FontSet".
//
// The relevant specifications are:
// - http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
// - http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
import (
"fmt"
"math"
"strconv"
)
const (
// psStackSize is the stack size for a PostScript interpreter. 5176.CFF.pdf
// section 4 "DICT Data" says that "An operator may be preceded by up to a
// maximum of 48 operands". Similarly, 5177.Type2.pdf Appendix B "Type 2
// Charstring Implementation Limits" says that "Argument stack 48".
psStackSize = 48
)
func bigEndian(b []byte) uint32 {
switch len(b) {
case 1:
return uint32(b[0])
case 2:
return uint32(b[0])<<8 | uint32(b[1])
case 3:
return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
case 4:
return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
}
panic("unreachable")
}
// cffParser parses the CFF table from an SFNT font.
type cffParser struct {
src *source
base int
offset int
end int
err error
buf []byte
locBuf [2]uint32
parseNumberBuf [maxRealNumberStrLen]byte
instructions []byte
stack struct {
a [psStackSize]int32
top int32
}
saved struct {
charStrings int32
}
}
func (p *cffParser) parse() (locations []uint32, err error) {
// Parse header.
{
if !p.read(4) {
return nil, p.err
}
if p.buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4 {
return nil, errUnsupportedCFFVersion
}
}
// Parse Name INDEX.
{
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The
// Name INDEX in the CFF must contain only one entry".
if count != 1 {
return nil, errInvalidCFFTable
}
if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
return nil, p.err
}
p.offset = int(p.locBuf[1])
}
// Parse Top DICT INDEX.
{
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
// 5176.CFF.pdf section 8 "Top DICT INDEX" says that the count here
// should match the count of the Name INDEX, which is 1.
if count != 1 {
return nil, errInvalidCFFTable
}
if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
return nil, p.err
}
if !p.read(int(p.locBuf[1] - p.locBuf[0])) {
return nil, p.err
}
for p.instructions = p.buf; len(p.instructions) > 0; {
p.step()
if p.err != nil {
return nil, p.err
}
}
}
// Parse the CharStrings INDEX, whose location was found in the Top DICT.
if p.saved.charStrings <= 0 || int32(p.end-p.base) < p.saved.charStrings {
return nil, errInvalidCFFTable
}
p.offset = p.base + int(p.saved.charStrings)
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
if count == 0 {
return nil, errInvalidCFFTable
}
locations = make([]uint32, count+1)
if !p.parseIndexLocations(locations, count, offSize) {
return nil, p.err
}
return locations, nil
}
// read sets p.buf to view the n bytes from p.offset to p.offset+n. It also
// advances p.offset by n.
//
// As per the source.view method, the caller should not modify the contents of
// p.buf after read returns, other than by calling read again.
//
// The caller should also avoid modifying the pointer / length / capacity of
// the p.buf slice, not just avoid modifying the slice's contents, in order to
// maximize the opportunity to re-use p.buf's allocated memory when viewing the
// underlying source data for subsequent read calls.
func (p *cffParser) read(n int) (ok bool) {
if p.end-p.offset < n {
p.err = errInvalidCFFTable
return false
}
p.buf, p.err = p.src.view(p.buf, p.offset, n)
p.offset += n
return p.err == nil
}
func (p *cffParser) parseIndexHeader() (count, offSize int32, ok bool) {
if !p.read(2) {
return 0, 0, false
}
count = int32(u16(p.buf[:2]))
// 5176.CFF.pdf section 5 "INDEX Data" says that "An empty INDEX is
// represented by a count field with a 0 value and no additional fields.
// Thus, the total size of an empty INDEX is 2 bytes".
if count == 0 {
return count, 0, true
}
if !p.read(1) {
return 0, 0, false
}
offSize = int32(p.buf[0])
if offSize < 1 || 4 < offSize {
p.err = errInvalidCFFTable
return 0, 0, false
}
return count, offSize, true
}
func (p *cffParser) parseIndexLocations(dst []uint32, count, offSize int32) (ok bool) {
if count == 0 {
return true
}
if len(dst) != int(count+1) {
panic("unreachable")
}
if !p.read(len(dst) * int(offSize)) {
return false
}
buf, prev := p.buf, uint32(0)
for i := range dst {
loc := bigEndian(buf[:offSize])
buf = buf[offSize:]
// Locations are off by 1 byte. 5176.CFF.pdf section 5 "INDEX Data"
// says that "Offsets in the offset array are relative to the byte that
// precedes the object data... This ensures that every object has a
// corresponding offset which is always nonzero".
if loc == 0 {
p.err = errInvalidCFFTable
return false
}
loc--
// In the same paragraph, "Therefore the first element of the offset
// array is always 1" before correcting for the off-by-1.
if i == 0 {
if loc != 0 {
p.err = errInvalidCFFTable
break
}
} else if loc <= prev { // Check that locations are increasing.
p.err = errInvalidCFFTable
break
}
// Check that locations are in bounds.
if uint32(p.end-p.offset) < loc {
p.err = errInvalidCFFTable
break
}
dst[i] = uint32(p.offset) + loc
prev = loc
}
return p.err == nil
}
// step executes a single operation, whether pushing a numeric operand onto the
// stack or executing an operator.
func (p *cffParser) step() {
if number, res := p.parseNumber(); res != prNone {
if res < 0 || p.stack.top == psStackSize {
if res == prUnsupportedRNE {
p.err = errUnsupportedRealNumberEncoding
} else {
p.err = errInvalidCFFTable
}
return
}
p.stack.a[p.stack.top] = number
p.stack.top++
return
}
b0 := p.instructions[0]
p.instructions = p.instructions[1:]
for b, escaped, operators := b0, false, topDictOperators[0]; ; {
if b == escapeByte && !escaped {
if len(p.instructions) <= 0 {
p.err = errInvalidCFFTable
return
}
b = p.instructions[0]
p.instructions = p.instructions[1:]
escaped = true
operators = topDictOperators[1]
continue
}
if int(b) < len(operators) {
if op := operators[b]; op.name != "" {
if p.stack.top < op.numPop {
p.err = errInvalidCFFTable
return
}
if op.run != nil {
op.run(p)
}
if op.numPop < 0 {
p.stack.top = 0
} else {
p.stack.top -= op.numPop
}
return
}
}
if escaped {
p.err = fmt.Errorf("sfnt: unrecognized CFF 2-byte operator (12 %d)", b)
} else {
p.err = fmt.Errorf("sfnt: unrecognized CFF 1-byte operator (%d)", b)
}
return
}
}
type parseResult int32
const (
prUnsupportedRNE parseResult = -2
prInvalid parseResult = -1
prNone parseResult = +0
prGood parseResult = +1
)
// See 5176.CFF.pdf section 4 "DICT Data".
func (p *cffParser) parseNumber() (number int32, res parseResult) {
if len(p.instructions) == 0 {
return 0, prNone
}
switch b0 := p.instructions[0]; {
case b0 == 28:
if len(p.instructions) < 3 {
return 0, prInvalid
}
number = int32(int16(u16(p.instructions[1:])))
p.instructions = p.instructions[3:]
return number, prGood
case b0 == 29:
if len(p.instructions) < 5 {
return 0, prInvalid
}
number = int32(u32(p.instructions[1:]))
p.instructions = p.instructions[5:]
return number, prGood
case b0 == 30:
// Parse a real number. This isn't listed in 5176.CFF.pdf Table 3
// "Operand Encoding" but that table lists integer encodings. Further
// down the page it says "A real number operand is provided in addition
// to integer operands. This operand begins with a byte value of 30
// followed by a variable-length sequence of bytes."
s := p.parseNumberBuf[:0]
p.instructions = p.instructions[1:]
for {
if len(p.instructions) == 0 {
return 0, prInvalid
}
b := p.instructions[0]
p.instructions = p.instructions[1:]
// Process b's two nibbles, high then low.
for i := 0; i < 2; i++ {
nib := b >> 4
b = b << 4
if nib == 0x0f {
f, err := strconv.ParseFloat(string(s), 32)
if err != nil {
return 0, prInvalid
}
return int32(math.Float32bits(float32(f))), prGood
}
if nib == 0x0d {
return 0, prInvalid
}
if len(s)+maxNibbleDefsLength > len(p.parseNumberBuf) {
return 0, prUnsupportedRNE
}
s = append(s, nibbleDefs[nib]...)
}
}
case b0 < 32:
// No-op.
case b0 < 247:
p.instructions = p.instructions[1:]
return int32(b0) - 139, prGood
case b0 < 251:
if len(p.instructions) < 2 {
return 0, prInvalid
}
b1 := p.instructions[1]
p.instructions = p.instructions[2:]
return +int32(b0-247)*256 + int32(b1) + 108, prGood
case b0 < 255:
if len(p.instructions) < 2 {
return 0, prInvalid
}
b1 := p.instructions[1]
p.instructions = p.instructions[2:]
return -int32(b0-251)*256 - int32(b1) - 108, prGood
}
return 0, prNone
}
const maxNibbleDefsLength = len("E-")
// nibbleDefs encodes 5176.CFF.pdf Table 5 "Nibble Definitions".
var nibbleDefs = [16]string{
0x00: "0",
0x01: "1",
0x02: "2",
0x03: "3",
0x04: "4",
0x05: "5",
0x06: "6",
0x07: "7",
0x08: "8",
0x09: "9",
0x0a: ".",
0x0b: "E",
0x0c: "E-",
0x0d: "",
0x0e: "-",
0x0f: "",
}
type cffOperator struct {
// numPop is the number of stack values to pop. -1 means "array" and -2
// means "delta" as per 5176.CFF.pdf Table 6 "Operand Types".
numPop int32
// name is the operator name. An empty name (i.e. the zero value for the
// struct overall) means an unrecognized 1-byte operator.
name string
// run is the function that implements the operator. Nil means that we
// ignore the operator, other than popping its arguments off the stack.
run func(*cffParser)
}
// topDictOperators encodes the subset of 5176.CFF.pdf Table 9 "Top DICT
// Operator Entries" and Table 10 "CIDFont Operator Extensions" used by this
// implementation.
var topDictOperators = [2][]cffOperator{{
// 1-byte operators.
0: {+1, "version", nil},
1: {+1, "Notice", nil},
2: {+1, "FullName", nil},
3: {+1, "FamilyName", nil},
4: {+1, "Weight", nil},
5: {-1, "FontBBox", nil},
13: {+1, "UniqueID", nil},
14: {-1, "XUID", nil},
15: {+1, "charset", nil},
16: {+1, "Encoding", nil},
17: {+1, "CharStrings", func(p *cffParser) {
p.saved.charStrings = p.stack.a[p.stack.top-1]
}},
18: {+2, "Private", nil},
}, {
// 2-byte operators. The first byte is the escape byte.
0: {+1, "Copyright", nil},
1: {+1, "isFixedPitch", nil},
2: {+1, "ItalicAngle", nil},
3: {+1, "UnderlinePosition", nil},
4: {+1, "UnderlineThickness", nil},
5: {+1, "PaintType", nil},
6: {+1, "CharstringType", nil},
7: {-1, "FontMatrix", nil},
8: {+1, "StrokeWidth", nil},
20: {+1, "SyntheticBase", nil},
21: {+1, "PostScript", nil},
22: {+1, "BaseFontName", nil},
23: {-2, "BaseFontBlend", nil},
30: {+3, "ROS", nil},
31: {+1, "CIDFontVersion", nil},
32: {+1, "CIDFontRevision", nil},
33: {+1, "CIDFontType", nil},
34: {+1, "CIDCount", nil},
35: {+1, "UIDBase", nil},
36: {+1, "FDArray", nil},
37: {+1, "FDSelect", nil},
38: {+1, "FontName", nil},
}}
// 5176.CFF.pdf section 4 "DICT Data" says that "Two-byte operators have an
// initial escape byte of 12".
const escapeByte = 12