blob: 7b11aaad3fa4d56568ec166288b13aa8d73c5a97 [file] [log] [blame]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"encoding/xml"
"fmt"
"io"
"sort"
"strconv"
"strings"
"unicode/utf16"
)
// binaryXML converts XML into Android's undocumented binary XML format.
//
// The best source of information on this format seems to be the source code
// in AOSP frameworks-base. Android "resource" types seem to describe the
// encoded bytes, in particular:
//
// ResChunk_header
// ResStringPool_header
// ResXMLTree_node
//
// These are defined in:
//
// https://android.googlesource.com/platform/frameworks/base/+/master/include/androidfw/ResourceTypes.h
//
// The rough format of the file is a resource chunk containing a sequence of
// chunks. Each chunk is made up of a header and a body. The header begins with
// the contents of the ResChunk_header struct, which includes the size of both
// the header and the body.
//
// Both the header and body are 4-byte aligned.
//
// Values are encoded as little-endian.
//
// The android source code for encoding is done in the aapt tool. Its source
// code lives in AOSP:
//
// https://android.googlesource.com/platform/frameworks/base.git/+/master/tools/aapt
//
// A sample layout:
//
// File Header (ResChunk_header, type XML)
// Chunk: String Pool (type STRING_POOL)
// Sequence of strings, each with the format:
// uint16 length
// uint16 extended_length -- only if top bit set on length
// UTF-16LE string
// two zero bytes
// Resource Map
// The [i]th 4-byte entry in the resource map corresponds with
// the [i]th string from the string pool. The 4-bytes are a
// Resource ID constant defined:
// http://developer.android.com/reference/android/R.attr.html
// This appears to be a way to map strings onto enum values.
// Chunk: Namespace Start (ResXMLTree_node; ResXMLTree_namespaceExt)
// Chunk: Element Start
// ResXMLTree_node
// ResXMLTree_attrExt
// ResXMLTree_attribute (repeated attributeCount times)
// Chunk: Element End
// (ResXMLTree_node; ResXMLTree_endElementExt)
// ...
// Chunk: Namespace End
func binaryXML(r io.Reader) ([]byte, error) {
lr := &lineReader{r: r}
d := xml.NewDecoder(lr)
pool := new(binStringPool)
depth := 0
elements := []chunk{}
namespaceEnds := make(map[int][]binEndNamespace)
for {
line := lr.line(d.InputOffset())
tok, err := d.Token()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
switch tok := tok.(type) {
case xml.StartElement:
// Intercept namespace definitions.
var attr []*binAttr
for _, a := range tok.Attr {
if a.Name.Space == "xmlns" {
elements = append(elements, binStartNamespace{
line: line,
prefix: pool.get(a.Name.Local),
url: pool.get(a.Value),
})
namespaceEnds[depth] = append([]binEndNamespace{{
line: line,
prefix: pool.get(a.Name.Local),
url: pool.get(a.Value),
}}, namespaceEnds[depth]...)
continue
}
ba, err := pool.getAttr(a)
if err != nil {
return nil, fmt.Errorf("%d: %s: %v", line, a.Name.Local, err)
}
attr = append(attr, ba)
}
depth++
elements = append(elements, &binStartElement{
line: line,
ns: pool.getNS(tok.Name.Space),
name: pool.get(tok.Name.Local),
attr: attr,
})
case xml.EndElement:
elements = append(elements, &binEndElement{
line: line,
ns: pool.getNS(tok.Name.Space),
name: pool.get(tok.Name.Local),
})
depth--
if nsEnds := namespaceEnds[depth]; len(nsEnds) > 0 {
delete(namespaceEnds, depth)
for _, nsEnd := range nsEnds {
elements = append(elements, nsEnd)
}
}
case xml.CharData:
// The aapt tool appears to "compact" leading and
// trailing whitepsace. See XMLNode::removeWhitespace in
// https://android.googlesource.com/platform/frameworks/base.git/+/master/tools/aapt/XMLNode.cpp
if len(tok) == 0 {
continue
}
start, end := 0, len(tok)
for start < len(tok) && isSpace(tok[start]) {
start++
}
for end > start && isSpace(tok[end-1]) {
end--
}
if start == end {
continue // all whitespace, skip it
}
// Preserve one character of whitespace.
if start > 0 {
start--
}
if end < len(tok) {
end++
}
elements = append(elements, &binCharData{
line: line,
data: pool.get(string(tok[start:end])),
})
case xml.Comment:
// Ignored by Anroid Binary XML format.
case xml.ProcInst:
// Ignored by Anroid Binary XML format?
case xml.Directive:
// Ignored by Anroid Binary XML format.
default:
return nil, fmt.Errorf("apk: unexpected token: %v (%T)", tok, tok)
}
}
sortPool(pool)
for _, e := range elements {
if e, ok := e.(*binStartElement); ok {
sortAttr(e, pool)
}
}
resMap := &binResMap{pool}
size := 8 + pool.size() + resMap.size()
for _, e := range elements {
size += e.size()
}
b := make([]byte, 0, size)
b = appendHeader(b, headerXML, size)
b = pool.append(b)
b = resMap.append(b)
for _, e := range elements {
b = e.append(b)
}
return b, nil
}
func isSpace(b byte) bool {
switch b {
case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
return true
}
return false
}
type headerType uint16
const (
headerXML headerType = 0x0003
headerStringPool = 0x0001
headerResourceMap = 0x0180
headerStartNamespace = 0x0100
headerEndNamespace = 0x0101
headerStartElement = 0x0102
headerEndElement = 0x0103
headerCharData = 0x0104
)
func appendU16(b []byte, v uint16) []byte {
return append(b, byte(v), byte(v>>8))
}
func appendU32(b []byte, v uint32) []byte {
return append(b, byte(v), byte(v>>8), byte(v>>16), byte(v>>24))
}
func appendHeader(b []byte, typ headerType, size int) []byte {
b = appendU16(b, uint16(typ))
b = appendU16(b, 8)
b = appendU16(b, uint16(size))
b = appendU16(b, 0)
return b
}
// Attributes of the form android:key are mapped to resource IDs, which are
// embedded into the Binary XML format.
//
// http://developer.android.com/reference/android/R.attr.html
var resourceCodes = map[string]uint32{
"versionCode": 0x0101021b,
"versionName": 0x0101021c,
"minSdkVersion": 0x0101020c,
"windowFullscreen": 0x0101020d,
"label": 0x01010001,
"hasCode": 0x0101000c,
"debuggable": 0x0101000f,
"name": 0x01010003,
"configChanges": 0x0101001f,
"value": 0x01010024,
}
// http://developer.android.com/reference/android/R.attr.html#configChanges
var configChanges = map[string]uint32{
"mcc": 0x0001,
"mnc": 0x0002,
"locale": 0x0004,
"touchscreen": 0x0008,
"keyboard": 0x0010,
"keyboardHidden": 0x0020,
"navigation": 0x0040,
"orientation": 0x0080,
"screenLayout": 0x0100,
"uiMode": 0x0200,
"screenSize": 0x0400,
"smallestScreenSize": 0x0800,
"layoutDirection": 0x2000,
"fontScale": 0x40000000,
}
type lineReader struct {
off int64
lines []int64
r io.Reader
}
func (r *lineReader) Read(p []byte) (n int, err error) {
n, err = r.r.Read(p)
for i := 0; i < n; i++ {
if p[i] == '\n' {
r.lines = append(r.lines, r.off+int64(i))
}
}
r.off += int64(n)
return n, err
}
func (r *lineReader) line(pos int64) int {
return sort.Search(len(r.lines), func(i int) bool {
return pos < r.lines[i]
}) + 1
}
type bstring struct {
ind uint32
str string
enc []byte // 2-byte length, utf16le, 2-byte zero
}
type chunk interface {
size() int
append([]byte) []byte
}
type binResMap struct {
pool *binStringPool
}
func (p *binResMap) append(b []byte) []byte {
b = appendHeader(b, headerResourceMap, p.size())
for _, bstr := range p.pool.s {
c, ok := resourceCodes[bstr.str]
if !ok {
break
}
b = appendU32(b, c)
}
return b
}
func (p *binResMap) size() int {
count := 0
for _, bstr := range p.pool.s {
if _, ok := resourceCodes[bstr.str]; !ok {
break
}
count++
}
return 8 + 4*count
}
type binStringPool struct {
s []*bstring
m map[string]*bstring
}
func (p *binStringPool) get(str string) *bstring {
if p.m == nil {
p.m = make(map[string]*bstring)
}
res := p.m[str]
if res != nil {
return res
}
res = &bstring{
ind: uint32(len(p.s)),
str: str,
}
p.s = append(p.s, res)
p.m[str] = res
if len(str)>>16 > 0 {
panic(fmt.Sprintf("string lengths over 1<<15 not yet supported, got len %d for string that starts %q", len(str), str[:100]))
}
strUTF16 := utf16.Encode([]rune(str))
res.enc = appendU16(nil, uint16(len(strUTF16)))
for _, w := range strUTF16 {
res.enc = appendU16(res.enc, w)
}
res.enc = appendU16(res.enc, 0)
return res
}
func (p *binStringPool) getNS(ns string) *bstring {
if ns == "" {
// Register empty string for inclusion in output (like aapt),
// but do not reference it from namespace elements.
p.get("")
return nil
}
return p.get(ns)
}
func (p *binStringPool) getAttr(attr xml.Attr) (*binAttr, error) {
a := &binAttr{
ns: p.getNS(attr.Name.Space),
name: p.get(attr.Name.Local),
}
if attr.Name.Space != "http://schemas.android.com/apk/res/android" {
a.data = p.get(attr.Value)
return a, nil
}
// Some android attributes have interesting values.
switch attr.Name.Local {
case "versionCode", "minSdkVersion":
v, err := strconv.Atoi(attr.Value)
if err != nil {
return nil, err
}
a.data = int(v)
case "hasCode", "debuggable":
v, err := strconv.ParseBool(attr.Value)
if err != nil {
return nil, err
}
a.data = v
case "configChanges":
v := uint32(0)
for _, c := range strings.Split(attr.Value, "|") {
v |= configChanges[c]
}
a.data = v
default:
a.data = p.get(attr.Value)
}
return a, nil
}
const stringPoolPreamble = 0 +
8 + // chunk header
4 + // string count
4 + // style count
4 + // flags
4 + // strings start
4 + // styles start
0
func (p *binStringPool) unpaddedSize() int {
strLens := 0
for _, s := range p.s {
strLens += len(s.enc)
}
return stringPoolPreamble + 4*len(p.s) + strLens
}
func (p *binStringPool) size() int {
size := p.unpaddedSize()
size += size % 0x04
return size
}
// overloaded for testing.
var (
sortPool = func(p *binStringPool) {
sort.Sort(p)
// Move resourceCodes to the front.
s := make([]*bstring, 0)
m := make(map[string]*bstring)
for str := range resourceCodes {
bstr := p.m[str]
if bstr == nil {
continue
}
bstr.ind = uint32(len(s))
s = append(s, bstr)
m[str] = bstr
delete(p.m, str)
}
for _, bstr := range p.m {
bstr.ind = uint32(len(s))
s = append(s, bstr)
}
p.s = s
p.m = m
}
sortAttr = func(e *binStartElement, p *binStringPool) {}
)
func (b *binStringPool) Len() int { return len(b.s) }
func (b *binStringPool) Less(i, j int) bool { return b.s[i].str < b.s[j].str }
func (b *binStringPool) Swap(i, j int) {
b.s[i], b.s[j] = b.s[j], b.s[i]
b.s[i].ind, b.s[j].ind = b.s[j].ind, b.s[i].ind
}
func (p *binStringPool) append(b []byte) []byte {
stringsStart := uint32(stringPoolPreamble + 4*len(p.s))
b = appendU16(b, uint16(headerStringPool))
b = appendU16(b, 0x1c) // chunk header size
b = appendU16(b, uint16(p.size()))
b = appendU16(b, 0)
b = appendU32(b, uint32(len(p.s)))
b = appendU32(b, 0) // style count
b = appendU32(b, 0) // flags
b = appendU32(b, stringsStart)
b = appendU32(b, 0) // styles start
off := 0
for _, bstr := range p.s {
b = appendU32(b, uint32(off))
off += len(bstr.enc)
}
for _, bstr := range p.s {
b = append(b, bstr.enc...)
}
for i := p.unpaddedSize() % 0x04; i > 0; i-- {
b = append(b, 0)
}
return b
}
type binStartElement struct {
line int
ns *bstring
name *bstring
attr []*binAttr
}
func (e *binStartElement) size() int {
return 8 + // chunk header
4 + // line number
4 + // comment
4 + // ns
4 + // name
2 + 2 + 2 + // attribute start, size, count
2 + 2 + 2 + // id/class/style index
len(e.attr)*(4+4+4+4+4)
}
func (e *binStartElement) append(b []byte) []byte {
b = appendU16(b, uint16(headerStartElement))
b = appendU16(b, 0x10) // chunk header size
b = appendU16(b, uint16(e.size()))
b = appendU16(b, 0)
b = appendU32(b, uint32(e.line))
b = appendU32(b, 0xffffffff) // comment
if e.ns == nil {
b = appendU32(b, 0xffffffff)
} else {
b = appendU32(b, e.ns.ind)
}
b = appendU32(b, e.name.ind)
b = appendU16(b, 0x14) // attribute start
b = appendU16(b, 0x14) // attribute size
b = appendU16(b, uint16(len(e.attr)))
b = appendU16(b, 0) // ID index (none)
b = appendU16(b, 0) // class index (none)
b = appendU16(b, 0) // style index (none)
for _, a := range e.attr {
b = a.append(b)
}
return b
}
type binAttr struct {
ns *bstring
name *bstring
data interface{} // either int (INT_DEC) or *bstring (STRING)
}
func (a *binAttr) append(b []byte) []byte {
if a.ns != nil {
b = appendU32(b, a.ns.ind)
} else {
b = appendU32(b, 0xffffffff)
}
b = appendU32(b, a.name.ind)
switch v := a.data.(type) {
case int:
b = appendU32(b, 0xffffffff) // raw value
b = appendU16(b, 8) // size
b = append(b, 0) // unused padding
b = append(b, 0x10) // INT_DEC
b = appendU32(b, uint32(v))
case bool:
b = appendU32(b, 0xffffffff) // raw value
b = appendU16(b, 8) // size
b = append(b, 0) // unused padding
b = append(b, 0x12) // INT_BOOLEAN
if v {
b = appendU32(b, 0xffffffff)
} else {
b = appendU32(b, 0)
}
case uint32:
b = appendU32(b, 0xffffffff) // raw value
b = appendU16(b, 8) // size
b = append(b, 0) // unused padding
b = append(b, 0x11) // INT_HEX
b = appendU32(b, uint32(v))
case *bstring:
b = appendU32(b, v.ind) // raw value
b = appendU16(b, 8) // size
b = append(b, 0) // unused padding
b = append(b, 0x03) // STRING
b = appendU32(b, v.ind)
default:
panic(fmt.Sprintf("unexpected attr type: %T (%v)", v, v))
}
return b
}
type binEndElement struct {
line int
ns *bstring
name *bstring
attr []*binAttr
}
func (*binEndElement) size() int {
return 8 + // chunk header
4 + // line number
4 + // comment
4 + // ns
4 // name
}
func (e *binEndElement) append(b []byte) []byte {
b = appendU16(b, uint16(headerEndElement))
b = appendU16(b, 0x10) // chunk header size
b = appendU16(b, uint16(e.size()))
b = appendU16(b, 0)
b = appendU32(b, uint32(e.line))
b = appendU32(b, 0xffffffff) // comment
if e.ns == nil {
b = appendU32(b, 0xffffffff)
} else {
b = appendU32(b, e.ns.ind)
}
b = appendU32(b, e.name.ind)
return b
}
type binStartNamespace struct {
line int
prefix *bstring
url *bstring
}
func (binStartNamespace) size() int {
return 8 + // chunk header
4 + // line number
4 + // comment
4 + // prefix
4 // url
}
func (e binStartNamespace) append(b []byte) []byte {
b = appendU16(b, uint16(headerStartNamespace))
b = appendU16(b, 0x10) // chunk header size
b = appendU16(b, uint16(e.size()))
b = appendU16(b, 0)
b = appendU32(b, uint32(e.line))
b = appendU32(b, 0xffffffff) // comment
b = appendU32(b, e.prefix.ind)
b = appendU32(b, e.url.ind)
return b
}
type binEndNamespace struct {
line int
prefix *bstring
url *bstring
}
func (binEndNamespace) size() int {
return 8 + // chunk header
4 + // line number
4 + // comment
4 + // prefix
4 // url
}
func (e binEndNamespace) append(b []byte) []byte {
b = appendU16(b, uint16(headerEndNamespace))
b = appendU16(b, 0x10) // chunk header size
b = appendU16(b, uint16(e.size()))
b = appendU16(b, 0)
b = appendU32(b, uint32(e.line))
b = appendU32(b, 0xffffffff) // comment
b = appendU32(b, e.prefix.ind)
b = appendU32(b, e.url.ind)
return b
}
type binCharData struct {
line int
data *bstring
}
func (*binCharData) size() int {
return 8 + // chunk header
4 + // line number
4 + // comment
4 + // data
8 // junk
}
func (e *binCharData) append(b []byte) []byte {
b = appendU16(b, uint16(headerCharData))
b = appendU16(b, 0x10) // chunk header size
b = appendU16(b, 0x1c) // size
b = appendU16(b, 0)
b = appendU32(b, uint32(e.line))
b = appendU32(b, 0xffffffff) // comment
b = appendU32(b, e.data.ind)
b = appendU16(b, 0x08)
b = appendU16(b, 0)
b = appendU16(b, 0)
b = appendU16(b, 0)
return b
}