blob: 35c61c369776089b02c272bec51c71331aedaa2a [file] [log] [blame]
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package wasm
import (
"bytes"
"cmd/internal/dwarf"
"cmd/internal/objabi"
"cmd/link/internal/ld"
"cmd/link/internal/loader"
"cmd/link/internal/sym"
"fmt"
"io"
"log"
"regexp"
)
const (
I32 = 0x7F
I64 = 0x7E
F32 = 0x7D
F64 = 0x7C
)
const (
sectionCustom = 0
sectionType = 1
sectionImport = 2
sectionFunction = 3
sectionTable = 4
sectionMemory = 5
sectionGlobal = 6
sectionExport = 7
sectionStart = 8
sectionElement = 9
sectionCode = 10
sectionData = 11
)
// funcValueOffset is the offset between the PC_F value of a function and the index of the function in WebAssembly
const funcValueOffset = 0x1000 // TODO(neelance): make function addresses play nice with heap addresses
func gentext(ctxt *ld.Link, ldr *loader.Loader) {
}
type wasmFunc struct {
Name string
Type uint32
Code []byte
}
type wasmFuncType struct {
Params []byte
Results []byte
}
var wasmFuncTypes = map[string]*wasmFuncType{
"_rt0_wasm_js": {Params: []byte{}}, //
"wasm_export_run": {Params: []byte{I32, I32}}, // argc, argv
"wasm_export_resume": {Params: []byte{}}, //
"wasm_export_getsp": {Results: []byte{I32}}, // sp
"wasm_pc_f_loop": {Params: []byte{}}, //
"runtime.wasmMove": {Params: []byte{I32, I32, I32}}, // dst, src, len
"runtime.wasmZero": {Params: []byte{I32, I32}}, // ptr, len
"runtime.wasmDiv": {Params: []byte{I64, I64}, Results: []byte{I64}}, // x, y -> x/y
"runtime.wasmTruncS": {Params: []byte{F64}, Results: []byte{I64}}, // x -> int(x)
"runtime.wasmTruncU": {Params: []byte{F64}, Results: []byte{I64}}, // x -> uint(x)
"runtime.gcWriteBarrier": {Params: []byte{I64, I64}}, // ptr, val
"cmpbody": {Params: []byte{I64, I64, I64, I64}, Results: []byte{I64}}, // a, alen, b, blen -> -1/0/1
"memeqbody": {Params: []byte{I64, I64, I64}, Results: []byte{I64}}, // a, b, len -> 0/1
"memcmp": {Params: []byte{I32, I32, I32}, Results: []byte{I32}}, // a, b, len -> <0/0/>0
"memchr": {Params: []byte{I32, I32, I32}, Results: []byte{I32}}, // s, c, len -> index
}
func assignAddress(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64) {
// WebAssembly functions do not live in the same address space as the linear memory.
// Instead, WebAssembly automatically assigns indices. Imported functions (section "import")
// have indices 0 to n. They are followed by native functions (sections "function" and "code")
// with indices n+1 and following.
//
// The following rules describe how wasm handles function indices and addresses:
// PC_F = funcValueOffset + WebAssembly function index (not including the imports)
// s.Value = PC = PC_F<<16 + PC_B
//
// The funcValueOffset is necessary to avoid conflicts with expectations
// that the Go runtime has about function addresses.
// The field "s.Value" corresponds to the concept of PC at runtime.
// However, there is no PC register, only PC_F and PC_B. PC_F denotes the function,
// PC_B the resume point inside of that function. The entry of the function has PC_B = 0.
ldr.SetSymSect(s, sect)
ldr.SetSymValue(s, int64(funcValueOffset+va/ld.MINFUNC)<<16) // va starts at zero
va += uint64(ld.MINFUNC)
return sect, n, va
}
type wasmDataSect struct {
sect *sym.Section
data []byte
}
var dataSects []wasmDataSect
func asmb(ctxt *ld.Link, ldr *loader.Loader) {
sections := []*sym.Section{
ldr.SymSect(ldr.Lookup("runtime.rodata", 0)),
ldr.SymSect(ldr.Lookup("runtime.typelink", 0)),
ldr.SymSect(ldr.Lookup("runtime.itablink", 0)),
ldr.SymSect(ldr.Lookup("runtime.symtab", 0)),
ldr.SymSect(ldr.Lookup("runtime.pclntab", 0)),
ldr.SymSect(ldr.Lookup("runtime.noptrdata", 0)),
ldr.SymSect(ldr.Lookup("runtime.data", 0)),
}
dataSects = make([]wasmDataSect, len(sections))
for i, sect := range sections {
data := ld.DatblkBytes(ctxt, int64(sect.Vaddr), int64(sect.Length))
dataSects[i] = wasmDataSect{sect, data}
}
}
// asmb writes the final WebAssembly module binary.
// Spec: https://webassembly.github.io/spec/core/binary/modules.html
func asmb2(ctxt *ld.Link, ldr *loader.Loader) {
types := []*wasmFuncType{
// For normal Go functions, the single parameter is PC_B,
// the return value is
// 0 if the function returned normally or
// 1 if the stack needs to be unwound.
{Params: []byte{I32}, Results: []byte{I32}},
}
// collect host imports (functions that get imported from the WebAssembly host, usually JavaScript)
hostImports := []*wasmFunc{
{
Name: "debug",
Type: lookupType(&wasmFuncType{Params: []byte{I32}}, &types),
},
}
hostImportMap := make(map[loader.Sym]int64)
for _, fn := range ctxt.Textp {
relocs := ldr.Relocs(fn)
for ri := 0; ri < relocs.Count(); ri++ {
r := relocs.At(ri)
if r.Type() == objabi.R_WASMIMPORT {
hostImportMap[r.Sym()] = int64(len(hostImports))
hostImports = append(hostImports, &wasmFunc{
Name: ldr.SymName(r.Sym()),
Type: lookupType(&wasmFuncType{Params: []byte{I32}}, &types),
})
}
}
}
// collect functions with WebAssembly body
var buildid []byte
fns := make([]*wasmFunc, len(ctxt.Textp))
for i, fn := range ctxt.Textp {
wfn := new(bytes.Buffer)
if ldr.SymName(fn) == "go.buildid" {
writeUleb128(wfn, 0) // number of sets of locals
writeI32Const(wfn, 0)
wfn.WriteByte(0x0b) // end
buildid = ldr.Data(fn)
} else {
// Relocations have variable length, handle them here.
relocs := ldr.Relocs(fn)
P := ldr.Data(fn)
off := int32(0)
for ri := 0; ri < relocs.Count(); ri++ {
r := relocs.At(ri)
if r.Siz() == 0 {
continue // skip marker relocations
}
wfn.Write(P[off:r.Off()])
off = r.Off()
rs := ldr.ResolveABIAlias(r.Sym())
switch r.Type() {
case objabi.R_ADDR:
writeSleb128(wfn, ldr.SymValue(rs)+r.Add())
case objabi.R_CALL:
writeSleb128(wfn, int64(len(hostImports))+ldr.SymValue(rs)>>16-funcValueOffset)
case objabi.R_WASMIMPORT:
writeSleb128(wfn, hostImportMap[rs])
default:
ldr.Errorf(fn, "bad reloc type %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()))
continue
}
}
wfn.Write(P[off:])
}
typ := uint32(0)
if sig, ok := wasmFuncTypes[ldr.SymName(fn)]; ok {
typ = lookupType(sig, &types)
}
name := nameRegexp.ReplaceAllString(ldr.SymName(fn), "_")
fns[i] = &wasmFunc{Name: name, Type: typ, Code: wfn.Bytes()}
}
ctxt.Out.Write([]byte{0x00, 0x61, 0x73, 0x6d}) // magic
ctxt.Out.Write([]byte{0x01, 0x00, 0x00, 0x00}) // version
// Add any buildid early in the binary:
if len(buildid) != 0 {
writeBuildID(ctxt, buildid)
}
writeTypeSec(ctxt, types)
writeImportSec(ctxt, hostImports)
writeFunctionSec(ctxt, fns)
writeTableSec(ctxt, fns)
writeMemorySec(ctxt, ldr)
writeGlobalSec(ctxt)
writeExportSec(ctxt, ldr, len(hostImports))
writeElementSec(ctxt, uint64(len(hostImports)), uint64(len(fns)))
writeCodeSec(ctxt, fns)
writeDataSec(ctxt)
writeProducerSec(ctxt)
writeDwarfSec(ctxt, ldr)
if !*ld.FlagS {
writeNameSec(ctxt, len(hostImports), fns)
}
}
func lookupType(sig *wasmFuncType, types *[]*wasmFuncType) uint32 {
for i, t := range *types {
if bytes.Equal(sig.Params, t.Params) && bytes.Equal(sig.Results, t.Results) {
return uint32(i)
}
}
*types = append(*types, sig)
return uint32(len(*types) - 1)
}
func writeSecHeader(ctxt *ld.Link, id uint8) int64 {
ctxt.Out.WriteByte(id)
sizeOffset := ctxt.Out.Offset()
ctxt.Out.Write(make([]byte, 5)) // placeholder for length
return sizeOffset
}
func writeSecSize(ctxt *ld.Link, sizeOffset int64) {
endOffset := ctxt.Out.Offset()
ctxt.Out.SeekSet(sizeOffset)
writeUleb128FixedLength(ctxt.Out, uint64(endOffset-sizeOffset-5), 5)
ctxt.Out.SeekSet(endOffset)
}
func writeBuildID(ctxt *ld.Link, buildid []byte) {
sizeOffset := writeSecHeader(ctxt, sectionCustom)
writeName(ctxt.Out, "go.buildid")
ctxt.Out.Write(buildid)
writeSecSize(ctxt, sizeOffset)
}
// writeTypeSec writes the section that declares all function types
// so they can be referenced by index.
func writeTypeSec(ctxt *ld.Link, types []*wasmFuncType) {
sizeOffset := writeSecHeader(ctxt, sectionType)
writeUleb128(ctxt.Out, uint64(len(types)))
for _, t := range types {
ctxt.Out.WriteByte(0x60) // functype
writeUleb128(ctxt.Out, uint64(len(t.Params)))
for _, v := range t.Params {
ctxt.Out.WriteByte(byte(v))
}
writeUleb128(ctxt.Out, uint64(len(t.Results)))
for _, v := range t.Results {
ctxt.Out.WriteByte(byte(v))
}
}
writeSecSize(ctxt, sizeOffset)
}
// writeImportSec writes the section that lists the functions that get
// imported from the WebAssembly host, usually JavaScript.
func writeImportSec(ctxt *ld.Link, hostImports []*wasmFunc) {
sizeOffset := writeSecHeader(ctxt, sectionImport)
writeUleb128(ctxt.Out, uint64(len(hostImports))) // number of imports
for _, fn := range hostImports {
writeName(ctxt.Out, "go") // provided by the import object in wasm_exec.js
writeName(ctxt.Out, fn.Name)
ctxt.Out.WriteByte(0x00) // func import
writeUleb128(ctxt.Out, uint64(fn.Type))
}
writeSecSize(ctxt, sizeOffset)
}
// writeFunctionSec writes the section that declares the types of functions.
// The bodies of these functions will later be provided in the "code" section.
func writeFunctionSec(ctxt *ld.Link, fns []*wasmFunc) {
sizeOffset := writeSecHeader(ctxt, sectionFunction)
writeUleb128(ctxt.Out, uint64(len(fns)))
for _, fn := range fns {
writeUleb128(ctxt.Out, uint64(fn.Type))
}
writeSecSize(ctxt, sizeOffset)
}
// writeTableSec writes the section that declares tables. Currently there is only a single table
// that is used by the CallIndirect operation to dynamically call any function.
// The contents of the table get initialized by the "element" section.
func writeTableSec(ctxt *ld.Link, fns []*wasmFunc) {
sizeOffset := writeSecHeader(ctxt, sectionTable)
numElements := uint64(funcValueOffset + len(fns))
writeUleb128(ctxt.Out, 1) // number of tables
ctxt.Out.WriteByte(0x70) // type: anyfunc
ctxt.Out.WriteByte(0x00) // no max
writeUleb128(ctxt.Out, numElements) // min
writeSecSize(ctxt, sizeOffset)
}
// writeMemorySec writes the section that declares linear memories. Currently one linear memory is being used.
// Linear memory always starts at address zero. More memory can be requested with the GrowMemory instruction.
func writeMemorySec(ctxt *ld.Link, ldr *loader.Loader) {
sizeOffset := writeSecHeader(ctxt, sectionMemory)
dataSection := ldr.SymSect(ldr.Lookup("runtime.data", 0))
dataEnd := dataSection.Vaddr + dataSection.Length
var initialSize = dataEnd + 16<<20 // 16MB, enough for runtime init without growing
const wasmPageSize = 64 << 10 // 64KB
writeUleb128(ctxt.Out, 1) // number of memories
ctxt.Out.WriteByte(0x00) // no maximum memory size
writeUleb128(ctxt.Out, initialSize/wasmPageSize) // minimum (initial) memory size
writeSecSize(ctxt, sizeOffset)
}
// writeGlobalSec writes the section that declares global variables.
func writeGlobalSec(ctxt *ld.Link) {
sizeOffset := writeSecHeader(ctxt, sectionGlobal)
globalRegs := []byte{
I32, // 0: SP
I64, // 1: CTXT
I64, // 2: g
I64, // 3: RET0
I64, // 4: RET1
I64, // 5: RET2
I64, // 6: RET3
I32, // 7: PAUSE
}
writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals
for _, typ := range globalRegs {
ctxt.Out.WriteByte(typ)
ctxt.Out.WriteByte(0x01) // var
switch typ {
case I32:
writeI32Const(ctxt.Out, 0)
case I64:
writeI64Const(ctxt.Out, 0)
}
ctxt.Out.WriteByte(0x0b) // end
}
writeSecSize(ctxt, sizeOffset)
}
// writeExportSec writes the section that declares exports.
// Exports can be accessed by the WebAssembly host, usually JavaScript.
// The wasm_export_* functions and the linear memory get exported.
func writeExportSec(ctxt *ld.Link, ldr *loader.Loader, lenHostImports int) {
sizeOffset := writeSecHeader(ctxt, sectionExport)
writeUleb128(ctxt.Out, 4) // number of exports
for _, name := range []string{"run", "resume", "getsp"} {
s := ldr.Lookup("wasm_export_"+name, 0)
idx := uint32(lenHostImports) + uint32(ldr.SymValue(s)>>16) - funcValueOffset
writeName(ctxt.Out, name) // inst.exports.run/resume/getsp in wasm_exec.js
ctxt.Out.WriteByte(0x00) // func export
writeUleb128(ctxt.Out, uint64(idx)) // funcidx
}
writeName(ctxt.Out, "mem") // inst.exports.mem in wasm_exec.js
ctxt.Out.WriteByte(0x02) // mem export
writeUleb128(ctxt.Out, 0) // memidx
writeSecSize(ctxt, sizeOffset)
}
// writeElementSec writes the section that initializes the tables declared by the "table" section.
// The table for CallIndirect gets initialized in a very simple way so that each table index (PC_F value)
// maps linearly to the function index (numImports + PC_F).
func writeElementSec(ctxt *ld.Link, numImports, numFns uint64) {
sizeOffset := writeSecHeader(ctxt, sectionElement)
writeUleb128(ctxt.Out, 1) // number of element segments
writeUleb128(ctxt.Out, 0) // tableidx
writeI32Const(ctxt.Out, funcValueOffset)
ctxt.Out.WriteByte(0x0b) // end
writeUleb128(ctxt.Out, numFns) // number of entries
for i := uint64(0); i < numFns; i++ {
writeUleb128(ctxt.Out, numImports+i)
}
writeSecSize(ctxt, sizeOffset)
}
// writeElementSec writes the section that provides the function bodies for the functions
// declared by the "func" section.
func writeCodeSec(ctxt *ld.Link, fns []*wasmFunc) {
sizeOffset := writeSecHeader(ctxt, sectionCode)
writeUleb128(ctxt.Out, uint64(len(fns))) // number of code entries
for _, fn := range fns {
writeUleb128(ctxt.Out, uint64(len(fn.Code)))
ctxt.Out.Write(fn.Code)
}
writeSecSize(ctxt, sizeOffset)
}
// writeDataSec writes the section that provides data that will be used to initialize the linear memory.
func writeDataSec(ctxt *ld.Link) {
sizeOffset := writeSecHeader(ctxt, sectionData)
type dataSegment struct {
offset int32
data []byte
}
// Omit blocks of zeroes and instead emit data segments with offsets skipping the zeroes.
// This reduces the size of the WebAssembly binary. We use 8 bytes as an estimate for the
// overhead of adding a new segment (same as wasm-opt's memory-packing optimization uses).
const segmentOverhead = 8
// Generate at most this many segments. A higher number of segments gets rejected by some WebAssembly runtimes.
const maxNumSegments = 100000
var segments []*dataSegment
for secIndex, ds := range dataSects {
data := ds.data
offset := int32(ds.sect.Vaddr)
// skip leading zeroes
for len(data) > 0 && data[0] == 0 {
data = data[1:]
offset++
}
for len(data) > 0 {
dataLen := int32(len(data))
var segmentEnd, zeroEnd int32
if len(segments)+(len(dataSects)-secIndex) == maxNumSegments {
segmentEnd = dataLen
zeroEnd = dataLen
} else {
for {
// look for beginning of zeroes
for segmentEnd < dataLen && data[segmentEnd] != 0 {
segmentEnd++
}
// look for end of zeroes
zeroEnd = segmentEnd
for zeroEnd < dataLen && data[zeroEnd] == 0 {
zeroEnd++
}
// emit segment if omitting zeroes reduces the output size
if zeroEnd-segmentEnd >= segmentOverhead || zeroEnd == dataLen {
break
}
segmentEnd = zeroEnd
}
}
segments = append(segments, &dataSegment{
offset: offset,
data: data[:segmentEnd],
})
data = data[zeroEnd:]
offset += zeroEnd
}
}
writeUleb128(ctxt.Out, uint64(len(segments))) // number of data entries
for _, seg := range segments {
writeUleb128(ctxt.Out, 0) // memidx
writeI32Const(ctxt.Out, seg.offset)
ctxt.Out.WriteByte(0x0b) // end
writeUleb128(ctxt.Out, uint64(len(seg.data)))
ctxt.Out.Write(seg.data)
}
writeSecSize(ctxt, sizeOffset)
}
// writeProducerSec writes an optional section that reports the source language and compiler version.
func writeProducerSec(ctxt *ld.Link) {
sizeOffset := writeSecHeader(ctxt, sectionCustom)
writeName(ctxt.Out, "producers")
writeUleb128(ctxt.Out, 2) // number of fields
writeName(ctxt.Out, "language") // field name
writeUleb128(ctxt.Out, 1) // number of values
writeName(ctxt.Out, "Go") // value: name
writeName(ctxt.Out, objabi.Version) // value: version
writeName(ctxt.Out, "processed-by") // field name
writeUleb128(ctxt.Out, 1) // number of values
writeName(ctxt.Out, "Go cmd/compile") // value: name
writeName(ctxt.Out, objabi.Version) // value: version
writeSecSize(ctxt, sizeOffset)
}
func writeDwarfSec(ctxt *ld.Link, ldr *loader.Loader) {
dwarfp := ld.GetDwarfSections(ctxt, ldr)
for _, sec := range dwarfp {
if len(sec) == 0 {
continue
}
secSym := sec[0]
var dataSyms []loader.Sym
if len(sec) > 1 {
dataSyms = sec[1:]
}
secName := ldr.SymName(secSym)
secOffset := writeSecHeader(ctxt, sectionCustom)
writeName(ctxt.Out, secName)
if secName == ".debug_abbrev" {
// why is debug_abbrev so special?
abbrev := dwarf.GetAbbrev()
if _, err := ctxt.Out.Write(abbrev); err != nil {
fmt.Println(err)
}
} else {
for _, dataSym := range dataSyms {
data := ldr.Data(dataSym)
if _, err := ctxt.Out.Write(data); err != nil {
log.Println(err)
}
}
}
writeSecSize(ctxt, secOffset)
}
}
var nameRegexp = regexp.MustCompile(`[^\w\.]`)
// writeNameSec writes an optional section that assigns names to the functions declared by the "func" section.
// The names are only used by WebAssembly stack traces, debuggers and decompilers.
// TODO(neelance): add symbol table of DATA symbols
func writeNameSec(ctxt *ld.Link, firstFnIndex int, fns []*wasmFunc) {
sizeOffset := writeSecHeader(ctxt, sectionCustom)
writeName(ctxt.Out, "name")
sizeOffset2 := writeSecHeader(ctxt, 0x01) // function names
writeUleb128(ctxt.Out, uint64(len(fns)))
for i, fn := range fns {
writeUleb128(ctxt.Out, uint64(firstFnIndex+i))
writeName(ctxt.Out, fn.Name)
}
writeSecSize(ctxt, sizeOffset2)
writeSecSize(ctxt, sizeOffset)
}
type nameWriter interface {
io.ByteWriter
io.Writer
}
func writeI32Const(w io.ByteWriter, v int32) {
w.WriteByte(0x41) // i32.const
writeSleb128(w, int64(v))
}
func writeI64Const(w io.ByteWriter, v int64) {
w.WriteByte(0x42) // i64.const
writeSleb128(w, v)
}
func writeName(w nameWriter, name string) {
writeUleb128(w, uint64(len(name)))
w.Write([]byte(name))
}
func writeUleb128(w io.ByteWriter, v uint64) {
if v < 128 {
w.WriteByte(uint8(v))
return
}
more := true
for more {
c := uint8(v & 0x7f)
v >>= 7
more = v != 0
if more {
c |= 0x80
}
w.WriteByte(c)
}
}
func writeUleb128FixedLength(w io.ByteWriter, v uint64, length int) {
for i := 0; i < length; i++ {
c := uint8(v & 0x7f)
v >>= 7
if i < length-1 {
c |= 0x80
}
w.WriteByte(c)
}
if v != 0 {
panic("writeUleb128FixedLength: length too small")
}
}
func writeSleb128(w io.ByteWriter, v int64) {
more := true
for more {
c := uint8(v & 0x7f)
s := uint8(v & 0x40)
v >>= 7
more = !((v == 0 && s == 0) || (v == -1 && s != 0))
if more {
c |= 0x80
}
w.WriteByte(c)
}
}