| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package pkgbits |
| |
| import ( |
| "bytes" |
| "crypto/md5" |
| "encoding/binary" |
| "go/constant" |
| "io" |
| "math/big" |
| "runtime" |
| ) |
| |
| // currentVersion is the current version number. |
| // |
| // - v0: initial prototype |
| // |
| // - v1: adds the flags uint32 word |
| const currentVersion uint32 = 1 |
| |
| // A PkgEncoder provides methods for encoding a package's Unified IR |
| // export data. |
| type PkgEncoder struct { |
| // elems holds the bitstream for previously encoded elements. |
| elems [numRelocs][]string |
| |
| // stringsIdx maps previously encoded strings to their index within |
| // the RelocString section, to allow deduplication. That is, |
| // elems[RelocString][stringsIdx[s]] == s (if present). |
| stringsIdx map[string]Index |
| |
| // syncFrames is the number of frames to write at each sync |
| // marker. A negative value means sync markers are omitted. |
| syncFrames int |
| } |
| |
| // SyncMarkers reports whether pw uses sync markers. |
| func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } |
| |
| // NewPkgEncoder returns an initialized PkgEncoder. |
| // |
| // syncFrames is the number of caller frames that should be serialized |
| // at Sync points. Serializing additional frames results in larger |
| // export data files, but can help diagnosing desync errors in |
| // higher-level Unified IR reader/writer code. If syncFrames is |
| // negative, then sync markers are omitted entirely. |
| func NewPkgEncoder(syncFrames int) PkgEncoder { |
| return PkgEncoder{ |
| stringsIdx: make(map[string]Index), |
| syncFrames: syncFrames, |
| } |
| } |
| |
| // DumpTo writes the package's encoded data to out0 and returns the |
| // package fingerprint. |
| func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { |
| h := md5.New() |
| out := io.MultiWriter(out0, h) |
| |
| writeUint32 := func(x uint32) { |
| assert(binary.Write(out, binary.LittleEndian, x) == nil) |
| } |
| |
| writeUint32(currentVersion) |
| |
| var flags uint32 |
| if pw.SyncMarkers() { |
| flags |= flagSyncMarkers |
| } |
| writeUint32(flags) |
| |
| // Write elemEndsEnds. |
| var sum uint32 |
| for _, elems := range &pw.elems { |
| sum += uint32(len(elems)) |
| writeUint32(sum) |
| } |
| |
| // Write elemEnds. |
| sum = 0 |
| for _, elems := range &pw.elems { |
| for _, elem := range elems { |
| sum += uint32(len(elem)) |
| writeUint32(sum) |
| } |
| } |
| |
| // Write elemData. |
| for _, elems := range &pw.elems { |
| for _, elem := range elems { |
| _, err := io.WriteString(out, elem) |
| assert(err == nil) |
| } |
| } |
| |
| // Write fingerprint. |
| copy(fingerprint[:], h.Sum(nil)) |
| _, err := out0.Write(fingerprint[:]) |
| assert(err == nil) |
| |
| return |
| } |
| |
| // StringIdx adds a string value to the strings section, if not |
| // already present, and returns its index. |
| func (pw *PkgEncoder) StringIdx(s string) Index { |
| if idx, ok := pw.stringsIdx[s]; ok { |
| assert(pw.elems[RelocString][idx] == s) |
| return idx |
| } |
| |
| idx := Index(len(pw.elems[RelocString])) |
| pw.elems[RelocString] = append(pw.elems[RelocString], s) |
| pw.stringsIdx[s] = idx |
| return idx |
| } |
| |
| // NewEncoder returns an Encoder for a new element within the given |
| // section, and encodes the given SyncMarker as the start of the |
| // element bitstream. |
| func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { |
| e := pw.NewEncoderRaw(k) |
| e.Sync(marker) |
| return e |
| } |
| |
| // NewEncoderRaw returns an Encoder for a new element within the given |
| // section. |
| // |
| // Most callers should use NewEncoder instead. |
| func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { |
| idx := Index(len(pw.elems[k])) |
| pw.elems[k] = append(pw.elems[k], "") // placeholder |
| |
| return Encoder{ |
| p: pw, |
| k: k, |
| Idx: idx, |
| } |
| } |
| |
| // An Encoder provides methods for encoding an individual element's |
| // bitstream data. |
| type Encoder struct { |
| p *PkgEncoder |
| |
| Relocs []RelocEnt |
| RelocMap map[RelocEnt]uint32 |
| Data bytes.Buffer // accumulated element bitstream data |
| |
| encodingRelocHeader bool |
| |
| k RelocKind |
| Idx Index // index within relocation section |
| } |
| |
| // Flush finalizes the element's bitstream and returns its Index. |
| func (w *Encoder) Flush() Index { |
| var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved |
| |
| // Backup the data so we write the relocations at the front. |
| var tmp bytes.Buffer |
| io.Copy(&tmp, &w.Data) |
| |
| // TODO(mdempsky): Consider writing these out separately so they're |
| // easier to strip, along with function bodies, so that we can prune |
| // down to just the data that's relevant to go/types. |
| if w.encodingRelocHeader { |
| panic("encodingRelocHeader already true; recursive flush?") |
| } |
| w.encodingRelocHeader = true |
| w.Sync(SyncRelocs) |
| w.Len(len(w.Relocs)) |
| for _, rEnt := range w.Relocs { |
| w.Sync(SyncReloc) |
| w.Len(int(rEnt.Kind)) |
| w.Len(int(rEnt.Idx)) |
| } |
| |
| io.Copy(&sb, &w.Data) |
| io.Copy(&sb, &tmp) |
| w.p.elems[w.k][w.Idx] = sb.String() |
| |
| return w.Idx |
| } |
| |
| func (w *Encoder) checkErr(err error) { |
| if err != nil { |
| errorf("unexpected encoding error: %v", err) |
| } |
| } |
| |
| func (w *Encoder) rawUvarint(x uint64) { |
| var buf [binary.MaxVarintLen64]byte |
| n := binary.PutUvarint(buf[:], x) |
| _, err := w.Data.Write(buf[:n]) |
| w.checkErr(err) |
| } |
| |
| func (w *Encoder) rawVarint(x int64) { |
| // Zig-zag encode. |
| ux := uint64(x) << 1 |
| if x < 0 { |
| ux = ^ux |
| } |
| |
| w.rawUvarint(ux) |
| } |
| |
| func (w *Encoder) rawReloc(r RelocKind, idx Index) int { |
| e := RelocEnt{r, idx} |
| if w.RelocMap != nil { |
| if i, ok := w.RelocMap[e]; ok { |
| return int(i) |
| } |
| } else { |
| w.RelocMap = make(map[RelocEnt]uint32) |
| } |
| |
| i := len(w.Relocs) |
| w.RelocMap[e] = uint32(i) |
| w.Relocs = append(w.Relocs, e) |
| return i |
| } |
| |
| func (w *Encoder) Sync(m SyncMarker) { |
| if !w.p.SyncMarkers() { |
| return |
| } |
| |
| // Writing out stack frame string references requires working |
| // relocations, but writing out the relocations themselves involves |
| // sync markers. To prevent infinite recursion, we simply trim the |
| // stack frame for sync markers within the relocation header. |
| var frames []string |
| if !w.encodingRelocHeader && w.p.syncFrames > 0 { |
| pcs := make([]uintptr, w.p.syncFrames) |
| n := runtime.Callers(2, pcs) |
| frames = fmtFrames(pcs[:n]...) |
| } |
| |
| // TODO(mdempsky): Save space by writing out stack frames as a |
| // linked list so we can share common stack frames. |
| w.rawUvarint(uint64(m)) |
| w.rawUvarint(uint64(len(frames))) |
| for _, frame := range frames { |
| w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) |
| } |
| } |
| |
| // Bool encodes and writes a bool value into the element bitstream, |
| // and then returns the bool value. |
| // |
| // For simple, 2-alternative encodings, the idiomatic way to call Bool |
| // is something like: |
| // |
| // if w.Bool(x != 0) { |
| // // alternative #1 |
| // } else { |
| // // alternative #2 |
| // } |
| // |
| // For multi-alternative encodings, use Code instead. |
| func (w *Encoder) Bool(b bool) bool { |
| w.Sync(SyncBool) |
| var x byte |
| if b { |
| x = 1 |
| } |
| err := w.Data.WriteByte(x) |
| w.checkErr(err) |
| return b |
| } |
| |
| // Int64 encodes and writes an int64 value into the element bitstream. |
| func (w *Encoder) Int64(x int64) { |
| w.Sync(SyncInt64) |
| w.rawVarint(x) |
| } |
| |
| // Uint64 encodes and writes a uint64 value into the element bitstream. |
| func (w *Encoder) Uint64(x uint64) { |
| w.Sync(SyncUint64) |
| w.rawUvarint(x) |
| } |
| |
| // Len encodes and writes a non-negative int value into the element bitstream. |
| func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } |
| |
| // Int encodes and writes an int value into the element bitstream. |
| func (w *Encoder) Int(x int) { w.Int64(int64(x)) } |
| |
| // Len encodes and writes a uint value into the element bitstream. |
| func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } |
| |
| // Reloc encodes and writes a relocation for the given (section, |
| // index) pair into the element bitstream. |
| // |
| // Note: Only the index is formally written into the element |
| // bitstream, so bitstream decoders must know from context which |
| // section an encoded relocation refers to. |
| func (w *Encoder) Reloc(r RelocKind, idx Index) { |
| w.Sync(SyncUseReloc) |
| w.Len(w.rawReloc(r, idx)) |
| } |
| |
| // Code encodes and writes a Code value into the element bitstream. |
| func (w *Encoder) Code(c Code) { |
| w.Sync(c.Marker()) |
| w.Len(c.Value()) |
| } |
| |
| // String encodes and writes a string value into the element |
| // bitstream. |
| // |
| // Internally, strings are deduplicated by adding them to the strings |
| // section (if not already present), and then writing a relocation |
| // into the element bitstream. |
| func (w *Encoder) String(s string) { |
| w.Sync(SyncString) |
| w.Reloc(RelocString, w.p.StringIdx(s)) |
| } |
| |
| // Strings encodes and writes a variable-length slice of strings into |
| // the element bitstream. |
| func (w *Encoder) Strings(ss []string) { |
| w.Len(len(ss)) |
| for _, s := range ss { |
| w.String(s) |
| } |
| } |
| |
| // Value encodes and writes a constant.Value into the element |
| // bitstream. |
| func (w *Encoder) Value(val constant.Value) { |
| w.Sync(SyncValue) |
| if w.Bool(val.Kind() == constant.Complex) { |
| w.scalar(constant.Real(val)) |
| w.scalar(constant.Imag(val)) |
| } else { |
| w.scalar(val) |
| } |
| } |
| |
| func (w *Encoder) scalar(val constant.Value) { |
| switch v := constant.Val(val).(type) { |
| default: |
| errorf("unhandled %v (%v)", val, val.Kind()) |
| case bool: |
| w.Code(ValBool) |
| w.Bool(v) |
| case string: |
| w.Code(ValString) |
| w.String(v) |
| case int64: |
| w.Code(ValInt64) |
| w.Int64(v) |
| case *big.Int: |
| w.Code(ValBigInt) |
| w.bigInt(v) |
| case *big.Rat: |
| w.Code(ValBigRat) |
| w.bigInt(v.Num()) |
| w.bigInt(v.Denom()) |
| case *big.Float: |
| w.Code(ValBigFloat) |
| w.bigFloat(v) |
| } |
| } |
| |
| func (w *Encoder) bigInt(v *big.Int) { |
| b := v.Bytes() |
| w.String(string(b)) // TODO: More efficient encoding. |
| w.Bool(v.Sign() < 0) |
| } |
| |
| func (w *Encoder) bigFloat(v *big.Float) { |
| b := v.Append(nil, 'p', -1) |
| w.String(string(b)) // TODO: More efficient encoding. |
| } |