ccitt: implement bitWriter
Change-Id: I4002e054ef8a167d5b254bd02bdbeb7932d47efc
Reviewed-on: https://go-review.googlesource.com/c/image/+/184958
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/ccitt/gen.go b/ccitt/gen.go
index 0245f66..39f81c2 100644
--- a/ccitt/gen.go
+++ b/ccitt/gen.go
@@ -25,14 +25,26 @@
{
w := &bytes.Buffer{}
w.WriteString(header)
- w.WriteString(headerComment)
- write(w, build(modeCodes[:], 0), "modeDecodeTable",
+ w.WriteString(decodeHeaderComment)
+ writeDecodeTable(w, build(modeCodes[:], 0), "modeDecodeTable",
"// modeDecodeTable represents Table 1 and the End-of-Line code.\n")
- write(w, build(whiteCodes[:], 0), "whiteDecodeTable",
+ writeDecodeTable(w, build(whiteCodes[:], 0), "whiteDecodeTable",
"// whiteDecodeTable represents Tables 2 and 3 for a white run.\n")
- write(w, build(blackCodes[:], 0), "blackDecodeTable",
+ writeDecodeTable(w, build(blackCodes[:], 0), "blackDecodeTable",
"// blackDecodeTable represents Tables 2 and 3 for a black run.\n")
writeMaxCodeLength(w, modeCodes[:], whiteCodes[:], blackCodes[:])
+ w.WriteString(encodeHeaderComment)
+ w.WriteString(bitStringTypeDef)
+ writeEncodeTable(w, modeCodes[:], "modeEncodeTable",
+ "// modeEncodeTable represents Table 1 and the End-of-Line code.\n")
+ writeEncodeTable(w, whiteCodes[:64], "whiteEncodeTable2",
+ "// whiteEncodeTable2 represents Table 2 for a white run.\n")
+ writeEncodeTable(w, whiteCodes[64:], "whiteEncodeTable3",
+ "// whiteEncodeTable3 represents Table 3 for a white run.\n")
+ writeEncodeTable(w, blackCodes[:64], "blackEncodeTable2",
+ "// blackEncodeTable2 represents Table 2 for a black run.\n")
+ writeEncodeTable(w, blackCodes[64:], "blackEncodeTable3",
+ "// blackEncodeTable3 represents Table 3 for a black run.\n")
finish(w, "table.go")
}
@@ -50,7 +62,7 @@
`
-const headerComment = `
+const decodeHeaderComment = `
// Each decodeTable is represented by an array of [2]int16's: a binary tree.
// Each array element (other than element 0, which means invalid) is a branch
// node in that tree. The root node is always element 1 (the second element).
@@ -81,6 +93,12 @@
`
+const encodeHeaderComment = `
+// Each encodeTable is represented by an array of bitStrings.
+
+
+`
+
type node struct {
children [2]*node
val uint32
@@ -128,7 +146,7 @@
}
}
-func write(w *bytes.Buffer, root *node, varName string, comment string) {
+func writeDecodeTable(w *bytes.Buffer, root *node, varName, comment string) {
assignBranchIndexes(root)
w.WriteString(comment)
@@ -151,6 +169,32 @@
fmt.Fprintf(w, "}\n\n")
}
+const bitStringTypeDef = `
+// bitString is a pair of uint32 values representing a bit code.
+// The nBits low bits of bits make up the actual bit code.
+// Eg. bitString{0x0004, 8} represents the bitcode "00000100".
+type bitString struct {
+ bits uint32
+ nBits uint32
+}
+
+`
+
+func writeEncodeTable(w *bytes.Buffer, codes []code, varName, comment string) {
+ w.WriteString(comment)
+ fmt.Fprintf(w, "var %s = [...]bitString{\n", varName)
+ for i, code := range codes {
+ s := code.str
+ n := uint32(len(s))
+ c := uint32(0)
+ for j := uint32(0); j < n; j++ {
+ c |= uint32(s[j]&1) << (n - j - 1)
+ }
+ fmt.Fprintf(w, "%d: {0x%04x, %v}, // %q \n", i, c, n, s)
+ }
+ fmt.Fprintf(w, "}\n\n")
+}
+
func assignBranchIndexes(root *node) {
// 0 is reserved for an invalid value.
branchIndex := int32(1)
diff --git a/ccitt/reader.go b/ccitt/reader.go
index 04d0302..62986f9 100644
--- a/ccitt/reader.go
+++ b/ccitt/reader.go
@@ -67,6 +67,12 @@
}
}
+func reverseBitsWithinBytes(b []byte) {
+ for i, c := range b {
+ b[i] = bits.Reverse8(c)
+ }
+}
+
type bitReader struct {
r io.Reader
@@ -139,10 +145,7 @@
b.readErr = err
if b.order != LSB {
- written := b.bytes[:b.bw]
- for i, x := range written {
- written[i] = bits.Reverse8(x)
- }
+ reverseBitsWithinBytes(b.bytes[:b.bw])
}
}
}
diff --git a/ccitt/table.go b/ccitt/table.go
index 29d0ea4..f01cc12 100644
--- a/ccitt/table.go
+++ b/ccitt/table.go
@@ -721,6 +721,255 @@
const maxCodeLength = 13
+// Each encodeTable is represented by an array of bitStrings.
+
+// bitString is a pair of uint32 values representing a bit code.
+// The nBits low bits of bits make up the actual bit code.
+// Eg. bitString{0x0004, 8} represents the bitcode "00000100".
+type bitString struct {
+ bits uint32
+ nBits uint32
+}
+
+// modeEncodeTable represents Table 1 and the End-of-Line code.
+var modeEncodeTable = [...]bitString{
+ 0: {0x0001, 4}, // "0001"
+ 1: {0x0001, 3}, // "001"
+ 2: {0x0001, 1}, // "1"
+ 3: {0x0003, 3}, // "011"
+ 4: {0x0003, 6}, // "000011"
+ 5: {0x0003, 7}, // "0000011"
+ 6: {0x0002, 3}, // "010"
+ 7: {0x0002, 6}, // "000010"
+ 8: {0x0002, 7}, // "0000010"
+ 9: {0x0001, 7}, // "0000001"
+ 10: {0x0001, 12}, // "000000000001"
+}
+
+// whiteEncodeTable2 represents Table 2 for a white run.
+var whiteEncodeTable2 = [...]bitString{
+ 0: {0x0035, 8}, // "00110101"
+ 1: {0x0007, 6}, // "000111"
+ 2: {0x0007, 4}, // "0111"
+ 3: {0x0008, 4}, // "1000"
+ 4: {0x000b, 4}, // "1011"
+ 5: {0x000c, 4}, // "1100"
+ 6: {0x000e, 4}, // "1110"
+ 7: {0x000f, 4}, // "1111"
+ 8: {0x0013, 5}, // "10011"
+ 9: {0x0014, 5}, // "10100"
+ 10: {0x0007, 5}, // "00111"
+ 11: {0x0008, 5}, // "01000"
+ 12: {0x0008, 6}, // "001000"
+ 13: {0x0003, 6}, // "000011"
+ 14: {0x0034, 6}, // "110100"
+ 15: {0x0035, 6}, // "110101"
+ 16: {0x002a, 6}, // "101010"
+ 17: {0x002b, 6}, // "101011"
+ 18: {0x0027, 7}, // "0100111"
+ 19: {0x000c, 7}, // "0001100"
+ 20: {0x0008, 7}, // "0001000"
+ 21: {0x0017, 7}, // "0010111"
+ 22: {0x0003, 7}, // "0000011"
+ 23: {0x0004, 7}, // "0000100"
+ 24: {0x0028, 7}, // "0101000"
+ 25: {0x002b, 7}, // "0101011"
+ 26: {0x0013, 7}, // "0010011"
+ 27: {0x0024, 7}, // "0100100"
+ 28: {0x0018, 7}, // "0011000"
+ 29: {0x0002, 8}, // "00000010"
+ 30: {0x0003, 8}, // "00000011"
+ 31: {0x001a, 8}, // "00011010"
+ 32: {0x001b, 8}, // "00011011"
+ 33: {0x0012, 8}, // "00010010"
+ 34: {0x0013, 8}, // "00010011"
+ 35: {0x0014, 8}, // "00010100"
+ 36: {0x0015, 8}, // "00010101"
+ 37: {0x0016, 8}, // "00010110"
+ 38: {0x0017, 8}, // "00010111"
+ 39: {0x0028, 8}, // "00101000"
+ 40: {0x0029, 8}, // "00101001"
+ 41: {0x002a, 8}, // "00101010"
+ 42: {0x002b, 8}, // "00101011"
+ 43: {0x002c, 8}, // "00101100"
+ 44: {0x002d, 8}, // "00101101"
+ 45: {0x0004, 8}, // "00000100"
+ 46: {0x0005, 8}, // "00000101"
+ 47: {0x000a, 8}, // "00001010"
+ 48: {0x000b, 8}, // "00001011"
+ 49: {0x0052, 8}, // "01010010"
+ 50: {0x0053, 8}, // "01010011"
+ 51: {0x0054, 8}, // "01010100"
+ 52: {0x0055, 8}, // "01010101"
+ 53: {0x0024, 8}, // "00100100"
+ 54: {0x0025, 8}, // "00100101"
+ 55: {0x0058, 8}, // "01011000"
+ 56: {0x0059, 8}, // "01011001"
+ 57: {0x005a, 8}, // "01011010"
+ 58: {0x005b, 8}, // "01011011"
+ 59: {0x004a, 8}, // "01001010"
+ 60: {0x004b, 8}, // "01001011"
+ 61: {0x0032, 8}, // "00110010"
+ 62: {0x0033, 8}, // "00110011"
+ 63: {0x0034, 8}, // "00110100"
+}
+
+// whiteEncodeTable3 represents Table 3 for a white run.
+var whiteEncodeTable3 = [...]bitString{
+ 0: {0x001b, 5}, // "11011"
+ 1: {0x0012, 5}, // "10010"
+ 2: {0x0017, 6}, // "010111"
+ 3: {0x0037, 7}, // "0110111"
+ 4: {0x0036, 8}, // "00110110"
+ 5: {0x0037, 8}, // "00110111"
+ 6: {0x0064, 8}, // "01100100"
+ 7: {0x0065, 8}, // "01100101"
+ 8: {0x0068, 8}, // "01101000"
+ 9: {0x0067, 8}, // "01100111"
+ 10: {0x00cc, 9}, // "011001100"
+ 11: {0x00cd, 9}, // "011001101"
+ 12: {0x00d2, 9}, // "011010010"
+ 13: {0x00d3, 9}, // "011010011"
+ 14: {0x00d4, 9}, // "011010100"
+ 15: {0x00d5, 9}, // "011010101"
+ 16: {0x00d6, 9}, // "011010110"
+ 17: {0x00d7, 9}, // "011010111"
+ 18: {0x00d8, 9}, // "011011000"
+ 19: {0x00d9, 9}, // "011011001"
+ 20: {0x00da, 9}, // "011011010"
+ 21: {0x00db, 9}, // "011011011"
+ 22: {0x0098, 9}, // "010011000"
+ 23: {0x0099, 9}, // "010011001"
+ 24: {0x009a, 9}, // "010011010"
+ 25: {0x0018, 6}, // "011000"
+ 26: {0x009b, 9}, // "010011011"
+ 27: {0x0008, 11}, // "00000001000"
+ 28: {0x000c, 11}, // "00000001100"
+ 29: {0x000d, 11}, // "00000001101"
+ 30: {0x0012, 12}, // "000000010010"
+ 31: {0x0013, 12}, // "000000010011"
+ 32: {0x0014, 12}, // "000000010100"
+ 33: {0x0015, 12}, // "000000010101"
+ 34: {0x0016, 12}, // "000000010110"
+ 35: {0x0017, 12}, // "000000010111"
+ 36: {0x001c, 12}, // "000000011100"
+ 37: {0x001d, 12}, // "000000011101"
+ 38: {0x001e, 12}, // "000000011110"
+ 39: {0x001f, 12}, // "000000011111"
+}
+
+// blackEncodeTable2 represents Table 2 for a black run.
+var blackEncodeTable2 = [...]bitString{
+ 0: {0x0037, 10}, // "0000110111"
+ 1: {0x0002, 3}, // "010"
+ 2: {0x0003, 2}, // "11"
+ 3: {0x0002, 2}, // "10"
+ 4: {0x0003, 3}, // "011"
+ 5: {0x0003, 4}, // "0011"
+ 6: {0x0002, 4}, // "0010"
+ 7: {0x0003, 5}, // "00011"
+ 8: {0x0005, 6}, // "000101"
+ 9: {0x0004, 6}, // "000100"
+ 10: {0x0004, 7}, // "0000100"
+ 11: {0x0005, 7}, // "0000101"
+ 12: {0x0007, 7}, // "0000111"
+ 13: {0x0004, 8}, // "00000100"
+ 14: {0x0007, 8}, // "00000111"
+ 15: {0x0018, 9}, // "000011000"
+ 16: {0x0017, 10}, // "0000010111"
+ 17: {0x0018, 10}, // "0000011000"
+ 18: {0x0008, 10}, // "0000001000"
+ 19: {0x0067, 11}, // "00001100111"
+ 20: {0x0068, 11}, // "00001101000"
+ 21: {0x006c, 11}, // "00001101100"
+ 22: {0x0037, 11}, // "00000110111"
+ 23: {0x0028, 11}, // "00000101000"
+ 24: {0x0017, 11}, // "00000010111"
+ 25: {0x0018, 11}, // "00000011000"
+ 26: {0x00ca, 12}, // "000011001010"
+ 27: {0x00cb, 12}, // "000011001011"
+ 28: {0x00cc, 12}, // "000011001100"
+ 29: {0x00cd, 12}, // "000011001101"
+ 30: {0x0068, 12}, // "000001101000"
+ 31: {0x0069, 12}, // "000001101001"
+ 32: {0x006a, 12}, // "000001101010"
+ 33: {0x006b, 12}, // "000001101011"
+ 34: {0x00d2, 12}, // "000011010010"
+ 35: {0x00d3, 12}, // "000011010011"
+ 36: {0x00d4, 12}, // "000011010100"
+ 37: {0x00d5, 12}, // "000011010101"
+ 38: {0x00d6, 12}, // "000011010110"
+ 39: {0x00d7, 12}, // "000011010111"
+ 40: {0x006c, 12}, // "000001101100"
+ 41: {0x006d, 12}, // "000001101101"
+ 42: {0x00da, 12}, // "000011011010"
+ 43: {0x00db, 12}, // "000011011011"
+ 44: {0x0054, 12}, // "000001010100"
+ 45: {0x0055, 12}, // "000001010101"
+ 46: {0x0056, 12}, // "000001010110"
+ 47: {0x0057, 12}, // "000001010111"
+ 48: {0x0064, 12}, // "000001100100"
+ 49: {0x0065, 12}, // "000001100101"
+ 50: {0x0052, 12}, // "000001010010"
+ 51: {0x0053, 12}, // "000001010011"
+ 52: {0x0024, 12}, // "000000100100"
+ 53: {0x0037, 12}, // "000000110111"
+ 54: {0x0038, 12}, // "000000111000"
+ 55: {0x0027, 12}, // "000000100111"
+ 56: {0x0028, 12}, // "000000101000"
+ 57: {0x0058, 12}, // "000001011000"
+ 58: {0x0059, 12}, // "000001011001"
+ 59: {0x002b, 12}, // "000000101011"
+ 60: {0x002c, 12}, // "000000101100"
+ 61: {0x005a, 12}, // "000001011010"
+ 62: {0x0066, 12}, // "000001100110"
+ 63: {0x0067, 12}, // "000001100111"
+}
+
+// blackEncodeTable3 represents Table 3 for a black run.
+var blackEncodeTable3 = [...]bitString{
+ 0: {0x000f, 10}, // "0000001111"
+ 1: {0x00c8, 12}, // "000011001000"
+ 2: {0x00c9, 12}, // "000011001001"
+ 3: {0x005b, 12}, // "000001011011"
+ 4: {0x0033, 12}, // "000000110011"
+ 5: {0x0034, 12}, // "000000110100"
+ 6: {0x0035, 12}, // "000000110101"
+ 7: {0x006c, 13}, // "0000001101100"
+ 8: {0x006d, 13}, // "0000001101101"
+ 9: {0x004a, 13}, // "0000001001010"
+ 10: {0x004b, 13}, // "0000001001011"
+ 11: {0x004c, 13}, // "0000001001100"
+ 12: {0x004d, 13}, // "0000001001101"
+ 13: {0x0072, 13}, // "0000001110010"
+ 14: {0x0073, 13}, // "0000001110011"
+ 15: {0x0074, 13}, // "0000001110100"
+ 16: {0x0075, 13}, // "0000001110101"
+ 17: {0x0076, 13}, // "0000001110110"
+ 18: {0x0077, 13}, // "0000001110111"
+ 19: {0x0052, 13}, // "0000001010010"
+ 20: {0x0053, 13}, // "0000001010011"
+ 21: {0x0054, 13}, // "0000001010100"
+ 22: {0x0055, 13}, // "0000001010101"
+ 23: {0x005a, 13}, // "0000001011010"
+ 24: {0x005b, 13}, // "0000001011011"
+ 25: {0x0064, 13}, // "0000001100100"
+ 26: {0x0065, 13}, // "0000001100101"
+ 27: {0x0008, 11}, // "00000001000"
+ 28: {0x000c, 11}, // "00000001100"
+ 29: {0x000d, 11}, // "00000001101"
+ 30: {0x0012, 12}, // "000000010010"
+ 31: {0x0013, 12}, // "000000010011"
+ 32: {0x0014, 12}, // "000000010100"
+ 33: {0x0015, 12}, // "000000010101"
+ 34: {0x0016, 12}, // "000000010110"
+ 35: {0x0017, 12}, // "000000010111"
+ 36: {0x001c, 12}, // "000000011100"
+ 37: {0x001d, 12}, // "000000011101"
+ 38: {0x001e, 12}, // "000000011110"
+ 39: {0x001f, 12}, // "000000011111"
+}
+
// COPY PASTE table.go BEGIN
const (
diff --git a/ccitt/writer.go b/ccitt/writer.go
new file mode 100644
index 0000000..87130ab
--- /dev/null
+++ b/ccitt/writer.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ccitt
+
+import (
+ "encoding/binary"
+ "io"
+)
+
+type bitWriter struct {
+ w io.Writer
+
+ // order is whether to process w's bytes LSB first or MSB first.
+ order Order
+
+ // The high nBits bits of the bits field hold encoded bits to be written to w.
+ bits uint64
+ nBits uint32
+
+ // bytes[:bw] holds encoded bytes not yet written to w.
+ // Overflow protection is ensured by using a multiple of 8 as bytes length.
+ bw uint32
+ bytes [1024]uint8
+}
+
+// flushBits copies 64 bits from b.bits to b.bytes. If b.bytes is then full, it
+// is written to b.w.
+func (b *bitWriter) flushBits() error {
+ binary.BigEndian.PutUint64(b.bytes[b.bw:], b.bits)
+ b.bits = 0
+ b.nBits = 0
+ b.bw += 8
+ if b.bw < uint32(len(b.bytes)) {
+ return nil
+ }
+ b.bw = 0
+ if b.order != MSB {
+ reverseBitsWithinBytes(b.bytes[:])
+ }
+ _, err := b.w.Write(b.bytes[:])
+ return err
+}
+
+// close finalizes a bitcode stream by writing any
+// pending bits to bitWriter's underlying io.Writer.
+func (b *bitWriter) close() error {
+ // Write any encoded bits to bytes.
+ if b.nBits > 0 {
+ binary.BigEndian.PutUint64(b.bytes[b.bw:], b.bits)
+ b.bw += (b.nBits + 7) >> 3
+ }
+
+ if b.order != MSB {
+ reverseBitsWithinBytes(b.bytes[:b.bw])
+ }
+
+ // Write b.bw bytes to b.w.
+ _, err := b.w.Write(b.bytes[:b.bw])
+ return err
+}
+
+// alignToByteBoundary rounds b.nBits up to a multiple of 8.
+// If all 64 bits are used, flush them to bitWriter's bytes.
+func (b *bitWriter) alignToByteBoundary() error {
+ if b.nBits = (b.nBits + 7) &^ 7; b.nBits == 64 {
+ return b.flushBits()
+ }
+ return nil
+}
+
+// writeCode writes a variable length bitcode to b's underlying io.Writer.
+func (b *bitWriter) writeCode(bs bitString) error {
+ bits := bs.bits
+ nBits := bs.nBits
+ if 64-b.nBits >= nBits {
+ // b.bits has sufficient room for storing nBits bits.
+ b.bits |= uint64(bits) << (64 - nBits - b.nBits)
+ b.nBits += nBits
+ if b.nBits == 64 {
+ return b.flushBits()
+ }
+ return nil
+ }
+
+ // Number of leading bits that fill b.bits.
+ i := 64 - b.nBits
+
+ // Fill b.bits then flush and write remaining bits.
+ b.bits |= uint64(bits) >> (nBits - i)
+ b.nBits = 64
+
+ if err := b.flushBits(); err != nil {
+ return err
+ }
+
+ nBits -= i
+ b.bits = uint64(bits) << (64 - nBits)
+ b.nBits = nBits
+ return nil
+}
diff --git a/ccitt/writer_test.go b/ccitt/writer_test.go
new file mode 100644
index 0000000..afa5097
--- /dev/null
+++ b/ccitt/writer_test.go
@@ -0,0 +1,67 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ccitt
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+)
+
+func testEncode(t *testing.T, o Order) {
+ t.Helper()
+ values := []uint32{0, 1, 256, 7, 128, 3, 2560, 2240, 2368, 2048}
+
+ decTable := whiteDecodeTable[:]
+ encTableSmall := whiteEncodeTable2[:]
+ encTableBig := whiteEncodeTable3[:]
+
+ // Encode values to bit stream.
+ var bb bytes.Buffer
+ w := &bitWriter{w: &bb, order: o}
+ for _, v := range values {
+ encTable := encTableSmall
+ if v < 64 {
+ // No-op.
+ } else if v&63 != 0 {
+ t.Fatalf("writeCode: cannot encode %d: large but not a multiple of 64", v)
+ } else {
+ encTable = encTableBig
+ v = v/64 - 1
+ }
+ if err := w.writeCode(encTable[v]); err != nil {
+ t.Fatalf("writeCode: %v", err)
+ }
+ }
+ if err := w.close(); err != nil {
+ t.Fatalf("close: %v", err)
+ }
+
+ // Decode bit stream to values.
+ got := []uint32(nil)
+ r := &bitReader{
+ r: bytes.NewReader(bb.Bytes()),
+ order: o,
+ }
+ finalValue := values[len(values)-1]
+ for {
+ v, err := decode(r, decTable)
+ if err != nil {
+ t.Fatalf("after got=%d: %v", got, err)
+ }
+ got = append(got, v)
+ if v == finalValue {
+ break
+ }
+ }
+
+ // Check that the round-tripped values were unchanged.
+ if !reflect.DeepEqual(got, values) {
+ t.Fatalf("\ngot: %v\nwant: %v", got, values)
+ }
+}
+
+func TestEncodeLSB(t *testing.T) { testEncode(t, LSB) }
+func TestEncodeMSB(t *testing.T) { testEncode(t, MSB) }