ccitt: implement bitWriter

Change-Id: I4002e054ef8a167d5b254bd02bdbeb7932d47efc
Reviewed-on: https://go-review.googlesource.com/c/image/+/184958
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/ccitt/gen.go b/ccitt/gen.go
index 0245f66..39f81c2 100644
--- a/ccitt/gen.go
+++ b/ccitt/gen.go
@@ -25,14 +25,26 @@
 	{
 		w := &bytes.Buffer{}
 		w.WriteString(header)
-		w.WriteString(headerComment)
-		write(w, build(modeCodes[:], 0), "modeDecodeTable",
+		w.WriteString(decodeHeaderComment)
+		writeDecodeTable(w, build(modeCodes[:], 0), "modeDecodeTable",
 			"// modeDecodeTable represents Table 1 and the End-of-Line code.\n")
-		write(w, build(whiteCodes[:], 0), "whiteDecodeTable",
+		writeDecodeTable(w, build(whiteCodes[:], 0), "whiteDecodeTable",
 			"// whiteDecodeTable represents Tables 2 and 3 for a white run.\n")
-		write(w, build(blackCodes[:], 0), "blackDecodeTable",
+		writeDecodeTable(w, build(blackCodes[:], 0), "blackDecodeTable",
 			"// blackDecodeTable represents Tables 2 and 3 for a black run.\n")
 		writeMaxCodeLength(w, modeCodes[:], whiteCodes[:], blackCodes[:])
+		w.WriteString(encodeHeaderComment)
+		w.WriteString(bitStringTypeDef)
+		writeEncodeTable(w, modeCodes[:], "modeEncodeTable",
+			"// modeEncodeTable represents Table 1 and the End-of-Line code.\n")
+		writeEncodeTable(w, whiteCodes[:64], "whiteEncodeTable2",
+			"// whiteEncodeTable2 represents Table 2 for a white run.\n")
+		writeEncodeTable(w, whiteCodes[64:], "whiteEncodeTable3",
+			"// whiteEncodeTable3 represents Table 3 for a white run.\n")
+		writeEncodeTable(w, blackCodes[:64], "blackEncodeTable2",
+			"// blackEncodeTable2 represents Table 2 for a black run.\n")
+		writeEncodeTable(w, blackCodes[64:], "blackEncodeTable3",
+			"// blackEncodeTable3 represents Table 3 for a black run.\n")
 		finish(w, "table.go")
 	}
 
@@ -50,7 +62,7 @@
 
 `
 
-const headerComment = `
+const decodeHeaderComment = `
 // Each decodeTable is represented by an array of [2]int16's: a binary tree.
 // Each array element (other than element 0, which means invalid) is a branch
 // node in that tree. The root node is always element 1 (the second element).
@@ -81,6 +93,12 @@
 
 `
 
+const encodeHeaderComment = `
+// Each encodeTable is represented by an array of bitStrings.
+
+
+`
+
 type node struct {
 	children    [2]*node
 	val         uint32
@@ -128,7 +146,7 @@
 	}
 }
 
-func write(w *bytes.Buffer, root *node, varName string, comment string) {
+func writeDecodeTable(w *bytes.Buffer, root *node, varName, comment string) {
 	assignBranchIndexes(root)
 
 	w.WriteString(comment)
@@ -151,6 +169,32 @@
 	fmt.Fprintf(w, "}\n\n")
 }
 
+const bitStringTypeDef = `
+// bitString is a pair of uint32 values representing a bit code.
+// The nBits low bits of bits make up the actual bit code.
+// Eg. bitString{0x0004, 8} represents the bitcode "00000100".
+type bitString struct {
+	bits  uint32
+	nBits uint32
+}
+
+`
+
+func writeEncodeTable(w *bytes.Buffer, codes []code, varName, comment string) {
+	w.WriteString(comment)
+	fmt.Fprintf(w, "var %s = [...]bitString{\n", varName)
+	for i, code := range codes {
+		s := code.str
+		n := uint32(len(s))
+		c := uint32(0)
+		for j := uint32(0); j < n; j++ {
+			c |= uint32(s[j]&1) << (n - j - 1)
+		}
+		fmt.Fprintf(w, "%d: {0x%04x, %v}, // %q \n", i, c, n, s)
+	}
+	fmt.Fprintf(w, "}\n\n")
+}
+
 func assignBranchIndexes(root *node) {
 	// 0 is reserved for an invalid value.
 	branchIndex := int32(1)
diff --git a/ccitt/reader.go b/ccitt/reader.go
index 04d0302..62986f9 100644
--- a/ccitt/reader.go
+++ b/ccitt/reader.go
@@ -67,6 +67,12 @@
 	}
 }
 
+func reverseBitsWithinBytes(b []byte) {
+	for i, c := range b {
+		b[i] = bits.Reverse8(c)
+	}
+}
+
 type bitReader struct {
 	r io.Reader
 
@@ -139,10 +145,7 @@
 		b.readErr = err
 
 		if b.order != LSB {
-			written := b.bytes[:b.bw]
-			for i, x := range written {
-				written[i] = bits.Reverse8(x)
-			}
+			reverseBitsWithinBytes(b.bytes[:b.bw])
 		}
 	}
 }
diff --git a/ccitt/table.go b/ccitt/table.go
index 29d0ea4..f01cc12 100644
--- a/ccitt/table.go
+++ b/ccitt/table.go
@@ -721,6 +721,255 @@
 
 const maxCodeLength = 13
 
+// Each encodeTable is represented by an array of bitStrings.
+
+// bitString is a pair of uint32 values representing a bit code.
+// The nBits low bits of bits make up the actual bit code.
+// Eg. bitString{0x0004, 8} represents the bitcode "00000100".
+type bitString struct {
+	bits  uint32
+	nBits uint32
+}
+
+// modeEncodeTable represents Table 1 and the End-of-Line code.
+var modeEncodeTable = [...]bitString{
+	0:  {0x0001, 4},  // "0001"
+	1:  {0x0001, 3},  // "001"
+	2:  {0x0001, 1},  // "1"
+	3:  {0x0003, 3},  // "011"
+	4:  {0x0003, 6},  // "000011"
+	5:  {0x0003, 7},  // "0000011"
+	6:  {0x0002, 3},  // "010"
+	7:  {0x0002, 6},  // "000010"
+	8:  {0x0002, 7},  // "0000010"
+	9:  {0x0001, 7},  // "0000001"
+	10: {0x0001, 12}, // "000000000001"
+}
+
+// whiteEncodeTable2 represents Table 2 for a white run.
+var whiteEncodeTable2 = [...]bitString{
+	0:  {0x0035, 8}, // "00110101"
+	1:  {0x0007, 6}, // "000111"
+	2:  {0x0007, 4}, // "0111"
+	3:  {0x0008, 4}, // "1000"
+	4:  {0x000b, 4}, // "1011"
+	5:  {0x000c, 4}, // "1100"
+	6:  {0x000e, 4}, // "1110"
+	7:  {0x000f, 4}, // "1111"
+	8:  {0x0013, 5}, // "10011"
+	9:  {0x0014, 5}, // "10100"
+	10: {0x0007, 5}, // "00111"
+	11: {0x0008, 5}, // "01000"
+	12: {0x0008, 6}, // "001000"
+	13: {0x0003, 6}, // "000011"
+	14: {0x0034, 6}, // "110100"
+	15: {0x0035, 6}, // "110101"
+	16: {0x002a, 6}, // "101010"
+	17: {0x002b, 6}, // "101011"
+	18: {0x0027, 7}, // "0100111"
+	19: {0x000c, 7}, // "0001100"
+	20: {0x0008, 7}, // "0001000"
+	21: {0x0017, 7}, // "0010111"
+	22: {0x0003, 7}, // "0000011"
+	23: {0x0004, 7}, // "0000100"
+	24: {0x0028, 7}, // "0101000"
+	25: {0x002b, 7}, // "0101011"
+	26: {0x0013, 7}, // "0010011"
+	27: {0x0024, 7}, // "0100100"
+	28: {0x0018, 7}, // "0011000"
+	29: {0x0002, 8}, // "00000010"
+	30: {0x0003, 8}, // "00000011"
+	31: {0x001a, 8}, // "00011010"
+	32: {0x001b, 8}, // "00011011"
+	33: {0x0012, 8}, // "00010010"
+	34: {0x0013, 8}, // "00010011"
+	35: {0x0014, 8}, // "00010100"
+	36: {0x0015, 8}, // "00010101"
+	37: {0x0016, 8}, // "00010110"
+	38: {0x0017, 8}, // "00010111"
+	39: {0x0028, 8}, // "00101000"
+	40: {0x0029, 8}, // "00101001"
+	41: {0x002a, 8}, // "00101010"
+	42: {0x002b, 8}, // "00101011"
+	43: {0x002c, 8}, // "00101100"
+	44: {0x002d, 8}, // "00101101"
+	45: {0x0004, 8}, // "00000100"
+	46: {0x0005, 8}, // "00000101"
+	47: {0x000a, 8}, // "00001010"
+	48: {0x000b, 8}, // "00001011"
+	49: {0x0052, 8}, // "01010010"
+	50: {0x0053, 8}, // "01010011"
+	51: {0x0054, 8}, // "01010100"
+	52: {0x0055, 8}, // "01010101"
+	53: {0x0024, 8}, // "00100100"
+	54: {0x0025, 8}, // "00100101"
+	55: {0x0058, 8}, // "01011000"
+	56: {0x0059, 8}, // "01011001"
+	57: {0x005a, 8}, // "01011010"
+	58: {0x005b, 8}, // "01011011"
+	59: {0x004a, 8}, // "01001010"
+	60: {0x004b, 8}, // "01001011"
+	61: {0x0032, 8}, // "00110010"
+	62: {0x0033, 8}, // "00110011"
+	63: {0x0034, 8}, // "00110100"
+}
+
+// whiteEncodeTable3 represents Table 3 for a white run.
+var whiteEncodeTable3 = [...]bitString{
+	0:  {0x001b, 5},  // "11011"
+	1:  {0x0012, 5},  // "10010"
+	2:  {0x0017, 6},  // "010111"
+	3:  {0x0037, 7},  // "0110111"
+	4:  {0x0036, 8},  // "00110110"
+	5:  {0x0037, 8},  // "00110111"
+	6:  {0x0064, 8},  // "01100100"
+	7:  {0x0065, 8},  // "01100101"
+	8:  {0x0068, 8},  // "01101000"
+	9:  {0x0067, 8},  // "01100111"
+	10: {0x00cc, 9},  // "011001100"
+	11: {0x00cd, 9},  // "011001101"
+	12: {0x00d2, 9},  // "011010010"
+	13: {0x00d3, 9},  // "011010011"
+	14: {0x00d4, 9},  // "011010100"
+	15: {0x00d5, 9},  // "011010101"
+	16: {0x00d6, 9},  // "011010110"
+	17: {0x00d7, 9},  // "011010111"
+	18: {0x00d8, 9},  // "011011000"
+	19: {0x00d9, 9},  // "011011001"
+	20: {0x00da, 9},  // "011011010"
+	21: {0x00db, 9},  // "011011011"
+	22: {0x0098, 9},  // "010011000"
+	23: {0x0099, 9},  // "010011001"
+	24: {0x009a, 9},  // "010011010"
+	25: {0x0018, 6},  // "011000"
+	26: {0x009b, 9},  // "010011011"
+	27: {0x0008, 11}, // "00000001000"
+	28: {0x000c, 11}, // "00000001100"
+	29: {0x000d, 11}, // "00000001101"
+	30: {0x0012, 12}, // "000000010010"
+	31: {0x0013, 12}, // "000000010011"
+	32: {0x0014, 12}, // "000000010100"
+	33: {0x0015, 12}, // "000000010101"
+	34: {0x0016, 12}, // "000000010110"
+	35: {0x0017, 12}, // "000000010111"
+	36: {0x001c, 12}, // "000000011100"
+	37: {0x001d, 12}, // "000000011101"
+	38: {0x001e, 12}, // "000000011110"
+	39: {0x001f, 12}, // "000000011111"
+}
+
+// blackEncodeTable2 represents Table 2 for a black run.
+var blackEncodeTable2 = [...]bitString{
+	0:  {0x0037, 10}, // "0000110111"
+	1:  {0x0002, 3},  // "010"
+	2:  {0x0003, 2},  // "11"
+	3:  {0x0002, 2},  // "10"
+	4:  {0x0003, 3},  // "011"
+	5:  {0x0003, 4},  // "0011"
+	6:  {0x0002, 4},  // "0010"
+	7:  {0x0003, 5},  // "00011"
+	8:  {0x0005, 6},  // "000101"
+	9:  {0x0004, 6},  // "000100"
+	10: {0x0004, 7},  // "0000100"
+	11: {0x0005, 7},  // "0000101"
+	12: {0x0007, 7},  // "0000111"
+	13: {0x0004, 8},  // "00000100"
+	14: {0x0007, 8},  // "00000111"
+	15: {0x0018, 9},  // "000011000"
+	16: {0x0017, 10}, // "0000010111"
+	17: {0x0018, 10}, // "0000011000"
+	18: {0x0008, 10}, // "0000001000"
+	19: {0x0067, 11}, // "00001100111"
+	20: {0x0068, 11}, // "00001101000"
+	21: {0x006c, 11}, // "00001101100"
+	22: {0x0037, 11}, // "00000110111"
+	23: {0x0028, 11}, // "00000101000"
+	24: {0x0017, 11}, // "00000010111"
+	25: {0x0018, 11}, // "00000011000"
+	26: {0x00ca, 12}, // "000011001010"
+	27: {0x00cb, 12}, // "000011001011"
+	28: {0x00cc, 12}, // "000011001100"
+	29: {0x00cd, 12}, // "000011001101"
+	30: {0x0068, 12}, // "000001101000"
+	31: {0x0069, 12}, // "000001101001"
+	32: {0x006a, 12}, // "000001101010"
+	33: {0x006b, 12}, // "000001101011"
+	34: {0x00d2, 12}, // "000011010010"
+	35: {0x00d3, 12}, // "000011010011"
+	36: {0x00d4, 12}, // "000011010100"
+	37: {0x00d5, 12}, // "000011010101"
+	38: {0x00d6, 12}, // "000011010110"
+	39: {0x00d7, 12}, // "000011010111"
+	40: {0x006c, 12}, // "000001101100"
+	41: {0x006d, 12}, // "000001101101"
+	42: {0x00da, 12}, // "000011011010"
+	43: {0x00db, 12}, // "000011011011"
+	44: {0x0054, 12}, // "000001010100"
+	45: {0x0055, 12}, // "000001010101"
+	46: {0x0056, 12}, // "000001010110"
+	47: {0x0057, 12}, // "000001010111"
+	48: {0x0064, 12}, // "000001100100"
+	49: {0x0065, 12}, // "000001100101"
+	50: {0x0052, 12}, // "000001010010"
+	51: {0x0053, 12}, // "000001010011"
+	52: {0x0024, 12}, // "000000100100"
+	53: {0x0037, 12}, // "000000110111"
+	54: {0x0038, 12}, // "000000111000"
+	55: {0x0027, 12}, // "000000100111"
+	56: {0x0028, 12}, // "000000101000"
+	57: {0x0058, 12}, // "000001011000"
+	58: {0x0059, 12}, // "000001011001"
+	59: {0x002b, 12}, // "000000101011"
+	60: {0x002c, 12}, // "000000101100"
+	61: {0x005a, 12}, // "000001011010"
+	62: {0x0066, 12}, // "000001100110"
+	63: {0x0067, 12}, // "000001100111"
+}
+
+// blackEncodeTable3 represents Table 3 for a black run.
+var blackEncodeTable3 = [...]bitString{
+	0:  {0x000f, 10}, // "0000001111"
+	1:  {0x00c8, 12}, // "000011001000"
+	2:  {0x00c9, 12}, // "000011001001"
+	3:  {0x005b, 12}, // "000001011011"
+	4:  {0x0033, 12}, // "000000110011"
+	5:  {0x0034, 12}, // "000000110100"
+	6:  {0x0035, 12}, // "000000110101"
+	7:  {0x006c, 13}, // "0000001101100"
+	8:  {0x006d, 13}, // "0000001101101"
+	9:  {0x004a, 13}, // "0000001001010"
+	10: {0x004b, 13}, // "0000001001011"
+	11: {0x004c, 13}, // "0000001001100"
+	12: {0x004d, 13}, // "0000001001101"
+	13: {0x0072, 13}, // "0000001110010"
+	14: {0x0073, 13}, // "0000001110011"
+	15: {0x0074, 13}, // "0000001110100"
+	16: {0x0075, 13}, // "0000001110101"
+	17: {0x0076, 13}, // "0000001110110"
+	18: {0x0077, 13}, // "0000001110111"
+	19: {0x0052, 13}, // "0000001010010"
+	20: {0x0053, 13}, // "0000001010011"
+	21: {0x0054, 13}, // "0000001010100"
+	22: {0x0055, 13}, // "0000001010101"
+	23: {0x005a, 13}, // "0000001011010"
+	24: {0x005b, 13}, // "0000001011011"
+	25: {0x0064, 13}, // "0000001100100"
+	26: {0x0065, 13}, // "0000001100101"
+	27: {0x0008, 11}, // "00000001000"
+	28: {0x000c, 11}, // "00000001100"
+	29: {0x000d, 11}, // "00000001101"
+	30: {0x0012, 12}, // "000000010010"
+	31: {0x0013, 12}, // "000000010011"
+	32: {0x0014, 12}, // "000000010100"
+	33: {0x0015, 12}, // "000000010101"
+	34: {0x0016, 12}, // "000000010110"
+	35: {0x0017, 12}, // "000000010111"
+	36: {0x001c, 12}, // "000000011100"
+	37: {0x001d, 12}, // "000000011101"
+	38: {0x001e, 12}, // "000000011110"
+	39: {0x001f, 12}, // "000000011111"
+}
+
 // COPY PASTE table.go BEGIN
 
 const (
diff --git a/ccitt/writer.go b/ccitt/writer.go
new file mode 100644
index 0000000..87130ab
--- /dev/null
+++ b/ccitt/writer.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ccitt
+
+import (
+	"encoding/binary"
+	"io"
+)
+
+type bitWriter struct {
+	w io.Writer
+
+	// order is whether to process w's bytes LSB first or MSB first.
+	order Order
+
+	// The high nBits bits of the bits field hold encoded bits to be written to w.
+	bits  uint64
+	nBits uint32
+
+	// bytes[:bw] holds encoded bytes not yet written to w.
+	// Overflow protection is ensured by using a multiple of 8 as bytes length.
+	bw    uint32
+	bytes [1024]uint8
+}
+
+// flushBits copies 64 bits from b.bits to b.bytes. If b.bytes is then full, it
+// is written to b.w.
+func (b *bitWriter) flushBits() error {
+	binary.BigEndian.PutUint64(b.bytes[b.bw:], b.bits)
+	b.bits = 0
+	b.nBits = 0
+	b.bw += 8
+	if b.bw < uint32(len(b.bytes)) {
+		return nil
+	}
+	b.bw = 0
+	if b.order != MSB {
+		reverseBitsWithinBytes(b.bytes[:])
+	}
+	_, err := b.w.Write(b.bytes[:])
+	return err
+}
+
+// close finalizes a bitcode stream by writing any
+// pending bits to bitWriter's underlying io.Writer.
+func (b *bitWriter) close() error {
+	// Write any encoded bits to bytes.
+	if b.nBits > 0 {
+		binary.BigEndian.PutUint64(b.bytes[b.bw:], b.bits)
+		b.bw += (b.nBits + 7) >> 3
+	}
+
+	if b.order != MSB {
+		reverseBitsWithinBytes(b.bytes[:b.bw])
+	}
+
+	// Write b.bw bytes to b.w.
+	_, err := b.w.Write(b.bytes[:b.bw])
+	return err
+}
+
+// alignToByteBoundary rounds b.nBits up to a multiple of 8.
+// If all 64 bits are used, flush them to bitWriter's bytes.
+func (b *bitWriter) alignToByteBoundary() error {
+	if b.nBits = (b.nBits + 7) &^ 7; b.nBits == 64 {
+		return b.flushBits()
+	}
+	return nil
+}
+
+// writeCode writes a variable length bitcode to b's underlying io.Writer.
+func (b *bitWriter) writeCode(bs bitString) error {
+	bits := bs.bits
+	nBits := bs.nBits
+	if 64-b.nBits >= nBits {
+		// b.bits has sufficient room for storing nBits bits.
+		b.bits |= uint64(bits) << (64 - nBits - b.nBits)
+		b.nBits += nBits
+		if b.nBits == 64 {
+			return b.flushBits()
+		}
+		return nil
+	}
+
+	// Number of leading bits that fill b.bits.
+	i := 64 - b.nBits
+
+	// Fill b.bits then flush and write remaining bits.
+	b.bits |= uint64(bits) >> (nBits - i)
+	b.nBits = 64
+
+	if err := b.flushBits(); err != nil {
+		return err
+	}
+
+	nBits -= i
+	b.bits = uint64(bits) << (64 - nBits)
+	b.nBits = nBits
+	return nil
+}
diff --git a/ccitt/writer_test.go b/ccitt/writer_test.go
new file mode 100644
index 0000000..afa5097
--- /dev/null
+++ b/ccitt/writer_test.go
@@ -0,0 +1,67 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ccitt
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func testEncode(t *testing.T, o Order) {
+	t.Helper()
+	values := []uint32{0, 1, 256, 7, 128, 3, 2560, 2240, 2368, 2048}
+
+	decTable := whiteDecodeTable[:]
+	encTableSmall := whiteEncodeTable2[:]
+	encTableBig := whiteEncodeTable3[:]
+
+	// Encode values to bit stream.
+	var bb bytes.Buffer
+	w := &bitWriter{w: &bb, order: o}
+	for _, v := range values {
+		encTable := encTableSmall
+		if v < 64 {
+			// No-op.
+		} else if v&63 != 0 {
+			t.Fatalf("writeCode: cannot encode %d: large but not a multiple of 64", v)
+		} else {
+			encTable = encTableBig
+			v = v/64 - 1
+		}
+		if err := w.writeCode(encTable[v]); err != nil {
+			t.Fatalf("writeCode: %v", err)
+		}
+	}
+	if err := w.close(); err != nil {
+		t.Fatalf("close: %v", err)
+	}
+
+	// Decode bit stream to values.
+	got := []uint32(nil)
+	r := &bitReader{
+		r:     bytes.NewReader(bb.Bytes()),
+		order: o,
+	}
+	finalValue := values[len(values)-1]
+	for {
+		v, err := decode(r, decTable)
+		if err != nil {
+			t.Fatalf("after got=%d: %v", got, err)
+		}
+		got = append(got, v)
+		if v == finalValue {
+			break
+		}
+	}
+
+	// Check that the round-tripped values were unchanged.
+	if !reflect.DeepEqual(got, values) {
+		t.Fatalf("\ngot:  %v\nwant: %v", got, values)
+	}
+}
+
+func TestEncodeLSB(t *testing.T) { testEncode(t, LSB) }
+func TestEncodeMSB(t *testing.T) { testEncode(t, MSB) }