compress/lzw: output a Clear code first, per GIF spec The TestStartsWithClearCode test is new, but if it existed beforehand, the want strings would be "\x81" and "Hi\x81" without a starting "\x80". Fixes #26108 Fixes #33748 Updates makeworld-the-better-one/didder#7 Updates nothings/stb#1222 Change-Id: I35ac0ed862ba6ee921ba9aee257bc19828abaa82 Reviewed-on: https://go-review.googlesource.com/c/go/+/354710 Trust: Nigel Tao <nigeltao@golang.org> Run-TryBot: Nigel Tao <nigeltao@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Andrew Gerrand <adg@golang.org>

commit: 9c1dbdf60edbffeff10f58af21fa055eb0fdd29f [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Sat Oct 09 16:52:19 2021 +1100
committer: Nigel Tao <nigeltao@golang.org> Tue Oct 12 11:00:47 2021 +0000
tree: 428cc9cf958129b4d03a5f5491d74d9a116433a5
parent: d887d3be5f8f143f2475ddb2ea6c48ceb32def17 [diff]
diff --git a/src/compress/lzw/writer.go b/src/compress/lzw/writer.go
index 552bdc2..cf06ea8 100644
--- a/src/compress/lzw/writer.go
+++ b/src/compress/lzw/writer.go

@@ -137,7 +137,19 @@
 	n = len(p)
 	code := w.savedCode
 	if code == invalidCode {
-		// The first code sent is always a literal code.
+		// This is the first write; send a clear code.
+		// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
+		// "Variable-Length-Code LZW Compression" says that "Encoders should
+		// output a Clear code as the first code of each image data stream".
+		//
+		// LZW compression isn't only used by GIF, but it's cheap to follow
+		// that directive unconditionally.
+		clear := uint32(1) << w.litWidth
+		if err := w.write(w, clear); err != nil {
+			return 0, err
+		}
+		// After the starting clear code, the next code sent (for non-empty
+		// input) is always a literal code.
 		code, p = uint32(p[0]), p[1:]
 	}
 loop:
@@ -202,6 +214,12 @@
 		if err := w.incHi(); err != nil && err != errOutOfCodes {
 			return err
 		}
+	} else {
+		// Write the starting clear code, as w.Write did not.
+		clear := uint32(1) << w.litWidth
+		if err := w.write(w, clear); err != nil {
+			return err
+		}
 	}
 	// Write the eof code.
 	eof := uint32(1)<<w.litWidth + 1

diff --git a/src/compress/lzw/writer_test.go b/src/compress/lzw/writer_test.go
index 9f59c8b..edf683a 100644
--- a/src/compress/lzw/writer_test.go
+++ b/src/compress/lzw/writer_test.go

@@ -168,6 +168,34 @@
 	}
 }
 
+func TestStartsWithClearCode(t *testing.T) {
+	// A literal width of 7 bits means that the code width starts at 8 bits,
+	// which makes it easier to visually inspect the output (provided that the
+	// output is short so codes don't get longer). Each byte is a code:
+	//  - ASCII bytes are literal codes,
+	//  - 0x80 is the clear code,
+	//  - 0x81 is the end code.
+	//  - 0x82 and above are copy codes (unused in this test case).
+	for _, empty := range []bool{false, true} {
+		var buf bytes.Buffer
+		w := NewWriter(&buf, LSB, 7)
+		if !empty {
+			w.Write([]byte("Hi"))
+		}
+		w.Close()
+		got := buf.String()
+
+		want := "\x80\x81"
+		if !empty {
+			want = "\x80Hi\x81"
+		}
+
+		if got != want {
+			t.Errorf("empty=%t: got %q, want %q", empty, got, want)
+		}
+	}
+}
+
 func BenchmarkEncoder(b *testing.B) {
 	buf, err := os.ReadFile("../testdata/e.txt")
 	if err != nil {
commit	9c1dbdf60edbffeff10f58af21fa055eb0fdd29f	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Sat Oct 09 16:52:19 2021 +1100
committer	Nigel Tao <nigeltao@golang.org>	Tue Oct 12 11:00:47 2021 +0000
tree	428cc9cf958129b4d03a5f5491d74d9a116433a5
parent	d887d3be5f8f143f2475ddb2ea6c48ceb32def17 [diff]