| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package syscall_test |
| |
| import ( |
| "fmt" |
| "slices" |
| "syscall" |
| "testing" |
| "unicode/utf16" |
| "unicode/utf8" |
| "unsafe" |
| ) |
| |
| var wtf8tests = []struct { |
| str string |
| wstr []uint16 |
| }{ |
| { |
| str: "\x00", |
| wstr: []uint16{0x00}, |
| }, |
| { |
| str: "\x5C", |
| wstr: []uint16{0x5C}, |
| }, |
| { |
| str: "\x7F", |
| wstr: []uint16{0x7F}, |
| }, |
| |
| // 2-byte |
| { |
| str: "\xC2\x80", |
| wstr: []uint16{0x80}, |
| }, |
| { |
| str: "\xD7\x8A", |
| wstr: []uint16{0x05CA}, |
| }, |
| { |
| str: "\xDF\xBF", |
| wstr: []uint16{0x07FF}, |
| }, |
| |
| // 3-byte |
| { |
| str: "\xE0\xA0\x80", |
| wstr: []uint16{0x0800}, |
| }, |
| { |
| str: "\xE2\xB0\xBC", |
| wstr: []uint16{0x2C3C}, |
| }, |
| { |
| str: "\xEF\xBF\xBF", |
| wstr: []uint16{0xFFFF}, |
| }, |
| // unmatched surrogate halves |
| // high surrogates: 0xD800 to 0xDBFF |
| { |
| str: "\xED\xA0\x80", |
| wstr: []uint16{0xD800}, |
| }, |
| { |
| // "High surrogate followed by another high surrogate" |
| str: "\xED\xA0\x80\xED\xA0\x80", |
| wstr: []uint16{0xD800, 0xD800}, |
| }, |
| { |
| // "High surrogate followed by a symbol that is not a surrogate" |
| str: string([]byte{0xED, 0xA0, 0x80, 0xA}), |
| wstr: []uint16{0xD800, 0xA}, |
| }, |
| { |
| // "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate" |
| str: string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}), |
| wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800}, |
| }, |
| { |
| str: "\xED\xA6\xAF", |
| wstr: []uint16{0xD9AF}, |
| }, |
| { |
| str: "\xED\xAF\xBF", |
| wstr: []uint16{0xDBFF}, |
| }, |
| // low surrogates: 0xDC00 to 0xDFFF |
| { |
| str: "\xED\xB0\x80", |
| wstr: []uint16{0xDC00}, |
| }, |
| { |
| // "Low surrogate followed by another low surrogate" |
| str: "\xED\xB0\x80\xED\xB0\x80", |
| wstr: []uint16{0xDC00, 0xDC00}, |
| }, |
| { |
| // "Low surrogate followed by a symbol that is not a surrogate" |
| str: string([]byte{0xED, 0xB0, 0x80, 0xA}), |
| wstr: []uint16{0xDC00, 0xA}, |
| }, |
| { |
| // "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate" |
| str: string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}), |
| wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00}, |
| }, |
| { |
| str: "\xED\xBB\xAE", |
| wstr: []uint16{0xDEEE}, |
| }, |
| { |
| str: "\xED\xBF\xBF", |
| wstr: []uint16{0xDFFF}, |
| }, |
| |
| // 4-byte |
| { |
| str: "\xF0\x90\x80\x80", |
| wstr: []uint16{0xD800, 0xDC00}, |
| }, |
| { |
| str: "\xF0\x9D\x8C\x86", |
| wstr: []uint16{0xD834, 0xDF06}, |
| }, |
| { |
| str: "\xF4\x8F\xBF\xBF", |
| wstr: []uint16{0xDBFF, 0xDFFF}, |
| }, |
| } |
| |
| func TestWTF16Rountrip(t *testing.T) { |
| for _, tt := range wtf8tests { |
| t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) { |
| got := syscall.EncodeWTF16(tt.str, nil) |
| got2 := string(syscall.DecodeWTF16(got, nil)) |
| if got2 != tt.str { |
| t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str) |
| } |
| }) |
| } |
| } |
| |
| func TestWTF16Golden(t *testing.T) { |
| for _, tt := range wtf8tests { |
| t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) { |
| got := syscall.EncodeWTF16(tt.str, nil) |
| if !slices.Equal(got, tt.wstr) { |
| t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr) |
| } |
| }) |
| } |
| } |
| |
| func FuzzEncodeWTF16(f *testing.F) { |
| for _, tt := range wtf8tests { |
| f.Add(tt.str) |
| } |
| f.Fuzz(func(t *testing.T, b string) { |
| // test that there are no panics |
| got := syscall.EncodeWTF16(b, nil) |
| syscall.DecodeWTF16(got, nil) |
| if utf8.ValidString(b) { |
| // if the input is a valid UTF-8 string, then |
| // test that syscall.EncodeWTF16 behaves as |
| // utf16.Encode |
| want := utf16.Encode([]rune(b)) |
| if !slices.Equal(got, want) { |
| t.Errorf("got:\n%v\nwant:\n%v", got, want) |
| } |
| } |
| }) |
| } |
| |
| func FuzzDecodeWTF16(f *testing.F) { |
| for _, tt := range wtf8tests { |
| b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2) |
| f.Add(b) |
| } |
| f.Fuzz(func(t *testing.T, b []byte) { |
| u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2) |
| got := syscall.DecodeWTF16(u16, nil) |
| if utf8.Valid(got) { |
| // if the input is a valid UTF-8 string, then |
| // test that syscall.DecodeWTF16 behaves as |
| // utf16.Decode |
| want := utf16.Decode(u16) |
| if string(got) != string(want) { |
| t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want)) |
| } |
| } |
| // WTF-8 should always roundtrip |
| got2 := syscall.EncodeWTF16(string(got), nil) |
| if !slices.Equal(got2, u16) { |
| t.Errorf("got:\n%v\nwant:\n%v", got2, u16) |
| } |
| }) |
| } |