replay CL 19916 and CL 19913 now that the build can handle them
TBR=r
OCL=19924
CL=19934
diff --git a/src/lib/utf8.go b/src/lib/utf8.go
index 7b0f15d..9ece25f 100644
--- a/src/lib/utf8.go
+++ b/src/lib/utf8.go
@@ -9,7 +9,8 @@
export const (
RuneError = 0xFFFD;
RuneSelf = 0x80;
- RuneMax = 1<<21 - 1;
+ RuneMax = 0x10FFFF;
+ UTFMax = 4;
)
const (
@@ -32,7 +33,8 @@
)
func DecodeRuneInternal(p *[]byte) (rune, size int, short bool) {
- if len(p) < 1 {
+ n := len(p);
+ if n < 1 {
return RuneError, 0, true;
}
c0 := p[0];
@@ -48,7 +50,7 @@
}
// need first continuation byte
- if len(p) < 2 {
+ if n < 2 {
return RuneError, 1, true
}
c1 := p[1];
@@ -66,7 +68,7 @@
}
// need second continuation byte
- if len(p) < 3 {
+ if n < 3 {
return RuneError, 1, true
}
c2 := p[2];
@@ -84,7 +86,7 @@
}
// need third continuation byte
- if len(p) < 4 {
+ if n < 4 {
return RuneError, 1, true
}
c3 := p[3];
@@ -105,17 +107,103 @@
return RuneError, 1, false
}
+func DecodeRuneInStringInternal(s string, i int) (rune, size int, short bool) {
+ n := len(s) - i;
+ if n < 1 {
+ return RuneError, 0, true;
+ }
+ c0 := s[i];
+
+ // 1-byte, 7-bit sequence?
+ if c0 < Tx {
+ return int(c0), 1, false
+ }
+
+ // unexpected continuation byte?
+ if c0 < T2 {
+ return RuneError, 1, false
+ }
+
+ // need first continuation byte
+ if n < 2 {
+ return RuneError, 1, true
+ }
+ c1 := s[i+1];
+ if c1 < Tx || T2 <= c1 {
+ return RuneError, 1, false
+ }
+
+ // 2-byte, 11-bit sequence?
+ if c0 < T3 {
+ rune = int(c0&Mask2)<<6 | int(c1&Maskx);
+ if rune <= Rune1Max {
+ return RuneError, 1, false
+ }
+ return rune, 2, false
+ }
+
+ // need second continuation byte
+ if n < 3 {
+ return RuneError, 1, true
+ }
+ c2 := s[i+2];
+ if c2 < Tx || T2 <= c2 {
+ return RuneError, 1, false
+ }
+
+ // 3-byte, 16-bit sequence?
+ if c0 < T4 {
+ rune = int(c0&Mask3)<<12 | int(c1&Maskx)<<6 | int(c2&Maskx);
+ if rune <= Rune2Max {
+ return RuneError, 1, false
+ }
+ return rune, 3, false
+ }
+
+ // need third continuation byte
+ if n < 4 {
+ return RuneError, 1, true
+ }
+ c3 := s[i+3];
+ if c3 < Tx || T2 <= c3 {
+ return RuneError, 1, false
+ }
+
+ // 4-byte, 21-bit sequence?
+ if c0 < T5 {
+ rune = int(c0&Mask4)<<18 | int(c1&Maskx)<<12 | int(c2&Maskx)<<6 | int(c3&Maskx);
+ if rune <= Rune3Max {
+ return RuneError, 1, false
+ }
+ return rune, 4, false
+ }
+
+ // error
+ return RuneError, 1, false
+}
+
export func FullRune(p *[]byte) bool {
rune, size, short := DecodeRuneInternal(p);
return !short
}
+export func FullRuneInString(s string, i int) bool {
+ rune, size, short := DecodeRuneInStringInternal(s, i);
+ return !short
+}
+
export func DecodeRune(p *[]byte) (rune, size int) {
var short bool;
rune, size, short = DecodeRuneInternal(p);
return;
}
+export func DecodeRuneInString(s string, i int) (rune, size int) {
+ var short bool;
+ rune, size, short = DecodeRuneInStringInternal(s, i);
+ return;
+}
+
export func RuneLen(rune int) int {
switch {
case rune <= Rune1Max: