| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package precis |
| |
| import ( |
| "unicode" |
| "unicode/utf8" |
| |
| "golang.org/x/text/transform" |
| ) |
| |
| type nickAdditionalMapping struct { |
| // TODO: This transformer needs to be stateless somehow… |
| notStart bool |
| prevSpace bool |
| } |
| |
| func (t *nickAdditionalMapping) Reset() { |
| t.prevSpace = false |
| t.notStart = false |
| } |
| |
| func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| // RFC 7700 §2.1. Rules |
| // |
| // 2. Additional Mapping Rule: The additional mapping rule consists of |
| // the following sub-rules. |
| // |
| // 1. Any instances of non-ASCII space MUST be mapped to ASCII |
| // space (U+0020); a non-ASCII space is any Unicode code point |
| // having a general category of "Zs", naturally with the |
| // exception of U+0020. |
| // |
| // 2. Any instances of the ASCII space character at the beginning |
| // or end of a nickname MUST be removed (e.g., "stpeter " is |
| // mapped to "stpeter"). |
| // |
| // 3. Interior sequences of more than one ASCII space character |
| // MUST be mapped to a single ASCII space character (e.g., |
| // "St Peter" is mapped to "St Peter"). |
| |
| for nSrc < len(src) { |
| r, size := utf8.DecodeRune(src[nSrc:]) |
| if size == 0 { // Incomplete UTF-8 encoding |
| if !atEOF { |
| return nDst, nSrc, transform.ErrShortSrc |
| } |
| size = 1 |
| } |
| if unicode.Is(unicode.Zs, r) { |
| t.prevSpace = true |
| } else { |
| if t.prevSpace && t.notStart { |
| dst[nDst] = ' ' |
| nDst += 1 |
| } |
| if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { |
| nDst += size |
| return nDst, nSrc, transform.ErrShortDst |
| } |
| nDst += size |
| t.prevSpace = false |
| t.notStart = true |
| } |
| nSrc += size |
| } |
| return nDst, nSrc, nil |
| } |