Blame - internal/lsp/fuzzy/input.go - tools

blob: c1038163f1a929cd19efd25915e522bad03d0964 [file] [log] [blame]

Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	1	// Copyright 2019 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package fuzzy
				6
				7	import (
				8	"unicode"
				9	)
				10
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	11	// RuneRole specifies the role of a rune in the context of an input.
				12	type RuneRole byte
				13
				14	const (
				15	// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
				16	RNone RuneRole = iota
				17	// RSep specifies a rune with the role of segment separator.
				18	RSep
				19	// RTail specifies a rune which is a lower-case tail in a word in the input.
				20	RTail
				21	// RUCTail specifies a rune which is an upper-case tail in a word in the input.
				22	RUCTail
				23	// RHead specifies a rune which is the first character in a word in the input.
				24	RHead
				25	)
				26
				27	// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
				28	// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
				29	// or when it filled the output. If output is nil, then it gets created.
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	30	func RuneRoles(candidate []byte, reuse []RuneRole) []RuneRole {
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	31	var output []RuneRole
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	32	if cap(reuse) < len(candidate) {
				33	output = make([]RuneRole, 0, len(candidate))
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	34	} else {
				35	output = reuse[:0]
				36	}
				37
				38	prev, prev2 := rtNone, rtNone
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	39	for i := 0; i < len(candidate); i++ {
				40	r := rune(candidate[i])
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	41
				42	role := RNone
				43
				44	curr := rtLower
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	45	if candidate[i] <= unicode.MaxASCII {
				46	curr = runeType(rt[candidate[i]] - '0')
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	47	}
				48
				49	if curr == rtLower {
				50	if prev == rtNone \|\| prev == rtPunct {
				51	role = RHead
				52	} else {
				53	role = RTail
				54	}
				55	} else if curr == rtUpper {
				56	role = RHead
				57
				58	if prev == rtUpper {
				59	// This and previous characters are both upper case.
				60
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	61	if i+1 == len(candidate) {
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	62	// This is last character, previous was also uppercase -> this is UCTail
				63	// i.e., (current char is C): aBC / BC / ABC
				64	role = RUCTail
				65	}
				66	}
				67	} else if curr == rtPunct {
Muir Manders	a12cc76	2019-10-21 21:07:21 -0700	[diff] [blame]	68	switch r {
				69	case '.', ':':
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	70	role = RSep
				71	}
				72	}
				73	if curr != rtLower {
				74	if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead \|\| output[i-2] == RUCTail) {
				75	// The previous two characters were uppercase. The current one is not a lower case, so the
				76	// previous one can't be a HEAD. Make it a UCTail.
				77	// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
				78	output[i-1] = RUCTail
				79	}
				80	}
				81
				82	output = append(output, role)
				83	prev2 = prev
				84	prev = curr
				85	}
				86	return output
				87	}
				88
				89	type runeType byte
				90
				91	const (
				92	rtNone runeType = iota
				93	rtPunct
				94	rtLower
				95	rtUpper
				96	)
				97
				98	const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"
				99
				100	// LastSegment returns the substring representing the last segment from the input, where each
				101	// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
				102	// or Filename type.
				103	func LastSegment(input string, roles []RuneRole) string {
				104	// Exclude ending separators.
				105	end := len(input) - 1
				106	for end >= 0 && roles[end] == RSep {
				107	end--
				108	}
				109	if end < 0 {
				110	return ""
				111	}
				112
				113	start := end - 1
				114	for start >= 0 && roles[start] != RSep {
				115	start--
				116	}
				117
				118	return input[start+1 : end+1]
				119	}
				120
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	121	// fromChunks copies string chunks into the given buffer.
				122	func fromChunks(chunks []string, buffer []byte) []byte {
				123	ii := 0
				124	for _, chunk := range chunks {
				125	for i := 0; i < len(chunk); i++ {
				126	if ii >= cap(buffer) {
				127	break
				128	}
				129	buffer[ii] = chunk[i]
				130	ii++
				131	}
				132	}
				133	return buffer[:ii]
				134	}
				135
				136	// toLower transforms the input string to lower case, which is stored in the output byte slice.
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	137	// The lower casing considers only ASCII values - non ASCII values are left unmodified.
				138	// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
				139	// created.
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	140	func toLower(input []byte, reuse []byte) []byte {
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	141	output := reuse
				142	if cap(reuse) < len(input) {
				143	output = make([]byte, len(input))
				144	}
				145
				146	for i := 0; i < len(input); i++ {
				147	r := rune(input[i])
Rob Findley	15eebf7	2021-07-30 15:11:53 -0400	[diff] [blame]	148	if input[i] <= unicode.MaxASCII {
Rebecca Stambler	2214986	2019-07-01 17:08:29 -0400	[diff] [blame]	149	if 'A' <= r && r <= 'Z' {
				150	r += 'a' - 'A'
				151	}
				152	}
				153	output[i] = byte(r)
				154	}
				155	return output[:len(input)]
				156	}
				157
				158	// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
				159	// (start is inclusive, end is exclusive).
				160	type WordConsumer func(start, end int)
				161
				162	// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
				163	// delimiters for each word are fed to the provided consumer function.
				164	func Words(roles []RuneRole, consume WordConsumer) {
				165	var wordStart int
				166	for i, r := range roles {
				167	switch r {
				168	case RUCTail, RTail:
				169	case RHead, RNone, RSep:
				170	if i != wordStart {
				171	consume(wordStart, i)
				172	}
				173	wordStart = i
				174	if r != RHead {
				175	// Skip this character.
				176	wordStart = i + 1
				177	}
				178	}
				179	}
				180	if wordStart != len(roles) {
				181	consume(wordStart, len(roles))
				182	}
				183	}