-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnormalize.go
More file actions
40 lines (36 loc) · 1.01 KB
/
normalize.go
File metadata and controls
40 lines (36 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package main
import (
"strings"
"unicode"
"unicode/utf8"
"github.com/rivo/uniseg"
"golang.org/x/text/cases"
)
var folder = cases.Fold()
// foldString processes text by grapheme cluster for case-insensitive matching.
// It applies unicode case folding and builds a byte offset mapping to preserve original indices.
// This allows accurate highlighting without altering the text with transliteration rules.
func foldString(text string) (string, []int) {
var folded strings.Builder
offsets := make([]int, 0, len(text)*2)
state := -1
var cluster string
cursor := 0
for len(text) > 0 {
cluster, text, _, state = uniseg.StepString(text, state)
if utf8.RuneCountInString(cluster) == 1 {
r, _ := utf8.DecodeRuneInString(cluster)
if r == '-' || unicode.IsSpace(r) {
continue
}
}
rep := folder.String(cluster)
for j := 0; j < len(rep); j++ {
offsets = append(offsets, cursor)
}
folded.WriteString(rep)
cursor += len(cluster)
}
offsets = append(offsets, cursor)
return folded.String(), offsets
}