chore: optimize string search with Aho–Corasick algorithm (#1476)

* chore: optimize string search with Aho–Corasick algorithm

* chore: optimize keywords replacer

* fix: replacer bugs

* chore: reorder members
This commit is contained in:
Kevin Wan
2022-01-23 23:37:02 +08:00
committed by GitHub
parent 09d1fad6e0
commit f1102fb262
8 changed files with 316 additions and 109 deletions

View File

@@ -2,6 +2,8 @@ package stringx
type node struct {
children map[rune]*node
fail *node
depth int
end bool
}
@@ -12,17 +14,19 @@ func (n *node) add(word string) {
}
nd := n
for _, char := range chars {
var depth int
for i, char := range chars {
if nd.children == nil {
child := new(node)
nd.children = map[rune]*node{
char: child,
}
child.depth = i + 1
nd.children = map[rune]*node{char: child}
nd = child
} else if child, ok := nd.children[char]; ok {
nd = child
depth++
} else {
child := new(node)
child.depth = i + 1
nd.children[char] = child
nd = child
}
@@ -30,3 +34,68 @@ func (n *node) add(word string) {
nd.end = true
}
func (n *node) build() {
n.fail = n
for _, child := range n.children {
child.fail = n
n.buildNode(child)
}
}
func (n *node) buildNode(nd *node) {
if nd.children == nil {
return
}
var fifo []*node
for key, child := range nd.children {
fifo = append(fifo, child)
if fail, ok := nd.fail.children[key]; ok {
child.fail = fail
} else {
child.fail = n
}
}
for _, val := range fifo {
n.buildNode(val)
}
}
func (n *node) find(chars []rune) []scope {
var scopes []scope
size := len(chars)
cur := n
for i := 0; i < size; i++ {
child, ok := cur.children[chars[i]]
if ok {
cur = child
} else if cur == n {
continue
} else {
cur = cur.fail
if child, ok = cur.children[chars[i]]; !ok {
continue
}
cur = child
}
if child.end {
scopes = append(scopes, scope{
start: i + 1 - child.depth,
stop: i + 1,
})
}
if child.fail != n && child.fail.end {
scopes = append(scopes, scope{
start: i + 1 - child.fail.depth,
stop: i + 1,
})
}
}
return scopes
}