chore: optimize string search with Aho–Corasick algorithm (#1476)
* chore: optimize string search with Aho–Corasick algorithm * chore: optimize keywords replacer * fix: replacer bugs * chore: reorder members
This commit is contained in:
@@ -39,6 +39,8 @@ func NewTrie(words []string, opts ...TrieOption) Trie {
|
||||
n.add(word)
|
||||
}
|
||||
|
||||
n.build()
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
@@ -48,7 +50,7 @@ func (n *trieNode) Filter(text string) (sentence string, keywords []string, foun
|
||||
return text, nil, false
|
||||
}
|
||||
|
||||
scopes := n.findKeywordScopes(chars)
|
||||
scopes := n.find(chars)
|
||||
keywords = n.collectKeywords(chars, scopes)
|
||||
|
||||
for _, match := range scopes {
|
||||
@@ -65,7 +67,7 @@ func (n *trieNode) FindKeywords(text string) []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
scopes := n.findKeywordScopes(chars)
|
||||
scopes := n.find(chars)
|
||||
return n.collectKeywords(chars, scopes)
|
||||
}
|
||||
|
||||
@@ -85,48 +87,6 @@ func (n *trieNode) collectKeywords(chars []rune, scopes []scope) []string {
|
||||
return keywords
|
||||
}
|
||||
|
||||
func (n *trieNode) findKeywordScopes(chars []rune) []scope {
|
||||
var scopes []scope
|
||||
size := len(chars)
|
||||
start := -1
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
child, ok := n.children[chars[i]]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if start < 0 {
|
||||
start = i
|
||||
}
|
||||
if child.end {
|
||||
scopes = append(scopes, scope{
|
||||
start: start,
|
||||
stop: i + 1,
|
||||
})
|
||||
}
|
||||
|
||||
for j := i + 1; j < size; j++ {
|
||||
grandchild, ok := child.children[chars[j]]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
||||
child = grandchild
|
||||
if child.end {
|
||||
scopes = append(scopes, scope{
|
||||
start: start,
|
||||
stop: j + 1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
start = -1
|
||||
}
|
||||
|
||||
return scopes
|
||||
}
|
||||
|
||||
func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
|
||||
for i := start; i < stop; i++ {
|
||||
chars[i] = n.mask
|
||||
|
||||
Reference in New Issue
Block a user