fix Replacer suffix match, and add test case (#2867)

* fix: replace shoud replace the longest match

* feat: revert bytes.Buffer to strings.Builder

* fix: loop reset nextStart

* feat: add node longest match test

* feat: add replacer suffix match test case

* feat: multiple match

* fix: partial match ends

* fix: replace look back upon error

* feat: rm unnecessary branch

---------

Co-authored-by: hudahai <hscxrzs@gmail.com>
Co-authored-by: hushichang <hushichang@sensetime.com>
This commit is contained in:
dahaihu
2023-02-12 21:04:35 +08:00
committed by GitHub
parent 3736dacf1e
commit cacd5dc91a
4 changed files with 349 additions and 49 deletions

View File

@@ -9,10 +9,10 @@ import (
func TestLongestMatchGuardedCondition(t *testing.T) {
n := new(node)
n.end = true
used, jump, matched := n.longestMatch([]rune(""), 0)
assert.Equal(t, 0, used)
uselessLen, matchLen, jump := n.longestMatch([]rune(""), nil)
assert.Equal(t, 0, uselessLen)
assert.Nil(t, jump)
assert.True(t, matched)
assert.Equal(t, 0, matchLen)
}
func TestFuzzNodeCase1(t *testing.T) {
@@ -202,3 +202,228 @@ func BenchmarkNodeFind(b *testing.B) {
trie.find([]rune("日本AV演员兼电视、电影演员。无名氏AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演"))
}
}
func TestNode_longestMatchCase0(t *testing.T) {
// match the longest word
keywords := []string{
"a",
"ab",
"abc",
"abcd",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcef"), nil)
assert.Equal(t, 0, uselessLen)
assert.Equal(t, 3, matchLen)
assert.Nil(t, jump)
}
func TestNode_longestMatchCase1(t *testing.T) {
keywords := []string{
"abcde",
"bcde",
"cde",
"de",
"b",
"bc",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 2, matchLen)
assert.Nil(t, jump)
}
func TestNode_longestMatchCase2(t *testing.T) {
keywords := []string{
"abcde",
"bcde",
"cde",
"de",
"c",
"cd",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
assert.Equal(t, 2, uselessLen)
assert.Equal(t, 2, matchLen)
assert.Nil(t, jump)
}
func TestNode_longestMatchCase3(t *testing.T) {
keywords := []string{
"abcde",
"bcde",
"cde",
"de",
"b",
"bc",
"c",
"cd",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 2, matchLen)
assert.Nil(t, jump)
}
func TestNode_longestMatchCase4(t *testing.T) {
keywords := []string{
"abcde",
"bcdf",
"bcd",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, paths := trie.longestMatch([]rune("abcdf"), nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 0, matchLen)
assert.Equal(t, 4, len(paths))
}
func TestNode_longestMatchCase5(t *testing.T) {
keywords := []string{
"abcdef",
"bcde",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, paths := trie.longestMatch([]rune("abcde"), nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 4, matchLen)
assert.Nil(t, paths)
}
func TestNode_longestMatchCase6(t *testing.T) {
keywords := []string{
"abcde",
"bc",
"d",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcd"), nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 2, matchLen)
assert.Nil(t, jump)
}
func TestNode_longestMatchCase7(t *testing.T) {
keywords := []string{
"abcdeg",
"cdef",
"bcde",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
word := []rune("abcdef")
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 4, matchLen)
assert.Nil(t, paths)
uselessLen, matchLen, paths = trie.longestMatch(word[uselessLen+matchLen:], paths)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 0, matchLen)
assert.Nil(t, paths)
}
func TestNode_longestMatchCase8(t *testing.T) {
keywords := []string{
"abcdeg",
"cdef",
"cde",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
word := []rune("abcdef")
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
assert.Equal(t, 2, uselessLen)
assert.Equal(t, 0, matchLen)
assert.NotNil(t, paths)
}
func TestNode_longestMatchCase9(t *testing.T) {
keywords := []string{
"abcdeg",
"cdef",
"cde",
"cd",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
word := []rune("abcde")
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
assert.Equal(t, 2, uselessLen)
assert.Equal(t, 3, matchLen)
assert.Nil(t, paths)
}
func TestNode_jump(t *testing.T) {
keywords := []string{
"de",
"fe",
}
trie := new(node)
for _, keyword := range keywords {
trie.add(keyword)
}
trie.build()
target := []rune("dfe")
uselessLen, matchLen, paths := trie.longestMatch(target, nil)
assert.Equal(t, 1, uselessLen)
assert.Equal(t, 0, matchLen)
assert.NotNil(t, paths)
uselessLen, matchLen, paths = paths[len(paths)-1].longestMatch(target[uselessLen+matchLen:], paths)
assert.Equal(t, 0, uselessLen)
assert.Equal(t, 2, matchLen)
assert.Nil(t, paths)
}