diff --git a/core/stringx/node.go b/core/stringx/node.go index 867afd94..4b6fb6e1 100644 --- a/core/stringx/node.go +++ b/core/stringx/node.go @@ -98,3 +98,45 @@ func (n *node) find(chars []rune) []scope { return scopes } + +func (n *node) longestMatch(chars []rune, start int) (used int, jump *node, matched bool) { + cur := n + var matchedNode *node + for i := start; i < len(chars); i++ { + child, ok := cur.children[chars[i]] + if ok { + cur = child + if cur.end { + matchedNode = cur + } + } else { + if matchedNode != nil { + return matchedNode.depth, nil, true + } + if n.end { + return start, nil, true + } + var jump *node + for cur.fail != nil { + jump, ok = cur.fail.children[chars[i]] + if ok { + break + } + cur = cur.fail + } + if jump != nil { + return i + 1 - jump.depth, jump, false + } + return i + 1, nil, false + } + } + // this longest matched node + if matchedNode != nil { + return matchedNode.depth, nil, true + } + // last mathed node + if n.end { + return start, nil, true + } + return len(chars), nil, false +} diff --git a/core/stringx/replacer.go b/core/stringx/replacer.go index 947ae69a..5912b141 100644 --- a/core/stringx/replacer.go +++ b/core/stringx/replacer.go @@ -1,6 +1,8 @@ package stringx -import "strings" +import ( + "bytes" +) type ( // Replacer interface wraps the Replace method. @@ -30,68 +32,27 @@ func NewReplacer(mapping map[string]string) Replacer { // Replace replaces text with given substitutes. func (r *replacer) Replace(text string) string { - var builder strings.Builder - var start int - chars := []rune(text) - size := len(chars) - - for start < size { - cur := r.node - - if start > 0 { - builder.WriteString(string(chars[:start])) - } - - for i := start; i < size; i++ { - child, ok := cur.children[chars[i]] - if ok { - cur = child - } else if cur == r.node { - builder.WriteRune(chars[i]) - // cur already points to root, set start only - start = i + 1 - continue + var buf bytes.Buffer + target := []rune(text) + cur := r.node + nextStart := 0 + for len(target) != 0 { + used, jump, matched := cur.longestMatch(target, nextStart) + if matched { + replaced := r.mapping[string(target[:used])] + target = append([]rune(replaced), target[used:]...) + cur = r.node + } else { + buf.WriteString(string(target[:used])) + target = target[used:] + if jump != nil { + cur = jump + nextStart = jump.depth } else { - curDepth := cur.depth - cur = cur.fail - child, ok = cur.children[chars[i]] - if !ok { - // write this path - builder.WriteString(string(chars[i-curDepth : i+1])) - // go to root - cur = r.node - start = i + 1 - continue - } - - failDepth := cur.depth - // write path before jump - builder.WriteString(string(chars[start : start+curDepth-failDepth])) - start += curDepth - failDepth - cur = child + cur = r.node + nextStart = 0 } - - if cur.end { - val := string(chars[i+1-cur.depth : i+1]) - builder.WriteString(r.mapping[val]) - builder.WriteString(string(chars[i+1:])) - // only matching this path, all previous paths are done - if start >= i+1-cur.depth && i+1 >= size { - return builder.String() - } - - chars = []rune(builder.String()) - size = len(chars) - builder.Reset() - break - } - } - - if !cur.end { - builder.WriteString(string(chars[start:])) - return builder.String() } } - - return string(chars) + return buf.String() } diff --git a/core/stringx/replacer_test.go b/core/stringx/replacer_test.go index 8cb8851b..0d3c4015 100644 --- a/core/stringx/replacer_test.go +++ b/core/stringx/replacer_test.go @@ -51,6 +51,15 @@ func TestReplacer_ReplaceMultiMatches(t *testing.T) { assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五")) } +func TestReplacer_ReplaceLongestMatching(t *testing.T) { + keywords := map[string]string{ + "日本": "japan", + "日本的首都": "东京", + } + replacer := NewReplacer(keywords) + assert.Equal(t, "东京在japan", replacer.Replace("日本的首都在日本")) +} + func TestReplacer_ReplaceJumpToFail(t *testing.T) { mapping := map[string]string{ "bcdf": "1235",