initial import
This commit is contained in:
32
core/stringx/node.go
Normal file
32
core/stringx/node.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package stringx
|
||||
|
||||
type node struct {
|
||||
children map[rune]*node
|
||||
end bool
|
||||
}
|
||||
|
||||
func (n *node) add(word string) {
|
||||
chars := []rune(word)
|
||||
if len(chars) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
nd := n
|
||||
for _, char := range chars {
|
||||
if nd.children == nil {
|
||||
child := new(node)
|
||||
nd.children = map[rune]*node{
|
||||
char: child,
|
||||
}
|
||||
nd = child
|
||||
} else if child, ok := nd.children[char]; ok {
|
||||
nd = child
|
||||
} else {
|
||||
child := new(node)
|
||||
nd.children[char] = child
|
||||
nd = child
|
||||
}
|
||||
}
|
||||
|
||||
nd.end = true
|
||||
}
|
||||
79
core/stringx/random.go
Normal file
79
core/stringx/random.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
letterIdxBits = 6 // 6 bits to represent a letter index
|
||||
idLen = 8
|
||||
defaultRandLen = 8
|
||||
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
|
||||
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
|
||||
)
|
||||
|
||||
var src = newLockedSource(time.Now().UnixNano())
|
||||
|
||||
type lockedSource struct {
|
||||
source rand.Source
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func newLockedSource(seed int64) *lockedSource {
|
||||
return &lockedSource{
|
||||
source: rand.NewSource(seed),
|
||||
}
|
||||
}
|
||||
|
||||
func (ls *lockedSource) Int63() int64 {
|
||||
ls.lock.Lock()
|
||||
defer ls.lock.Unlock()
|
||||
return ls.source.Int63()
|
||||
}
|
||||
|
||||
func (ls *lockedSource) Seed(seed int64) {
|
||||
ls.lock.Lock()
|
||||
defer ls.lock.Unlock()
|
||||
ls.source.Seed(seed)
|
||||
}
|
||||
|
||||
func Rand() string {
|
||||
return Randn(defaultRandLen)
|
||||
}
|
||||
|
||||
func RandId() string {
|
||||
b := make([]byte, idLen)
|
||||
_, err := crand.Read(b)
|
||||
if err != nil {
|
||||
return Randn(idLen)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%x%x%x%x", b[0:2], b[2:4], b[4:6], b[6:8])
|
||||
}
|
||||
|
||||
func Randn(n int) string {
|
||||
b := make([]byte, n)
|
||||
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
|
||||
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
|
||||
if remain == 0 {
|
||||
cache, remain = src.Int63(), letterIdxMax
|
||||
}
|
||||
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
|
||||
b[i] = letterBytes[idx]
|
||||
i--
|
||||
}
|
||||
cache >>= letterIdxBits
|
||||
remain--
|
||||
}
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func Seed(seed int64) {
|
||||
src.Seed(seed)
|
||||
}
|
||||
23
core/stringx/random_test.go
Normal file
23
core/stringx/random_test.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRand(t *testing.T) {
|
||||
Seed(time.Now().UnixNano())
|
||||
assert.True(t, len(Rand()) > 0)
|
||||
assert.True(t, len(RandId()) > 0)
|
||||
|
||||
const size = 10
|
||||
assert.True(t, len(Randn(size)) == size)
|
||||
}
|
||||
|
||||
func BenchmarkRandString(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = Randn(10)
|
||||
}
|
||||
}
|
||||
77
core/stringx/replacer.go
Normal file
77
core/stringx/replacer.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package stringx
|
||||
|
||||
import "strings"
|
||||
|
||||
type (
|
||||
Replacer interface {
|
||||
Replace(text string) string
|
||||
}
|
||||
|
||||
replacer struct {
|
||||
node
|
||||
mapping map[string]string
|
||||
}
|
||||
)
|
||||
|
||||
func NewReplacer(mapping map[string]string) Replacer {
|
||||
var rep = &replacer{
|
||||
mapping: mapping,
|
||||
}
|
||||
for k := range mapping {
|
||||
rep.add(k)
|
||||
}
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func (r *replacer) Replace(text string) string {
|
||||
var builder strings.Builder
|
||||
var chars = []rune(text)
|
||||
var size = len(chars)
|
||||
var start = -1
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
child, ok := r.children[chars[i]]
|
||||
if !ok {
|
||||
builder.WriteRune(chars[i])
|
||||
continue
|
||||
}
|
||||
|
||||
if start < 0 {
|
||||
start = i
|
||||
}
|
||||
var end = -1
|
||||
if child.end {
|
||||
end = i + 1
|
||||
}
|
||||
|
||||
var j = i + 1
|
||||
for ; j < size; j++ {
|
||||
grandchild, ok := child.children[chars[j]]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
||||
child = grandchild
|
||||
if child.end {
|
||||
end = j + 1
|
||||
i = j
|
||||
}
|
||||
}
|
||||
|
||||
if end > 0 {
|
||||
i = j - 1
|
||||
builder.WriteString(r.mapping[string(chars[start:end])])
|
||||
} else {
|
||||
if j < size {
|
||||
end = j + 1
|
||||
} else {
|
||||
end = size
|
||||
}
|
||||
builder.WriteRune(chars[i])
|
||||
}
|
||||
start = -1
|
||||
}
|
||||
|
||||
return builder.String()
|
||||
}
|
||||
44
core/stringx/replacer_test.go
Normal file
44
core/stringx/replacer_test.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestReplacer_Replace(t *testing.T) {
|
||||
var mapping = map[string]string{
|
||||
"一二三四": "1234",
|
||||
"二三": "23",
|
||||
"二": "2",
|
||||
}
|
||||
assert.Equal(t, "零1234五", NewReplacer(mapping).Replace("零一二三四五"))
|
||||
}
|
||||
|
||||
func TestReplacer_ReplaceSingleChar(t *testing.T) {
|
||||
var mapping = map[string]string{
|
||||
"二": "2",
|
||||
}
|
||||
assert.Equal(t, "零一2三四五", NewReplacer(mapping).Replace("零一二三四五"))
|
||||
}
|
||||
|
||||
func TestReplacer_ReplaceExceedRange(t *testing.T) {
|
||||
var mapping = map[string]string{
|
||||
"二三四五六": "23456",
|
||||
}
|
||||
assert.Equal(t, "零一二三四五", NewReplacer(mapping).Replace("零一二三四五"))
|
||||
}
|
||||
|
||||
func TestReplacer_ReplacePartialMatch(t *testing.T) {
|
||||
var mapping = map[string]string{
|
||||
"二三四七": "2347",
|
||||
}
|
||||
assert.Equal(t, "零一二三四五", NewReplacer(mapping).Replace("零一二三四五"))
|
||||
}
|
||||
|
||||
func TestReplacer_ReplaceMultiMatches(t *testing.T) {
|
||||
var mapping = map[string]string{
|
||||
"二三": "23",
|
||||
}
|
||||
assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五"))
|
||||
}
|
||||
131
core/stringx/strings.go
Normal file
131
core/stringx/strings.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"zero/core/lang"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidStartPosition = errors.New("start position is invalid")
|
||||
ErrInvalidStopPosition = errors.New("stop position is invalid")
|
||||
)
|
||||
|
||||
func Contains(list []string, str string) bool {
|
||||
for _, each := range list {
|
||||
if each == str {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func Filter(s string, filter func(r rune) bool) string {
|
||||
var n int
|
||||
chars := []rune(s)
|
||||
for i, x := range chars {
|
||||
if n < i {
|
||||
chars[n] = x
|
||||
}
|
||||
if !filter(x) {
|
||||
n++
|
||||
}
|
||||
}
|
||||
|
||||
return string(chars[:n])
|
||||
}
|
||||
|
||||
func HasEmpty(args ...string) bool {
|
||||
for _, arg := range args {
|
||||
if len(arg) == 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func NotEmpty(args ...string) bool {
|
||||
return !HasEmpty(args...)
|
||||
}
|
||||
|
||||
func Remove(strings []string, strs ...string) []string {
|
||||
out := append([]string(nil), strings...)
|
||||
|
||||
for _, str := range strs {
|
||||
var n int
|
||||
for _, v := range out {
|
||||
if v != str {
|
||||
out[n] = v
|
||||
n++
|
||||
}
|
||||
}
|
||||
out = out[:n]
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func Reverse(s string) string {
|
||||
runes := []rune(s)
|
||||
|
||||
for from, to := 0, len(runes)-1; from < to; from, to = from+1, to-1 {
|
||||
runes[from], runes[to] = runes[to], runes[from]
|
||||
}
|
||||
|
||||
return string(runes)
|
||||
}
|
||||
|
||||
// Substr returns runes between start and stop [start, stop) regardless of the chars are ascii or utf8
|
||||
func Substr(str string, start int, stop int) (string, error) {
|
||||
rs := []rune(str)
|
||||
length := len(rs)
|
||||
|
||||
if start < 0 || start > length {
|
||||
return "", ErrInvalidStartPosition
|
||||
}
|
||||
|
||||
if stop < 0 || stop > length {
|
||||
return "", ErrInvalidStopPosition
|
||||
}
|
||||
|
||||
return string(rs[start:stop]), nil
|
||||
}
|
||||
|
||||
func TakeOne(valid, or string) string {
|
||||
if len(valid) > 0 {
|
||||
return valid
|
||||
} else {
|
||||
return or
|
||||
}
|
||||
}
|
||||
|
||||
func TakeWithPriority(fns ...func() string) string {
|
||||
for _, fn := range fns {
|
||||
val := fn()
|
||||
if len(val) > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func Union(first, second []string) []string {
|
||||
set := make(map[string]lang.PlaceholderType)
|
||||
|
||||
for _, each := range first {
|
||||
set[each] = lang.Placeholder
|
||||
}
|
||||
for _, each := range second {
|
||||
set[each] = lang.Placeholder
|
||||
}
|
||||
|
||||
merged := make([]string, 0, len(set))
|
||||
for k := range set {
|
||||
merged = append(merged, k)
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
336
core/stringx/strings_test.go
Normal file
336
core/stringx/strings_test.go
Normal file
@@ -0,0 +1,336 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"path"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNotEmpty(t *testing.T) {
|
||||
cases := []struct {
|
||||
args []string
|
||||
expect bool
|
||||
}{
|
||||
{
|
||||
args: []string{"a", "b", "c"},
|
||||
expect: true,
|
||||
},
|
||||
{
|
||||
args: []string{"a", "", "c"},
|
||||
expect: false,
|
||||
},
|
||||
{
|
||||
args: []string{"a"},
|
||||
expect: true,
|
||||
},
|
||||
{
|
||||
args: []string{""},
|
||||
expect: false,
|
||||
},
|
||||
{
|
||||
args: []string{},
|
||||
expect: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(path.Join(each.args...), func(t *testing.T) {
|
||||
assert.Equal(t, each.expect, NotEmpty(each.args...))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainsString(t *testing.T) {
|
||||
cases := []struct {
|
||||
slice []string
|
||||
value string
|
||||
expect bool
|
||||
}{
|
||||
{[]string{"1"}, "1", true},
|
||||
{[]string{"1"}, "2", false},
|
||||
{[]string{"1", "2"}, "1", true},
|
||||
{[]string{"1", "2"}, "3", false},
|
||||
{nil, "3", false},
|
||||
{nil, "", false},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(path.Join(each.slice...), func(t *testing.T) {
|
||||
actual := Contains(each.slice, each.value)
|
||||
assert.Equal(t, each.expect, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
cases := []struct {
|
||||
input string
|
||||
ignores []rune
|
||||
expect string
|
||||
}{
|
||||
{``, nil, ``},
|
||||
{`abcd`, nil, `abcd`},
|
||||
{`ab,cd,ef`, []rune{','}, `abcdef`},
|
||||
{`ab, cd,ef`, []rune{',', ' '}, `abcdef`},
|
||||
{`ab, cd, ef`, []rune{',', ' '}, `abcdef`},
|
||||
{`ab, cd, ef, `, []rune{',', ' '}, `abcdef`},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(each.input, func(t *testing.T) {
|
||||
actual := Filter(each.input, func(r rune) bool {
|
||||
for _, x := range each.ignores {
|
||||
if x == r {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
})
|
||||
assert.Equal(t, each.expect, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemove(t *testing.T) {
|
||||
cases := []struct {
|
||||
input []string
|
||||
remove []string
|
||||
expect []string
|
||||
}{
|
||||
{
|
||||
input: []string{"a", "b", "a", "c"},
|
||||
remove: []string{"a", "b"},
|
||||
expect: []string{"c"},
|
||||
},
|
||||
{
|
||||
input: []string{"b", "c"},
|
||||
remove: []string{"a"},
|
||||
expect: []string{"b", "c"},
|
||||
},
|
||||
{
|
||||
input: []string{"b", "a", "c"},
|
||||
remove: []string{"a"},
|
||||
expect: []string{"b", "c"},
|
||||
},
|
||||
{
|
||||
input: []string{},
|
||||
remove: []string{"a"},
|
||||
expect: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(path.Join(each.input...), func(t *testing.T) {
|
||||
assert.ElementsMatch(t, each.expect, Remove(each.input, each.remove...))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestReverse(t *testing.T) {
|
||||
cases := []struct {
|
||||
input string
|
||||
expect string
|
||||
}{
|
||||
{
|
||||
input: "abcd",
|
||||
expect: "dcba",
|
||||
},
|
||||
{
|
||||
input: "",
|
||||
expect: "",
|
||||
},
|
||||
{
|
||||
input: "我爱中国",
|
||||
expect: "国中爱我",
|
||||
},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(each.input, func(t *testing.T) {
|
||||
assert.Equal(t, each.expect, Reverse(each.input))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubstr(t *testing.T) {
|
||||
cases := []struct {
|
||||
input string
|
||||
start int
|
||||
stop int
|
||||
err error
|
||||
expect string
|
||||
}{
|
||||
{
|
||||
input: "abcdefg",
|
||||
start: 1,
|
||||
stop: 4,
|
||||
expect: "bcd",
|
||||
},
|
||||
{
|
||||
input: "我爱中国3000遍,even more",
|
||||
start: 1,
|
||||
stop: 9,
|
||||
expect: "爱中国3000遍",
|
||||
},
|
||||
{
|
||||
input: "abcdefg",
|
||||
start: -1,
|
||||
stop: 4,
|
||||
err: ErrInvalidStartPosition,
|
||||
expect: "",
|
||||
},
|
||||
{
|
||||
input: "abcdefg",
|
||||
start: 100,
|
||||
stop: 4,
|
||||
err: ErrInvalidStartPosition,
|
||||
expect: "",
|
||||
},
|
||||
{
|
||||
input: "abcdefg",
|
||||
start: 1,
|
||||
stop: -1,
|
||||
err: ErrInvalidStopPosition,
|
||||
expect: "",
|
||||
},
|
||||
{
|
||||
input: "abcdefg",
|
||||
start: 1,
|
||||
stop: 100,
|
||||
err: ErrInvalidStopPosition,
|
||||
expect: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(each.input, func(t *testing.T) {
|
||||
val, err := Substr(each.input, each.start, each.stop)
|
||||
assert.Equal(t, each.err, err)
|
||||
if err == nil {
|
||||
assert.Equal(t, each.expect, val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTakeOne(t *testing.T) {
|
||||
cases := []struct {
|
||||
valid string
|
||||
or string
|
||||
expect string
|
||||
}{
|
||||
{"", "", ""},
|
||||
{"", "1", "1"},
|
||||
{"1", "", "1"},
|
||||
{"1", "2", "1"},
|
||||
}
|
||||
|
||||
for _, each := range cases {
|
||||
t.Run(each.valid, func(t *testing.T) {
|
||||
actual := TakeOne(each.valid, each.or)
|
||||
assert.Equal(t, each.expect, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTakeWithPriority(t *testing.T) {
|
||||
tests := []struct {
|
||||
fns []func() string
|
||||
expect string
|
||||
}{
|
||||
{
|
||||
fns: []func() string{
|
||||
func() string {
|
||||
return "first"
|
||||
},
|
||||
func() string {
|
||||
return "second"
|
||||
},
|
||||
func() string {
|
||||
return "third"
|
||||
},
|
||||
},
|
||||
expect: "first",
|
||||
},
|
||||
{
|
||||
fns: []func() string{
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
func() string {
|
||||
return "second"
|
||||
},
|
||||
func() string {
|
||||
return "third"
|
||||
},
|
||||
},
|
||||
expect: "second",
|
||||
},
|
||||
{
|
||||
fns: []func() string{
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
func() string {
|
||||
return "third"
|
||||
},
|
||||
},
|
||||
expect: "third",
|
||||
},
|
||||
{
|
||||
fns: []func() string{
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
func() string {
|
||||
return ""
|
||||
},
|
||||
},
|
||||
expect: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(RandId(), func(t *testing.T) {
|
||||
val := TakeWithPriority(test.fns...)
|
||||
assert.Equal(t, test.expect, val)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnion(t *testing.T) {
|
||||
first := []string{
|
||||
"one",
|
||||
"two",
|
||||
"three",
|
||||
}
|
||||
second := []string{
|
||||
"zero",
|
||||
"two",
|
||||
"three",
|
||||
"four",
|
||||
}
|
||||
union := Union(first, second)
|
||||
contains := func(v string) bool {
|
||||
for _, each := range union {
|
||||
if v == each {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
assert.Equal(t, 5, len(union))
|
||||
assert.True(t, contains("zero"))
|
||||
assert.True(t, contains("one"))
|
||||
assert.True(t, contains("two"))
|
||||
assert.True(t, contains("three"))
|
||||
assert.True(t, contains("four"))
|
||||
}
|
||||
119
core/stringx/trie.go
Normal file
119
core/stringx/trie.go
Normal file
@@ -0,0 +1,119 @@
|
||||
package stringx
|
||||
|
||||
import "zero/core/lang"
|
||||
|
||||
type (
|
||||
Trie interface {
|
||||
Filter(text string) (string, []string, bool)
|
||||
FindKeywords(text string) []string
|
||||
}
|
||||
|
||||
trieNode struct {
|
||||
node
|
||||
}
|
||||
|
||||
scope struct {
|
||||
start int
|
||||
stop int
|
||||
}
|
||||
)
|
||||
|
||||
func NewTrie(words []string) Trie {
|
||||
n := new(trieNode)
|
||||
for _, word := range words {
|
||||
n.add(word)
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func (n *trieNode) Filter(text string) (sentence string, keywords []string, found bool) {
|
||||
chars := []rune(text)
|
||||
if len(chars) == 0 {
|
||||
return text, nil, false
|
||||
}
|
||||
|
||||
scopes := n.findKeywordScopes(chars)
|
||||
keywords = n.collectKeywords(chars, scopes)
|
||||
|
||||
for _, match := range scopes {
|
||||
// we don't care about overlaps, not bringing a performance improvement
|
||||
n.replaceWithAsterisk(chars, match.start, match.stop)
|
||||
}
|
||||
|
||||
return string(chars), keywords, len(keywords) > 0
|
||||
}
|
||||
|
||||
func (n *trieNode) FindKeywords(text string) []string {
|
||||
chars := []rune(text)
|
||||
if len(chars) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
scopes := n.findKeywordScopes(chars)
|
||||
return n.collectKeywords(chars, scopes)
|
||||
}
|
||||
|
||||
func (n *trieNode) collectKeywords(chars []rune, scopes []scope) []string {
|
||||
set := make(map[string]lang.PlaceholderType)
|
||||
for _, v := range scopes {
|
||||
set[string(chars[v.start:v.stop])] = lang.Placeholder
|
||||
}
|
||||
|
||||
var i int
|
||||
keywords := make([]string, len(set))
|
||||
for k := range set {
|
||||
keywords[i] = k
|
||||
i++
|
||||
}
|
||||
|
||||
return keywords
|
||||
}
|
||||
|
||||
func (n *trieNode) findKeywordScopes(chars []rune) []scope {
|
||||
var scopes []scope
|
||||
size := len(chars)
|
||||
start := -1
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
child, ok := n.children[chars[i]]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if start < 0 {
|
||||
start = i
|
||||
}
|
||||
if child.end {
|
||||
scopes = append(scopes, scope{
|
||||
start: start,
|
||||
stop: i + 1,
|
||||
})
|
||||
}
|
||||
|
||||
for j := i + 1; j < size; j++ {
|
||||
grandchild, ok := child.children[chars[j]]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
||||
child = grandchild
|
||||
if child.end {
|
||||
scopes = append(scopes, scope{
|
||||
start: start,
|
||||
stop: j + 1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
start = -1
|
||||
}
|
||||
|
||||
return scopes
|
||||
}
|
||||
|
||||
func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
|
||||
for i := start; i < stop; i++ {
|
||||
chars[i] = '*'
|
||||
}
|
||||
}
|
||||
164
core/stringx/trie_test.go
Normal file
164
core/stringx/trie_test.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestTrie(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
output string
|
||||
keywords []string
|
||||
found bool
|
||||
}{
|
||||
{
|
||||
input: "日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演",
|
||||
output: "日本****兼电视、电影演员。*****女优是xx出道, ******们最精彩的表演是******表演",
|
||||
keywords: []string{
|
||||
"AV演员",
|
||||
"苍井空",
|
||||
"AV",
|
||||
"日本AV女优",
|
||||
"AV演员色情",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "完全和谐的文本完全和谐的文本",
|
||||
output: "完全和谐的文本完全和谐的文本",
|
||||
keywords: nil,
|
||||
found: false,
|
||||
},
|
||||
{
|
||||
input: "就一个字不对",
|
||||
output: "就*个字不对",
|
||||
keywords: []string{
|
||||
"一",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "就一对, AV",
|
||||
output: "就*对, **",
|
||||
keywords: []string{
|
||||
"一",
|
||||
"AV",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "就一不对, AV",
|
||||
output: "就**对, **",
|
||||
keywords: []string{
|
||||
"一",
|
||||
"一不",
|
||||
"AV",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "就对, AV",
|
||||
output: "就对, **",
|
||||
keywords: []string{
|
||||
"AV",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "就对, 一不",
|
||||
output: "就对, **",
|
||||
keywords: []string{
|
||||
"一",
|
||||
"一不",
|
||||
},
|
||||
found: true,
|
||||
},
|
||||
{
|
||||
input: "",
|
||||
output: "",
|
||||
keywords: nil,
|
||||
found: false,
|
||||
},
|
||||
}
|
||||
|
||||
trie := NewTrie([]string{
|
||||
"", // no hurts for empty keywords
|
||||
"一",
|
||||
"一不",
|
||||
"AV",
|
||||
"AV演员",
|
||||
"苍井空",
|
||||
"AV演员色情",
|
||||
"日本AV女优",
|
||||
})
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
output, keywords, ok := trie.Filter(test.input)
|
||||
assert.Equal(t, test.found, ok)
|
||||
assert.Equal(t, test.output, output)
|
||||
assert.ElementsMatch(t, test.keywords, keywords)
|
||||
keywords = trie.FindKeywords(test.input)
|
||||
assert.ElementsMatch(t, test.keywords, keywords)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrieSingleWord(t *testing.T) {
|
||||
trie := NewTrie([]string{
|
||||
"闹",
|
||||
})
|
||||
output, keywords, ok := trie.Filter("今晚真热闹")
|
||||
assert.ElementsMatch(t, []string{"闹"}, keywords)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "今晚真热*", output)
|
||||
}
|
||||
|
||||
func TestTrieOverlap(t *testing.T) {
|
||||
trie := NewTrie([]string{
|
||||
"一二三四五",
|
||||
"二三四五六七八",
|
||||
})
|
||||
output, keywords, ok := trie.Filter("零一二三四五六七八九十")
|
||||
assert.ElementsMatch(t, []string{
|
||||
"一二三四五",
|
||||
"二三四五六七八",
|
||||
}, keywords)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "零********九十", output)
|
||||
}
|
||||
|
||||
func TestTrieNested(t *testing.T) {
|
||||
trie := NewTrie([]string{
|
||||
"一二三",
|
||||
"一二三四五",
|
||||
"一二三四五六七八",
|
||||
})
|
||||
output, keywords, ok := trie.Filter("零一二三四五六七八九十")
|
||||
assert.ElementsMatch(t, []string{
|
||||
"一二三",
|
||||
"一二三四五",
|
||||
"一二三四五六七八",
|
||||
}, keywords)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "零********九十", output)
|
||||
}
|
||||
|
||||
func BenchmarkTrie(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
trie := NewTrie([]string{
|
||||
"A",
|
||||
"AV",
|
||||
"AV演员",
|
||||
"苍井空",
|
||||
"AV演员色情",
|
||||
"日本AV女优",
|
||||
})
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
trie.Filter("日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user