initial import

This commit is contained in:
kevin
2020-07-26 17:09:05 +08:00
commit 7e3a369a8f
647 changed files with 54754 additions and 0 deletions

32
core/stringx/node.go Normal file
View File

@@ -0,0 +1,32 @@
package stringx
type node struct {
children map[rune]*node
end bool
}
func (n *node) add(word string) {
chars := []rune(word)
if len(chars) == 0 {
return
}
nd := n
for _, char := range chars {
if nd.children == nil {
child := new(node)
nd.children = map[rune]*node{
char: child,
}
nd = child
} else if child, ok := nd.children[char]; ok {
nd = child
} else {
child := new(node)
nd.children[char] = child
nd = child
}
}
nd.end = true
}

79
core/stringx/random.go Normal file
View File

@@ -0,0 +1,79 @@
package stringx
import (
crand "crypto/rand"
"fmt"
"math/rand"
"sync"
"time"
)
const (
letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
letterIdxBits = 6 // 6 bits to represent a letter index
idLen = 8
defaultRandLen = 8
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
var src = newLockedSource(time.Now().UnixNano())
type lockedSource struct {
source rand.Source
lock sync.Mutex
}
func newLockedSource(seed int64) *lockedSource {
return &lockedSource{
source: rand.NewSource(seed),
}
}
func (ls *lockedSource) Int63() int64 {
ls.lock.Lock()
defer ls.lock.Unlock()
return ls.source.Int63()
}
func (ls *lockedSource) Seed(seed int64) {
ls.lock.Lock()
defer ls.lock.Unlock()
ls.source.Seed(seed)
}
func Rand() string {
return Randn(defaultRandLen)
}
func RandId() string {
b := make([]byte, idLen)
_, err := crand.Read(b)
if err != nil {
return Randn(idLen)
}
return fmt.Sprintf("%x%x%x%x", b[0:2], b[2:4], b[4:6], b[6:8])
}
func Randn(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
func Seed(seed int64) {
src.Seed(seed)
}

View File

@@ -0,0 +1,23 @@
package stringx
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestRand(t *testing.T) {
Seed(time.Now().UnixNano())
assert.True(t, len(Rand()) > 0)
assert.True(t, len(RandId()) > 0)
const size = 10
assert.True(t, len(Randn(size)) == size)
}
func BenchmarkRandString(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = Randn(10)
}
}

77
core/stringx/replacer.go Normal file
View File

@@ -0,0 +1,77 @@
package stringx
import "strings"
type (
Replacer interface {
Replace(text string) string
}
replacer struct {
node
mapping map[string]string
}
)
func NewReplacer(mapping map[string]string) Replacer {
var rep = &replacer{
mapping: mapping,
}
for k := range mapping {
rep.add(k)
}
return rep
}
func (r *replacer) Replace(text string) string {
var builder strings.Builder
var chars = []rune(text)
var size = len(chars)
var start = -1
for i := 0; i < size; i++ {
child, ok := r.children[chars[i]]
if !ok {
builder.WriteRune(chars[i])
continue
}
if start < 0 {
start = i
}
var end = -1
if child.end {
end = i + 1
}
var j = i + 1
for ; j < size; j++ {
grandchild, ok := child.children[chars[j]]
if !ok {
break
}
child = grandchild
if child.end {
end = j + 1
i = j
}
}
if end > 0 {
i = j - 1
builder.WriteString(r.mapping[string(chars[start:end])])
} else {
if j < size {
end = j + 1
} else {
end = size
}
builder.WriteRune(chars[i])
}
start = -1
}
return builder.String()
}

View File

@@ -0,0 +1,44 @@
package stringx
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestReplacer_Replace(t *testing.T) {
var mapping = map[string]string{
"一二三四": "1234",
"二三": "23",
"二": "2",
}
assert.Equal(t, "零1234五", NewReplacer(mapping).Replace("零一二三四五"))
}
func TestReplacer_ReplaceSingleChar(t *testing.T) {
var mapping = map[string]string{
"二": "2",
}
assert.Equal(t, "零一2三四五", NewReplacer(mapping).Replace("零一二三四五"))
}
func TestReplacer_ReplaceExceedRange(t *testing.T) {
var mapping = map[string]string{
"二三四五六": "23456",
}
assert.Equal(t, "零一二三四五", NewReplacer(mapping).Replace("零一二三四五"))
}
func TestReplacer_ReplacePartialMatch(t *testing.T) {
var mapping = map[string]string{
"二三四七": "2347",
}
assert.Equal(t, "零一二三四五", NewReplacer(mapping).Replace("零一二三四五"))
}
func TestReplacer_ReplaceMultiMatches(t *testing.T) {
var mapping = map[string]string{
"二三": "23",
}
assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五"))
}

131
core/stringx/strings.go Normal file
View File

@@ -0,0 +1,131 @@
package stringx
import (
"errors"
"zero/core/lang"
)
var (
ErrInvalidStartPosition = errors.New("start position is invalid")
ErrInvalidStopPosition = errors.New("stop position is invalid")
)
func Contains(list []string, str string) bool {
for _, each := range list {
if each == str {
return true
}
}
return false
}
func Filter(s string, filter func(r rune) bool) string {
var n int
chars := []rune(s)
for i, x := range chars {
if n < i {
chars[n] = x
}
if !filter(x) {
n++
}
}
return string(chars[:n])
}
func HasEmpty(args ...string) bool {
for _, arg := range args {
if len(arg) == 0 {
return true
}
}
return false
}
func NotEmpty(args ...string) bool {
return !HasEmpty(args...)
}
func Remove(strings []string, strs ...string) []string {
out := append([]string(nil), strings...)
for _, str := range strs {
var n int
for _, v := range out {
if v != str {
out[n] = v
n++
}
}
out = out[:n]
}
return out
}
func Reverse(s string) string {
runes := []rune(s)
for from, to := 0, len(runes)-1; from < to; from, to = from+1, to-1 {
runes[from], runes[to] = runes[to], runes[from]
}
return string(runes)
}
// Substr returns runes between start and stop [start, stop) regardless of the chars are ascii or utf8
func Substr(str string, start int, stop int) (string, error) {
rs := []rune(str)
length := len(rs)
if start < 0 || start > length {
return "", ErrInvalidStartPosition
}
if stop < 0 || stop > length {
return "", ErrInvalidStopPosition
}
return string(rs[start:stop]), nil
}
func TakeOne(valid, or string) string {
if len(valid) > 0 {
return valid
} else {
return or
}
}
func TakeWithPriority(fns ...func() string) string {
for _, fn := range fns {
val := fn()
if len(val) > 0 {
return val
}
}
return ""
}
func Union(first, second []string) []string {
set := make(map[string]lang.PlaceholderType)
for _, each := range first {
set[each] = lang.Placeholder
}
for _, each := range second {
set[each] = lang.Placeholder
}
merged := make([]string, 0, len(set))
for k := range set {
merged = append(merged, k)
}
return merged
}

View File

@@ -0,0 +1,336 @@
package stringx
import (
"path"
"testing"
"github.com/stretchr/testify/assert"
)
func TestNotEmpty(t *testing.T) {
cases := []struct {
args []string
expect bool
}{
{
args: []string{"a", "b", "c"},
expect: true,
},
{
args: []string{"a", "", "c"},
expect: false,
},
{
args: []string{"a"},
expect: true,
},
{
args: []string{""},
expect: false,
},
{
args: []string{},
expect: true,
},
}
for _, each := range cases {
t.Run(path.Join(each.args...), func(t *testing.T) {
assert.Equal(t, each.expect, NotEmpty(each.args...))
})
}
}
func TestContainsString(t *testing.T) {
cases := []struct {
slice []string
value string
expect bool
}{
{[]string{"1"}, "1", true},
{[]string{"1"}, "2", false},
{[]string{"1", "2"}, "1", true},
{[]string{"1", "2"}, "3", false},
{nil, "3", false},
{nil, "", false},
}
for _, each := range cases {
t.Run(path.Join(each.slice...), func(t *testing.T) {
actual := Contains(each.slice, each.value)
assert.Equal(t, each.expect, actual)
})
}
}
func TestFilter(t *testing.T) {
cases := []struct {
input string
ignores []rune
expect string
}{
{``, nil, ``},
{`abcd`, nil, `abcd`},
{`ab,cd,ef`, []rune{','}, `abcdef`},
{`ab, cd,ef`, []rune{',', ' '}, `abcdef`},
{`ab, cd, ef`, []rune{',', ' '}, `abcdef`},
{`ab, cd, ef, `, []rune{',', ' '}, `abcdef`},
}
for _, each := range cases {
t.Run(each.input, func(t *testing.T) {
actual := Filter(each.input, func(r rune) bool {
for _, x := range each.ignores {
if x == r {
return true
}
}
return false
})
assert.Equal(t, each.expect, actual)
})
}
}
func TestRemove(t *testing.T) {
cases := []struct {
input []string
remove []string
expect []string
}{
{
input: []string{"a", "b", "a", "c"},
remove: []string{"a", "b"},
expect: []string{"c"},
},
{
input: []string{"b", "c"},
remove: []string{"a"},
expect: []string{"b", "c"},
},
{
input: []string{"b", "a", "c"},
remove: []string{"a"},
expect: []string{"b", "c"},
},
{
input: []string{},
remove: []string{"a"},
expect: []string{},
},
}
for _, each := range cases {
t.Run(path.Join(each.input...), func(t *testing.T) {
assert.ElementsMatch(t, each.expect, Remove(each.input, each.remove...))
})
}
}
func TestReverse(t *testing.T) {
cases := []struct {
input string
expect string
}{
{
input: "abcd",
expect: "dcba",
},
{
input: "",
expect: "",
},
{
input: "我爱中国",
expect: "国中爱我",
},
}
for _, each := range cases {
t.Run(each.input, func(t *testing.T) {
assert.Equal(t, each.expect, Reverse(each.input))
})
}
}
func TestSubstr(t *testing.T) {
cases := []struct {
input string
start int
stop int
err error
expect string
}{
{
input: "abcdefg",
start: 1,
stop: 4,
expect: "bcd",
},
{
input: "我爱中国3000遍even more",
start: 1,
stop: 9,
expect: "爱中国3000遍",
},
{
input: "abcdefg",
start: -1,
stop: 4,
err: ErrInvalidStartPosition,
expect: "",
},
{
input: "abcdefg",
start: 100,
stop: 4,
err: ErrInvalidStartPosition,
expect: "",
},
{
input: "abcdefg",
start: 1,
stop: -1,
err: ErrInvalidStopPosition,
expect: "",
},
{
input: "abcdefg",
start: 1,
stop: 100,
err: ErrInvalidStopPosition,
expect: "",
},
}
for _, each := range cases {
t.Run(each.input, func(t *testing.T) {
val, err := Substr(each.input, each.start, each.stop)
assert.Equal(t, each.err, err)
if err == nil {
assert.Equal(t, each.expect, val)
}
})
}
}
func TestTakeOne(t *testing.T) {
cases := []struct {
valid string
or string
expect string
}{
{"", "", ""},
{"", "1", "1"},
{"1", "", "1"},
{"1", "2", "1"},
}
for _, each := range cases {
t.Run(each.valid, func(t *testing.T) {
actual := TakeOne(each.valid, each.or)
assert.Equal(t, each.expect, actual)
})
}
}
func TestTakeWithPriority(t *testing.T) {
tests := []struct {
fns []func() string
expect string
}{
{
fns: []func() string{
func() string {
return "first"
},
func() string {
return "second"
},
func() string {
return "third"
},
},
expect: "first",
},
{
fns: []func() string{
func() string {
return ""
},
func() string {
return "second"
},
func() string {
return "third"
},
},
expect: "second",
},
{
fns: []func() string{
func() string {
return ""
},
func() string {
return ""
},
func() string {
return "third"
},
},
expect: "third",
},
{
fns: []func() string{
func() string {
return ""
},
func() string {
return ""
},
func() string {
return ""
},
},
expect: "",
},
}
for _, test := range tests {
t.Run(RandId(), func(t *testing.T) {
val := TakeWithPriority(test.fns...)
assert.Equal(t, test.expect, val)
})
}
}
func TestUnion(t *testing.T) {
first := []string{
"one",
"two",
"three",
}
second := []string{
"zero",
"two",
"three",
"four",
}
union := Union(first, second)
contains := func(v string) bool {
for _, each := range union {
if v == each {
return true
}
}
return false
}
assert.Equal(t, 5, len(union))
assert.True(t, contains("zero"))
assert.True(t, contains("one"))
assert.True(t, contains("two"))
assert.True(t, contains("three"))
assert.True(t, contains("four"))
}

119
core/stringx/trie.go Normal file
View File

@@ -0,0 +1,119 @@
package stringx
import "zero/core/lang"
type (
Trie interface {
Filter(text string) (string, []string, bool)
FindKeywords(text string) []string
}
trieNode struct {
node
}
scope struct {
start int
stop int
}
)
func NewTrie(words []string) Trie {
n := new(trieNode)
for _, word := range words {
n.add(word)
}
return n
}
func (n *trieNode) Filter(text string) (sentence string, keywords []string, found bool) {
chars := []rune(text)
if len(chars) == 0 {
return text, nil, false
}
scopes := n.findKeywordScopes(chars)
keywords = n.collectKeywords(chars, scopes)
for _, match := range scopes {
// we don't care about overlaps, not bringing a performance improvement
n.replaceWithAsterisk(chars, match.start, match.stop)
}
return string(chars), keywords, len(keywords) > 0
}
func (n *trieNode) FindKeywords(text string) []string {
chars := []rune(text)
if len(chars) == 0 {
return nil
}
scopes := n.findKeywordScopes(chars)
return n.collectKeywords(chars, scopes)
}
func (n *trieNode) collectKeywords(chars []rune, scopes []scope) []string {
set := make(map[string]lang.PlaceholderType)
for _, v := range scopes {
set[string(chars[v.start:v.stop])] = lang.Placeholder
}
var i int
keywords := make([]string, len(set))
for k := range set {
keywords[i] = k
i++
}
return keywords
}
func (n *trieNode) findKeywordScopes(chars []rune) []scope {
var scopes []scope
size := len(chars)
start := -1
for i := 0; i < size; i++ {
child, ok := n.children[chars[i]]
if !ok {
continue
}
if start < 0 {
start = i
}
if child.end {
scopes = append(scopes, scope{
start: start,
stop: i + 1,
})
}
for j := i + 1; j < size; j++ {
grandchild, ok := child.children[chars[j]]
if !ok {
break
}
child = grandchild
if child.end {
scopes = append(scopes, scope{
start: start,
stop: j + 1,
})
}
}
start = -1
}
return scopes
}
func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
for i := start; i < stop; i++ {
chars[i] = '*'
}
}

164
core/stringx/trie_test.go Normal file
View File

@@ -0,0 +1,164 @@
package stringx
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestTrie(t *testing.T) {
tests := []struct {
input string
output string
keywords []string
found bool
}{
{
input: "日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演",
output: "日本****兼电视、电影演员。*****女优是xx出道, ******们最精彩的表演是******表演",
keywords: []string{
"AV演员",
"苍井空",
"AV",
"日本AV女优",
"AV演员色情",
},
found: true,
},
{
input: "完全和谐的文本完全和谐的文本",
output: "完全和谐的文本完全和谐的文本",
keywords: nil,
found: false,
},
{
input: "就一个字不对",
output: "就*个字不对",
keywords: []string{
"一",
},
found: true,
},
{
input: "就一对, AV",
output: "就*对, **",
keywords: []string{
"一",
"AV",
},
found: true,
},
{
input: "就一不对, AV",
output: "就**对, **",
keywords: []string{
"一",
"一不",
"AV",
},
found: true,
},
{
input: "就对, AV",
output: "就对, **",
keywords: []string{
"AV",
},
found: true,
},
{
input: "就对, 一不",
output: "就对, **",
keywords: []string{
"一",
"一不",
},
found: true,
},
{
input: "",
output: "",
keywords: nil,
found: false,
},
}
trie := NewTrie([]string{
"", // no hurts for empty keywords
"一",
"一不",
"AV",
"AV演员",
"苍井空",
"AV演员色情",
"日本AV女优",
})
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
output, keywords, ok := trie.Filter(test.input)
assert.Equal(t, test.found, ok)
assert.Equal(t, test.output, output)
assert.ElementsMatch(t, test.keywords, keywords)
keywords = trie.FindKeywords(test.input)
assert.ElementsMatch(t, test.keywords, keywords)
})
}
}
func TestTrieSingleWord(t *testing.T) {
trie := NewTrie([]string{
"闹",
})
output, keywords, ok := trie.Filter("今晚真热闹")
assert.ElementsMatch(t, []string{"闹"}, keywords)
assert.True(t, ok)
assert.Equal(t, "今晚真热*", output)
}
func TestTrieOverlap(t *testing.T) {
trie := NewTrie([]string{
"一二三四五",
"二三四五六七八",
})
output, keywords, ok := trie.Filter("零一二三四五六七八九十")
assert.ElementsMatch(t, []string{
"一二三四五",
"二三四五六七八",
}, keywords)
assert.True(t, ok)
assert.Equal(t, "零********九十", output)
}
func TestTrieNested(t *testing.T) {
trie := NewTrie([]string{
"一二三",
"一二三四五",
"一二三四五六七八",
})
output, keywords, ok := trie.Filter("零一二三四五六七八九十")
assert.ElementsMatch(t, []string{
"一二三",
"一二三四五",
"一二三四五六七八",
}, keywords)
assert.True(t, ok)
assert.Equal(t, "零********九十", output)
}
func BenchmarkTrie(b *testing.B) {
b.ReportAllocs()
trie := NewTrie([]string{
"A",
"AV",
"AV演员",
"苍井空",
"AV演员色情",
"日本AV女优",
})
for i := 0; i < b.N; i++ {
trie.Filter("日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演")
}
}