initial import
This commit is contained in:
264
core/mapreduce/mapreduce.go
Normal file
264
core/mapreduce/mapreduce.go
Normal file
@@ -0,0 +1,264 @@
|
||||
package mapreduce
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"zero/core/errorx"
|
||||
"zero/core/lang"
|
||||
"zero/core/syncx"
|
||||
"zero/core/threading"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultWorkers = 16
|
||||
minWorkers = 1
|
||||
)
|
||||
|
||||
var ErrCancelWithNil = errors.New("mapreduce cancelled with nil")
|
||||
|
||||
type (
|
||||
GenerateFunc func(source chan<- interface{})
|
||||
MapFunc func(item interface{}, writer Writer)
|
||||
VoidMapFunc func(item interface{})
|
||||
MapperFunc func(item interface{}, writer Writer, cancel func(error))
|
||||
ReducerFunc func(pipe <-chan interface{}, writer Writer, cancel func(error))
|
||||
VoidReducerFunc func(pipe <-chan interface{}, cancel func(error))
|
||||
Option func(opts *mapReduceOptions)
|
||||
|
||||
mapReduceOptions struct {
|
||||
workers int
|
||||
}
|
||||
|
||||
Writer interface {
|
||||
Write(v interface{})
|
||||
}
|
||||
)
|
||||
|
||||
func Finish(fns ...func() error) error {
|
||||
if len(fns) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return MapReduceVoid(func(source chan<- interface{}) {
|
||||
for _, fn := range fns {
|
||||
source <- fn
|
||||
}
|
||||
}, func(item interface{}, writer Writer, cancel func(error)) {
|
||||
fn := item.(func() error)
|
||||
if err := fn(); err != nil {
|
||||
cancel(err)
|
||||
}
|
||||
}, func(pipe <-chan interface{}, cancel func(error)) {
|
||||
drain(pipe)
|
||||
}, WithWorkers(len(fns)))
|
||||
}
|
||||
|
||||
func FinishVoid(fns ...func()) {
|
||||
if len(fns) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
MapVoid(func(source chan<- interface{}) {
|
||||
for _, fn := range fns {
|
||||
source <- fn
|
||||
}
|
||||
}, func(item interface{}) {
|
||||
fn := item.(func())
|
||||
fn()
|
||||
}, WithWorkers(len(fns)))
|
||||
}
|
||||
|
||||
func Map(generate GenerateFunc, mapper MapFunc, opts ...Option) chan interface{} {
|
||||
options := buildOptions(opts...)
|
||||
source := buildSource(generate)
|
||||
collector := make(chan interface{}, options.workers)
|
||||
done := syncx.NewDoneChan()
|
||||
|
||||
go mapDispatcher(mapper, source, collector, done.Done(), options.workers)
|
||||
|
||||
return collector
|
||||
}
|
||||
|
||||
func MapReduce(generate GenerateFunc, mapper MapperFunc, reducer ReducerFunc, opts ...Option) (interface{}, error) {
|
||||
source := buildSource(generate)
|
||||
return MapReduceWithSource(source, mapper, reducer, opts...)
|
||||
}
|
||||
|
||||
func MapReduceWithSource(source <-chan interface{}, mapper MapperFunc, reducer ReducerFunc,
|
||||
opts ...Option) (interface{}, error) {
|
||||
options := buildOptions(opts...)
|
||||
output := make(chan interface{})
|
||||
collector := make(chan interface{}, options.workers)
|
||||
done := syncx.NewDoneChan()
|
||||
writer := newGuardedWriter(output, done.Done())
|
||||
var retErr errorx.AtomicError
|
||||
cancel := once(func(err error) {
|
||||
if err != nil {
|
||||
retErr.Set(err)
|
||||
} else {
|
||||
retErr.Set(ErrCancelWithNil)
|
||||
}
|
||||
|
||||
drain(source)
|
||||
done.Close()
|
||||
close(output)
|
||||
})
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
cancel(fmt.Errorf("%v", r))
|
||||
}
|
||||
}()
|
||||
reducer(collector, writer, cancel)
|
||||
}()
|
||||
go mapperDispatcher(mapper, source, collector, done.Done(), cancel, options.workers)
|
||||
|
||||
value, ok := <-output
|
||||
if err := retErr.Load(); err != nil {
|
||||
return nil, err
|
||||
} else if ok {
|
||||
return value, nil
|
||||
} else {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func MapReduceVoid(generator GenerateFunc, mapper MapperFunc, reducer VoidReducerFunc, opts ...Option) error {
|
||||
_, err := MapReduce(generator, mapper, func(input <-chan interface{}, writer Writer, cancel func(error)) {
|
||||
reducer(input, cancel)
|
||||
// We need to write a placeholder to let MapReduce to continue on reducer done,
|
||||
// otherwise, all goroutines are waiting. The placeholder will be discarded by MapReduce.
|
||||
writer.Write(lang.Placeholder)
|
||||
}, opts...)
|
||||
return err
|
||||
}
|
||||
|
||||
func MapVoid(generate GenerateFunc, mapper VoidMapFunc, opts ...Option) {
|
||||
drain(Map(generate, func(item interface{}, writer Writer) {
|
||||
mapper(item)
|
||||
}, opts...))
|
||||
}
|
||||
|
||||
func WithWorkers(workers int) Option {
|
||||
return func(opts *mapReduceOptions) {
|
||||
if workers < minWorkers {
|
||||
opts.workers = minWorkers
|
||||
} else {
|
||||
opts.workers = workers
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildOptions(opts ...Option) *mapReduceOptions {
|
||||
options := newOptions()
|
||||
for _, opt := range opts {
|
||||
opt(options)
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
func buildSource(generate GenerateFunc) chan interface{} {
|
||||
source := make(chan interface{})
|
||||
threading.GoSafe(func() {
|
||||
defer close(source)
|
||||
generate(source)
|
||||
})
|
||||
|
||||
return source
|
||||
}
|
||||
|
||||
// drain drains the channel.
|
||||
func drain(channel <-chan interface{}) {
|
||||
// drain the channel
|
||||
for range channel {
|
||||
}
|
||||
}
|
||||
|
||||
func executeMappers(mapper MapFunc, input <-chan interface{}, collector chan<- interface{},
|
||||
done <-chan lang.PlaceholderType, workers int) {
|
||||
var wg sync.WaitGroup
|
||||
defer func() {
|
||||
wg.Wait()
|
||||
close(collector)
|
||||
}()
|
||||
|
||||
pool := make(chan lang.PlaceholderType, workers)
|
||||
writer := newGuardedWriter(collector, done)
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case pool <- lang.Placeholder:
|
||||
item, ok := <-input
|
||||
if !ok {
|
||||
<-pool
|
||||
return
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
// better to safely run caller defined method
|
||||
threading.GoSafe(func() {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-pool
|
||||
}()
|
||||
|
||||
mapper(item, writer)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mapDispatcher(mapper MapFunc, input <-chan interface{}, collector chan<- interface{},
|
||||
done <-chan lang.PlaceholderType, workers int) {
|
||||
executeMappers(func(item interface{}, writer Writer) {
|
||||
mapper(item, writer)
|
||||
}, input, collector, done, workers)
|
||||
}
|
||||
|
||||
func mapperDispatcher(mapper MapperFunc, input <-chan interface{}, collector chan<- interface{},
|
||||
done <-chan lang.PlaceholderType, cancel func(error), workers int) {
|
||||
executeMappers(func(item interface{}, writer Writer) {
|
||||
mapper(item, writer, cancel)
|
||||
}, input, collector, done, workers)
|
||||
}
|
||||
|
||||
func newOptions() *mapReduceOptions {
|
||||
return &mapReduceOptions{
|
||||
workers: defaultWorkers,
|
||||
}
|
||||
}
|
||||
|
||||
func once(fn func(error)) func(error) {
|
||||
once := new(sync.Once)
|
||||
return func(err error) {
|
||||
once.Do(func() {
|
||||
fn(err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type guardedWriter struct {
|
||||
channel chan<- interface{}
|
||||
done <-chan lang.PlaceholderType
|
||||
}
|
||||
|
||||
func newGuardedWriter(channel chan<- interface{}, done <-chan lang.PlaceholderType) guardedWriter {
|
||||
return guardedWriter{
|
||||
channel: channel,
|
||||
done: done,
|
||||
}
|
||||
}
|
||||
|
||||
func (gw guardedWriter) Write(v interface{}) {
|
||||
select {
|
||||
case <-gw.done:
|
||||
return
|
||||
default:
|
||||
gw.channel <- v
|
||||
}
|
||||
}
|
||||
403
core/mapreduce/mapreduce_test.go
Normal file
403
core/mapreduce/mapreduce_test.go
Normal file
@@ -0,0 +1,403 @@
|
||||
package mapreduce
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"zero/core/stringx"
|
||||
"zero/core/syncx"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var errDummy = errors.New("dummy")
|
||||
|
||||
func init() {
|
||||
log.SetOutput(ioutil.Discard)
|
||||
}
|
||||
|
||||
func TestFinish(t *testing.T) {
|
||||
var total uint32
|
||||
err := Finish(func() error {
|
||||
atomic.AddUint32(&total, 2)
|
||||
return nil
|
||||
}, func() error {
|
||||
atomic.AddUint32(&total, 3)
|
||||
return nil
|
||||
}, func() error {
|
||||
atomic.AddUint32(&total, 5)
|
||||
return nil
|
||||
})
|
||||
|
||||
assert.Equal(t, uint32(10), atomic.LoadUint32(&total))
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestFinishNone(t *testing.T) {
|
||||
assert.Nil(t, Finish())
|
||||
}
|
||||
|
||||
func TestFinishVoidNone(t *testing.T) {
|
||||
FinishVoid()
|
||||
}
|
||||
|
||||
func TestFinishErr(t *testing.T) {
|
||||
var total uint32
|
||||
err := Finish(func() error {
|
||||
atomic.AddUint32(&total, 2)
|
||||
return nil
|
||||
}, func() error {
|
||||
atomic.AddUint32(&total, 3)
|
||||
return errDummy
|
||||
}, func() error {
|
||||
atomic.AddUint32(&total, 5)
|
||||
return nil
|
||||
})
|
||||
|
||||
assert.Equal(t, errDummy, err)
|
||||
}
|
||||
|
||||
func TestFinishVoid(t *testing.T) {
|
||||
var total uint32
|
||||
FinishVoid(func() {
|
||||
atomic.AddUint32(&total, 2)
|
||||
}, func() {
|
||||
atomic.AddUint32(&total, 3)
|
||||
}, func() {
|
||||
atomic.AddUint32(&total, 5)
|
||||
})
|
||||
|
||||
assert.Equal(t, uint32(10), atomic.LoadUint32(&total))
|
||||
}
|
||||
|
||||
func TestMap(t *testing.T) {
|
||||
tests := []struct {
|
||||
mapper MapFunc
|
||||
expect int
|
||||
}{
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer) {
|
||||
v := item.(int)
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expect: 30,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer) {
|
||||
v := item.(int)
|
||||
if v%2 == 0 {
|
||||
return
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expect: 10,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer) {
|
||||
v := item.(int)
|
||||
if v%2 == 0 {
|
||||
panic(v)
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expect: 10,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(stringx.Rand(), func(t *testing.T) {
|
||||
channel := Map(func(source chan<- interface{}) {
|
||||
for i := 1; i < 5; i++ {
|
||||
source <- i
|
||||
}
|
||||
}, test.mapper, WithWorkers(-1))
|
||||
|
||||
var result int
|
||||
for v := range channel {
|
||||
result += v.(int)
|
||||
}
|
||||
|
||||
assert.Equal(t, test.expect, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapReduce(t *testing.T) {
|
||||
tests := []struct {
|
||||
mapper MapperFunc
|
||||
reducer ReducerFunc
|
||||
expectErr error
|
||||
expectValue interface{}
|
||||
}{
|
||||
{
|
||||
expectErr: nil,
|
||||
expectValue: 30,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
if v%3 == 0 {
|
||||
cancel(errDummy)
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expectErr: errDummy,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
if v%3 == 0 {
|
||||
cancel(nil)
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expectErr: ErrCancelWithNil,
|
||||
expectValue: nil,
|
||||
},
|
||||
{
|
||||
reducer: func(pipe <-chan interface{}, writer Writer, cancel func(error)) {
|
||||
var result int
|
||||
for item := range pipe {
|
||||
result += item.(int)
|
||||
if result > 10 {
|
||||
cancel(errDummy)
|
||||
}
|
||||
}
|
||||
writer.Write(result)
|
||||
},
|
||||
expectErr: errDummy,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(stringx.Rand(), func(t *testing.T) {
|
||||
if test.mapper == nil {
|
||||
test.mapper = func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
writer.Write(v * v)
|
||||
}
|
||||
}
|
||||
if test.reducer == nil {
|
||||
test.reducer = func(pipe <-chan interface{}, writer Writer, cancel func(error)) {
|
||||
var result int
|
||||
for item := range pipe {
|
||||
result += item.(int)
|
||||
}
|
||||
writer.Write(result)
|
||||
}
|
||||
}
|
||||
value, err := MapReduce(func(source chan<- interface{}) {
|
||||
for i := 1; i < 5; i++ {
|
||||
source <- i
|
||||
}
|
||||
}, test.mapper, test.reducer, WithWorkers(runtime.NumCPU()))
|
||||
|
||||
assert.Equal(t, test.expectErr, err)
|
||||
assert.Equal(t, test.expectValue, value)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapReduceVoid(t *testing.T) {
|
||||
var value uint32
|
||||
tests := []struct {
|
||||
mapper MapperFunc
|
||||
reducer VoidReducerFunc
|
||||
expectValue uint32
|
||||
expectErr error
|
||||
}{
|
||||
{
|
||||
expectValue: 30,
|
||||
expectErr: nil,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
if v%3 == 0 {
|
||||
cancel(errDummy)
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expectErr: errDummy,
|
||||
},
|
||||
{
|
||||
mapper: func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
if v%3 == 0 {
|
||||
cancel(nil)
|
||||
}
|
||||
writer.Write(v * v)
|
||||
},
|
||||
expectErr: ErrCancelWithNil,
|
||||
},
|
||||
{
|
||||
reducer: func(pipe <-chan interface{}, cancel func(error)) {
|
||||
for item := range pipe {
|
||||
result := atomic.AddUint32(&value, uint32(item.(int)))
|
||||
if result > 10 {
|
||||
cancel(errDummy)
|
||||
}
|
||||
}
|
||||
},
|
||||
expectErr: errDummy,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(stringx.Rand(), func(t *testing.T) {
|
||||
atomic.StoreUint32(&value, 0)
|
||||
|
||||
if test.mapper == nil {
|
||||
test.mapper = func(item interface{}, writer Writer, cancel func(error)) {
|
||||
v := item.(int)
|
||||
writer.Write(v * v)
|
||||
}
|
||||
}
|
||||
if test.reducer == nil {
|
||||
test.reducer = func(pipe <-chan interface{}, cancel func(error)) {
|
||||
for item := range pipe {
|
||||
atomic.AddUint32(&value, uint32(item.(int)))
|
||||
}
|
||||
}
|
||||
}
|
||||
err := MapReduceVoid(func(source chan<- interface{}) {
|
||||
for i := 1; i < 5; i++ {
|
||||
source <- i
|
||||
}
|
||||
}, test.mapper, test.reducer)
|
||||
|
||||
assert.Equal(t, test.expectErr, err)
|
||||
if err == nil {
|
||||
assert.Equal(t, test.expectValue, atomic.LoadUint32(&value))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapReduceVoidWithDelay(t *testing.T) {
|
||||
var result []int
|
||||
err := MapReduceVoid(func(source chan<- interface{}) {
|
||||
source <- 0
|
||||
source <- 1
|
||||
}, func(item interface{}, writer Writer, cancel func(error)) {
|
||||
i := item.(int)
|
||||
if i == 0 {
|
||||
time.Sleep(time.Millisecond * 50)
|
||||
}
|
||||
writer.Write(i)
|
||||
}, func(pipe <-chan interface{}, cancel func(error)) {
|
||||
for item := range pipe {
|
||||
i := item.(int)
|
||||
result = append(result, i)
|
||||
}
|
||||
})
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 2, len(result))
|
||||
assert.Equal(t, 1, result[0])
|
||||
assert.Equal(t, 0, result[1])
|
||||
}
|
||||
|
||||
func TestMapVoid(t *testing.T) {
|
||||
const tasks = 1000
|
||||
var count uint32
|
||||
MapVoid(func(source chan<- interface{}) {
|
||||
for i := 0; i < tasks; i++ {
|
||||
source <- i
|
||||
}
|
||||
}, func(item interface{}) {
|
||||
atomic.AddUint32(&count, 1)
|
||||
})
|
||||
|
||||
assert.Equal(t, tasks, int(count))
|
||||
}
|
||||
|
||||
func TestMapReducePanic(t *testing.T) {
|
||||
v, err := MapReduce(func(source chan<- interface{}) {
|
||||
source <- 0
|
||||
source <- 1
|
||||
}, func(item interface{}, writer Writer, cancel func(error)) {
|
||||
i := item.(int)
|
||||
writer.Write(i)
|
||||
}, func(pipe <-chan interface{}, writer Writer, cancel func(error)) {
|
||||
for range pipe {
|
||||
panic("panic")
|
||||
}
|
||||
})
|
||||
assert.Nil(t, v)
|
||||
assert.NotNil(t, err)
|
||||
assert.Equal(t, "panic", err.Error())
|
||||
}
|
||||
|
||||
func TestMapReduceVoidCancel(t *testing.T) {
|
||||
var result []int
|
||||
err := MapReduceVoid(func(source chan<- interface{}) {
|
||||
source <- 0
|
||||
source <- 1
|
||||
}, func(item interface{}, writer Writer, cancel func(error)) {
|
||||
i := item.(int)
|
||||
if i == 1 {
|
||||
cancel(errors.New("anything"))
|
||||
}
|
||||
writer.Write(i)
|
||||
}, func(pipe <-chan interface{}, cancel func(error)) {
|
||||
for item := range pipe {
|
||||
i := item.(int)
|
||||
result = append(result, i)
|
||||
}
|
||||
})
|
||||
assert.NotNil(t, err)
|
||||
assert.Equal(t, "anything", err.Error())
|
||||
}
|
||||
|
||||
func TestMapReduceVoidCancelWithRemains(t *testing.T) {
|
||||
var done syncx.AtomicBool
|
||||
var result []int
|
||||
err := MapReduceVoid(func(source chan<- interface{}) {
|
||||
for i := 0; i < defaultWorkers*2; i++ {
|
||||
source <- i
|
||||
}
|
||||
done.Set(true)
|
||||
}, func(item interface{}, writer Writer, cancel func(error)) {
|
||||
i := item.(int)
|
||||
if i == defaultWorkers/2 {
|
||||
cancel(errors.New("anything"))
|
||||
}
|
||||
writer.Write(i)
|
||||
}, func(pipe <-chan interface{}, cancel func(error)) {
|
||||
for item := range pipe {
|
||||
i := item.(int)
|
||||
result = append(result, i)
|
||||
}
|
||||
})
|
||||
assert.NotNil(t, err)
|
||||
assert.Equal(t, "anything", err.Error())
|
||||
assert.True(t, done.True())
|
||||
}
|
||||
|
||||
func BenchmarkMapReduce(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
mapper := func(v interface{}, writer Writer, cancel func(error)) {
|
||||
writer.Write(v.(int64) * v.(int64))
|
||||
}
|
||||
reducer := func(input <-chan interface{}, writer Writer, cancel func(error)) {
|
||||
var result int64
|
||||
for v := range input {
|
||||
result += v.(int64)
|
||||
}
|
||||
writer.Write(result)
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
MapReduce(func(input chan<- interface{}) {
|
||||
for j := 0; j < 2; j++ {
|
||||
input <- int64(j)
|
||||
}
|
||||
}, mapper, reducer)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user