-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathmain.go
83 lines (72 loc) · 1.89 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// sharding example shows how to use an output that supports sharding
// When a shardable output is used, the parallel outputs
// identified by the "procs" configuration value in the toml,
// receive a subset of the processed records. The sharding function
// returns a shard idx (based on the sharded field value) which value
// is assigned to an output process calculating the modulo with the
// available output processes.
// This means that using a procs=1 configuration is the same as disabling
// the sharding, while procs=N where N is the number of possible values
// of the sharded field means that each output receives records with always
// the same value for that field
package main
import (
"log"
"strings"
"github.com/AdRoll/baker"
"github.com/AdRoll/baker/input"
)
// Some example fields
const (
ID baker.FieldIndex = 0
FirstName baker.FieldIndex = 1
LastName baker.FieldIndex = 2
Age baker.FieldIndex = 3
Street baker.FieldIndex = 4
City baker.FieldIndex = 5
Dollar baker.FieldIndex = 6
)
// And their respective names
var fieldNames = []string{
"id",
"first_name",
"last_name",
"age",
"street",
"city",
"dollar",
}
func fieldByName(name string) (baker.FieldIndex, bool) {
for idx, fname := range fieldNames {
if name == fname {
return baker.FieldIndex(idx), true
}
}
return 0, false
}
var components = baker.Components{
Inputs: input.All,
Outputs: []baker.OutputDesc{ShardableDesc},
ShardingFuncs: shardingFuncs,
FieldByName: fieldByName,
FieldNames: fieldNames,
}
func main() {
toml := `
[input]
name="List"
[input.config]
files=["./testdata/customers_random.input.csv.zst"]
[output]
name="Shardable"
sharding="age" # "city" can be used as well
procs=10
`
cfg, err := baker.NewConfigFromToml(strings.NewReader(toml), components)
if err != nil {
log.Fatal(err)
}
if err := baker.Main(cfg); err != nil {
log.Fatal(err)
}
}