forked from ooni/probe-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inputloader.go
209 lines (186 loc) · 6.19 KB
/
inputloader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
package engine
import (
"bufio"
"context"
"errors"
"fmt"
"github.com/ooni/probe-engine/internal/fsx"
"github.com/ooni/probe-engine/model"
)
// The following errors are returned by the InputLoader.
var (
ErrNoInputExpected = errors.New("we did not expect any input")
ErrInputRequired = errors.New("no input provided")
ErrDetectedEmptyFile = errors.New("file did not contain any input")
)
// InputLoaderSession is the session according to an InputLoader.
type InputLoaderSession interface {
MaybeLookupLocationContext(ctx context.Context) error
NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error)
ProbeCC() string
}
// InputLoader loads input according to the specified policy
// from the specified sources and OONI services. The behaviour
// depends on the input policy as described below.
//
// InputNone
//
// We fail if there is any StaticInput or any SourceFiles. If
// there's no input, we return a single, empty entry that causes
// experiments that don't require input to run once.
//
// InputOptional
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise we return a single, empty entry
// that causes experiments that don't require input to run once.
//
// InputOrQueryTestLists
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we use OONI's probe services
// to gather input using the test lists API.
//
// InputStrictlyRequired
//
// Like InputOrQueryTestLists but, if there is no input, it's an
// user error and we just abort running the experiment.
type InputLoader interface {
// Load attempts to load input using the specified input loader. We will
// return a list of URLs because this is the only input we support.
Load(ctx context.Context) ([]model.URLInfo, error)
}
// InputLoaderConfig contains config for InputLoader.
type InputLoaderConfig struct {
// StaticInputs contains optional input to be added
// to the resulting input list if possible.
StaticInputs []string
// SourceFiles contains optional files to read input
// from. Each file should contain a single input string
// per line. We will fail if any file is unreadable.
SourceFiles []string
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not.
InputPolicy InputPolicy
// Session is the current measurement session.
Session InputLoaderSession
// URLLimit is the optional limit on the number of URLs
// that probe services should return to us.
URLLimit int64
// URLCategories limits the categories of URLs that
// probe services should return to us.
URLCategories []string
}
// NewInputLoader creates a new InputLoader.
func NewInputLoader(config InputLoaderConfig) InputLoader {
// TODO(bassosimone): the current implementation stems from a
// simple refactoring from a previous implementation where
// we weren't using interfaces. Because now we're using interfaces,
// there is the opportunity to select behaviour here depending
// on the specified policy rather than later inside Load.
return inputLoader{InputLoaderConfig: config}
}
type inputLoader struct {
InputLoaderConfig
}
var _ InputLoader = inputLoader{}
// Load attempts to load input using the specified input loader. We will
// return a list of URLs because this is the only input we support.
func (il inputLoader) Load(ctx context.Context) ([]model.URLInfo, error) {
switch il.InputPolicy {
case InputOptional:
return il.loadOptional()
case InputOrQueryTestLists:
return il.loadOrQueryTestList(ctx)
case InputStrictlyRequired:
return il.loadStrictlyRequired(ctx)
default:
return il.loadNone()
}
}
func (il inputLoader) loadNone() ([]model.URLInfo, error) {
if len(il.StaticInputs) > 0 || len(il.SourceFiles) > 0 {
return nil, ErrNoInputExpected
}
return []model.URLInfo{{}}, nil
}
func (il inputLoader) loadOptional() ([]model.URLInfo, error) {
inputs, err := il.loadLocal()
if err == nil && len(inputs) <= 0 {
inputs = []model.URLInfo{{}}
}
return inputs, err
}
func (il inputLoader) loadStrictlyRequired(ctx context.Context) ([]model.URLInfo, error) {
inputs, err := il.loadLocal()
if err != nil || len(inputs) > 0 {
return inputs, err
}
return nil, ErrInputRequired
}
func (il inputLoader) loadOrQueryTestList(ctx context.Context) ([]model.URLInfo, error) {
inputs, err := il.loadLocal()
if err != nil || len(inputs) > 0 {
return inputs, err
}
return il.loadRemote(loadRemoteConfig{ctx: ctx, session: il.Session})
}
func (il inputLoader) loadLocal() ([]model.URLInfo, error) {
inputs := []model.URLInfo{}
for _, input := range il.StaticInputs {
inputs = append(inputs, model.URLInfo{URL: input})
}
for _, filepath := range il.SourceFiles {
extra, err := il.readfile(filepath, fsx.Open)
if err != nil {
return nil, err
}
// See https://github.com/ooni/probe-engine/issues/1123.
if len(extra) <= 0 {
return nil, fmt.Errorf("%w: %s", ErrDetectedEmptyFile, filepath)
}
inputs = append(inputs, extra...)
}
return inputs, nil
}
func (il inputLoader) readfile(filepath string, open func(string) (fsx.File, error)) ([]model.URLInfo, error) {
inputs := []model.URLInfo{}
filep, err := open(filepath)
if err != nil {
return nil, err
}
defer filep.Close()
// Implementation note: when you save file with vim, you have newline at
// end of file and you don't want to consider that an input line. While there
// ignore any other empty line that may occur inside the file.
scanner := bufio.NewScanner(filep)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
inputs = append(inputs, model.URLInfo{URL: line})
}
}
if scanner.Err() != nil {
return nil, scanner.Err()
}
return inputs, nil
}
type loadRemoteConfig struct {
ctx context.Context
session InputLoaderSession
}
func (il inputLoader) loadRemote(conf loadRemoteConfig) ([]model.URLInfo, error) {
if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil {
return nil, err
}
client, err := conf.session.NewOrchestraClient(conf.ctx)
if err != nil {
return nil, err
}
return client.FetchURLList(conf.ctx, model.URLListConfig{
CountryCode: conf.session.ProbeCC(),
Limit: il.URLLimit,
Categories: il.URLCategories,
})
}