-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgreat_expectations.yml
174 lines (152 loc) · 5.97 KB
/
great_expectations.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Welcome to great expectations.
# This project configuration file allows you to define datasources,
# generators, integrations, and other configuration artifacts that
# make it easier to use Great Expectations.
# For more help configuring great expectations,
# see the documentation at: https://greatexpectations.io/config_file.html
# NOTE: GE uses the names of configured datasources and generators to manage
# how expectations and other configuration artifacts are stored in the
# expectations/ and datasources/ folders. If you need to rename an existing
# datasource or generator, be sure to also update the paths for related artifacts.
datasources:
data__dir:
type: pandas
generators:
default:
type: subdir_reader
base_directory: ../data
reader_options:
sep: ','
header: 0
engine: python
data_asset_type:
class_name: PandasDataset
output__dir:
type: pandas
generators:
default:
type: subdir_reader
base_directory: ../output
reader_options:
sep: ','
header: 0
engine: python
data_asset_type:
class_name: PandasDataset
# The plugins_directory is where the data_context will look for custom_data_assets.py
# and any configured evaluation parameter store
plugins_directory: plugins/
# Configure additional data context options here.
# Uncomment the lines below to enable s3 as a result store. If a result store is enabled,
# validation results will be saved in the store according to run id.
# For S3, ensure that appropriate credentials or assume_role permissions are set where
# validation happens.
validations_store:
local:
type: filesystem
base_directory: uncommitted/validations/
# remote:
# type: s3
# bucket: <your bucket>
# key_prefix: <your key prefix>
#
# Uncomment the lines below to enable a result callback.
# result_callback:
# slack: https://slack.com/replace_with_your_webhook
# Uncomment the lines below to save snapshots of data assets that fail validation.
# data_asset_snapshot_store:
# filesystem:
# base_directory: uncommitted/snapshots/
# s3:
# bucket:
# key_prefix:
# Uncomment the lines below to enable a custom evaluation_parameter_store
# evaluation_parameter_store:
# type: my_evaluation_parameter_store
# config: # - this is optional - this is how we can pass kwargs to the object's constructor
# param1: boo
# param2: bah
data_docs:
sites:
local_site: # site name
# “local_site” renders documentation for all the datasources in the project from GE artifacts in the local repo.
# The site includes expectation suites and profiling and validation results from uncommitted directory.
# Local site provides the convenience of visualizing all the entities stored in JSON files as HTML.
type: SiteBuilder
site_store: # where the HTML will be written to (filesystem/S3)
type: filesystem
base_directory: uncommitted/documentation/local_site
validations_store: # where to look for validation results (filesystem/S3)
type: filesystem
base_directory: uncommitted/validations/
run_id_filter:
ne: profiling
profiling_store: # where to look for profiling results (filesystem/S3)
type: filesystem
base_directory: uncommitted/validations/
run_id_filter:
eq: profiling
datasources: '*' # by default, all datasources
sections:
index:
renderer:
module: great_expectations.render.renderer
class: SiteIndexPageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaIndexPageView
validations: # if not present, validation results are not rendered
renderer:
module: great_expectations.render.renderer
class: ValidationResultsPageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaPageView
expectations: # if not present, expectation suites are not rendered
renderer:
module: great_expectations.render.renderer
class: ExpectationSuitePageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaPageView
profiling: # if not present, profiling results are not rendered
renderer:
module: great_expectations.render.renderer
class: ProfilingResultsPageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaPageView
team_site:
# "team_site" is meant to support the "shared source of truth for a team" use case.
# By default only the expectations section is enabled.
# Users have to configure the profiling and the validations sections (and the corresponding validations_store and profiling_store attributes based on the team's decisions where these are stored (a local filesystem or S3).
# Reach out on Slack (https://tinyurl.com/great-expectations-slack>) if you would like to discuss the best way to configure a team site.
type: SiteBuilder
site_store:
type: filesystem
base_directory: uncommitted/documentation/team_site
# validations_store:
# type: s3
# bucket: ???
# path: ???
# profiling_store:
# type: filesystem
# base_directory: fixtures/validations/
# run_id_filter:
# eq: profiling
datasources: '*'
sections:
index:
renderer:
module: great_expectations.render.renderer
class: SiteIndexPageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaIndexPageView
expectations:
renderer:
module: great_expectations.render.renderer
class: ExpectationSuitePageRenderer
view:
module: great_expectations.render.view
class: DefaultJinjaPageView