-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv.rel
190 lines (170 loc) · 7.6 KB
/
csv.rel
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// *****************************************************************
// Sales Engineering CSV data load Library
//
// Tools for loading CSV data into GNF data models.
//
// INSTALL this source: csv.rel
// *****************************************************************
// with se_util use apply
// replace the above with fully-qualified references to se_util:apply
// See NCDNTS-853, https://relationalai.atlassian.net/browse/NCDNTS-853
// Every meta module can be bound by this:
@inline module meta_csv_config
def entity_name = RelName // entity name to use in KG
def key = RelName // unique column name
def subtypes = RelName // subtypes when generalizing to super type
def alt_key = RelName // alternative unique column name
def as_is_attr = RelName // attributes parsed as strings as is
def value_type_attr = {RelName, Any, Any} // attributes that are converted to a type (from String)
def string_attr = {RelName, String, String} // Column names and dedicated pre-processing functions
def string_transform_attr = {RelName, RelName, String, String}
def int_attr = RelName
def float_attr = RelName
def date_attr = {RelName, String} // Date column names with their formats
def datetime_attr = {RelName, String} // Datetime column names with their formats
def attr_alias_map = {RelName, RelName} // Rename columns map
def null_string = String // Special string for NULL values
def attr_description = RelName, String
end
module se_csv
@outline
def as_is_attributes[ID, DATA, META](attr in RelName, e in Entity, val) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, val) and
not META:null_string(val) and
META:as_is_attr(sym) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym
@no_diagnostics(:TYPE_MISMATCH) // temp workaround: https://relationalai.atlassian.net/browse/RAI-19187
@outline
def parse_value_type_attributes[ID, DATA, META](attr in RelName, e in Entity, val) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, val_str) and
not META:null_string(val_str) and
META:value_type_attr[sym][val_str](val) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym, val_str
@outline
def parse_string_attributes[ID, DATA, META](attr in RelName, e in Entity, val in String) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, val_str) and
not META:null_string(val_str) and
se_util:apply[META:string_attr[sym], val_str](val) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym, val_str
@no_diagnostics(:ARITY_MISMATCH) // temp workaround for system bug (recommended by Mary McGrath)
@outline
def parse_string_transform_attributes[ID, DATA, META](attr in RelName, e in Entity, val in String) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, val_str) and
not META:null_string(val_str) and
se_util:apply[META:string_transform_attr[sym, attr], val_str](val)
from row, id, sym, val_str
@outline
def parse_int_attributes[ID, DATA, META](attr in RelName, e in Entity, val in Int) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, int_str) and
not META:null_string(int_str) and
parse_int(int_str, val) and
META:int_attr(sym) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym, int_str
@no_diagnostics(:TYPE_MISMATCH) // temp workaround: https://relationalai.atlassian.net/browse/RAI-19187
@outline
def parse_date_attributes[ID, DATA, META](attr in RelName, e in Entity, val in Date) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, date_str) and
not META:null_string(date_str) and
parse_date(date_str, df, val) and
META:date_attr(sym, df) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym, date_str, df
@no_diagnostics(:TYPE_MISMATCH) // temp workaround for system bug (recommended by Mary McGrath)
@outline
def parse_datetime_attributes[ID, DATA, META](attr in RelName, e in Entity, val in DateTime) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, datetime_str) and
not META:null_string(datetime_str) and
parse_datetime(datetime_str, df, val) and
META:datetime_attr(sym, df) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym in RelName, datetime_str, df in String
@no_diagnostics(:TYPE_MISMATCH) // temp workaround: https://relationalai.atlassian.net/browse/RAI-19187
@outline
def parse_float_attributes[ID, DATA, META](attr in RelName, e in Entity, val in Float) =
ID(e, id) and
DATA(META:key, row, id) and
DATA(sym, row, float_str) and
not META:null_string(float_str) and
parse_float(float_str, val) and
META:float_attr(sym) and
(META:attr_alias_map[sym] <++ sym)(attr)
from row, id, sym, float_str
// parse_attributes - apply definitions for columns, their types, formats, and transformations
// from the meta module to process and load CSV data file into data module
// Example of meta module:
// @inline def prepare_string[s in String] = uppercase[string_trim[s]]
//
// module my_meta
// module vehicle
// def entity_name = :Vehicle
// def key = :VEHICLE_ID
// def as_is_attr = {
// :BIDDER_NAME; :WEB_LINK;
// }
// def string_attr = {
// (:MAKE, prepare_string);
// (:MODEL, prepare_string);
// }
// def int_attr = {
// :BIDS;
// }
// def float_attr = {
// :VEHICLE_MARKET_PRICE;
// }
// def date_attr = {
// (:SOLD_DATE, "Y-m-d");
// }
// def datetime_attr = {
// (:CREATE_DATE, "y-m-dTH:M:S.sss");
// (:LAST_ACCESS_DATE, "y-m-dTH:M:S.sss");
// }
// def attr_alias_map = {
// (:MAKE, :make);
// (:MODEL, :model);
// (:VEHICLE_MARKET_PRICE, :marketPrice);
// (:BIDDER_NAME, :bidderName);
// (:BIDS, :bidCount);
// (:WEB_LINK, :webLink);
// (:SOLD_DATE, :soldDate);
// (:CREATE_DATE, :createDate);
// (:LAST_ACCESS_DATE, :lastAccessDate);
// }
// end
// end
@inline
def parse_attributes[ID, DATA, META](attr in RelName, e in Entity, val) =
as_is_attributes[ID, DATA, META](attr, e, val);
parse_value_type_attributes[ID, DATA, META](attr, e, val);
parse_string_attributes[ID, DATA, META](attr, e, val);
parse_string_transform_attributes[ID, DATA, META](attr, e, val);
parse_int_attributes[ID, DATA, META](attr, e, val);
parse_date_attributes[ID, DATA, META](attr, e, val);
parse_datetime_attributes[ID, DATA, META](attr, e, val);
parse_float_attributes[ID, DATA, META](attr, e, val)
// @outline
// def lookup_relation[relname, attr, ENTITY_GNF, DATA, REL_ENTITY_GNF](r, e, ue) =
// ENTITY_GNF(:id, e, id) and
// DATA(mosaic_meta:account:key, row, id) and
// DATA(attr, row, userId) and
// REL_ENTITY_GNF(:id, ue, userId) and
// r = relname
// from row, id, userId
end