forked from rrayatec/esearch-rag
-
Notifications
You must be signed in to change notification settings - Fork 0
/
esearch.pipeline
73 lines (73 loc) · 1.4 KB
/
esearch.pipeline
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
{
"description": "Customizable ingest pipeline for the 'RAG en Acción' index",
"version": 1,
"processors": [
{
"set": {
"field": "text",
"copy_from": "body_content",
"override": false
}
},
{
"set": {
"field": "metadata.url",
"copy_from": "url"
}
},
{
"set": {
"field": "metadata.title",
"copy_from": "title"
}
},
{
"set": {
"field": "metadata.addedToDB",
"copy_from": "last_crawled_at"
}
},
{
"set": {
"field": "metadata.meta_description",
"copy_from": "meta_description"
}
},
{
"remove": {
"field": [
"additional_urls",
"domains",
"links",
"url_path_dir2",
"url_path",
"url_path_dir1",
"url_path_dir3",
"body_content",
"url_port",
"url_host",
"headings",
"url_scheme",
"title",
"last_crawled_at",
"is_truncated",
"meta_description",
"main",
"url"
],
"ignore_missing": true
}
},
{
"inference": {
"model_id": ".multilingual-e5-small_linux-x86_64",
"input_output": [
{
"input_field": "text",
"output_field": "vector"
}
]
}
}
]
}