-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathINPUT_SCHEMA.json
95 lines (95 loc) · 3.68 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
"title": "Input schema for Reddit scraper",
"description": "Scrape information about posts, communities and users https://www.reddit.com/ without login.",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URLs",
"type": "array",
"description": "If you already have URL(s) of page(s) you wish to scrape, you can set them here.",
"editor": "stringList",
"placeholderValue": "URL",
"patternValue": "^$|https:\\/\\/www\\.reddit\\.com\\/*.*",
"uniqueItems": true
},
"useBuiltInSearch":{
"title": "Use built-in search and ignore startUrls",
"type": "boolean",
"description": "Use the fields bellow to perform a search and scrape the result",
"editor": "checkbox",
"sectionCaption": "Search",
"sectionDescription": "This fields will be used on the search"
},
"searches": {
"title": "Search Term",
"type": "array",
"description": "Here you can provide a search query which will be used to search Reddit's topics.",
"editor": "stringList"
},
"type": {
"title": "Search type",
"type": "string",
"description": "Select the type of content you want to scrape",
"editor": "select",
"enum": [
"posts",
"communities_and_users"
],
"enumTitles": [
"Posts",
"Communities and users"
]
},
"maxItems": {
"title": "Maximum Number of Items",
"type": "integer",
"description": "The maximum number of items that will be saved in the dataset. If you are scrapping for Communities&Users, remeber to consider that each category inside a community is saved as a separeted item.",
"editor": "number",
"prefill": 50,
"default": 50
},
"maxPostCount": {
"title": "Maximum Post Count",
"type": "integer",
"description": "The maximum number of posts that will be scraped for each Posts Page or Communities&Users URL",
"editor": "number",
"prefill": 50,
"default": 50
},
"maxComments": {
"title": "Maximum Comments Count",
"type": "integer",
"description": "The maximum number of comments that will be scraped for each Comments Page.",
"editor": "number",
"prefill": 50,
"default": 50
},
"maxCommunitiesAndUsers": {
"title": "Maximum Communities & Users Count",
"type": "integer",
"description": "The maximum number of `Communities & Users`'s pages that will be scraped if your seach or startUrl is a Communites&Users type.",
"editor": "number",
"prefill": 50,
"default": 50
},
"extendOutputFunction": {
"title": "Extended Output Function",
"type": "string",
"description": "Here you can write your custom javascript code to extract custom data from the page.",
"editor": "javascript",
"prefill": "async () => {\n return $('title').text();\n\n}"
},
"proxy": {
"title": "Proxy configuration",
"type": "object",
"description": "Either use Apify proxy, or provide your own proxy servers.",
"default": {
"useApifyProxy": true
},
"editor": "proxy"
}
},
"required": [
]
}