INPUT_SCHEMA.json

{
    "title": "Input schema for Reddit scraper",
    "description": "Scrape information about posts, communities and users https://www.reddit.com/ without login.",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "If you already have URL(s) of page(s) you wish to scrape, you can set them here.",
            "editor": "stringList",
            "placeholderValue": "URL",
            "patternValue": "^$|https:\\/\\/www\\.reddit\\.com\\/*.*",
            "uniqueItems": true
        },  
        "useBuiltInSearch":{
            "title": "Use built-in search and ignore startUrls",
            "type": "boolean",
            "description": "Use the fields bellow to perform a search and scrape the result",
            "editor": "checkbox",
            "sectionCaption": "Search",
            "sectionDescription": "This fields will be used on the search"
        },
        "searches": {
            "title": "Search Term",
            "type": "array",
            "description": "Here you can provide a search query which will be used to search Reddit's topics.",
            "editor": "stringList"
        },
        "type": {
            "title": "Search type",
            "type": "string",
            "description": "Select the type of content you want to scrape",
            "editor": "select",
            "enum": [
                "posts",
                "communities_and_users"
            ],
            "enumTitles": [
                "Posts",
                "Communities and users"
            ]
        },
        "maxItems": {
            "title": "Maximum Number of Items",
            "type": "integer",
            "description": "The maximum number of items that will be saved in the dataset. If you are scrapping for Communities&Users, remeber to consider that each category inside a community is saved as a separeted item.",
            "editor": "number",
            "prefill": 50,
            "default": 50
        },
        "maxPostCount": {
            "title": "Maximum Post Count",
            "type": "integer",
            "description": "The maximum number of posts that will be scraped for each Posts Page or Communities&Users URL",
            "editor": "number",
            "prefill": 50,
            "default": 50
        },
        "maxComments": {
            "title": "Maximum Comments Count",
            "type": "integer",
            "description": "The maximum number of comments that will be scraped for each Comments Page.",
            "editor": "number",
            "prefill": 50,
            "default": 50
        },
        "maxCommunitiesAndUsers": {
            "title": "Maximum Communities & Users Count",
            "type": "integer",
            "description": "The maximum number of `Communities & Users`'s pages that will be scraped if your seach or startUrl is a Communites&Users type.",
            "editor": "number",
            "prefill": 50,
            "default": 50
        },        
        "extendOutputFunction": {
            "title": "Extended Output Function",
            "type": "string",
            "description": "Here you can write your custom javascript code to extract custom data from the page.",
            "editor": "javascript",
            "prefill": "async () => {\n  return $('title').text();\n\n}"
        },
        "proxy": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Either use Apify proxy, or provide your own proxy servers.",
            "default": {
                "useApifyProxy": true
            },
            "editor": "proxy"
        }
    },
    "required": [
    ]
}