-
Notifications
You must be signed in to change notification settings - Fork 1
/
datafabric_settings.py
63 lines (46 loc) · 1.56 KB
/
datafabric_settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
datafabric_settings.py
Jonathan Poczatek
4/21/2017
"""
################################
## Data Files and Directories ##
################################
# #Data Locations
# MAIN_DATA_DIR = 'data'
# RAW_DATA_DIR = 'raw_data'
# EXTRACTED_DATA_DIR = 'extracted_data'
# #Path Names
# RAW_DATA_PATH = "{0}/{1}".format(MAIN_DATA_DIR, RAW_DATA_DIR)
# EXTRACTED_DATA_PATH = "{0}/{1}".format(MAIN_DATA_DIR, EXTRACTED_DATA_DIR)
######################################################
## Scrape Lists and Reference Files and Directories ##
######################################################
# Scrape List Locations
# MAIN_SCRAPE_LIST_DIR = 'scrape_lists'
# STOCK_EXCHANGE_LIST_DIR = 'stock_exchanges'
CONFIG_RES_DIR = 'config'
CIK_LIST_CSV = '{0}/investment_company_series_class.csv'.format(CONFIG_RES_DIR)
CIK_LIST_PRASANNA = '{0}/cik.coleft.c.txt'.format(CONFIG_RES_DIR)
LOCAL_HDF = '{0}/local_store.h5'.format(CONFIG_RES_DIR)
# OTHER_SCRAPE_LIST_DIR = 'other_scrapes'
#####################
## Scraper Options ##
#####################
# GET_XML = False
# GET_TXT = False
# GET_HTML = True
# GET_XL = False
# #####################
# ## Extract Options ##
# #####################
# OUTPUT_PICKLE = True
# OUTPUT_JSON = False
###########
## URLs ###
###########
LINK_URL = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={0}&type={1}&dateb=&owner=exclude&count=100"
RSS_XML_URL = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={0}&type={1}&dateb=&owner=exclude&start=0&count=100&output=atom"
NAMESPACES = {
"http://www.w3.org/2005/Atom": None
}