-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_sample.ini
125 lines (96 loc) · 3.27 KB
/
config_sample.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#
# filmdata - configuration
#
[DEFAULT]
# root directory for all the data
data_dir = data
# directory for the sources data (e.g. xml files from netflix)
sources_dir = %(data_dir)s/sources
[core]
# the master source for the data (default is imdb)
# this source is responsible for loading the titles, actors, etc. and linking them
master_source = imdb
# the source to use to filter out the films for metrics (defalt is imdb)
# usually this just means that the number of votes on imdb are used
# to cull the films so only the ones with good data are regarded
master_data = imdb
# space separated list of title roles to include (actor, director, and/or actress)
active_role_types = actor actress director
# space separated list of title types to include (film, tv, video, game)
active_title_types = film
# the sink to use ( 'sqlalchemy' only for now )
active_sink = sqlalchemy
# what to normalize the ratings to (their range from 1 to X, default is 10)
max_rating = 10
[sqlalchemy]
# the sqlalchemy connection url string (assuming you're using sqlalchemy)
# see http://www.sqlalchemy.org/docs/05/reference/sqlalchemy/connections.html#creating-engines
url = postgresql+psycopg2://<user>:<pass>:5432/<dbname>
[netflix]
# your netflix api key
consumer_key =
# your netflix api secret
consumer_secret =
# urls to fetch netflix data from
# first is the entire catalog
# second gets details on an individual movie, which is needed for the ratings
titles_url = http://api.netflix.com/catalog/titles/index
title_url_base = http://api.netflix.com/catalog/titles/movies
# where to download netflix stuff
path = %(sources_dir)s/netflix
titles_xml_path = %(path)s/titles.xml
titles_dir_path = %(path)s/titles
[imdb]
# ftp site to fetch imdb plain text data files (see http://www.imdb.com/interfaces)
url = ftp://ftp.fu-berlin.de/pub/misc/movies/database
# extension of those files
ext = list.gz
# root directory for imdb raw data
path = %(sources_dir)s/imdb
# where to store each imdb file
director_path = %(path)s/directors.list
actor_path = %(path)s/actors.list
actress_path = %(path)s/actresses.list
aka_path = %(path)s/aka_titles.list
rating_path = %(path)s/ratings.list
# full url for fetching each imdb file
director_url = %(url)s/directors.%(ext)s
actor_url = %(url)s/actors.%(ext)s
actress_url = %(url)s/actresses.%(ext)s
aka_url = %(url)s/aka-titles.%(ext)s
rating_url = %(url)s/ratings.%(ext)s
[test]
# true/false
# whether or not to test the fetching/downloading of the source data (probably want this off, which is the default)
fetch = false
# tmp dir to use for storing fetched data when enabled (ONLY USED FOR RUNNING TESTS!)
test_data_dir = data_test
# logging ...
[loggers]
keys = root, filmdata, sqlalchemy
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_filmdata]
level = WARN
handlers = console
qualname = filmdata
[logger_sqlalchemy]
level = WARN
handlers = console
qualname = sqlalchemy.engine
# "level = INFO" logs SQL queries.
# "level = DEBUG" logs SQL queries and results.
# "level = WARN" logs neither. (Recommended for production systems.)
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S