config_sample.ini

#
# filmdata - configuration
#

[DEFAULT]
# root directory for all the data
data_dir = data

# directory for the sources data (e.g. xml files from netflix)
sources_dir = %(data_dir)s/sources

[core]
# the master source for the data (default is imdb)
# this source is responsible for loading the titles, actors, etc. and linking them
master_source = imdb

# the source to use to filter out the films for metrics (defalt is imdb)
# usually this just means that the number of votes on imdb are used
# to cull the films so only the ones with good data are regarded
master_data = imdb

# space separated list of title roles to include (actor, director, and/or actress)
active_role_types = actor actress director

# space separated list of title types to include (film, tv, video, game)
active_title_types = film

# the sink to use ( 'sqlalchemy' only for now )
active_sink = sqlalchemy

# what to normalize the ratings to (their range from 1 to X, default is 10)
max_rating = 10

[sqlalchemy]
# the sqlalchemy connection url string (assuming you're using sqlalchemy)
# see http://www.sqlalchemy.org/docs/05/reference/sqlalchemy/connections.html#creating-engines
url = postgresql+psycopg2://<user>:<pass>:5432/<dbname>

[netflix]
# your netflix api key
consumer_key =

# your netflix api secret
consumer_secret =

# urls to fetch netflix data from
# first is the entire catalog
# second gets details on an individual movie, which is needed for the ratings
titles_url = http://api.netflix.com/catalog/titles/index
title_url_base = http://api.netflix.com/catalog/titles/movies

# where to download netflix stuff
path = %(sources_dir)s/netflix
titles_xml_path = %(path)s/titles.xml
titles_dir_path = %(path)s/titles

[imdb]
# ftp site to fetch imdb plain text data files (see http://www.imdb.com/interfaces)
url = ftp://ftp.fu-berlin.de/pub/misc/movies/database

# extension of those files
ext = list.gz

# root directory for imdb raw data
path = %(sources_dir)s/imdb

# where to store each imdb file
director_path = %(path)s/directors.list
actor_path = %(path)s/actors.list
actress_path = %(path)s/actresses.list
aka_path = %(path)s/aka_titles.list
rating_path = %(path)s/ratings.list

# full url for fetching each imdb file
director_url = %(url)s/directors.%(ext)s
actor_url = %(url)s/actors.%(ext)s
actress_url = %(url)s/actresses.%(ext)s
aka_url = %(url)s/aka-titles.%(ext)s
rating_url = %(url)s/ratings.%(ext)s

[test]
# true/false
# whether or not to test the fetching/downloading of the source data (probably want this off, which is the default)
fetch = false

# tmp dir to use for storing fetched data when enabled (ONLY USED FOR RUNNING TESTS!)
test_data_dir = data_test


# logging ...
[loggers]
keys = root, filmdata, sqlalchemy

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console

[logger_filmdata]
level = WARN
handlers = console
qualname = filmdata

[logger_sqlalchemy]
level = WARN
handlers = console
qualname = sqlalchemy.engine
# "level = INFO" logs SQL queries.
# "level = DEBUG" logs SQL queries and results.
# "level = WARN" logs neither.  (Recommended for production systems.)

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S