diff --git a/.gitignore b/.gitignore index ace83f0..7a51b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,14 @@ downloads *.db +*.db-journal +# IDE +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# Distribution / packaging +build/ +dist/ +*.egg-info/ diff --git a/README.md b/README.md index 0c189f8..617d8d9 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ The program relies on the following IMDB tab separated files: usage: imdb-sqlite [OPTIONS] - Imports imdb tsv interface files into a new sqlitedatabase. Fetches them from - imdb if not present onthe machine. + Imports imdb tsv interface files into a new sqlite database. Fetches them from + imdb if not present on the machine. optional arguments: -h, --help show this help message and exit @@ -127,7 +127,7 @@ the following: ```sql -- // table aliases: st = show-title, et = episode-title SELECT st.primary_title, st.premiered, st.genres, e.season_number, - e.eposide_number, et.primary_title, r.rating, r.votes + e.episode_number, et.primary_title, r.rating, r.votes FROM titles AS st INNER JOIN episodes e ON ( e.show_title_id = st.title_id ) INNER JOIN titles et ON ( e.episode_title_id = et.title_id ) @@ -137,7 +137,7 @@ AND st.type = 'tvSeries' ORDER BY r.rating DESC ``` -**Find which productions both Robert Deniro and Al Pacino acted together on** +**Find which productions both Robert De Niro and Al Pacino acted together on** ```sql SELECT t.title_id, t.type, t.primary_title, t.premiered, t.genres, c1.characters AS 'Pacino played', c2.characters AS 'Deniro played' diff --git a/imdb_sqlite/__main__.py b/imdb_sqlite/__main__.py index e752993..e41a2f5 100644 --- a/imdb_sqlite/__main__.py +++ b/imdb_sqlite/__main__.py @@ -240,7 +240,7 @@ def count_lines(f): return lines -def import_file(db, filename, table, column_mapping): +def import_file(db, filename, table, column_mapping, rm_tsv): """ Import a imdb file into a given table, using a specific tsv value to column mapping """ @@ -285,21 +285,27 @@ def text_open(fn, encoding='utf-8'): db.rollback() raise + if rm_tsv: + logger.info('Deleting file: {}'.format(filename)) + os.remove(filename) + def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='Imports imdb tsv interface files into a new sqlite' - 'database. Fetches them from imdb if not present on' - 'the machine.' + description='''Imports imdb tsv interface files into a new sqlite + database. Fetches them from imdb if not present on + the machine.''' ) parser.add_argument('--db', metavar='FILE', default='imdb.db', help='Connection URI for the database to import into') parser.add_argument('--cache-dir', metavar='DIR', default='downloads', help='Download cache dir where the tsv files from imdb will be stored before the import') + parser.add_argument('--rm-tsv', action='store_true', + help='Delete tsv after sql import. Useful if storage is limited') parser.add_argument('--no-index', action='store_true', - help='Do not create any indices. Massively slower joins, but cuts the DB file size ' - 'approximately in half') + help='''Do not create any indices. Massively slower joins, but cuts the DB file size + approximately in half''') parser.add_argument('--verbose', action='store_true', help='Show database interaction') opts = parser.parse_args() @@ -319,7 +325,7 @@ def main(): for filename, table_mapping in TSV_TABLE_MAP.items(): table, column_mapping = table_mapping import_file(db, os.path.join(opts.cache_dir, filename), - table, column_mapping) + table, column_mapping, opts.rm_tsv) if not opts.no_index: logger.info('Creating table indices ...')