From 46acf12e96b20cc1297df9153fd8590e91b0c7a1 Mon Sep 17 00:00:00 2001 From: Jerry Jones <28451941+stratusjerry@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:56:26 +0000 Subject: [PATCH 1/4] update gitignore temp files --- .gitignore | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitignore b/.gitignore index ace83f0..7a51b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,14 @@ downloads *.db +*.db-journal +# IDE +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# Distribution / packaging +build/ +dist/ +*.egg-info/ From 6087b3ca62bce3777ac73aae3169d19ccbef8f9a Mon Sep 17 00:00:00 2001 From: Jerry Jones <28451941+stratusjerry@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:56:36 +0000 Subject: [PATCH 2/4] fix typos --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0c189f8..617d8d9 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ The program relies on the following IMDB tab separated files: usage: imdb-sqlite [OPTIONS] - Imports imdb tsv interface files into a new sqlitedatabase. Fetches them from - imdb if not present onthe machine. + Imports imdb tsv interface files into a new sqlite database. Fetches them from + imdb if not present on the machine. optional arguments: -h, --help show this help message and exit @@ -127,7 +127,7 @@ the following: ```sql -- // table aliases: st = show-title, et = episode-title SELECT st.primary_title, st.premiered, st.genres, e.season_number, - e.eposide_number, et.primary_title, r.rating, r.votes + e.episode_number, et.primary_title, r.rating, r.votes FROM titles AS st INNER JOIN episodes e ON ( e.show_title_id = st.title_id ) INNER JOIN titles et ON ( e.episode_title_id = et.title_id ) @@ -137,7 +137,7 @@ AND st.type = 'tvSeries' ORDER BY r.rating DESC ``` -**Find which productions both Robert Deniro and Al Pacino acted together on** +**Find which productions both Robert De Niro and Al Pacino acted together on** ```sql SELECT t.title_id, t.type, t.primary_title, t.premiered, t.genres, c1.characters AS 'Pacino played', c2.characters AS 'Deniro played' From 45aa900a51c83a255c3f977af41089c2b04cf0a5 Mon Sep 17 00:00:00 2001 From: Jerry Jones <28451941+stratusjerry@users.noreply.github.com> Date: Wed, 4 Sep 2024 01:49:39 +0000 Subject: [PATCH 3/4] fix multi-line help output --- imdb_sqlite/__main__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/imdb_sqlite/__main__.py b/imdb_sqlite/__main__.py index e752993..3de5466 100644 --- a/imdb_sqlite/__main__.py +++ b/imdb_sqlite/__main__.py @@ -289,17 +289,17 @@ def text_open(fn, encoding='utf-8'): def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='Imports imdb tsv interface files into a new sqlite' - 'database. Fetches them from imdb if not present on' - 'the machine.' + description='''Imports imdb tsv interface files into a new sqlite + database. Fetches them from imdb if not present on + the machine.''' ) parser.add_argument('--db', metavar='FILE', default='imdb.db', help='Connection URI for the database to import into') parser.add_argument('--cache-dir', metavar='DIR', default='downloads', help='Download cache dir where the tsv files from imdb will be stored before the import') parser.add_argument('--no-index', action='store_true', - help='Do not create any indices. Massively slower joins, but cuts the DB file size ' - 'approximately in half') + help='''Do not create any indices. Massively slower joins, but cuts the DB file size + approximately in half''') parser.add_argument('--verbose', action='store_true', help='Show database interaction') opts = parser.parse_args() From 970ad6d9d5fa67d10050ac7bc51b077720672fbc Mon Sep 17 00:00:00 2001 From: Jerry Jones <28451941+stratusjerry@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:14:45 +0000 Subject: [PATCH 4/4] add arg to delete tsv after import --- imdb_sqlite/__main__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/imdb_sqlite/__main__.py b/imdb_sqlite/__main__.py index 3de5466..e41a2f5 100644 --- a/imdb_sqlite/__main__.py +++ b/imdb_sqlite/__main__.py @@ -240,7 +240,7 @@ def count_lines(f): return lines -def import_file(db, filename, table, column_mapping): +def import_file(db, filename, table, column_mapping, rm_tsv): """ Import a imdb file into a given table, using a specific tsv value to column mapping """ @@ -285,6 +285,10 @@ def text_open(fn, encoding='utf-8'): db.rollback() raise + if rm_tsv: + logger.info('Deleting file: {}'.format(filename)) + os.remove(filename) + def main(): parser = argparse.ArgumentParser( @@ -297,6 +301,8 @@ def main(): help='Connection URI for the database to import into') parser.add_argument('--cache-dir', metavar='DIR', default='downloads', help='Download cache dir where the tsv files from imdb will be stored before the import') + parser.add_argument('--rm-tsv', action='store_true', + help='Delete tsv after sql import. Useful if storage is limited') parser.add_argument('--no-index', action='store_true', help='''Do not create any indices. Massively slower joins, but cuts the DB file size approximately in half''') @@ -319,7 +325,7 @@ def main(): for filename, table_mapping in TSV_TABLE_MAP.items(): table, column_mapping = table_mapping import_file(db, os.path.join(opts.cache_dir, filename), - table, column_mapping) + table, column_mapping, opts.rm_tsv) if not opts.no_index: logger.info('Creating table indices ...')