diff --git a/inb4404.py b/inb4404.py index 6d60325..28e9e7f 100755 --- a/inb4404.py +++ b/inb4404.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 import urllib.request, urllib.error, urllib.parse, argparse, logging import os, re, time -import http.client +import http.client import fileinput from multiprocessing import Process @@ -18,13 +18,17 @@ def main(): parser.add_argument('-l', '--less', action='store_true', help='show less information (surpresses checking messages)') parser.add_argument('-n', '--use-names', action='store_true', help='use thread names instead of the thread ids (...4chan.org/board/thread/thread-id/thread-name)') parser.add_argument('-r', '--reload', action='store_true', help='reload the queue file every 5 minutes') + parser.add_argument('-s', '--sleep', action='store', help='sleep `N` seconds between every download', metavar='N') # TODO parser.add_argument('-t', '--title', action='store_true', help='save original filenames') + + parser.add_argument('--single-process', action='store_true', help='disable multiprocessing, download one thread at a time') + args = parser.parse_args() if args.date: logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p') else: - logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%I:%M:%S %p') + logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%I:%M:%S %p') thread = args.thread[0].strip() if thread[:4].lower() == 'http': @@ -73,7 +77,7 @@ def download_thread(thread_link, args): if len(thread_link.split('/')) > 6: thread_tmp = thread_link.split('/')[6].split('#')[0] - if args.use_names or os.path.exists(os.path.join(workpath, 'downloads', board, thread_tmp)): + if args.use_names or os.path.exists(os.path.join(workpath, 'downloads', board, thread_tmp)): thread = thread_tmp while True: @@ -128,7 +132,7 @@ def download_thread(thread_link, args): except urllib.error.HTTPError: time.sleep(10) try: - load(thread_link) + load(thread_link) except urllib.error.HTTPError: log.info('%s 404\'d', thread_link) break @@ -141,22 +145,34 @@ def download_thread(thread_link, args): log.info('Checking ' + board + '/' + thread) time.sleep(20) +def run_single_process(links_list): + for l in links_list: + download_thread(l, args) + +def run_multiple_process(links_list, processes_list): + for l in links_list: + process = Process(target=call_download_thread, args=(l, args, )) + process.start() + processes_list.append([process, l]) + def download_from_file(filename): running_links = [] while True: processes = [] - for link in [_f for _f in [line.strip() for line in open(filename) if line[:4] == 'http'] if _f]: + for link in [line.strip() for line in open(filename) if line[:4] == 'http']: if link not in running_links: running_links.append(link) log.info('Added ' + link) - process = Process(target=call_download_thread, args=(link, args, )) - process.start() - processes.append([process, link]) + if len(running_links) == 0: + log.fatal('`%s` is empty!' % filename) + return + + if args.single_process: + run_single_process(running_links) + else: + run_multiple_process(running_links, processes) - if len(processes) == 0: - log.warning(filename + ' empty') - if args.reload: time.sleep(60 * 5) # 5 minutes links_to_remove = [] @@ -182,4 +198,3 @@ def download_from_file(filename): main() except KeyboardInterrupt: pass -