Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small fixes #38

Closed
wants to merge 6 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions inb4404.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/python3
import urllib.request, urllib.error, urllib.parse, argparse, logging
import os, re, time
import http.client
import http.client
import fileinput
from multiprocessing import Process

Expand All @@ -18,13 +18,17 @@ def main():
parser.add_argument('-l', '--less', action='store_true', help='show less information (surpresses checking messages)')
parser.add_argument('-n', '--use-names', action='store_true', help='use thread names instead of the thread ids (...4chan.org/board/thread/thread-id/thread-name)')
parser.add_argument('-r', '--reload', action='store_true', help='reload the queue file every 5 minutes')
parser.add_argument('-s', '--sleep', action='store', help='sleep `N` seconds between every download', metavar='N') # TODO
parser.add_argument('-t', '--title', action='store_true', help='save original filenames')

parser.add_argument('--single-process', action='store_true', help='disable multiprocessing, download one thread at a time')

args = parser.parse_args()

if args.date:
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p')
else:
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%I:%M:%S %p')
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%I:%M:%S %p')

thread = args.thread[0].strip()
if thread[:4].lower() == 'http':
Expand Down Expand Up @@ -73,7 +77,7 @@ def download_thread(thread_link, args):
if len(thread_link.split('/')) > 6:
thread_tmp = thread_link.split('/')[6].split('#')[0]

if args.use_names or os.path.exists(os.path.join(workpath, 'downloads', board, thread_tmp)):
if args.use_names or os.path.exists(os.path.join(workpath, 'downloads', board, thread_tmp)):
thread = thread_tmp

while True:
Expand Down Expand Up @@ -128,7 +132,7 @@ def download_thread(thread_link, args):
except urllib.error.HTTPError:
time.sleep(10)
try:
load(thread_link)
load(thread_link)
except urllib.error.HTTPError:
log.info('%s 404\'d', thread_link)
break
Expand All @@ -141,22 +145,34 @@ def download_thread(thread_link, args):
log.info('Checking ' + board + '/' + thread)
time.sleep(20)

def run_single_process(links_list):
for l in links_list:
download_thread(l, args)

def run_multiple_process(links_list, processes_list):
for l in links_list:
process = Process(target=call_download_thread, args=(l, args, ))
process.start()
processes_list.append([process, l])

def download_from_file(filename):
running_links = []
while True:
processes = []
for link in [_f for _f in [line.strip() for line in open(filename) if line[:4] == 'http'] if _f]:
for link in [line.strip() for line in open(filename) if line[:4] == 'http']:
if link not in running_links:
running_links.append(link)
log.info('Added ' + link)

process = Process(target=call_download_thread, args=(link, args, ))
process.start()
processes.append([process, link])
if len(running_links) == 0:
log.fatal('`%s` is empty!' % filename)
return

if args.single_process:
run_single_process(running_links)
else:
run_multiple_process(running_links, processes)

if len(processes) == 0:
log.warning(filename + ' empty')

if args.reload:
time.sleep(60 * 5) # 5 minutes
links_to_remove = []
Expand All @@ -182,4 +198,3 @@ def download_from_file(filename):
main()
except KeyboardInterrupt:
pass