From 4c4292531c0ca3b5d1cab7c371e03a386a07f318 Mon Sep 17 00:00:00 2001 From: Jonathan Adamczewski Date: Sat, 12 Nov 2022 23:48:45 -0800 Subject: [PATCH 1/4] Output all attached images --- parser.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/parser.py b/parser.py index 2b0dd96..1c1058e 100644 --- a/parser.py +++ b/parser.py @@ -54,31 +54,32 @@ def tweet_json_to_markdown(tweet, username, archive_media_folder, output_media_f if 'url' in url and 'expanded_url' in url: body = body.replace(url['url'], url['expanded_url']) # replace image URLs with markdown image links to local files - if 'entities' in tweet and 'media' in tweet['entities']: - for media in tweet['entities']['media']: + if 'extended_entities' in tweet and 'media' in tweet['extended_entities']: + markdown = '' + for media in tweet['extended_entities']['media']: if 'url' in media and 'media_url' in media: original_url = media['url'] original_expanded_url = media['media_url'] original_filename = os.path.split(original_expanded_url)[1] local_filename = os.path.join(archive_media_folder, tweet_id_str + '-' + original_filename) new_url = output_media_folder_name + tweet_id_str + '-' + original_filename + markdown += '' if not markdown and body.startswith(original_url) else '\n\n' if os.path.isfile(local_filename): # Found a matching image, use this one shutil.copy(local_filename, new_url) - markdown = f'![]({new_url})' + markdown += f'![]({new_url})' else: # Is there any other file that includes the tweet_id in its filename? media_filenames = glob.glob(os.path.join(archive_media_folder, tweet_id_str + '*')) if len(media_filenames) > 0: - markdown = '' for media_filename in media_filenames: media_url = f'{output_media_folder_name}{os.path.split(media_filename)[-1]}' shutil.copy(media_filename, media_url) - markdown += f'\n\n\n{media_url}' + markdown += f'\n{media_url}' else: print(f'Warning: missing local file: {local_filename}. Using original link instead: {original_url} (expands to {original_expanded_url})') - markdown = f'![]({original_url})' - body = body.replace(original_url, markdown) + markdown += f'![]({original_url})' + body = body.replace(original_url, markdown) # append the original Twitter URL as a link body += f'\n\n(Originally on Twitter: [{timestamp_str}](https://twitter.com/{username}/status/{tweet_id_str}))' return timestamp, body From fe779957d141fb342cce7bf94881533f39ae9e8c Mon Sep 17 00:00:00 2001 From: Jonathan Adamczewski Date: Sun, 13 Nov 2022 12:57:00 -0800 Subject: [PATCH 2/4] Use equality comparison If body starts with original_url, that - it seems - is all that will be in there. --- parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.py b/parser.py index 1c1058e..cd7f23d 100644 --- a/parser.py +++ b/parser.py @@ -63,7 +63,7 @@ def tweet_json_to_markdown(tweet, username, archive_media_folder, output_media_f original_filename = os.path.split(original_expanded_url)[1] local_filename = os.path.join(archive_media_folder, tweet_id_str + '-' + original_filename) new_url = output_media_folder_name + tweet_id_str + '-' + original_filename - markdown += '' if not markdown and body.startswith(original_url) else '\n\n' + markdown += '' if not markdown and body == original_url else '\n\n' if os.path.isfile(local_filename): # Found a matching image, use this one shutil.copy(local_filename, new_url) From 5f800c545f715e5dea214d1ff6d11bde27fb4ad8 Mon Sep 17 00:00:00 2001 From: Jonathan Adamczewski Date: Sun, 13 Nov 2022 13:17:22 -0800 Subject: [PATCH 3/4] Move evaluation of original_url out of loop --- parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser.py b/parser.py index cd7f23d..6dbe333 100644 --- a/parser.py +++ b/parser.py @@ -54,11 +54,11 @@ def tweet_json_to_markdown(tweet, username, archive_media_folder, output_media_f if 'url' in url and 'expanded_url' in url: body = body.replace(url['url'], url['expanded_url']) # replace image URLs with markdown image links to local files - if 'extended_entities' in tweet and 'media' in tweet['extended_entities']: + if 'entities' in tweet and 'media' in tweet['entities'] and 'extended_entities' in tweet and 'media' in tweet['extended_entities']: + original_url = tweet['entities']['media'][0]['url'] markdown = '' for media in tweet['extended_entities']['media']: if 'url' in media and 'media_url' in media: - original_url = media['url'] original_expanded_url = media['media_url'] original_filename = os.path.split(original_expanded_url)[1] local_filename = os.path.join(archive_media_folder, tweet_id_str + '-' + original_filename) From 53b8ff3b46ac05bb4d3255094f61bd8c515048d2 Mon Sep 17 00:00:00 2001 From: Jonathan Adamczewski Date: Sun, 13 Nov 2022 13:23:25 -0800 Subject: [PATCH 4/4] remove accidental paste --- parser.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/parser.py b/parser.py index a77b2a5..84de6ff 100644 --- a/parser.py +++ b/parser.py @@ -5,32 +5,7 @@ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation, ei if 'entities' in tweet and 'media' in tweet['entities'] and 'extended_entities' in tweet and 'media' in tweet['extended_entities']: - original_url = tweet['entities']['media'][0]['url'] - markdown = '' - for media in tweet['extended_entities']['media']: - if 'url' in media and 'media_url' in media: - original_expanded_url = media['media_url'] - original_filename = os.path.split(original_expanded_url)[1] - local_filename = os.path.join(archive_media_folder, tweet_id_str + '-' + original_filename) - new_url = output_media_folder_name + tweet_id_str + '-' + original_filename - markdown += '' if not markdown and body == original_url else '\n\n' - if os.path.isfile(local_filename): - # Found a matching image, use this one - shutil.copy(local_filename, new_url) - markdown += f'![]({new_url})' - else: - # Is there any other file that includes the tweet_id in its filename? - media_filenames = glob.glob(os.path.join(archive_media_folder, tweet_id_str + '*')) - if len(media_filenames) > 0: - for media_filename in media_filenames: - media_url = f'{output_media_folder_name}{os.path.split(media_filename)[-1]}' - shutil.copy(media_filename, media_url) - markdown += f'\n{media_url}' - else: - print(f'Warning: missing local file: {local_filename}. Using original link instead: {original_url} (expands to {original_expanded_url})') - markdown += f'![]({original_url})' - body = body.replace(original_url, markdown)ther version 3 of the License, or + the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful,