Merge pull request #157 from blackjack4494/master

Release 2020.09.30
blackjack4494 · Sep 30, 2020 · f3e826e · f3e826e
2 parents e265ab4 + 3d6a47d
commit f3e826e
Show file tree

Hide file tree

Showing 12 changed files with 101 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -50,14 +50,17 @@ Using curl:
 **Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!).  
 
 **Compile**
-To build the Windows executable yourself
+To build the Windows executable yourself (without version info!)
 
     python -m pip install --upgrade pyinstaller
     pyinstaller.exe youtube_dlc\__main__.py --onefile --name youtube-dlc
 
 Or simply execute the `make_win.bat` if pyinstaller is installed.
 There will be a `youtube-dlc.exe` in `/dist`  
 
+New way to build Windows is to use `python pyinst.py` (please use python3 64Bit)  
+For 32Bit Version use a 32Bit Version of python (3 preferred here as well) and run `python pyinst32.py`  
+
 For Unix:
 You will need the required build tools  
 python, make (GNU), pandoc, zip, nosetests  

diff --git a/pyinst.py b/pyinst.py
@@ -22,7 +22,9 @@
 if len(_OLD_VERSION) > 1:
     old_rev = _OLD_VERSION[1]
 
-ver = f'{datetime.today():%Y.%m.%d}'
+now = datetime.now()
+# ver = f'{datetime.today():%Y.%m.%d}'
+ver = now.strftime("%Y.%m.%d")
 rev = ''
 
 if old_ver == ver:

diff --git a/pyinst32.py b/pyinst32.py
@@ -22,7 +22,9 @@
 if len(_OLD_VERSION) > 1:
     old_rev = _OLD_VERSION[1]
 
-ver = f'{datetime.today():%Y.%m.%d}'
+now = datetime.now()
+# ver = f'{datetime.today():%Y.%m.%d}'
+ver = now.strftime("%Y.%m.%d")
 rev = ''
 
 if old_ver == ver:

diff --git a/scripts/update-version.py b/scripts/update-version.py
@@ -15,7 +15,9 @@
 if len(_OLD_VERSION) > 1:
     old_rev = _OLD_VERSION[1]
 
-ver = f'{datetime.today():%Y.%m.%d}'
+now = datetime.now()
+# ver = f'{datetime.today():%Y.%m.%d}'
+ver = now.strftime("%Y.%m.%d")
 rev = ''
 
 if old_ver == ver:

diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
@@ -164,7 +164,8 @@ class YoutubeDL(object):
     simulate:          Do not download the video files.
     format:            Video format code. See options.py for more information.
     outtmpl:           Template for output names.
-    restrictfilenames: Do not allow "&" and spaces in file names
+    restrictfilenames: Do not allow "&" and spaces in file names.
+    trim_file_name:    Limit length of filename (extension excluded).
     ignoreerrors:      Do not stop on download errors.
     force_generic_extractor: Force downloader to use the generic extractor
     nooverwrites:      Prevent overwriting files.
@@ -732,6 +733,16 @@ def prepare_filename(self, info_dict):
             # title "Hello $PATH", we don't want `$PATH` to be expanded.
             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 
+            # https://github.com/blackjack4494/youtube-dlc/issues/85
+            trim_file_name = self.params.get('trim_file_name', False)
+            if trim_file_name:
+                fn_groups = filename.rsplit('.')
+                ext = fn_groups[-1]
+                sub_ext = ''
+                if len(fn_groups) > 2:
+                    sub_ext = fn_groups[-2]
+                filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
             # to workaround encoding issues with subprocess on python2 @ Windows
@@ -1856,12 +1867,14 @@ def dl(name, info):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
+            ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                 else:
+                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                     if sub_info.get('data') is not None:
                         try:
                             # Use newline='' to prevent conversion of newline characters
@@ -1873,11 +1886,14 @@ def dl(name, info):
                             return
                     else:
                         try:
-                            dl(sub_filename, sub_info)
-                        except (ExtractorError, IOError, OSError, ValueError,
-                                compat_urllib_error.URLError,
-                                compat_http_client.HTTPException,
-                                socket.error) as err:
+                            if self.params.get('sleep_interval_subtitles', False):
+                                dl(sub_filename, sub_info)
+                            else:
+                                sub_data = ie._request_webpage(
+                                    sub_info['url'], info_dict['id'], note=False).read()
+                                with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                                    subfile.write(sub_data)
+                        except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
                                                 (sub_lang, error_to_compat_str(err)))
                             continue

diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py
@@ -390,6 +390,7 @@ def parse_retries(retries):
         'rejecttitle': decodeOption(opts.rejecttitle),
         'max_downloads': opts.max_downloads,
         'prefer_free_formats': opts.prefer_free_formats,
+        'trim_file_name': opts.trim_file_name,
         'verbose': opts.verbose,
         'dump_intermediate_pages': opts.dump_intermediate_pages,
         'write_pages': opts.write_pages,
@@ -427,6 +428,7 @@ def parse_retries(retries):
         'call_home': opts.call_home,
         'sleep_interval': opts.sleep_interval,
         'max_sleep_interval': opts.max_sleep_interval,
+        'sleep_interval_subtitles': opts.sleep_interval_subtitles,
         'external_downloader': opts.external_downloader,
         'list_thumbnails': opts.list_thumbnails,
         'playlist_items': opts.playlist_items,

diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py
@@ -33,14 +33,17 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '1812978515',
             'ext': 'mp3',
-            'title': "youtube-dlc  \"'/\\\u00e4\u21ad - youtube-dlc test song \"'/\\\u00e4\u21ad",
+            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
             'duration': 9.8485,
+            'uploader': "youtube-dl  \"'/\\\u00e4\u21ad",
+            'timestamp': 1354224127,
+            'upload_date': '20121129',
         },
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
         # free download
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '853e35bf34aa1d6fe2615ae612564b36',
+        'md5': '5d92af55811e47f38962a54c30b07ef0',
         'info_dict': {
             'id': '2650410135',
             'ext': 'aiff',
@@ -91,10 +94,12 @@ def _real_extract(self, url):
         duration = None
 
         formats = []
-        track_info = self._parse_json(
-            self._search_regex(
-                r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
-                webpage, 'track info', default='{}'), title)
+        trackinfo_block = self._html_search_regex(
+            r'trackinfo(?:["\']|&quot;):\[\s*({.+?})\s*\],(?:["\']|&quot;)',
+            webpage, 'track info', default='{}')
+
+        track_info = self._parse_json(trackinfo_block, title)
+
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -116,9 +121,10 @@ def _real_extract(self, url):
             duration = float_or_none(track_info.get('duration'))
 
         def extract(key):
-            return self._search_regex(
-                r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+            data = self._html_search_regex(
+                r',(["\']|&quot;)%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key,
                 webpage, key, default=None, group='value')
+            return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
 
         artist = extract('artist')
         album = extract('album_title')
@@ -127,12 +133,12 @@ def extract(key):
         release_date = unified_strdate(extract('album_release_date'))
 
         download_link = self._search_regex(
-            r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
             'download link', default=None, group='url')
         if download_link:
             track_id = self._search_regex(
-                r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
-                webpage, 'track id')
+                r'\?id=(?P<id>\d+)&',
+                download_link, 'track id')
 
             download_webpage = self._download_webpage(
                 download_link, track_id, 'Downloading free downloads page')
@@ -315,10 +321,12 @@ def _real_extract(self, url):
             if self._html_search_meta('duration', elem_content, default=None)]
 
         title = self._html_search_regex(
-            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
-            webpage, 'title', fatal=False)
+            r'album_title\s*(?:&quot;|["\']):\s*(&quot;|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1',
+            webpage, 'title', fatal=False, group='album')
+
         if title:
             title = title.replace(r'\"', '"')
+
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,

diff --git a/youtube_dlc/extractor/go.py b/youtube_dlc/extractor/go.py
@@ -137,7 +137,11 @@ def _real_extract(self, url):
                     # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                     # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
                     r'data-video-id=["\']*(VDKA\w+)',
-                    # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+                    # https://github.com/ytdl-org/youtube-dl/pull/25216/files
+                    #  The following is based on the pull request on the line above. Changed the ABC.com URL to a show available now.
+                    # https://abc.com/shows/the-rookie/episode-guide/season-02/19-the-q-word
+                    r'\bvideoIdCode["\']\s*:\s*["\'](vdka\w+)',
+                    # Deprecated  fallback pattern
                     r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
                 ), webpage, 'video id', default=video_id)
             if not site_info:

diff --git a/youtube_dlc/extractor/hotstar.py b/youtube_dlc/extractor/hotstar.py
@@ -7,11 +7,12 @@
 import time
 import uuid
 import json
+import random
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_str,
+    compat_str
 )
 from ..utils import (
     determine_ext,
@@ -31,21 +32,38 @@ def _call_api_impl(self, path, video_id, query):
         exp = st + 6000
         auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
         auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+
+        def _generate_device_id():
+            """
+            Reversed from javascript library.
+            JS function is generateUUID
+            """
+            t = int(round(time.time() * 1000))
+            e = "xxxxxxxx-xxxx-4xxx-xxxx-xxxxxxxxxxxx"  # 4 seems to be interchangeable
+
+            def _replacer():
+                n = int((t + 16 * random.random())) % 16 | 0
+                return hex(n if "x" == e else 3 & n | 8)[2:]
+            return "".join([_.replace('x', _replacer()) for _ in e])
+
         token = self._download_json(
-            'https://api.hotstar.com/in/aadhar/v2/web/in/user/guest-signup',
+            'https://api.hotstar.com/um/v3/users',
             video_id, note='Downloading token',
-            data=json.dumps({"idType": "device", "id": compat_str(uuid.uuid4())}).encode('utf-8'),
+            data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
             headers={
                 'hotstarauth': auth,
+                'x-hs-platform': 'PCTV',  # or 'web'
                 'Content-Type': 'application/json',
-            })['description']['userIdentity']
+            })['user_identity']
+
         response = self._download_json(
             'https://api.hotstar.com/' + path, video_id, headers={
                 'hotstarauth': auth,
                 'x-hs-appversion': '6.72.2',
                 'x-hs-platform': 'web',
                 'x-hs-usertoken': token,
             }, query=query)
+
         if response['message'] != "Playback URL's fetched successfully":
             raise ExtractorError(
                 response['message'], expected=True)
@@ -60,7 +78,7 @@ def _call_api(self, path, video_id, query_name='contentId'):
     def _call_api_v2(self, path, video_id):
         return self._call_api_impl(
             '%s/content/%s' % (path, video_id), video_id, {
-                'desired-config': 'encryption:plain;ladder:phone,tv;package:hls,dash',
+                'desired-config': 'audio_channel:stereo|dynamic_range:sdr|encryption:plain|ladder:tv|package:dash|resolution:hd|subs-tag:HotstarVIP|video_codec:vp9',
                 'device-id': compat_str(uuid.uuid4()),
                 'os-name': 'Windows',
                 'os-version': '10',
@@ -129,6 +147,7 @@ def _real_extract(self, url):
         headers = {'Referer': url}
         formats = []
         geo_restricted = False
+        # change to v2 in the future
         playback_sets = self._call_api_v2('play/v1/playback', video_id)['playBackSets']
         for playback_set in playback_sets:
             if not isinstance(playback_set, dict):

diff --git a/youtube_dlc/extractor/tiktok.py b/youtube_dlc/extractor/tiktok.py
@@ -12,7 +12,7 @@
 
 
 class TikTokBaseIE(InfoExtractor):
-    def _extract_aweme(self, video_data, webpage):
+    def _extract_aweme(self, video_data, webpage, url):
         video_info = try_get(
             video_data, lambda x: x['videoData']['itemInfos'], dict)
         author_info = try_get(
@@ -60,7 +60,10 @@ def _extract_aweme(self, video_data, webpage):
             'webpage_url': self._og_search_url(webpage),
             'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')),
             'ext': 'mp4',
-            'formats': formats
+            'formats': formats,
+            'http_headers': {
+                'Referer': url,
+            }
         }
 
 
@@ -131,6 +134,6 @@ def _real_extract(self, url):
 
         # Chech statusCode for success
         if video_data.get('statusCode') == 0:
-            return self._extract_aweme(video_data, webpage)
+            return self._extract_aweme(video_data, webpage, url)
 
         raise ExtractorError('Video not available', video_id=video_id)
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
@@ -580,6 +580,10 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
             'Upper bound of a range for randomized sleep before each download '
             '(maximum possible number of seconds to sleep). Must only be used '
             'along with --min-sleep-interval.'))
+    workarounds.add_option(
+        '--sleep-subtitles',
+        dest='sleep_interval_subtitles', action='store_true', default=False,
+        help='Enforce sleep interval on subtitles as well')
 
     verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
     verbosity.add_option(
@@ -771,6 +775,9 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
         '--rm-cache-dir',
         action='store_true', dest='rm_cachedir',
         help='Delete all filesystem cache files')
+    filesystem.add_option(
+        '--trim-file-name', dest='trim_file_name', default=0, type=int,
+        help='Limit the filename length (extension excluded)')
 
     thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
     thumbnail.add_option(

diff --git a/youtube_dlc/version.py b/youtube_dlc/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2020.09.23-1'
+__version__ = '2020.09.29'