Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unlimit search result (x) #11

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 53 additions & 23 deletions itunes_app_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class AppStoreScraper:
can be found at https://github.com/facundoolano/app-store-scraper.
"""

def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", timeout=None):
"""
Retrieve suggested app IDs for search query

Expand All @@ -35,6 +35,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
:param str country: Two-letter country code of store to search in,
default 'nl'
:param str lang: Language code to search with, default 'nl'
:param int timeout: Seconds to wait for response before stopping.

:return list: List of App IDs returned for search query
"""
Expand All @@ -43,8 +44,11 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):

url = "https://search.itunes.apple.com/WebObjects/MZStore.woa/wa/search?clientApplication=Software&media=software&term="
url += quote_plus(term)

amount = int(num) * int(page)

if num is None or page is None:
amount = None
else:
amount = int(num) * int(page)

country = self.get_store_id_for_country(country)
headers = {
Expand All @@ -53,7 +57,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
}

try:
result = requests.get(url, headers=headers).json()
result = requests.get(url, headers=headers, timeout=timeout).json()
except ConnectionError as ce:
raise AppStoreException("Cannot connect to store: {0}".format(str(ce)))
except json.JSONDecodeError:
Expand All @@ -64,7 +68,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):

return [app["id"] for app in result["bubbles"][0]["results"][:amount]]

def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang=""):
def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang="", timeout=None):
"""
Retrieve app IDs in given App Store collection

Expand All @@ -78,48 +82,71 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country
:param str country: Two-letter country code for the store to search in.
Defaults to 'nl'.
:param str lang: Dummy argument for compatibility. Unused.
:param int timeout: Seconds to wait for response before stopping.

:return: List of App IDs in collection.
"""
if not collection:
collection = AppStoreCollections.TOP_FREE_IOS

country = self.get_store_id_for_country(country)
params = (collection, category, num, country)
url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params
country = country.lower()
params = (country, category, collection, num)
url = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=%s&g=%s&name=%s&limit=%s" % params


try:
result = requests.get(url).json()
result = requests.get(url, timeout=timeout).json()
except json.JSONDecodeError:
raise AppStoreException("Could not parse app store response")

return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]]
return result["resultIds"]

def get_app_ids_for_developer(self, developer_id, country="nl", lang=""):
def get_apps_for_developer(self, developer_id, country="nl", lang="", timeout=None):
"""
Retrieve App IDs linked to given developer
Retrieve Apps linked to given developer

:param int developer_id: Developer ID
:param str country: Two-letter country code for the store to search in.
Defaults to 'nl'.
:param str lang: Dummy argument for compatibility. Unused.
:param int timeout: Seconds to wait for response before stopping.

:return list: List of App IDs linked to developer
:return list[dict]: List of Apps linked to developer
"""
url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country)

try:
result = requests.get(url).json()
result = requests.get(url, timeout=timeout).json()
except json.JSONDecodeError:
raise AppStoreException("Could not parse app store response")

if "results" in result:
return [app["trackId"] for app in result["results"] if app["wrapperType"] == "software"]
return [app for app in result["results"] if app["wrapperType"] == "software"]
else:
# probably an invalid developer ID
return []

def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None):
"""
Retrieve App IDs linked to given developer

:param int developer_id: Developer ID
:param str country: Two-letter country code for the store to search in.
Defaults to 'nl'.
:param str lang: Dummy argument for compatibility. Unused.
:param int timeout: Seconds to wait for response before stopping.

:return list: List of App IDs linked to developer
"""
apps = self.get_apps_for_developer(developer_id, country=country, lang=lang, timeout=timeout)
if len(apps) > 0:
app_ids =[app["trackId"] for app in apps["results"] if app["wrapperType"] == "software"]
else:
return []
return app_ids


def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=None):
"""
Retrieve list of App IDs of apps similar to given app

Expand All @@ -131,6 +158,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
:param str country: Two-letter country code for the store to search in.
Defaults to 'nl'.
:param str lang: Language code to search with, default 'nl'
:param int timeout: Seconds to wait for response before stopping.

:return list: List of similar app IDs
"""
Expand All @@ -142,7 +170,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
"Accept-Language": lang
}

result = requests.get(url, headers=headers).text
result = requests.get(url, headers=headers, timeout=timeout).text
if "customersAlsoBoughtApps" not in result:
return []

Expand All @@ -157,7 +185,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):

return ids

def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False):
def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False, timeout=None):
"""
Get app details for given app ID

Expand All @@ -176,6 +204,7 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
short time. Defaults to None.
:param bool force: by-passes the server side caching by adding a timestamp
to the request (default is False)
:param int timeout: Seconds to wait for response before stopping.

:return dict: App details, as returned by the app store. The result is
not processed any further, unless `flatten` is True
Expand All @@ -197,13 +226,13 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
try:
if sleep is not None:
time.sleep(sleep)
result = requests.get(url).json()
result = requests.get(url, timeout=timeout).json()
except Exception:
try:
# handle the retry here.
# Take an extra sleep as back off and then retry the URL once.
time.sleep(2)
result = requests.get(url).json()
result = requests.get(url, timeout=timeout).json()
except Exception:
raise AppStoreException("Could not parse app store response for ID %s" % app_id)

Expand Down Expand Up @@ -271,7 +300,7 @@ def get_store_id_for_country(self, country):
else:
raise AppStoreException("Country code not found for {0}".format(country))

def get_app_ratings(self, app_id, countries=None, sleep=1):
def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None):
"""
Get app ratings for given app ID

Expand All @@ -283,6 +312,7 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
:param int sleep: Seconds to sleep before request to prevent being
temporary blocked if there are many requests in a
short time. Defaults to 1.
:param int timeout: Seconds to wait for response before stopping.

:return dict: App ratings, as scraped from the app store.
"""
Expand All @@ -302,13 +332,13 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
try:
if sleep is not None:
time.sleep(sleep)
result = requests.get(url, headers=headers).text
result = requests.get(url, headers=headers, timeout=timeout).text
except Exception:
try:
# handle the retry here.
# Take an extra sleep as back off and then retry the URL once.
time.sleep(2)
result = requests.get(url, headers=headers).text
result = requests.get(url, headers=headers, timeout=timeout).text
except Exception:
raise AppStoreException("Could not parse app store rating response for ID %s" % app_id)

Expand Down
21 changes: 8 additions & 13 deletions itunes_app_scraper/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,18 @@ class AppStoreCollections:
"""
App store collection IDs

Borrowed from https://github.com/facundoolano/app-store-scraper. These are
Based on https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/genres?id=6000. These are
the various collections displayed in the app store, usually on the front
page.
"""
TOP_MAC = 'topmacapps'
TOP_FREE_MAC = 'topfreemacapps'
TOP_GROSSING_MAC = 'topgrossingmacapps'
TOP_FREE_MAC = 'freeMacAppsV2'
TOP_PAID_MAC = 'toppaidmacapps'
NEW_IOS = 'newapplications'
NEW_FREE_IOS = 'newfreeapplications'
NEW_PAID_IOS = 'newpaidapplications'
TOP_FREE_IOS = 'topfreeapplications'
TOP_FREE_IPAD = 'topfreeipadapplications'
TOP_GROSSING_IOS = 'topgrossingapplications'
TOP_GROSSING_IPAD = 'topgrossingipadapplications'
TOP_PAID_IOS = 'toppaidapplications'
TOP_PAID_IPAD = 'toppaidipadapplications'
TOP_FREE_IOS = 'freeAppsV2'
TOP_FREE_IPAD = 'freeIpadApplications'
TOP_GROSSING_IOS = 'appsByRevenue'
TOP_GROSSING_IPAD = 'ipadAppsByRevenue'
TOP_PAID_IOS = 'paidApplications'
TOP_PAID_IPAD = 'paidIpadApplications'

class AppStoreCategories:
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="itunes-app-scraper-dmi",
version="0.9.5",
version="0.9.6",
author="Digital Methods Initiative",
author_email="[email protected]",
description="A lightweight iTunes App Store scraper",
Expand Down