ee_apps catalog #142
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ee_apps catalog | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 1 * * 0' | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout repo content | |
uses: actions/checkout@v2 # checkout the repository content to github runner | |
- name: setup python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.8' # install the python version needed | |
- name: install python packages | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --upgrade pip | |
pip install -U pip setuptools | |
pip install beautifulsoup4 requests lxml | |
- name: Script check | |
uses: jannekem/run-python-script-action@v1 | |
with: | |
script: | | |
import requests | |
import os | |
import json | |
import sys | |
import shutil | |
import zipfile | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
from datetime import date | |
# Get today's date | |
dt = datetime.today().strftime('%Y-%m-%d') | |
pathway = os.path.dirname(os.path.realpath(__file__)) | |
path = os.path.join(pathway, "EE-Apps_" + dt) | |
BASE_URL = ".users.earthengine.app" | |
def zip_directory(folder_path, zip_path): | |
with zipfile.ZipFile(zip_path, mode='w') as zipf: | |
len_dir_path = len(folder_path) | |
for root, _, files in os.walk(folder_path): | |
for file in files: | |
file_path = os.path.join(root, file) | |
zipf.write(file_path, file_path[len_dir_path:]) | |
def jsext(urllist, folder): | |
for i, url in enumerate(urllist): | |
try: | |
username = url.split(".user")[0].split("https://")[1] | |
app_name = url.split("/")[-1] | |
# print(username,app_name) | |
if not os.path.exists(os.path.join(folder, username)): | |
os.makedirs(os.path.join(folder, username)) | |
try: | |
local_path = os.path.join(folder, username, f"{app_name}.js") | |
except TypeError: | |
local_path = os.path.join( | |
folder, username, f"{app_name.encode('utf-8')}.js" | |
) | |
print(f"Writing {i+1} of {len(urllist)} apps") | |
source = requests.get(url) | |
html_content = source.text | |
soup = BeautifulSoup(html_content, "html.parser") | |
for articles in soup.find_all("script"): | |
if not articles.string == None and articles.string.strip().startswith( | |
"init" | |
): | |
url = articles.string.strip().split('"')[1] | |
if url.startswith("https"): | |
iscript = requests.get(url).json() | |
pt = iscript["path"] | |
scr = iscript["dependencies"][pt] | |
if not os.path.exists(local_path): | |
try: | |
file = open(local_path, "w", encoding="utf-8") | |
file.write(str(iscript["dependencies"][pt]).strip()) | |
file.close() | |
clean_lines = [] | |
with open(local_path, "r", encoding="utf-8") as f: | |
lines = f.readlines() | |
clean_lines = [ | |
l.strip("\n") for l in lines if l.strip() | |
] | |
with open(local_path, "w", encoding="utf-8") as f: | |
f.writelines("\n".join(clean_lines)) | |
except Exception as e: | |
print(e) | |
except Exception as e: | |
print(e) | |
def merge_dictionary_list(dict_list): | |
return { | |
k: [d.get(k) for d in dict_list if k in d] # explanation A | |
for k in set().union(*dict_list) # explanation B | |
} | |
app_urls = [] | |
json_app_urls = [] | |
def jurl(folder, user_list): | |
for user in user_list: | |
url = f"https://{user}{BASE_URL}" | |
try: | |
source = requests.get(url).text | |
soup = BeautifulSoup(source, "html.parser") | |
for article in soup.find_all("div", class_="mdl-grid"): | |
for li in article.find_all("a"): | |
url = li["href"] | |
name = li.get_text().strip() | |
app_urls.append(str(url)) | |
json_app_urls.append({user: str(url)}) | |
print(f"Total unique url : {len(set(app_urls))}", end="\r") | |
except Exception as e: | |
pass | |
with open('twitter_app_urls.json') as f: | |
data = json.load(f) | |
for user, value in data.items(): | |
value_list = [val for val in value] | |
for url in value_list: | |
app_urls.append(str(url)) | |
json_app_urls.append({user: str(url)}) | |
unique_url = list(set(app_urls)) | |
return unique_url, json_app_urls | |
ulist = [] | |
def eeapps(): | |
dt = datetime.today().strftime('%Y-%m-%d') | |
try: | |
folder = "ee-appshot" | |
except TypeError: | |
folder = "ee-appshot" | |
try: | |
with open("app_urls.json") as f: | |
data = json.load(f) | |
repo_user_list = [users for users,value in data.items()] | |
user_list = list(set(repo_user_list)) | |
print(f"Processing a total of {len(user_list)} users") | |
applist, user_app_list = jurl(folder, user_list) | |
json_sorted = merge_dictionary_list(user_app_list) | |
with open("app_urls.json", "w") as out: | |
json.dump(json_sorted, out, indent=4, sort_keys=True) | |
print("") | |
jsext(applist, folder) | |
except Exception as e: | |
print(e) | |
except (KeyboardInterrupt, SystemExit) as e: | |
print("\n" + "Program escaped by User") | |
sys.exit() | |
result = [ | |
os.path.join(dp, f) | |
for dp, dn, filenames in os.walk(folder) | |
for f in filenames | |
if os.path.splitext(f)[1] == ".js" | |
] | |
print(f"Written a total source code for {len(result)} apps") | |
zip_directory(folder, folder.split('.')[0]+'.zip') | |
shutil.rmtree(folder) | |
eeapps() | |
- name: file_check | |
run: ls -l -a | |
- name: commit files | |
continue-on-error: true | |
run: | | |
today=$(date +"%Y-%m-%d") | |
git config --local user.email "[email protected]" | |
git config --local user.name "GitHub Action" | |
git add -A | |
git commit -m "updated datasets ${today}" -a | |
- name: push changes | |
continue-on-error: true | |
uses: ad-m/[email protected] | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
branch: main | |
- name: Get the Numbers | |
run: | | |
echo "JSON_COUNT=$(cat app_urls.json | jq '.[] | length' | paste -s -d+ - | bc)" >> $GITHUB_ENV | |
- name: JSON counter | |
uses: schneegans/[email protected] | |
with: | |
auth: ${{ secrets.GIST_SECRET }} | |
gistID: 6b74344d1e99aec6cce2aacb088aa34b | |
filename: eeapps.json | |
label: EE Apps | |
message: ${{ env.JSON_COUNT }} | |
color: green |