Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add demo #3

Merged
merged 1 commit into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions demo/michigan_mashup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env python3

"""
Make data Lansing, Michigan from World Historical Gazetteer and Michigan Memories
available to teachers as they create lesson plans in the OER Commons Editor.

queries:
* whg Lansing, Michigan
* michmemories Lansing river images
* worldbank Health Service Delivery
"""

import asyncio

import httpx
from bs4 import BeautifulSoup


def dbpedia(page: str) -> str:
soup = BeautifulSoup(page, "html.parser")
return soup.find("span", {"property": "dbo:abstract", "lang": "en"}).text


def decolonialatlas(page: str) -> str:
page = "\n".join(line for line in page.splitlines() if "Lansing" in line)
return BeautifulSoup(page, "html.parser").text


def getty(page: str) -> str:
"""
<TD COLSPAN=5><SPAN CLASS=page><BR><B>Note: </B>Located on Grand river where it joins the Red Cedar river; site was wilderness in 1847 when state capital was moved here; grew with arrival of railroad & development of motor industry in 1880s; now produces automobiles.</SPAN></TD></TR>
"""
page = "\n".join(line for line in page.splitlines() if "<B>Note: </B>" in line)
return BeautifulSoup(page, "html.parser").text


async def fetch_text(url: str) -> str:
async with httpx.AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
return response.text
else:
print(f"Failed to fetch {url}: {response.status_code}")
return ""


async def whgazetteer():
urls = (
# "https://michmemories.org/exhibits/default/catalog?q=lansing",
# "https://whgazetteer.org/places/14156749/portal",
"https://www.getty.edu/vow/TGNFullDisplay?find=Michigan&place=&nation=&prev_page=1&english=Y&subjectid=2052433",
"https://decolonialatlas.wordpress.com/turtle-island-decolonized/",
# "https://dbpedia.org/page/Lansing,_Michigan",
)

tasks = [fetch_text(url) for url in urls]
results = await asyncio.gather(*tasks)

funcs = {func.__name__: func for func in (decolonialatlas, dbpedia, getty)}
for url, result in zip(urls, results):
if result:
for func in funcs:
if func in url:
result = funcs[func](result)
break
print(f"Text from {url}:\n\n{result}\n\n{'='*50}\n")
else:
print(f"Failed to fetch text from {url}")


if __name__ == "__main__":
# from pathlib import Path
from subprocess import run
from time import sleep
from webbrowser import open_new_tab

# Type: whg Lansing, Michigan
print("whg: World Historical Gazetteer")
print("michmem: Michigan Memories\n")
_ = input("Search: (ex. 'whg Canada') ")
sleep(2)

print(
"3 World Historical Gazetteer datasets found: Getty, Decolonial Atlas, DBpedia\n"
)
open_new_tab("https://whgazetteer.org/places/14156749/portal")
print("Parsing 3 sources...\n")
sleep(2)
asyncio.run(whgazetteer())

# Type: michmem Lansing river images
_ = input("Search: (ex. 'whg Canada') ")
print("4 Michigan Memories selected: Lansing river images\n")
open_new_tab(
"https://michmemories.org/exhibits/default/catalog?q=Lansing+river+images"
)
sleep(2)
run("open *.jpg", shell=True)
# for i, image_file in enumerate(Path(__file__).glob("*.jpg")):
# print(f"Opening {image_file}...")
# sleep(1)
# run(["open", image_file])

# Type: worldbank Health Service Delivery
#
_ = input("Search: (ex. 'whg Canada') ")
print(
"1 Worldbank pdf file: The Power of Data Collection on Health Service Delivery\n"
)
open_new_tab(
"https://openknowledge.worldbank.org/bitstreams/a34428a8-81c0-4c98-88b8-a5637bc5fde8/download"
)
# sleep(2)
# run("open *.jpg", shell=True)
# for i, image_file in enumerate(Path(__file__).glob("*.jpg")):
# print(f"Opening {image_file}...")
# sleep(1)
# run(["open", image_file])
62 changes: 62 additions & 0 deletions demo/ome_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""
Import FastAPI and use it retrieve multiple different resources
1. An image from https://michmemories.org
2. A dataset from https://github.com/WorldHistoricalGazetteer
3. A pdf file from https://openknowledge.worldbank.org/pages/sustainable-development-goals
"""

from typing import Annotated

from fastapi import FastAPI, File, Form, HTTPException, UploadFile
import httpx

app = FastAPI()


@app.get("/")
async def root():
return {"message": "Hello World"}


@app.post("/login/")
async def login(username: Annotated[str, Form()], password: Annotated[str, Form()]):
return {"username": username}


@app.get("/image")
async def image() -> bytes:
"""
Retrieve an image from https://michmemories.org
"""
image_url = "https://digitalcollections.detroitpubliclibrary.org/islandora/object/islandora%3A236607/datastream/IMAGE/view"
image = await httpx.get(image_url)
if image.status_code == 200:
return image.content
raise HTTPException(status_code=404, detail="Image not found")


@app.get("/dataset")
async def dataset():
"""
Retrieve a dataset from https://github.com/WorldHistoricalGazetteer
"""
return {"message": "This is a dataset"}


@app.get("/pdf")
async def pdf():
"""
Retrieve a pdf file from https://openknowledge.worldbank.org/pages/sustainable-development-goals
"""
return {"message": "This is a pdf"}


@app.post("/files/")
async def create_file(file: Annotated[bytes, File()]):
return {"file_size": len(file)}


@app.post("/uploadfile/")
async def create_upload_file(file: UploadFile):
return {"filename": file.filename}