diff --git a/pyproject.toml b/pyproject.toml index 8214bda..a39ee54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "reasonify-headless" description = "headless implementation for the Reasonify Agent" license = { text = "MIT" } dynamic = [ "version" ] -requires-python = ">=3.12" +requires-python = "~=3.12" authors = [ { name = "Muspi Merol", email = "me@promplate.dev" } ] readme = "README.md" dependencies = [ @@ -16,6 +16,7 @@ dependencies = [ "html-text~=0.6.2", "html2text2~=1.0.0", "isomorphic-fetch~=0.0.0.0.dev2", + "python-readability~=0.0.2", ] [build-system] diff --git a/reasonify-headless/reasonify/tools/fetch.py b/reasonify-headless/reasonify/tools/fetch.py index c1986aa..de076a6 100644 --- a/reasonify-headless/reasonify/tools/fetch.py +++ b/reasonify-headless/reasonify/tools/fetch.py @@ -32,4 +32,8 @@ async def _fetch(url: str, strategy: Strategy): except OSError: res = await fetch(f"/api/proxy?url={url}") - return res.status, post_process(pre_process(res.text, strategy)) + from readability import parse + + text = parse(res.text, keep_classes=True).content or res.text + + return res.status, post_process(pre_process(text, strategy))