Skip to content

Commit

Permalink
Merge pull request #47 from ddkasa/atom-support
Browse files Browse the repository at this point in the history
Atom Support
  • Loading branch information
dhvcc authored Feb 16, 2024
2 parents e5c7bb2 + 24ad9cf commit 1143adb
Show file tree
Hide file tree
Showing 22 changed files with 451 additions and 49 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ select = [
"**/__init__.py" = [
"F401"
]
"rss_parser/models/atom/**" = [
"A003"
]


[build-system]
Expand Down
4 changes: 3 additions & 1 deletion rss_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from ._parser import Parser
from ._parser import AtomParser, BaseParser, Parser, RSSParser

__all__ = ("BaseParser", "Parser", "AtomParser", "RSSParser")
62 changes: 46 additions & 16 deletions rss_parser/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,72 @@

from xmltodict import parse

from rss_parser.custom_decorators import abstract_class_attributes
from rss_parser.models import XMLBaseModel
from rss_parser.models.atom import Atom
from rss_parser.models.rss import RSS

# >>> FUTURE
# TODO: May be support generator based approach for big rss feeds
# TODO: Add cli to parse to json
# TODO: Possibly bundle as deb/rpm/exe
# TODO: Atom support
# TODO: Older RSS versions?
# TODO: Older Atom versions
# TODO: Older RSS versions


class Parser:
"""Parser for rss files."""
@abstract_class_attributes("schema")
class BaseParser:
"""Parser for rss/atom files."""

schema: ClassVar[Type[XMLBaseModel]] = RSS

@staticmethod
def _check_atom(root: dict):
if "feed" in root:
raise NotImplementedError("ATOM feed is not currently supported")
schema: ClassVar[Type[XMLBaseModel]]
root_key: Optional[str] = None

@staticmethod
def to_xml(data: str, *args, **kwargs):
return parse(str(data), *args, **kwargs)

@classmethod
def parse(cls, data: str, *, schema: Optional[Type[XMLBaseModel]] = None) -> XMLBaseModel:
def parse(
cls,
data: str,
*,
schema: Optional[Type[XMLBaseModel]] = None,
root_key: Optional[str] = None,
) -> XMLBaseModel:
"""
Parse XML data into schema (default: RSS 2.0).
Parse XML data into schema.
:param data: string of XML data that needs to be parsed
:return: "schema" object
"""
root = cls.to_xml(data)
cls._check_atom(root)

schema = schema or cls.schema
schema = schema if schema else cls.schema

root_key = root_key if root_key else cls.root_key

if root_key:
root = root.get(root_key, root)

return schema.parse_obj(root)


class AtomParser(BaseParser):
schema = Atom


class RSSParser(BaseParser):
root_key = "rss"
schema = RSS


class Parser(RSSParser):
@classmethod
def parse(cls, data: str, *, schema: Optional[Type[XMLBaseModel]] = None) -> XMLBaseModel:
import warnings

return schema.parse_obj(root["rss"])
warnings.warn(
"Class Parser was renamed to RSSParser " "and will be removed in the next major update",
DeprecationWarning,
stacklevel=2,
)
return RSSParser.parse(data, schema=schema)
43 changes: 43 additions & 0 deletions rss_parser/custom_decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
def abstract_class_attributes(*names):
"""Class decorator to add one or more abstract attribute."""

def _func(cls, *names):
"""Function that extends the __init_subclass__ method of a class."""

# Add each attribute to the class with the value of NotImplemented
for name in names:
setattr(cls, name, NotImplemented)

# Save the original __init_subclass__ implementation, then wrap
# it with our new implementation.
orig_init_subclass = cls.__init_subclass__

def new_init_subclass(cls, **kwargs):
"""
New definition of __init_subclass__ that checks that
attributes are implemented.
"""

# The default implementation of __init_subclass__ takes no
# positional arguments, but a custom implementation does.
# If the user has not reimplemented __init_subclass__ then
# the first signature will fail and we try the second.
try:
orig_init_subclass(cls, **kwargs)
except TypeError:
orig_init_subclass(**kwargs)

# Check that each attribute is defined.
for name in names:
if getattr(cls, name, NotImplemented) is NotImplemented:
raise NotImplementedError(f"Class attribute {name} must be set for class {cls}")

# Bind this new function to the __init_subclass__.
# For reasons beyond the scope here, it we must manually
# declare it as a classmethod because it is not done automatically
# as it would be if declared in the standard way.
cls.__init_subclass__ = classmethod(new_init_subclass)

return cls

return lambda cls: _func(cls, *names)
3 changes: 3 additions & 0 deletions rss_parser/models/atom/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .atom import Atom

__all__ = ("Atom",)
15 changes: 15 additions & 0 deletions rss_parser/models/atom/atom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.atom.feed import Feed
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic = import_v1_pydantic()


class Atom(XMLBaseModel):
"""Atom 1.0"""

version: Optional[Tag[str]] = pydantic.Field(alias="@version")
feed: Tag[Feed]
56 changes: 56 additions & 0 deletions rss_parser/models/atom/entry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.atom.person import Person
from rss_parser.models.types.date import DateTimeOrStr
from rss_parser.models.types.only_list import OnlyList
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic = import_v1_pydantic()


class RequiredAtomEntryMixin(XMLBaseModel):
id: Tag[str]
"Identifier for the entry."

title: Tag[str]
"The title of the entry."

updated: Tag[DateTimeOrStr]
"Indicates when the entry was updated."


class RecommendedAtomEntryMixin(XMLBaseModel):
authors: Optional[OnlyList[Tag[Person]]] = pydantic.Field(alias="author", default=[])
"Entry authors."

links: Optional[OnlyList[Tag[str]]] = pydantic.Field(alias="link", default=[])
"The URL of the entry."

content: Optional[Tag[str]] = None
"The main content of the entry."

summary: Optional[Tag[str]] = None
"Conveys a short summary, abstract, or excerpt of the entry. Some feeds use this tag as the main content."


class OptionalAtomEntryMixin(XMLBaseModel):
categories: Optional[OnlyList[Tag[dict]]] = pydantic.Field(alias="category", default=[])
"Specifies a categories that the entry belongs to."

contributors: Optional[OnlyList[Tag[Person]]] = pydantic.Field(alias="contributor", default=[])
"Entry contributors."

rights: Optional[Tag[str]] = None
"The copyright of the entry."

published: Optional[Tag[DateTimeOrStr]] = None
"Indicates when the entry was published."

source: Optional[Tag[str]] = None
"Contains metadata from the source feed if this entry is a copy."


class Entry(RequiredAtomEntryMixin, RecommendedAtomEntryMixin, OptionalAtomEntryMixin, XMLBaseModel):
"""https://validator.w3.org/feed/docs/atom.html"""
61 changes: 61 additions & 0 deletions rss_parser/models/atom/feed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.atom.entry import Entry
from rss_parser.models.atom.person import Person
from rss_parser.models.types.date import DateTimeOrStr
from rss_parser.models.types.only_list import OnlyList
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic = import_v1_pydantic()


class RequiredAtomFeedMixin(XMLBaseModel):
id: Tag[str]
"Identifies the feed using a universally unique and permanent URI."

title: Tag[str]
"Contains a human readable title for the feed."

updated: Tag[DateTimeOrStr]
"Indicates the last time the feed was modified in a significant way."


class RecommendedAtomFeedMixin(XMLBaseModel):
authors: Optional[OnlyList[Tag[Person]]] = pydantic.Field(alias="author", default=[])
"Names one author of the feed. A feed may have multiple author elements."

links: Optional[OnlyList[Tag[str]]] = pydantic.Field(alias="link", default=[])
"The URL to the feed. A feed may have multiple link elements."


class OptionalAtomFeedMixin(XMLBaseModel):
entries: Optional[OnlyList[Tag[Entry]]] = pydantic.Field(alias="entry", default=[])
"The entries in the feed. A feed may have multiple entry elements."

categories: Optional[OnlyList[Tag[dict]]] = pydantic.Field(alias="category", default=[])
"Specifies a categories that the feed belongs to. The feed may have multiple categories elements."

contributors: Optional[OnlyList[Tag[Person]]] = pydantic.Field(alias="contributor", default=[])
"Feed contributors."

generator: Optional[Tag[str]] = None
"Identifies the software used to generate the feed, for debugging and other purposes."

icon: Optional[Tag[str]] = None
"Identifies a small image which provides iconic visual identification for the feed. Icons should be square."

logo: Optional[Tag[str]] = None
"Identifies a larger image which provides visual identification for the feed. \
Images should be twice as wide as they are tall."

rights: Optional[Tag[str]] = None
"The copyright of the feed."

subtitle: Optional[Tag[str]] = None
"Contains a human readable description or subtitle for the feed."


class Feed(RequiredAtomFeedMixin, RecommendedAtomFeedMixin, OptionalAtomFeedMixin, XMLBaseModel):
"""https://validator.w3.org/feed/docs/atom.html"""
18 changes: 18 additions & 0 deletions rss_parser/models/atom/person.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic = import_v1_pydantic()


class Person(XMLBaseModel):
name: Tag[str]
"Conveys a human-readable name for the person."

uri: Optional[Tag[str]] = None
"Contains a home page for the person."

email: Optional[Tag[str]] = None
"Contains an email address for the person."
19 changes: 19 additions & 0 deletions rss_parser/models/atom/source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.types.date import DateTimeOrStr
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic = import_v1_pydantic()


class Source(XMLBaseModel):
id: Optional[Tag[str]] = None
"Source id."

title: Optional[Tag[str]] = None
"Title of the source."

updated: Optional[Tag[DateTimeOrStr]] = None
"When source was updated."
3 changes: 3 additions & 0 deletions rss_parser/models/rss/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .rss import RSS

__all__ = ("RSS",)
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.image import Image
from rss_parser.models.item import Item
from rss_parser.models.text_input import TextInput
from rss_parser.models.rss.image import Image
from rss_parser.models.rss.item import Item
from rss_parser.models.rss.text_input import TextInput
from rss_parser.models.types.date import DateTimeOrStr
from rss_parser.models.types.only_list import OnlyList
from rss_parser.models.types.tag import Tag
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion rss_parser/models/rss.py → rss_parser/models/rss/rss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional

from rss_parser.models import XMLBaseModel
from rss_parser.models.channel import Channel
from rss_parser.models.rss.channel import Channel
from rss_parser.models.types.tag import Tag
from rss_parser.pydantic_proxy import import_v1_pydantic

Expand Down
File renamed without changes.
15 changes: 7 additions & 8 deletions rss_parser/models/types/date.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from datetime import datetime
from email.utils import parsedate_to_datetime

from rss_parser.pydantic_proxy import import_v1_pydantic

pydantic_validators = import_v1_pydantic(".validators")


class DateTimeOrStr(datetime):
@classmethod
Expand All @@ -25,16 +29,11 @@ def validate_dt_or_str(value: str) -> datetime:
# Try to parse standard (RFC 822)
try:
return parsedate_to_datetime(value)
except ValueError:
pass
# Try ISO
try:
return datetime.fromisoformat(value)
except ValueError:
except (ValueError, TypeError): # https://github.com/python/cpython/issues/74866
pass
# Try timestamp
# Try ISO or timestamp
try:
return datetime.fromtimestamp(int(value))
return pydantic_validators.parse_datetime(value)
except ValueError:
pass

Expand Down
2 changes: 1 addition & 1 deletion rss_parser/models/types/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Tag(pydantic_generics.GenericModel, Generic[T]):
>>> m.width.content
48
>>> type(m.width), type(m.width.content)
(<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>)
(<class 'rss_parser.models.rss.image.Tag[int]'>, <class 'int'>)
>>> # The attributes are empty by default
>>> m.width.attributes
{}
Expand Down
Loading

0 comments on commit 1143adb

Please sign in to comment.