Skip to content

Commit

Permalink
Use urlparse in get_domain.
Browse files Browse the repository at this point in the history
  • Loading branch information
skytreader committed Jul 23, 2019
1 parent 744e466 commit 3602f31
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions microdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import html5lib

from collections import defaultdict
from urlparse import urlparse


try:
Expand Down Expand Up @@ -137,8 +138,9 @@ def get_domain(url_string):
"""
Get the domain _including_ the protocol specified, if any.
"""
if "://" in url_string:
return "/".join(url_string.split("/")[0:3])
parsed = urlparse(url_string)
if parsed.scheme:
return "/".join((parsed.scheme, "", parsed.netloc))
else:
return url_string.split("/")[0]

Expand Down

0 comments on commit 3602f31

Please sign in to comment.