Skip to content

Commit

Permalink
use is_valid_url
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Nov 20, 2023
1 parent f4ec8f5 commit ec2463b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions trafilatura/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from itertools import islice

from courlan import (check_url, clean_url, filter_urls, fix_relative_urls,
get_hostinfo, validate_url)
get_hostinfo, is_valid_url)

from .downloads import fetch_url
from .settings import MAX_LINKS
Expand Down Expand Up @@ -135,7 +135,7 @@ def determine_feed(htmlstring, baseurl, reference):
for link in sorted(set(feed_urls)):
link = fix_relative_urls(baseurl, link)
link = clean_url(link)
if link is None or link == reference or validate_url(link)[0] is False:
if link is None or link == reference or not is_valid_url(link):
continue
if BLACKLIST.search(link):
continue
Expand Down
6 changes: 3 additions & 3 deletions trafilatura/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
from copy import deepcopy

from courlan import extract_domain, get_base_url, normalize_url, validate_url
from courlan import extract_domain, get_base_url, is_valid_url, normalize_url, validate_url
from htmldate import find_date
from lxml.html import tostring

Expand Down Expand Up @@ -173,7 +173,7 @@ def extract_opengraph(tree):
title = elem.get('content')
# orig URL
elif elem.get('property') == 'og:url':
if validate_url(elem.get('content'))[0] is True:
if is_valid_url(elem.get('content')):
url = elem.get('content')
# description
elif elem.get('property') == 'og:description':
Expand Down Expand Up @@ -250,7 +250,7 @@ def examine_meta(tree):
backup_sitename = content_attr
# url
elif name_attr == 'twitter:url':
if url is None and validate_url(content_attr)[0] is True:
if url is None and is_valid_url(content_attr):
url = content_attr
# keywords
elif name_attr in METANAME_TAG: # 'page-topic'
Expand Down

0 comments on commit ec2463b

Please sign in to comment.