Skip to content

Commit

Permalink
z_url_metadata: Add 'site_name' property (#101)
Browse files Browse the repository at this point in the history
* z_url_metadata: Add 'site_name' property

* Add extra types. Also test on OTP27
  • Loading branch information
mworrell authored Nov 25, 2024
1 parent 7dd5737 commit f1fa46e
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:

strategy:
matrix:
otp_version: [25,26]
otp_version: [25,26,27]
os: [ubuntu-latest]

container:
Expand Down
8 changes: 4 additions & 4 deletions include/z_url_metadata.hrl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
%% @author Marc Worrell
%% @copyright 2014-2021 Marc Worrell
%% @copyright 2014-2024 Marc Worrell
%% @doc Record returned by z_url_metadata:fetch/1
%% @end

%% Copyright 2014-2021 Marc Worrell
%% Copyright 2014-2024 Marc Worrell
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand All @@ -23,7 +24,6 @@
content_length :: integer(),
metadata :: list(),
is_index_page = false :: boolean(),
headers :: list(),
headers :: list({binary(), binary()}),
partial_data :: binary()
}).

40 changes: 38 additions & 2 deletions src/z_url_metadata.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
%% @author Marc Worrell
%% @copyright 2014-2024 Marc Worrell
%% @doc Discover metadata about an url.
%% @doc Discover metadata about an url. First follows any redirects
%% and URL shorteners, then fetches the data at the final URL to inspect
%% all metadata tags, content headers and first part of the HTML. The
%% returned opaque metadata can be questioned with properties.
%% The Slackbot user-agent is used for fetching URLs so that the URL shorteners
%% return a location header and other sites are coerced to give correct metadata.
%% Only the first MB of data is fetched, this prevents fetching large objects.
%% @end

%% Copyright 2014-2024 Marc Worrell
Expand Down Expand Up @@ -34,6 +40,13 @@


-type metadata() :: #url_metadata{}.
-type property() :: mime | mime_options | site_name | content_length |
url | canonical_url | short_url | final_url |
headers | title | h1 | summary | tags | filename |
mtitle | description | keywords | author | charset | language |
image | image_nav | thumbnail |
icon | icon_nav | icon_shortcut | icon_touch |
binary().

-export_type([ metadata/0 ]).

Expand Down Expand Up @@ -75,7 +88,11 @@ fetch(Url, Options) ->


%% @doc Fetch properties of the fetched metadata
-spec p(atom() | binary() | list( atom() | binary() ), metadata()) -> list(binary()) | binary() | undefined.
-spec p(Property, Metadata) -> Value when
Property :: property() | [ property() ],
Metadata :: metadata(),
Value :: binary() | list( binary() ) | Headers | undefined,
Headers :: list({binary(), binary()}).
p(mime, MD) ->
MD#url_metadata.content_type;
p(mime_options, MD) ->
Expand All @@ -87,6 +104,25 @@ p(url, MD) ->
undefined -> MD#url_metadata.final_url;
PrefUrl -> z_url:abs_link(PrefUrl, MD#url_metadata.final_url)
end;
p(site_name, MD) ->
case p1([<<"og:site_name">>, <<"twitter:site">>], MD) of
undefined ->
Url = case p1([canonical_url], MD) of
undefined -> MD#url_metadata.final_url;
Canonical -> Canonical
end,
case uri_string:parse(Url) of
#{ host := Host } ->
case unicode:characters_to_binary(Host) of
<<"www.", H/binary>> -> H;
H -> H
end;
{error, _, _} ->
undefined
end;
Sitename ->
Sitename
end;
p(content_length, MD) ->
MD#url_metadata.content_length;
p(headers, MD) ->
Expand Down

0 comments on commit f1fa46e

Please sign in to comment.