Skip to content

Commit

Permalink
z_url_metadata: use Slack UA for fetching link data (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
mworrell authored Mar 20, 2024
1 parent d21a1db commit c42d132
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions src/z_url_metadata.erl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
%% @author Marc Worrell
%% @copyright 2014-2023 Marc Worrell
%% @copyright 2014-2024 Marc Worrell
%% @doc Discover metadata about an url.
%% @end

%% Copyright 2014-2023 Marc Worrell
%% Copyright 2014-2024 Marc Worrell
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,10 +37,16 @@

-export_type([ metadata/0 ]).

% Per default we fetch max 1MB of data to analyze
% User-agent used for metadata sniffing - we pretend to be Slack so
% that some websites with bot-protection allow us to sniff the metadata.
-define(USER_AGENT, <<"Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)">>).

% Per default we fetch max 1MB of data to analyze.
% We need to fetch this much as (for example) Youtube adds a lot of css/scripts
% above the metadata of the page.
-define(FETCH_LENGTH, 1024*1024).

% Below this size an image is considered too small to be a representative image or icon
% Below this size an image is considered too small to be a representative image or icon.
-define(IMG_SMALL_SIZE, 16).


Expand All @@ -55,7 +62,11 @@ fetch(Url, Options) ->
true -> Options;
false -> [ {max_length, ?FETCH_LENGTH} | Options ]
end,
case z_url_fetch:fetch_partial(Url, Options1) of
Options2 = case proplists:is_defined(user_agent, Options1) of
true -> Options1;
false -> [ {user_agent, ?USER_AGENT} | Options1 ]
end,
case z_url_fetch:fetch_partial(Url, Options2) of
{ok, {FinalUrl, Headers, _Size, Data}} ->
{ok, partial_metadata(FinalUrl, Headers, Data)};
{error, _} = Error ->
Expand Down

0 comments on commit c42d132

Please sign in to comment.