From efdca3debc60e60b63b3b5e7973906a62040d9f9 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Mon, 10 Feb 2020 11:19:18 +0100 Subject: [PATCH 01/16] configure : use numeric sort --- tv_grab_fr_telerama | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 0bc26ff..8aabc97 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -580,7 +580,7 @@ if ($mode eq 'configure') { my %asked; # Ask about each channel (unless already asked). - my @chs = grep { not $asked{$_}++ } sort keys %channels; + my @chs = grep { not $asked{$_}++ } sort {$a <=> $b} keys %channels; my @names = map { $channels{$_}{name} } @chs; my @qs = map { "add channel $_?" } @names; my @want = ask_many_boolean(1, @qs); From c6fa7e1528b390aa9940eabd0e568d10de7784a4 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Wed, 12 Feb 2020 17:04:00 +0100 Subject: [PATCH 02/16] mimic android api calls --- tv_grab_fr_telerama | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 8aabc97..30b3318 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -420,8 +420,17 @@ $XMLTV::Get_nice::ua = LWP::UserAgent->new( requests_redirectable => ['GET', 'POST','HEAD'], max_redirect => 3, keep_alive => 1, - agent=>"okhttp/3.2.0" + agent=>"okhttp/3.12.3" ); +$XMLTV::Get_nice::ua->default_header( + 'lmd-systemname' => 'Android', + 'lmd-systemversion' => '8.1.0', + 'lmd-devicetype' => 'tablet', + 'lmd-devicemodel' => 'Samsung Galaxy', + 'lmd-bundleid' => 'com.telerama.fr', + 'lmd-appversion' => '3.6.4', +); + $XMLTV::Get_nice::ua->env_proxy; $XMLTV::Get_nice::Delay = $Delay; # tell XMLTV::Get_nice, we handle error our self @@ -790,7 +799,7 @@ my $script_duration = time(); # The website stores channel information by hour area for a whole week ! my $ind; -foreach $ind (sort { $a <=> $b } keys %channels) { +foreach $ind (sort { $channels{$a}{chid} <=> $channels{$b}{chid} } keys %channels) { my $chid = $channels{$ind}{chid}; my $url; my $i; @@ -905,9 +914,11 @@ sub get_channels( $ ) { my %channels; + # create random hash to mimic api trace + my $hash; + for(1..32) { $hash .= sprintf("%X", rand(16)); } # Get the current page - - my $my_url = mkurl($CHANNEL_GRID, {}); + my $my_url = mkurl($CHANNEL_GRID, {'hash'=>lc $hash}); if ($show_url) { print STDERR $my_url."\n"; } @@ -950,12 +961,7 @@ sub process_channel_grid_all_pages ( $$$$ ) { # appli api trace use 3200 for nb_par_page my $nb_par_page = 3200; do { - my %params; - if($page > 1) { - %params = ( 'date' => $dayoff, 'id_chaines' => $chid, 'nb_par_page' => $nb_par_page, 'page' => $page ); - } else { - %params = ( 'date' => $dayoff, 'id_chaines' => $chid, 'nb_par_page' => $nb_par_page ); - } + my %params = ( 'date' => $dayoff, 'id_chaines' => $chid, 'nb_par_page' => $nb_par_page, 'page' => $page ); my $url = mkurl($CHANNEL_GRID_PAGE, \%params); if ($show_url) { print STDERR $url."\n"; } if ($save_json) { $jsname = mkjsonname("", \%params); } From be481f2e3a9b81945b83ec24f641ea494af885e1 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Wed, 12 Feb 2020 17:12:24 +0100 Subject: [PATCH 03/16] move get genres before chans grab loop --- tv_grab_fr_telerama | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 30b3318..56b87eb 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -827,7 +827,10 @@ foreach $ind (sort { $channels{$a}{chid} <=> $channels{$b}{chid} } keys %channel my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get) if not $opt_quiet; Date_Init('SetDate=now,UTC'); - +# get genres +if (!@genres) { + get_channels("categories.json"); +} foreach (@to_get) { my ($chid, $offset, $chid_name) = @$_; process_channel_grid_all_pages($writer, $chid, $offset, $chid_name); @@ -972,10 +975,6 @@ sub process_channel_grid_all_pages ( $$$$ ) { sub process_channel_grid_page ( $$$$$$$$ ) { my ($writer, $chid, $url, $slot, $jsname, $chid_name, $dayoff, $page) = @_; - - if (!@genres) { - get_channels("categories.json"); - } my $json_hash = get_page_json( $url, $jsname ); # no data ? if(!defined($json_hash->{'donnees'})) { From f72f6491b58bd62ffdcbb07398bc43b6491adf74 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Wed, 12 Feb 2020 20:16:54 +0100 Subject: [PATCH 04/16] simplify loops, remove obsolete comments, unused variables, use globals for channels/writer --- tv_grab_fr_telerama | 153 +++++++++++--------------------------------- 1 file changed, 39 insertions(+), 114 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 56b87eb..91e369c 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -14,7 +14,7 @@ tv_grab_fr_telerama - Grab TV listings for France. tv_grab_fr --configure [--config-file FILE] To grab listings: tv_grab_fr [--config-file FILE] [--output FILE] [--days N] - [--offset N] [--quiet] [--perdays] [--perweeks] + [--offset N] [--quiet] [--ch_prefix prefix] [--ch_postfix postfix] [--no_episodedesc] [--no_aggregatecat] [--show_url] [--save_json] @@ -322,8 +322,8 @@ sub get_channels( $ ); sub return_other_channels( ); sub build_other_channel_filename(); sub get_more_channel_icon( $ ); -sub process_channel_grid_all_pages( $$$$ ); -sub process_channel_grid_page( $$$$$$$$ ); +sub process_channel_grid_all_pages( $$ ); +sub process_channel_grid_page( $$$$$ ); sub debug_print( @ ); sub get_page_json( $$ ); @@ -331,27 +331,12 @@ sub get_page_json( $$ ); # Main declarations #*************************************************************************** my $LANG = "fr"; - -# Grid id defined by the website according to channel types (needed to build the URL) -# my %GridType = ( "ALL" => "all"); - -# Slot of hours according to the website (needed to build the URL) -my @offsets = (2, 3, 4, 5, 6, 7); -# Slot of days for day per day grabbing -# my @days = (2, 3, 4, 5, 6, 7, 8, 9); - -my $Delay = 5; # in seconds +# delay between api requests in seconds +my $Delay = 5; my $FailOnError = 1; my %errors = (); my $last_get_time; - -# my $progexist; -# my %prevprog; -# my $prevtitle; -# my $prevstart; -# my $prevstop; - my $channel_postfix = ".api.telerama.fr"; my $channel_prefix = "C"; @@ -366,20 +351,18 @@ XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux') # cache on disk or memoize('XMLTV::Get_nice::get_nice_aux') # cache in memory or die "cannot memoize 'XMLTV::Get_nice::get_nice_aux': $!"; -##patch: tigerlol: correction des chevauchements d'horaire -my ($opt_days, $opt_help, $opt_output, $opt_per_days, $opt_per_weeks, $opt_offset, $opt_gui, $opt_quiet, +my ($opt_days, $opt_help, $opt_output, $opt_offset, $opt_gui, $opt_quiet, $opt_list_channels, $opt_config_file, $opt_configure, $opt_morechannels, $opt_logo_path, $no_episodedesc, $no_aggregatecat, $show_url, $save_json, $no_cryptedcplus, $no_cryptedpprem, - $no_htmltags,$opt_casting ); -##/patch + $no_htmltags, $opt_casting ); # debug if ($DEBUG_CMD) { print $0." | ".join(" | ", @ARGV), "\n\n"; } -$opt_per_weeks = 0; $opt_quiet = 0; + # The website is able to store at least 11 days from now my $default_opt_days = 11; @@ -392,8 +375,6 @@ GetOptions('days=i' => \$opt_days, 'config-file=s' => \$opt_config_file, 'gui:s' => \$opt_gui, 'list-channels' => \$opt_list_channels, - 'perdays' => \$opt_per_days, - 'perweeks' => \$opt_per_weeks, 'ch_prefix=s' => \$channel_prefix, 'ch_postfix=s' => \$channel_postfix, 'no_episodedesc' => \$no_episodedesc, @@ -454,11 +435,6 @@ XMLTV::Ask::init($opt_gui); $opt_offset = 0 if not defined $opt_offset; $opt_days = $default_opt_days if not defined $opt_days; -# Force the per days option in all cases -if ( $opt_per_weeks == 0 ) { - $opt_per_days = 1; -} - if ( (($opt_offset + $opt_days) > $default_opt_days) or ($opt_offset > $default_opt_days) ) { $opt_days = $default_opt_days - $opt_offset; if ($opt_days < 0) { @@ -513,10 +489,6 @@ if (-e "./logo-path.txt") { } } - - - - #*************************************************************************** # Sub sections #*************************************************************************** @@ -580,14 +552,10 @@ if ($mode eq 'configure') { XMLTV::Config_file::check_no_overwrite($config_file); open(CONF, '>:utf8', $config_file) or die "Cannot write to $config_file: $!"; - #my $bar = new XMLTV::ProgressBar('getting channel lists', scalar grep { $_ } @gtwant) if not $opt_quiet; - my %channels_for; my %channels = get_channels("configure.json"); die 'No channels could be found' if not %channels; - - + my %asked; - # Ask about each channel (unless already asked). my @chs = grep { not $asked{$_}++ } sort {$a <=> $b} keys %channels; my @names = map { $channels{$_}{name} } @chs; @@ -694,7 +662,6 @@ if ($mode eq 'confmorechannels') { # Check mode checking and get configuration file #*************************************************************************** die if $mode ne 'grab' and $mode ne 'list-channels'; -# debug_print( "my Mode : " . $mode ."\n"); my @config_lines; if ($mode eq 'grab') { @@ -712,9 +679,6 @@ if (defined $opt_output) { } $w_args{encoding} = 'UTF-8'; -#$w_args{days} = $opt_days; -#$w_args{offset} = $opt_offset; -#$w_args{cutoff} = "000000"; my $writer = new XMLTV::Writer(%w_args); $writer->start @@ -727,22 +691,8 @@ $writer->start #*************************************************************************** # List channels only case #*************************************************************************** -# debug_print( "my Mode : " . $mode ."\n"); if ($mode eq 'list-channels') { - # Get a list of available channels, according to the grid type - # my @gts = sort keys %GridType; - # my @gtnames = map { $GridType{$_} } @gts; - # my @gtqs = map { "List channels for grid : $_?" } @gts; - # my @gtwant = ask_many_boolean(1, @gtqs); - my %seen; - # debug_print( "Entering list-channels\n"); - # foreach (@gts) { - # debug_print( "In foreach\n"); - # my $gtw = shift @gtwant; - # my $gtname = shift @gtnames; - # if ($gtw) { - # say "Now getting grid : $_ \n"; my %channels = get_channels("list_chan.json"); die 'no channels could be found' if (scalar(keys(%channels)) == 0); foreach my $ch_did (sort(keys %channels)) { @@ -752,10 +702,8 @@ if ($mode eq 'list-channels') { 'icon' => [{src=> $channels{$ch_did}{icon} }] }) unless $seen{$ch_xid}++; } - # } - # } $writer->end(); - exit(); + exit; } #*************************************************************************** @@ -784,57 +732,52 @@ foreach (@config_lines) { } else { $chid_name = ''; } - $channels{$line_num} = {'chid'=>$chid, 'name'=>$chname, 'icon'=>$chicon, 'chid_name'=>$chid_name}; + $channels{$chid} = {'name'=>$chname, 'icon'=>$chicon, 'chid_name'=>$chid_name}; } else { warn "$config_file:$line_num: bad line $_\n"; } } - #*************************************************************************** # Now process the days by getting the main grids. #*************************************************************************** -my @to_get; warn "No working channels configured, so no listings\n" if not %channels; my $script_duration = time(); -# The website stores channel information by hour area for a whole week ! -my $ind; -foreach $ind (sort { $channels{$a}{chid} <=> $channels{$b}{chid} } keys %channels) { - my $chid = $channels{$ind}{chid}; +# loop on channels +foreach my $chid (sort { $a <=> $b } keys %channels) { my $url; my $i; my $dayoff; my $json_name = ""; my $chid_name = ""; - if($channels{$ind}{chid_name} ne '') { - $chid_name = $channels{$ind}{chid_name}; + if($channels{$chid}{chid_name} ne '') { + $chid_name = $channels{$chid}{chid_name}; } else { $chid_name = $channel_prefix.$chid.$channel_postfix; + $channels{$chid}{chid_name} = $channel_prefix.$chid.$channel_postfix; } - $writer->write_channel({ id => $chid_name, 'display-name' => [[$channels{$ind}{name}]], 'icon' => [{src=>$channels{$ind}{icon}}]}); - if ( $opt_per_days ) { - for ($i=$opt_offset; $i < $opt_offset+$opt_days; $i++ ) { - push @to_get, [ $chid, $i, $chid_name ]; - } - } else { - foreach (@offsets) { - push @to_get, [ $chid, $_, $chid_name ]; - } - } + $writer->write_channel({ id => $chid_name, 'display-name' => [[$channels{$chid}{name}]], 'icon' => [{src=>$channels{$chid}{icon}}]}); } +# count needed api calls +my $to_get = scalar(keys %channels) * ($opt_days - $opt_offset); +my $bar = new XMLTV::ProgressBar('getting listings', $to_get) if not $opt_quiet and not $show_url; -my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get) if not $opt_quiet; Date_Init('SetDate=now,UTC'); -# get genres + +# initialisation genres if (!@genres) { get_channels("categories.json"); } -foreach (@to_get) { - my ($chid, $offset, $chid_name) = @$_; - process_channel_grid_all_pages($writer, $chid, $offset, $chid_name); - update $bar if not $opt_quiet; + +# loop on days +for (my $offset=$opt_offset; $offset < $opt_offset+$opt_days; $offset++ ) { + # loop on channels + foreach my $chid (sort { $a <=> $b } keys %channels) { + process_channel_grid_all_pages($chid, $offset); + update $bar if not $opt_quiet and not $show_url; + } } $writer->end(); @@ -955,9 +898,8 @@ sub get_channels( $ ) { return %channels; } -sub process_channel_grid_all_pages ( $$$$ ) { - # some channels need more than 1 page - my ($writer, $chid, $offset, $chid_name) = @_; +sub process_channel_grid_all_pages ( $$ ) { + my ($chid, $offset) = @_; my $dayoff = strftime("%Y-%m-%d", gmtime(time() + 3600 * 24 * $offset)); my ($jsname, $nb); my $page = 1; @@ -968,18 +910,18 @@ sub process_channel_grid_all_pages ( $$$$ ) { my $url = mkurl($CHANNEL_GRID_PAGE, \%params); if ($show_url) { print STDERR $url."\n"; } if ($save_json) { $jsname = mkjsonname("", \%params); } - $nb = process_channel_grid_page($writer, $chid, $url, $offset, $jsname, $chid_name, $dayoff, $page); + $nb = process_channel_grid_page($chid, $url, $jsname, $dayoff, $page); $page++; } until ($nb < $nb_par_page); } -sub process_channel_grid_page ( $$$$$$$$ ) { - my ($writer, $chid, $url, $slot, $jsname, $chid_name, $dayoff, $page) = @_; +sub process_channel_grid_page ( $$$$$ ) { + my ($chid, $url, $jsname, $dayoff, $page) = @_; my $json_hash = get_page_json( $url, $jsname ); # no data ? if(!defined($json_hash->{'donnees'})) { if($page == 1) { - if(!$opt_quiet) { print STDERR "Aucun programme pour la chaîne $chid $chid_name le $dayoff\n"; } + if(!$opt_quiet) { print STDERR "Aucun programme pour la chaîne ".$channels{$chid}{chid_name}." \"".$channels{$chid}{name}."\" le $dayoff\n"; } } return 0; } @@ -1049,7 +991,7 @@ sub process_channel_grid_page ( $$$$$$$$ ) { $startdate = utc_offset( $startdate, "+0100"); $enddate = utc_offset( $enddate , "+0100"); - my %prog = (channel => $chid_name, + my %prog = (channel => $channels{$chid}{chid_name}, title => [ [ $line->{'titre'} ] ], # lang unknown start => $startdate, stop => $enddate @@ -1267,34 +1209,17 @@ sub process_channel_grid_page ( $$$$$$$$ ) { push @{$prog{desc}}, [$description, $LANG ]; } - - - # if ($rating2) { - # push @{$prog{rating}}, [ "Tout public", "CSA", [] ]; - # } - + # CSA Icons if ($age == 0) { push @{$prog{rating}}, [ "Tout public", "CSA", [] ]; } else { push @{$prog{rating}}, [ $age, "CSA", [ {src => $icon}] ]; } - - - - - - # étoiles + # étoiles T $prog{'star-rating'} = [$stars."/5"] if ($stars); - #debug_print($title." - ".$startdate." - ".$enddate."\n"); - #debug_print("genre : ".$genretext." - ".$subgenre."\n"); - #debug_print("description : ".$description."\n"); - #debug_print("genre : ".$genretext."\n"); - #debug_print(Dumper(%prog)); - $writer->write_programme(\%prog); - } return $nb; } From 905d2d80486b50a96a65f5cae51f0a87624d16a0 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Thu, 13 Feb 2020 11:55:29 +0100 Subject: [PATCH 05/16] fix LMD headers --- tv_grab_fr_telerama | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 91e369c..38652b4 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -404,12 +404,12 @@ $XMLTV::Get_nice::ua = LWP::UserAgent->new( agent=>"okhttp/3.12.3" ); $XMLTV::Get_nice::ua->default_header( - 'lmd-systemname' => 'Android', - 'lmd-systemversion' => '8.1.0', - 'lmd-devicetype' => 'tablet', - 'lmd-devicemodel' => 'Samsung Galaxy', - 'lmd-bundleid' => 'com.telerama.fr', - 'lmd-appversion' => '3.6.4', + 'LMD-SystemName' => 'Android', + 'LMD-SystemVersion' => '8.1.0', + 'LMD-DeviceType' => 'tablet', + 'LMD-DeviceModel' => 'samsung Galaxy Tab S6 Samsung (Android 8.1.0) API-27', + 'LMD-BundleId' => 'com.telerama.fr', + 'LMD-AppVersion' => '3.6.4', ); $XMLTV::Get_nice::ua->env_proxy; From 08fd4df4371633484632afccf2b18274a578ebca Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Thu, 13 Feb 2020 12:12:22 +0100 Subject: [PATCH 06/16] save pretty json instead of raw json --- tv_grab_fr_telerama | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 38652b4..9871293 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -405,9 +405,9 @@ $XMLTV::Get_nice::ua = LWP::UserAgent->new( ); $XMLTV::Get_nice::ua->default_header( 'LMD-SystemName' => 'Android', - 'LMD-SystemVersion' => '8.1.0', + 'LMD-SystemVersion' => '9.1.0', 'LMD-DeviceType' => 'tablet', - 'LMD-DeviceModel' => 'samsung Galaxy Tab S6 Samsung (Android 8.1.0) API-27', + 'LMD-DeviceModel' => 'samsung Galaxy Tab S6 Samsung (Android 9.1.0) API-28', 'LMD-BundleId' => 'com.telerama.fr', 'LMD-AppVersion' => '3.6.4', ); @@ -1249,16 +1249,17 @@ sub get_page_json( $$ ) { } } - if ($save_json) { - open (JSFIC, ">$jsname") || die ("Vous ne pouvez pas créer le fichier \"$jsname\""); - print JSFIC $content; - close (JSFIC); - } - eval { $json = JSON->new->utf8(1)->decode($content); } or do { die ("malformed json on $url : ".$Response->status_code."\n"); }; + + if ($save_json) { + open (JSFIC, ">$jsname") || die ("Vous ne pouvez pas créer le fichier \"$jsname\""); + print JSFIC JSON->new->pretty->encode($json); + close (JSFIC); + } + return $json; } From 5c1959653ecb628d0b52f94fd84d7780062e0108 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Thu, 13 Feb 2020 17:18:07 +0100 Subject: [PATCH 07/16] add stats, move critic and notule to casting api --- tv_grab_fr_telerama | 62 ++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 9871293..290a0d0 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -325,7 +325,7 @@ sub get_more_channel_icon( $ ); sub process_channel_grid_all_pages( $$ ); sub process_channel_grid_page( $$$$$ ); sub debug_print( @ ); -sub get_page_json( $$ ); +sub get_page_json( $$$ ); #*************************************************************************** # Main declarations @@ -336,7 +336,6 @@ my $Delay = 5; my $FailOnError = 1; my %errors = (); -my $last_get_time; my $channel_postfix = ".api.telerama.fr"; my $channel_prefix = "C"; @@ -741,7 +740,8 @@ foreach (@config_lines) { # Now process the days by getting the main grids. #*************************************************************************** warn "No working channels configured, so no listings\n" if not %channels; -my $script_duration = time(); +my %stats; +$stats{start} = time(); # loop on channels foreach my $chid (sort { $a <=> $b } keys %channels) { @@ -784,8 +784,13 @@ $writer->end(); $bar->finish() if not $opt_quiet; # Print the duration -$script_duration = time() - $script_duration; -print STDERR "Grabber process finished in " . $script_duration . " seconds.\n" if not $opt_quiet; +if (!$opt_quiet) { + my @apis; + foreach my $api(sort { $stats{api}{$b} <=> $stats{api}{$a} } keys %{$stats{api}}) { + if($api ne 'total') { push @apis, "api_".$api." ".$stats{api}{$api}; } + } + print STDERR "Grabber process finished in ".(time() - $stats{start})." seconds, ".$stats{api}{total}." api calls : ".join(', ',@apis)."\n"; +} #*************************************************************************** # Specific functions for grabbing information @@ -869,7 +874,7 @@ sub get_channels( $ ) { print STDERR $my_url."\n"; } - my $json_hash = get_page_json($my_url, $jsname); + my $json_hash = get_page_json('init',$my_url, $jsname); my $chicon = ""; my @lines = @{ $json_hash->{'donnees'}{'chaines'} }; @@ -917,7 +922,7 @@ sub process_channel_grid_all_pages ( $$ ) { sub process_channel_grid_page ( $$$$$ ) { my ($chid, $url, $jsname, $dayoff, $page) = @_; - my $json_hash = get_page_json( $url, $jsname ); + my $json_hash = get_page_json('grille', $url, $jsname); # no data ? if(!defined($json_hash->{'donnees'})) { if($page == 1) { @@ -967,17 +972,6 @@ sub process_channel_grid_page ( $$$$$ ) { $description = trim($line->{'resume'}) if ($line->{'resume'}) ; my $genre = $line->{'id_genre'} ? $line->{'id_genre'} : 0; my $genretext = $genre ? $genres[$genre] : ""; - my $critic = ""; - # debug_print("possede_critique : ".$line->{'possede_critique'}."\n"); - # debug_print("possede_notule : ".$line->{'possede_notule'}."\n"); - - if ($line->{'possede_critique'} == 1) { - $critic = trim($line->{'critique'}) if ($line->{'critique'}) ; - # debug_print("critique : ".$critic."\n"); - } elsif ($line->{'possede_notule'} == 1) { - $critic = trim($line->{'notule'}) if ($line->{'notule'}) ; - # debug_print("notule : ".$critic."\n"); - } my $stars = $line->{'note_telerama'}; my $chname = $channelnames[$chid]; @@ -1068,13 +1062,16 @@ sub process_channel_grid_page ( $$$$$ ) { $genretext = "Théâtre"; } } + + my $critic = ""; # if casting is enable, we need another api call if($opt_casting && (!$subgenre || $subgenre !~ /(animation|réaliste|jeunesse|téléréalité|sentimental|burlesque)$/i) && ($genretext =~ /^(Film|Série|Feuilleton|Téléfilm|Théâtre)$/i)) { - my $jsname_programme = ''; - my $url_programme = mkurl($CHANNEL_PROGRAMME_PAGE.$line->{'id_programme'}, {} ); - if ($show_url) { print STDERR $url_programme."\n"; } - if ($save_json) { $jsname_programme = mkjsonname("", { "id_programme" => $line->{'id_programme'}}); } - my $json_hash_programme = get_page_json( $url_programme, $jsname_programme ); + my $jsname_programme = ''; + my $url_programme = mkurl($CHANNEL_PROGRAMME_PAGE.$line->{'id_programme'}, {} ); + if ($genretext eq 'Film') { print $line->{'id_programme'}." ".$line->{'titre'}."\n"; } + if ($show_url) { print STDERR $url_programme."\n"; } + if ($save_json) { $jsname_programme = mkjsonname("", { "id_programme" => $line->{'id_programme'}}); } + my $json_hash_programme = get_page_json('casting', $url_programme, $jsname_programme); #print Dumper($json_hash_programme->{'donnees'}[0]->{intervenants})."\n"; my @cast = @{ $json_hash_programme->{'donnees'}[0]->{intervenants} } if ($json_hash_programme->{'donnees'} && $json_hash_programme->{'donnees'}[0]->{intervenants} ); #if(scalar(@cast) == 0) { print $genretext." ".$subgenre."\n"; } @@ -1094,7 +1091,7 @@ sub process_channel_grid_page ( $$$$$ ) { push @{$prog{credits}{presenter}}, $cname; } elsif ($ctype =~ m/Musique/) { push @{$prog{credits}{composer}}, $cname; - } elsif ($ctype =~ m/Cr.+ateur/ || $ctype =~ m/Auteur/ || $ctype =~ m/Sc.+nariste/ || $ctype =~ m/Sc.+nario/ || $ctype =~ m/Dialogue/) { + } elsif ($ctype =~ m/Cr.+ateur/ || $ctype =~ m/Auteur/ || $ctype =~ m/Sc.+nariste/ || $ctype =~ m/Sc.+nario/ || $ctype =~ m/Dialogue/) { push @{$prog{credits}{writer}}, $cname; } else { if ($roleid eq "1") { @@ -1110,6 +1107,11 @@ sub process_channel_grid_page ( $$$$$ ) { } } } + if ($line->{'possede_critique'} == 1) { + $critic = trim($json_hash_programme->{'donnees'}[0]->{critique}) if($json_hash_programme->{'donnees'}[0]->{critique}) ; + } elsif ($line->{'possede_notule'} == 1) { + $critic = trim($json_hash_programme->{'donnees'}[0]->{notule}) if ($json_hash_programme->{'donnees'}[0]->{notule}) ; + } } $prog{'date'} = $line->{'annee_realisation'} if ($line->{'annee_realisation'}); @@ -1193,7 +1195,7 @@ sub process_channel_grid_page ( $$$$$ ) { if ( $description ne "" ) { if (!$critic) { - ; # Nothing to do + ;# Nothing to do } elsif ($critic eq $description) { $description = "Critique : ".$critic; } else { @@ -1205,7 +1207,7 @@ sub process_channel_grid_page ( $$$$$ ) { $description =~ s/<\/?[^>]+>//g; } $description =~ s/\r//g; - + print $no_htmltags.":".$description."\n"; push @{$prog{desc}}, [$description, $LANG ]; } @@ -1226,9 +1228,11 @@ sub process_channel_grid_page ( $$$$$ ) { # use XMLTV::get_nice # with our error_handling -sub get_page_json( $$ ) { - my $url = shift; - my $jsname = shift ; +sub get_page_json( $$$ ) { + my ($api_name, $url, $jsname) = @_; + # stats on api calls + $stats{api}{total}++; + $stats{api}{$api_name}++; my $content = get_nice($url); my $json; From 2b1c2335fec27c7385d3467ee5eb9c023beadfb2 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Thu, 13 Feb 2020 18:05:16 +0100 Subject: [PATCH 08/16] add casting cache on id_emission --- tv_grab_fr_telerama | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 290a0d0..c658e3f 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -771,6 +771,10 @@ if (!@genres) { get_channels("categories.json"); } +# cache emission to reduce casting/programme api call +my %emissions; +my $emissions_cache = 1; + # loop on days for (my $offset=$opt_offset; $offset < $opt_offset+$opt_days; $offset++ ) { # loop on channels @@ -967,22 +971,16 @@ sub process_channel_grid_page ( $$$$$ ) { my $startdate = $line->{'horaire'}{'debut'}; my $enddate = $line->{'horaire'}{'fin'}; + $startdate =~ tr/:\$\ -//d; + $enddate =~ tr/:\$\ -//d; + my $title = $line->{'titre'}; my $description = ""; $description = trim($line->{'resume'}) if ($line->{'resume'}) ; - my $genre = $line->{'id_genre'} ? $line->{'id_genre'} : 0; - my $genretext = $genre ? $genres[$genre] : ""; - my $stars = $line->{'note_telerama'}; my $chname = $channelnames[$chid]; - - my $imgurl= $line->{'vignette'}{'grande'}; - - $startdate =~ tr/:\$\ -//d; - $enddate =~ tr/:\$\ -//d; - $startdate = utc_offset( $startdate, "+0100"); $enddate = utc_offset( $enddate , "+0100"); my %prog = (channel => $channels{$chid}{chid_name}, @@ -990,8 +988,6 @@ sub process_channel_grid_page ( $$$$$ ) { start => $startdate, stop => $enddate ); - #debug_print($start.">".$stop."\n"); - ##### my $crypted = 0; if ( $no_cryptedcplus && $channelnames[$chid] =~ m/Canal+/ ) { @@ -1000,7 +996,9 @@ sub process_channel_grid_page ( $$$$$ ) { if ( $no_cryptedpprem && $channelnames[$chid] =~ m/Paris Première/ ) { $crypted = 1; } - #### + + my $genre = $line->{'id_genre'} ? $line->{'id_genre'} : 0; + my $genretext = $genre ? $genres[$genre] : ""; my $subgenre = $line->{'genre_specifique'}; my $episode = $line->{'serie'}{'numero_episode'}; my $season = $line->{'serie'}{'saison'}; @@ -1068,10 +1066,20 @@ sub process_channel_grid_page ( $$$$$ ) { if($opt_casting && (!$subgenre || $subgenre !~ /(animation|réaliste|jeunesse|téléréalité|sentimental|burlesque)$/i) && ($genretext =~ /^(Film|Série|Feuilleton|Téléfilm|Théâtre)$/i)) { my $jsname_programme = ''; my $url_programme = mkurl($CHANNEL_PROGRAMME_PAGE.$line->{'id_programme'}, {} ); - if ($genretext eq 'Film') { print $line->{'id_programme'}." ".$line->{'titre'}."\n"; } + #if ($genretext eq 'Film') { print $line->{'id_programme'}." ".$line->{'titre'}."\n"; } if ($show_url) { print STDERR $url_programme."\n"; } if ($save_json) { $jsname_programme = mkjsonname("", { "id_programme" => $line->{'id_programme'}}); } - my $json_hash_programme = get_page_json('casting', $url_programme, $jsname_programme); + my $json_hash_programme; + # try to use emissions cache + if(exists $emissions{$line->{id_emission}} && $emissions_cache) { + #use cache + $stats{api}{cache}++; + #print "use cache for ".$line->{titre}.", id_programme ".$line->{id_programme}.", id_emmission ".$line->{id_emission}."\n"; + $json_hash_programme = $emissions{$line->{id_emission}}; + } else { + $json_hash_programme = get_page_json('casting', $url_programme, $jsname_programme); + $emissions{$line->{id_emission}} = $json_hash_programme if $emissions_cache; + } #print Dumper($json_hash_programme->{'donnees'}[0]->{intervenants})."\n"; my @cast = @{ $json_hash_programme->{'donnees'}[0]->{intervenants} } if ($json_hash_programme->{'donnees'} && $json_hash_programme->{'donnees'}[0]->{intervenants} ); #if(scalar(@cast) == 0) { print $genretext." ".$subgenre."\n"; } @@ -1207,7 +1215,6 @@ sub process_channel_grid_page ( $$$$$ ) { $description =~ s/<\/?[^>]+>//g; } $description =~ s/\r//g; - print $no_htmltags.":".$description."\n"; push @{$prog{desc}}, [$description, $LANG ]; } @@ -1219,7 +1226,7 @@ sub process_channel_grid_page ( $$$$$ ) { } # étoiles T - $prog{'star-rating'} = [$stars."/5"] if ($stars); + $prog{'star-rating'} = [$line->{'note_telerama'}."/5"] if ($line->{'note_telerama'}); $writer->write_programme(\%prog); } From 7f5c3ef8d3b24ca5cd51c0e95e80801679bc72d3 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 12:19:30 +0100 Subject: [PATCH 09/16] group channels to make less api calls --- tv_grab_fr_telerama | 118 ++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 69 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index c658e3f..1cae505 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -318,14 +318,14 @@ use Digest::HMAC_SHA1 qw(hmac_sha1_hex); use open ':std', ':encoding(UTF-8)'; # subs -sub get_channels( $ ); -sub return_other_channels( ); +sub get_channels($); +sub return_other_channels(); sub build_other_channel_filename(); -sub get_more_channel_icon( $ ); -sub process_channel_grid_all_pages( $$ ); -sub process_channel_grid_page( $$$$$ ); -sub debug_print( @ ); -sub get_page_json( $$$ ); +sub get_more_channel_icon($); +sub grab_day($); +sub grab_day_channel($$$$$$); +sub debug_print(@); +sub get_page_json($$$); #*************************************************************************** # Main declarations @@ -334,6 +334,7 @@ my $LANG = "fr"; # delay between api requests in seconds my $Delay = 5; my $FailOnError = 1; +my $group_size = 32; my %errors = (); my $channel_postfix = ".api.telerama.fr"; @@ -761,7 +762,8 @@ foreach my $chid (sort { $a <=> $b } keys %channels) { } # count needed api calls -my $to_get = scalar(keys %channels) * ($opt_days - $opt_offset); +my $nb_chans = scalar(keys %channels); +my $to_get = int($nb_chans / $group_size + 1) * ($opt_days - $opt_offset); my $bar = new XMLTV::ProgressBar('getting listings', $to_get) if not $opt_quiet and not $show_url; Date_Init('SetDate=now,UTC'); @@ -777,15 +779,11 @@ my $emissions_cache = 1; # loop on days for (my $offset=$opt_offset; $offset < $opt_offset+$opt_days; $offset++ ) { - # loop on channels - foreach my $chid (sort { $a <=> $b } keys %channels) { - process_channel_grid_all_pages($chid, $offset); - update $bar if not $opt_quiet and not $show_url; - } + grab_day($offset); } $writer->end(); -$bar->finish() if not $opt_quiet; +$bar->finish() if not $opt_quiet and not $show_url; # Print the duration if (!$opt_quiet) { @@ -907,69 +905,51 @@ sub get_channels( $ ) { return %channels; } -sub process_channel_grid_all_pages ( $$ ) { - my ($chid, $offset) = @_; +sub grab_day ($) { + my $offset = shift; + my $dayoff = strftime("%Y-%m-%d", gmtime(time() + 3600 * 24 * $offset)); my ($jsname, $nb); my $page = 1; - # appli api trace use 3200 for nb_par_page my $nb_par_page = 3200; + # loop on group chans + my $g_deb = 0; + my $g_end = ($group_size > $nb_chans-1) ? $nb_chans-1 : $group_size - 1; do { - my %params = ( 'date' => $dayoff, 'id_chaines' => $chid, 'nb_par_page' => $nb_par_page, 'page' => $page ); - my $url = mkurl($CHANNEL_GRID_PAGE, \%params); - if ($show_url) { print STDERR $url."\n"; } - if ($save_json) { $jsname = mkjsonname("", \%params); } - $nb = process_channel_grid_page($chid, $url, $jsname, $dayoff, $page); - $page++; - } until ($nb < $nb_par_page); + my @chids = (sort { $a <=> $b } keys %channels) [$g_deb..$g_end]; + my $chans = join(',',sort { $a <=> $b } @chids); + my $page = 1; + # loop on page if needed + my $nb = 0; + do { + my %params = ( 'date' => $dayoff, 'id_chaines' => $chans, 'nb_par_page' => $nb_par_page, 'page' => $page ); + my $url = mkurl($CHANNEL_GRID_PAGE, \%params); + if ($show_url) { print STDERR $url."\n"; } + if ($save_json) { $jsname = mkjsonname("", \%params); } + my $json = get_page_json('grille', $url, $jsname); + $nb = $json->{'pagination'}{'nb_sur_page'}; + update $bar if not $opt_quiet and not $show_url; + # loop on chid + foreach my $chid (sort { $a <=> $b } @chids) { + my @progs = grep { $_->{id_chaine} == $chid } @{$json->{'donnees'}}; + grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); + } + $page++; + } until ($nb < $nb_par_page); + # next group of chans + $g_deb += $group_size; + $g_end += $group_size; + if($g_end > $nb_chans-1) { $g_end = $nb_chans-1; } + } until ($g_end == $nb_chans-1) } -sub process_channel_grid_page ( $$$$$ ) { - my ($chid, $url, $jsname, $dayoff, $page) = @_; - my $json_hash = get_page_json('grille', $url, $jsname); +sub grab_day_channel($$$$$$) { + my ($chid, $url, $jsname, $dayoff, $page, $progs) = @_; # no data ? - if(!defined($json_hash->{'donnees'})) { - if($page == 1) { - if(!$opt_quiet) { print STDERR "Aucun programme pour la chaîne ".$channels{$chid}{chid_name}." \"".$channels{$chid}{name}."\" le $dayoff\n"; } - } - return 0; - } - - # flag overlapping - # check all, starting at end - my $nb = scalar @{ $json_hash->{'donnees'} }; - my $start = 0; - $json_hash->{'donnees'}[$nb-1]{'overlap'} = 0; - for (my $i=$nb-1;$i>=1;$i--) { - my $stop_prev = $json_hash->{'donnees'}[$i-1]{'horaire'}{'fin'}; - $stop_prev =~ tr/:\$\ -//d; - if($json_hash->{'donnees'}[$i]{'overlap'} != 1) { - $start = $json_hash->{'donnees'}[$i]{'horaire'}{'debut'}; - $start =~ tr/:\$\ -//d; - } - if($stop_prev > $start) { - $json_hash->{'donnees'}[$i-1]{'overlap'} = 1; - } else { - $json_hash->{'donnees'}[$i-1]{'overlap'} = 0; - } - } - # if last programme ends >= 06:00:00 flag it - # to avoid duplicate between day and day+1 - if($json_hash->{'donnees'}[$nb-1]{'horaire'}{'fin'} =~ m/(\d+):\d+:\d+$/) { - if( $1 >= 6) { $json_hash->{'donnees'}[$nb-1]{'overlap'} = 1; } - } - - my @lines = @{ $json_hash->{'donnees'} }; - foreach my $line ( @lines ) { - # debug_print "Found line : " . $line . "\n"; - # print STDERR Dumper($line); - - # skip overlaping and duplicate - if($line->{'overlap'}) { next;} - - $chid = $line->{'id_chaine'}; - + my @lines = @$progs; + foreach my $line ( sort { $a->{'horaire'}{'debut'} cmp $b->{'horaire'}{'debut'} } @lines ) { my $startdate = $line->{'horaire'}{'debut'}; + #print $startdate."\n"; my $enddate = $line->{'horaire'}{'fin'}; $startdate =~ tr/:\$\ -//d; $enddate =~ tr/:\$\ -//d; @@ -1230,7 +1210,7 @@ sub process_channel_grid_page ( $$$$$ ) { $writer->write_programme(\%prog); } - return $nb; + return; } # use XMLTV::get_nice From 079467b4c5b5c64d357c13e05a00e2bb8ea46dc3 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 12:33:23 +0100 Subject: [PATCH 10/16] group channels to make less api calls --- tv_grab_fr_telerama | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 1cae505..99740f0 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -940,7 +940,7 @@ sub grab_day ($) { $g_deb += $group_size; $g_end += $group_size; if($g_end > $nb_chans-1) { $g_end = $nb_chans-1; } - } until ($g_end == $nb_chans-1) + } until ($g_deb > $nb_chans-1) } sub grab_day_channel($$$$$$) { From 60d686aafbef59899c2aca33ffde40c113d8f722 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 13:07:58 +0100 Subject: [PATCH 11/16] group channels to make less api calls --- tv_grab_fr_telerama | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 99740f0..f7d5b44 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -1046,22 +1046,19 @@ sub grab_day_channel($$$$$$) { if($opt_casting && (!$subgenre || $subgenre !~ /(animation|réaliste|jeunesse|téléréalité|sentimental|burlesque)$/i) && ($genretext =~ /^(Film|Série|Feuilleton|Téléfilm|Théâtre)$/i)) { my $jsname_programme = ''; my $url_programme = mkurl($CHANNEL_PROGRAMME_PAGE.$line->{'id_programme'}, {} ); - #if ($genretext eq 'Film') { print $line->{'id_programme'}." ".$line->{'titre'}."\n"; } if ($show_url) { print STDERR $url_programme."\n"; } if ($save_json) { $jsname_programme = mkjsonname("", { "id_programme" => $line->{'id_programme'}}); } - my $json_hash_programme; + my $json_p; # try to use emissions cache if(exists $emissions{$line->{id_emission}} && $emissions_cache) { #use cache - $stats{api}{cache}++; - #print "use cache for ".$line->{titre}.", id_programme ".$line->{id_programme}.", id_emmission ".$line->{id_emission}."\n"; - $json_hash_programme = $emissions{$line->{id_emission}}; + $stats{cache_emission}++; + $json_p = $emissions{$line->{id_emission}}; } else { - $json_hash_programme = get_page_json('casting', $url_programme, $jsname_programme); - $emissions{$line->{id_emission}} = $json_hash_programme if $emissions_cache; + $json_p = get_page_json('casting', $url_programme, $jsname_programme); + $emissions{$line->{id_emission}} = $json_p if $emissions_cache; } - #print Dumper($json_hash_programme->{'donnees'}[0]->{intervenants})."\n"; - my @cast = @{ $json_hash_programme->{'donnees'}[0]->{intervenants} } if ($json_hash_programme->{'donnees'} && $json_hash_programme->{'donnees'}[0]->{intervenants} ); + my @cast = @{ $json_p->{donnees}[0]->{intervenants} } if ($json_p->{donnees} && $json_p->{donnees}[0]->{intervenants} ); #if(scalar(@cast) == 0) { print $genretext." ".$subgenre."\n"; } foreach my $people (@cast) { my $ctype = $people->{'libelle'}; @@ -1096,9 +1093,9 @@ sub grab_day_channel($$$$$$) { } } if ($line->{'possede_critique'} == 1) { - $critic = trim($json_hash_programme->{'donnees'}[0]->{critique}) if($json_hash_programme->{'donnees'}[0]->{critique}) ; + $critic = trim($json_p->{'donnees'}[0]->{critique}) if($json_p->{'donnees'}[0]->{critique}) ; } elsif ($line->{'possede_notule'} == 1) { - $critic = trim($json_hash_programme->{'donnees'}[0]->{notule}) if ($json_hash_programme->{'donnees'}[0]->{notule}) ; + $critic = trim($json_p->{'donnees'}[0]->{notule}) if ($json_p->{'donnees'}[0]->{notule}) ; } } From 184264400677a116889be7f35e5cd96c8406c5e5 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 13:39:52 +0100 Subject: [PATCH 12/16] fix missing title --- tv_grab_fr_telerama | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index f7d5b44..81a36a6 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -923,9 +923,10 @@ sub grab_day ($) { my $nb = 0; do { my %params = ( 'date' => $dayoff, 'id_chaines' => $chans, 'nb_par_page' => $nb_par_page, 'page' => $page ); + my %params_jsname = ( 'date' => $dayoff =~ s/-//gr, 'id_chaines' => $chans =~ s/,/_/gr, 'page' => $page ); my $url = mkurl($CHANNEL_GRID_PAGE, \%params); if ($show_url) { print STDERR $url."\n"; } - if ($save_json) { $jsname = mkjsonname("", \%params); } + if ($save_json) { $jsname = mkjsonname("", \%params_jsname); } my $json = get_page_json('grille', $url, $jsname); $nb = $json->{'pagination'}{'nb_sur_page'}; update $bar if not $opt_quiet and not $show_url; @@ -949,12 +950,13 @@ sub grab_day_channel($$$$$$) { my @lines = @$progs; foreach my $line ( sort { $a->{'horaire'}{'debut'} cmp $b->{'horaire'}{'debut'} } @lines ) { my $startdate = $line->{'horaire'}{'debut'}; - #print $startdate."\n"; my $enddate = $line->{'horaire'}{'fin'}; $startdate =~ tr/:\$\ -//d; $enddate =~ tr/:\$\ -//d; my $title = $line->{'titre'}; + if($title eq '' && $line->{'soustitre'}) { $title = $line->{'soustitre'} } else { $title = "sans titre"; } + my $description = ""; $description = trim($line->{'resume'}) if ($line->{'resume'}) ; @@ -964,7 +966,7 @@ sub grab_day_channel($$$$$$) { $startdate = utc_offset( $startdate, "+0100"); $enddate = utc_offset( $enddate , "+0100"); my %prog = (channel => $channels{$chid}{chid_name}, - title => [ [ $line->{'titre'} ] ], # lang unknown + title => [ [ $title ] ], # lang unknown start => $startdate, stop => $enddate ); @@ -1102,7 +1104,6 @@ sub grab_day_channel($$$$$$) { $prog{'date'} = $line->{'annee_realisation'} if ($line->{'annee_realisation'}); $prog{country} = [[$line->{'libelle_nationalite'}]] if ($line->{'libelle_nationalite'}); $prog{'audio'}{stereo} = "bilingual" if ($line->{'flags'}{'est_vm'}); - #$prog{title_orig} = $line->{'titre_original'} if ($line->{'titre_original'}); $crypted = 0 if ($line->{'flags'}{'est_clair'}); $prog{subtitles} = [ { type => 'onscreen', language => ['fr'] } ] if ($line->{'flags'}{'est_vost'}); From cdf708ad64b14cb1b4ac05e52b6acc0f3dec719f Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 13:59:21 +0100 Subject: [PATCH 13/16] add warning when there is no shows --- tv_grab_fr_telerama | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index 81a36a6..e4e17c2 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -933,6 +933,11 @@ sub grab_day ($) { # loop on chid foreach my $chid (sort { $a <=> $b } @chids) { my @progs = grep { $_->{id_chaine} == $chid } @{$json->{'donnees'}}; + if(scalar @progs) { + grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); + } elsif(!$opt_quiet) { + print STDERR "Aucun programme pour la chaîne $chid ".$channels{$chid}{chid_name}." le $dayoff\n"; + } grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); } $page++; From 207386f1ac9dc9b21fce00a5c09cc8c395fc7cbf Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 15:49:23 +0100 Subject: [PATCH 14/16] better stats, canonical json files --- tv_grab_fr_telerama | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index e4e17c2..bab7b9a 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -776,6 +776,7 @@ if (!@genres) { # cache emission to reduce casting/programme api call my %emissions; my $emissions_cache = 1; +$stats{cache_casting} = 0; # loop on days for (my $offset=$opt_offset; $offset < $opt_offset+$opt_days; $offset++ ) { @@ -789,9 +790,9 @@ $bar->finish() if not $opt_quiet and not $show_url; if (!$opt_quiet) { my @apis; foreach my $api(sort { $stats{api}{$b} <=> $stats{api}{$a} } keys %{$stats{api}}) { - if($api ne 'total') { push @apis, "api_".$api." ".$stats{api}{$api}; } + if($api ne 'total') { push @apis, $stats{api}{$api}." api_".$api; } } - print STDERR "Grabber process finished in ".(time() - $stats{start})." seconds, ".$stats{api}{total}." api calls : ".join(', ',@apis)."\n"; + print STDERR "Grabber process finished in ".(time() - $stats{start})." seconds for ".$nb_chans." chans, ".$stats{api}{total}." api calls : ".join(', ',@apis).", ".$stats{cache_casting}." cached api_casting\n"; } #*************************************************************************** @@ -936,7 +937,7 @@ sub grab_day ($) { if(scalar @progs) { grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); } elsif(!$opt_quiet) { - print STDERR "Aucun programme pour la chaîne $chid ".$channels{$chid}{chid_name}." le $dayoff\n"; + print STDERR "Aucun programme pour la chaîne $chid ".$channels{$chid}{name}." le $dayoff\n"; } grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); } @@ -1059,7 +1060,7 @@ sub grab_day_channel($$$$$$) { # try to use emissions cache if(exists $emissions{$line->{id_emission}} && $emissions_cache) { #use cache - $stats{cache_emission}++; + $stats{cache_casting}++; $json_p = $emissions{$line->{id_emission}}; } else { $json_p = get_page_json('casting', $url_programme, $jsname_programme); @@ -1251,7 +1252,7 @@ sub get_page_json( $$$ ) { if ($save_json) { open (JSFIC, ">$jsname") || die ("Vous ne pouvez pas créer le fichier \"$jsname\""); - print JSFIC JSON->new->pretty->encode($json); + print JSFIC JSON->new->pretty->canonical->encode($json); close (JSFIC); } From e845c553e009f7f79919579187dec06eeba2c718 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 16:06:17 +0100 Subject: [PATCH 15/16] remove obsolete comment --- tv_grab_fr_telerama | 1 - 1 file changed, 1 deletion(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index bab7b9a..a9ce50d 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -952,7 +952,6 @@ sub grab_day ($) { sub grab_day_channel($$$$$$) { my ($chid, $url, $jsname, $dayoff, $page, $progs) = @_; - # no data ? my @lines = @$progs; foreach my $line ( sort { $a->{'horaire'}{'debut'} cmp $b->{'horaire'}{'debut'} } @lines ) { my $startdate = $line->{'horaire'}{'debut'}; From ae7ba74cf8b65d89ae67c3fca2dabd581ea09c25 Mon Sep 17 00:00:00 2001 From: Laurent Haond Date: Fri, 14 Feb 2020 20:27:11 +0100 Subject: [PATCH 16/16] fix title, add option --no-optim --- tv_grab_fr_telerama | 85 +++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/tv_grab_fr_telerama b/tv_grab_fr_telerama index a9ce50d..3a1d7ec 100755 --- a/tv_grab_fr_telerama +++ b/tv_grab_fr_telerama @@ -19,7 +19,7 @@ tv_grab_fr_telerama - Grab TV listings for France. [--no_episodedesc] [--no_aggregatecat] [--show_url] [--save_json] [--no_cryptedcplus] [--no_cryptedpprem] - [--casting] + [--casting] [--no-optim] To show capabilities: tv_grab_fr --capabilities To show version: @@ -264,6 +264,11 @@ along with this program. If not, see . 1.42 Fix params/page +2.0 Nettoyage, Réécriture et optimisations pour avoir moins d'appel d'api + * un appel d'api grille pour 32 chaînes à la fois (32x plus rapide si vous n'utiliser pas --casting) + * cache pour le casting car les programmes_id qui ont le même emission_id sont la même émission (2 à 3 fois plus rapide si vous utiliser --casting) + * Les critiques et notules sont récupérés avec le casting + * Quand il n'y a pas de titre on prend soustitre. =cut use XMLTV::Usage < \$opt_days, 'no_cryptedpprem' => \$no_cryptedpprem, 'no_htmltags' => \$no_htmltags, 'casting' => \$opt_casting, + 'no-optim' => \$opt_no_optim, ##Gestion des channels non declares dans les listes "officielles" 'configure-more-channels' => \$opt_morechannels, 'delay=i' => \$Delay ) or usage(0); -my $CHANNEL_GRID; -my $CHANNEL_GRID_PAGE; -my $CHANNEL_PROGRAMME_PAGE; -my $ROOT_URL; +my $CHANNEL_GRID = '/v1/application/initialisation'; +my $CHANNEL_GRID_PAGE = '/v1/programmes/grille'; +my $CHANNEL_PROGRAMME_PAGE = '/v1/programmes/'; +my $ROOT_URL = 'https://api.telerama.fr'; + +my %stats; +my $group_size = 32; +my $emissions_cache = 1; +if($opt_no_optim) { + $group_size = 1; + $emissions_cache = 0; +} +$stats{cache_casting} = 0; # use keep-alive to avoid useless ssl handshakes ! $XMLTV::Get_nice::ua = LWP::UserAgent->new( @@ -417,11 +432,6 @@ $XMLTV::Get_nice::Delay = $Delay; # tell XMLTV::Get_nice, we handle error our self $XMLTV::Get_nice::FailOnError = 0; -$CHANNEL_GRID = '/v1/application/initialisation'; -$CHANNEL_GRID_PAGE = '/v1/programmes/grille'; -$CHANNEL_PROGRAMME_PAGE = '/v1/programmes/'; -$ROOT_URL = 'https://api.telerama.fr'; - #*************************************************************************** # Options processing, warnings, checks and default parameters #*************************************************************************** @@ -481,11 +491,8 @@ if (-e "./logo-path.txt") { close(FILE); foreach $chan (@data) { - # print STDERR "Ligne Logo : " . $chan . "\n"; @name_url = split('\|', $chan); - # $xmltvid_map{$name_url[0]} = $name_url[1]; $icon_map{$name_url[0]} = $name_url[2]; - # print $name_url[0]."|".$name_url[2]."\n"; } } @@ -741,7 +748,6 @@ foreach (@config_lines) { # Now process the days by getting the main grids. #*************************************************************************** warn "No working channels configured, so no listings\n" if not %channels; -my %stats; $stats{start} = time(); # loop on channels @@ -775,9 +781,6 @@ if (!@genres) { # cache emission to reduce casting/programme api call my %emissions; -my $emissions_cache = 1; -$stats{cache_casting} = 0; - # loop on days for (my $offset=$opt_offset; $offset < $opt_offset+$opt_days; $offset++ ) { grab_day($offset); @@ -929,7 +932,7 @@ sub grab_day ($) { if ($show_url) { print STDERR $url."\n"; } if ($save_json) { $jsname = mkjsonname("", \%params_jsname); } my $json = get_page_json('grille', $url, $jsname); - $nb = $json->{'pagination'}{'nb_sur_page'}; + $nb = $json->{'pagination'}{'nb_sur_page'} if (defined $json->{'pagination'}{'nb_sur_page'}); update $bar if not $opt_quiet and not $show_url; # loop on chid foreach my $chid (sort { $a <=> $b } @chids) { @@ -939,7 +942,6 @@ sub grab_day ($) { } elsif(!$opt_quiet) { print STDERR "Aucun programme pour la chaîne $chid ".$channels{$chid}{name}." le $dayoff\n"; } - grab_day_channel($chid, $url, $jsname, $dayoff, $page, \@progs); } $page++; } until ($nb < $nb_par_page); @@ -952,16 +954,45 @@ sub grab_day ($) { sub grab_day_channel($$$$$$) { my ($chid, $url, $jsname, $dayoff, $page, $progs) = @_; - my @lines = @$progs; - foreach my $line ( sort { $a->{'horaire'}{'debut'} cmp $b->{'horaire'}{'debut'} } @lines ) { + my @lines = sort { $a->{'horaire'}{'debut'} cmp $b->{'horaire'}{'debut'} } @$progs; + + # flag overlapping + # check all, starting at end + my $nb = scalar @lines; + my $start = 0; + $lines[$nb-1]{'overlap'} = 0; + for (my $i=$nb-1;$i>=1;$i--) { + my $stop_prev = $lines[$i-1]{'horaire'}{'fin'}; + $stop_prev =~ tr/:\$\ -//d; + if($lines[$i]{'overlap'} != 1) { + $start = $lines[$i]{'horaire'}{'debut'}; + $start =~ tr/:\$\ -//d; + } + if($stop_prev > $start) { + $lines[$i-1]{'overlap'} = 1; + } else { + $lines[$i-1]{'overlap'} = 0; + } + } + # if last programme ends >= 06:00:00 flag it + # to avoid duplicate between day and day+1 + if($lines[$nb-1]{'horaire'}{'fin'} =~ m/(\d+):\d+:\d+$/) { + if( $1 >= 6) { $lines[$nb-1]{'overlap'} = 1; } + } + + foreach my $line (@lines) { + # skip overlaping and duplicate + if($line->{'overlap'}) { next;} + my $startdate = $line->{'horaire'}{'debut'}; my $enddate = $line->{'horaire'}{'fin'}; $startdate =~ tr/:\$\ -//d; $enddate =~ tr/:\$\ -//d; my $title = $line->{'titre'}; - if($title eq '' && $line->{'soustitre'}) { $title = $line->{'soustitre'} } else { $title = "sans titre"; } - + if($title eq '') { + if($line->{'soustitre'} ne '') { $title = $line->{'soustitre'}; } else { $title = 'sans titre'; } + } my $description = ""; $description = trim($line->{'resume'}) if ($line->{'resume'}) ;