#!/usr/bin/perl

=pod

=head1 NAME

tv_grab_pt - Grab TV listings for Portugal.

=head1 SYNOPSIS

tv_grab_pt --help

tv_grab_pt [--config-file FILE] --configure

tv_grab_pt [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet]

tv_grab_pt --list-channels

=head1 DESCRIPTION

Output TV listings for several channels available in Portugal.
It supports the public network and the private NetCabo network.

First run B<tv_grab_pt --configure> to choose, which channels you want
to download. Then running B<tv_grab_pt> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_pt.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  The default is 3.

B<--offset N> start N days in the future.  The default is to start
from today.

B<--quiet> suppress the progress messages normally written to standard
error.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Bruno Tavares, gawen@users.sourceforge.net, based on tv_grab_es, from Ramon Roca.

=head1 BUGS

=cut

######################################################################
# initializations

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_pt,v 1.4 2004/04/28 18:45:17 epaepa Exp $ ';
use Getopt::Long;
use Date::Manip;
use Data::Dumper;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Mode;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Portuguese television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet]
To list channels: $0 --list-channels
END
  ;

# Use Term::ProgressBar if installed.
use constant Have_bar => eval { require Term::ProgressBar; 1 };

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'     => 'http://tv.clix.pt',
	     'source-data-url'     => "http://tv.clix.pt",
	     'generator-info-name' => 'XMLTV',
	     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
	   };

# default language
my $LANG="pt";

# Global channel_data
our @ch_all;

######################################################################
# get options

# Get options, including undocumented --cache option.
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_quiet,
    $opt_list_channels);
$opt_days  = 2; # default
$opt_offset = 0; # default
$opt_quiet  = 0; # default
GetOptions('days=i'        => \$opt_days,
	   'offset=i'      => \$opt_offset,
	   'help'          => \$opt_help,
	   'configure'     => \$opt_configure,
	   'config-file=s' => \$opt_config_file,
	   'output=s'      => \$opt_output,
	   'quiet'         => \$opt_quiet,
	   'list-channels' => \$opt_list_channels
	  )
  or usage(0);
die 'number of days must not be negative'
  if (defined $opt_days && $opt_days < 0);
warn "site normally has only two days of listings\n"
  if $opt_days + $opt_offset > 2;
usage(1) if $opt_help;

my $mode = XMLTV::Mode::mode('grab', # default
			     $opt_configure => 'configure',
			     $opt_list_channels => 'list-channels',
			    );

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_pt', $opt_quiet);

my @config_lines; # used only in grab mode
if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
}
elsif ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}
elsif ($mode eq 'list-channels') {
    # Config file not used.
}
else { die }

# Whatever we are doing, we need the channels data.
my %channels = get_channels(); # sets @ch_all
my @channels;

######################################################################
# write configuration

if ($mode eq 'configure') {
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    # Ask about each channel.
    my @chs = sort keys %channels;
    my @names = map { $channels{$_} } @chs;
    my @qs = map { "add channel $_?" } @names;
    my @want = askManyBooleanQuestions(1, @qs);
    foreach (@chs) {
	my $w = shift @want;
	warn("cannot read input, stopping channel questions"), last
	  if not defined $w;
	# No need to print to user - XMLTV::Ask is verbose enough.

	# Print a config line, but comment it out if channel not wanted.
	print CONF '#' if not $w;
	my $name = shift @names;
	print CONF "channel $_\n";
	# TODO don't store display-name in config file.
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");

    exit();
}


# Not configuration, we must be writing something, either full
# listings or just channels.
#
die if $mode ne 'grab' and $mode ne 'list-channels';

# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
    $writer->write_channel($_) foreach @ch_all;
    $writer->end();
    exit();
}

######################################################################
# We are producing full listings.
die if $mode ne 'grab';

# Read configuration
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;
    if (/^channel:?\s+(\S+)(s+)?/) {
	my $ch_did = $1;
	push @channels, $ch_did;
    }
    else {
	warn "$config_file:$line_num: bad line\n";
    }
}

######################################################################
# begin main program

# Assume the listings source uses CET (see BUGS above).
my $now = DateCalc(ParseDate('now'), "$opt_offset days");
die "No channels specified, run me with --configure\n"
  if not keys %channels;
my @to_get;

# the order in which we fetch the channels matters
# This progress bar is for both downloading and parsing.  Maybe
# they could be separate.
#
my $bar = new Term::ProgressBar('getting listings',
				scalar(@channels) * $opt_days)
  if Have_bar && not $opt_quiet;
foreach my $ch_did (@channels) {
    my $ch_name=$channels{$ch_did};
    $writer->write_channel({ id => $ch_did,
                             'display-name' => [ [ $ch_name ] ] });
}

my $date = UnixDate($now,'%Q');
for (my $i = 0; $i < $opt_days; $i++) {
    my $some = 0;
    foreach my $ch_did (@channels) {
	foreach (process_table($ch_did, $date)) {
	    $writer->write_programme($_);
	    $some = 1;
	}
	update $bar;
    }
    if (not $some) {
	die "no programmes found\n" if $i == 0;
	warn "only one day of listings found\n" if $i == 1;
	warn "only $i days of listings found\n" if $i > 1;
	last;
    }
    $date = nextday($date); die if not defined $date;
}

$writer->end();

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

# Clean up bad characters in HTML.
sub tidy( $ ) {
    for (my $s = shift) {
	# Character 150 seems to be used for 'versus' in sporting
	# events, but I don't know what that is in Portuguese.
	#
	s/\s\226\s/ vs /g;
	return $_;
    }
}

sub process_table {
    my ($ch_xmltv_id, $date) = @_;

    t "Getting channel $ch_xmltv_id, date $date\n";

    die unless $date =~ /(\d{4})(\d{2})(\d{2})/;
    my $my_date = "$1-$2-$3";
    my $url = $HEAD->{'source-info-url'}."/canais.html?dia=$my_date&channel=$channels{$ch_xmltv_id}";
    #print STDERR "Getting url : $url"; 
    t $url;
    my $data=tidy(get_nice($url));
    if (not defined $data) {
	die "could not fetch $url, aborting\n";
    }
    local $SIG{__WARN__} = sub {
	warn "$url: $_[0]";
    };

    # parse the page to a document object
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($data);
    my @program_data = get_program_data($tree);
    if (not @program_data) {
	warn "$url: no programmes found\n";
	return ();
    }

    my $first = $program_data[0];
    my @r;
    foreach my $p (@program_data) {
	push @r, make_programme_hash($ch_xmltv_id, $p, $first, $date);
    }
    return @r;
}

sub make_programme_hash {
    my ($ch_xmltv_id, $cur, $first, $date) = @_;

    my %prog;

    $prog{channel}=$ch_xmltv_id;
    $prog{title}=[ [ $cur->{title}, $LANG ] ];
    $prog{"sub-title"}=[ [ $cur->{subtitle}, $LANG ] ] if $cur->{subtitle};
    $prog{category}=[ [ $cur->{category}, $LANG ] ] if $cur->{category};


    if ( $cur->{time} < $first->{time} ) {
	t "Jumping for next day of (".$cur->{time}.",".$first->{time}.") $date...";
	$date = nextday($date);
	t "Got $date\n";
    }

    my $time = $date.$cur->{time}."00";

    #print STDERR "Date built = $time\n";

    $prog{start}=utc_offset($time, '+0000');
    t "...got $prog{start}";
    unless ($prog{start}) {
	warn "bad time string: $cur->{time}";
	return undef;
    }

    $prog{desc}=[ [ $cur->{desc}, $LANG ] ] if $cur->{desc};
	
    return \%prog;
}


#
sub get_program_data {
    my ($tree) = @_;

    my @data;

    my @tables = $tree->find_by_tag_name("_tag"=>"table");

    # Actually time and title are required, but we don't check that.
    foreach my $table (@tables) {
	
	my @trs = $table->find_by_tag_name("_tag"=>"tr");
	next unless $trs[2];
	my $tr = $trs[3];
	my @tds = $tr->find_by_tag_name("_tag"=>"td");
	next unless (scalar(@tds) >= 2);
	my $should_be_hour = $tds[0]->as_trimmed_text."\n";
	#print STDERR "*".$should_be_hour."*\n";
	next unless ($should_be_hour =~ /^Hora/);
	#print STDERR "Found the leading html\n";

	my $index = 4;
	while ($trs[$index]) {
		my @tds = $trs[$index]->find_by_tag_name("_tag"=>"td");
		my $time = $tds[0]->as_trimmed_text; 
		my $title = $tds[1]->as_trimmed_text;
		my $cat = $tds[2]->as_trimmed_text;

		#print STDERR "Found $time | $title | $cat\n";
		$time =~ s/://g;

                my %h = (       time =>         $time,
                                category=>      $cat,
                                title=>         $title,
                                subtitle=>      "",
                                desc =>         "");
                push @data, \%h;
        	$index = $index + 1;
	}
	last;
    }
    return @data;
}

# get channel listing
sub get_channels {
    my $bar = new Term::ProgressBar('getting list of channels', 1)
	if Have_bar && not $opt_quiet;
    my %channels;
    my $url=$HEAD->{'source-info-url'};
    t $url;
    my $local_data=get_nice($url);
    die "could not get channel listing $url, aborting\n"
      if not defined $local_data;

    my $tree = HTML::TreeBuilder->new();
    $tree->parse($local_data);
    my @menus = $tree->find_by_tag_name("_tag"=>"select");

    foreach my $elem (@menus) {
	my $cname = $elem->attr('name');
	next unless $cname eq 'Channel';	
        my @ocanals = $elem->find_by_tag_name("_tag"=>"option");
        @ocanals = sort @ocanals;
	foreach my $opt (@ocanals) {
		    my $channel_id  = $opt->content->[0];
		    $channel_id =~ s/\s*$//;
		    $channel_id =~ s/\s/\_/g;
		    my $channel_name= $opt->content->[0];
		    next if ($channel_id =~ /^[\-]+$/);
		    next if ($channel_id =~ /^Canal$/);
		    for ($channel_name) { s/^\s+//; s/\s+$// }
		    $channels{$channel_id}=$channel_name;
		    push @ch_all, { 'display-name' => [ [ $channel_name,
							  $LANG ] ],
				    'id'=> "$channel_id" };
	} #foreach
    } #while

    if (not %channels) {
	if ($local_data =~ /(P.gina tempor.riamente indisponivel)/) {
	    die "$url says $1, cannot grab\n";
	}
	die "no channels could be found in $url\n";
    }
    update $bar if Have_bar && not $opt_quiet;
    return %channels;
}

sub nextday {
    my $d = shift;
    my $p = ParseDate($d);
    my $n = DateCalc($p, '+ 1 day');
    return UnixDate($n, '%Q');
}


