#!/usr/bin/perl # Copyright (C) 2010 Modestas Vainius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see use strict; use warnings; use Debian::PkgKde; use Getopt::Long; use File::Copy qw(); use File::Temp qw(); use File::Spec; use IO::Uncompress::Inflate qw(inflate); use IO::Uncompress::Gunzip qw(gunzip); # Load extra modules (from libwww-perl and its dependencies) INIT { eval "use HTTP::Request"; if ($@) { error "in order to use this utility, you have to install libwww-perl package"; } eval "use URI; use URI::QueryParam; use URI::Escape;"; eval "use HTTP::Response; use LWP::UserAgent; use HTML::LinkExtor; use HTML::Parser;"; } sub usage { usageerr "[ -d destdir ] [ -v version ] [ -a arch ] [ -o ] [ package ] [ distribution ]"; } sub construct_url { my ($path, %params) = @_; my $url = URI->new($path); foreach my $param (keys %params) { my @value = (ref $params{$param} eq "ARRAY") ? @{$params{$param}} : ( $params{$param} ); $url->query_param_append($param, @value); } return $url->as_string(); } sub as_array { my $scalar = shift; my @ret; if (defined $scalar) { if (ref($scalar) eq 'ARRAY') { @ret = @$scalar; } else { push @ret, $scalar; } } return @ret; } sub in_array { my $value = shift; foreach my $member (@_) { return 1 if $member eq $value; } return 0; } sub get_command_output { my @lines; open(my $cmd, "-|", @_) or syserr("unable to execute command %s", $_[0]); while (<$cmd>) { chop; push @lines, $_; } close $cmd; return @lines; } sub get_rfc822_field_value { my ($field, $input) = @_; foreach my $line (@$input) { if ($line =~ /^\Q$field\E:\s*(.*)$/) { return "$1" } } } sub html2text { my ($in, $out) = @_; my $body; my $parser = HTML::Parser->new( api_version => 3, start_h => [ sub { if (shift() eq "body") { $body = 1 } }, "tagname" ], end_h => [ sub { if (shift() eq "body") { $body = 0 } }, "tagname" ], text_h => [ sub { if ($body) { print $out shift(); } }, "dtext" ] ); $parser->ignore_elements("head", "a", "img"); return defined($parser->parse_file($in)) && defined($body); } sub fetch_log { my ($browser, $uri, %opts) = @_; my $ok = 0; my @status; my ($filename, $file); # Construct log filename $filename = sprintf("%s_%s_%s_%s.build", $uri->query_param("pkg"), $uri->query_param("ver"), $uri->query_param("arch"), $uri->query_param("stamp")); $file = File::Spec->catfile($opts{destdir}, $filename) if defined $opts{destdir}; # Check overwrite option if (!$opts{overwrite} && -e $file) { info "Not overwriting existing build log at $file ..."; } else { # Create a temporary file my %tmpfileopts = ( TEMPLATE => $filename . ".XXXXXX" ); $tmpfileopts{DIR} = $opts{destdir} if exists $opts{destdir}; my $tmpfile1 = File::Temp->new(%tmpfileopts); my $tmpfile2 = File::Temp->new(%tmpfileopts); info "Fetching build log to $filename ..."; my $request = HTTP::Request->new(GET => $uri); $request->header("Accept-Encoding" => "gzip, deflate, identity"); $browser->show_progress(1); $tmpfile1->close(); my $response = $browser->request($request, $tmpfile1->filename); if ($response->is_success()) { # Inflate/gunzip contents if needed my $content_encoding = $response->header("Content-Encoding"); if (defined $content_encoding && $content_encoding ne "identity") { my $uncompress_func; if ($content_encoding eq "deflate") { push @status, "decoded:deflate"; $uncompress_func = \&inflate; } elsif ($content_encoding eq "gzip") { push @status, "decoded:gzip"; $uncompress_func = \&gunzip; } else { push @status, "unsupported content encoding: $content_encoding"; } if (defined $uncompress_func) { if (&$uncompress_func($tmpfile1->filename => $tmpfile2, BinModeOut => 1)) { $tmpfile2->close(); ($tmpfile1, $tmpfile2) = ($tmpfile2, $tmpfile1); open($tmpfile2, ">:utf8", $tmpfile2->filename) or syserr "unable to reopen temporary file"; $ok = 1; } } } else { # Identity encoding (no compression) $ok = 1; binmode($tmpfile2, ":utf8"); } if ($ok) { open($tmpfile1, "<:utf8", $tmpfile1->filename); if ($ok = html2text($tmpfile1 => $tmpfile2)) { $tmpfile1->close(); $tmpfile1 = $tmpfile2; } else { push @status, "html unstripped"; } $tmpfile1->close(); $tmpfile2->close(); File::Copy::move($tmpfile1->filename, $file) or error "unable to rename '%s' to '%s'", $tmpfile1->filename, $file; } } } return ($ok, [ $filename, @status ]); } sub download_logs { my ($pkg, %opts) = @_; my $distro = $opts{distro}; my @arches = as_array($opts{arch}); my @vers = as_array($opts{ver}); my $url; # Construct index URL if (defined $distro) { $url = construct_url('https://buildd.debian.org/status/package.php', p => $pkg, suite => $distro); } elsif (defined $opts{ver}) { if (scalar(@arches) == 1 && scalar(@vers) == 1) { $url = construct_url('https://buildd.debian.org/build.php', pkg => $pkg, ver => \@vers, arch => \@arches); } else { # Only single ver + single arch query is supported by build.php. # For anything else fallback to url filtering (see below) $url = construct_url('https://buildd.debian.org/build.php', pkg => $pkg); } } else { error "neither version(s) nor distribution was specified"; } # Download index document and extract links info "Downloading build log index from $url ..."; my $browser = LWP::UserAgent->new( agent => get_program_name(), timeout => 10, keep_alive => 1, env_proxy => 1, ); my $request = HTTP::Request->new(GET => $url); if (my $response = $browser->request($request)) { error "unable to access log index at URL $url: ".$response->status_line unless $response->is_success(); my $urlbase = $url; $urlbase =~ s,[^/]*$,,; my $linkextor = HTML::LinkExtor->new(undef, $urlbase); $linkextor->parse($response->content()); if (my @links = grep { $_->[0] eq "a" } $linkextor->links()) { @links = map { shift @{$_}; +{ @{$_} }->{href} } @links; my @ok; my @failed; my %latest_logs; # Collect links we need to fetch foreach my $link (@links) { # Check if it is the link we need if ($link =~ m,/fetch\.(cgi|php)(\?[^/]+)$,) { # We might need to filter out links based on the arch and # version we need my $loguri = URI->new($link); my $ver = $loguri->query_param("ver"); my $arch = $loguri->query_param("arch"); my $stamp = $loguri->query_param("stamp"); next if @vers && !in_array($ver, @vers); next if @arches && !in_array($arch, @arches); # Ensure that we are fetching the latest log for ver/arch my $k = $ver . "_" . $arch; if (!exists $latest_logs{$k} || $latest_logs{$k}{stamp} < $stamp) { $latest_logs{$k} = { stamp => $stamp, uri => $loguri }; } } } # Fetch logs foreach my $k (sort keys %latest_logs) { my $loguri = $latest_logs{$k}{uri}; my ($ok, $status) = fetch_log($browser, $loguri, %opts); if ($ok) { push @ok, $status; } else { push @failed, $status; } } return (@ok || @failed) ? (\@ok, \@failed) : (); } return (); } else { error "unable to access log index URL $url"; } } sub print_summary { my $logs = shift; my $is_warning = shift; my $msg = shift; if (@$logs) { info $msg, @_ unless $is_warning; warning $msg, @_ if $is_warning; foreach my $log_info (@$logs) { my ($filename, @info) = @$log_info; if (@info) { printmsg " - %s [%s]", $filename, join(", ", @info); } else { printmsg " - %s", $filename; } } } } my $opt_destdir; my @opt_versions; my @opt_archs; my $opt_force; # Get and verify options unless (GetOptions( "destdir|d=s" => \$opt_destdir, "version|v=s" => \@opt_versions, "arch|a=s" => \@opt_archs, "force|f!" => \$opt_force)) { usage(); } my ($opt_package, $opt_distro) = @ARGV; my @dpkg_parsechangelog; if (!$opt_package && -f "debian/changelog") { @dpkg_parsechangelog = get_command_output("dpkg-parsechangelog"); $opt_package = get_rfc822_field_value("Source", \@dpkg_parsechangelog); } if (!$opt_package) { errormsg "source package was not specified and could not be autoguessed"; usage(); } if ($opt_distro && @opt_versions) { errormsg "version and distribution options are mutually exclusive"; usage(); } if (!@opt_versions) { if (!$opt_distro && -f "debian/changelog") { @dpkg_parsechangelog = get_command_output("dpkg-parsechangelog") unless @dpkg_parsechangelog; $opt_distro = get_rfc822_field_value("Distribution", \@dpkg_parsechangelog); if ($opt_distro eq "UNRELEASED") { # Get distro from the next to current entry $opt_distro = get_rfc822_field_value("Distribution", [ get_command_output("dpkg-parsechangelog", "-c1", "-o1") ]); } } if (!$opt_distro) { errormsg "neither distribution nor version(s) was specified and could not be autoguessed"; usage(); } } # Determine destination directory to store logs unless ($opt_destdir) { $opt_destdir = sprintf("%s_%s_logs", $opt_package, ($opt_distro) ? $opt_distro : $opt_versions[0]); } info("Selected output directory for logs: %s/", $opt_destdir); unless (-d $opt_destdir) { mkdir $opt_destdir; } my ($ok_logs, $failed_logs) = download_logs($opt_package, overwrite => $opt_force, destdir => $opt_destdir, distro => $opt_distro, ver => \@opt_versions, arch => \@opt_archs); if (defined $ok_logs) { print_summary $ok_logs, 0, "Successfully downloaded build logs (stored to %s):", $opt_destdir; print_summary $failed_logs, 1, "Failed to fetch/ignored the following build logs:"; } else { error "no build logs referenced in the build log index"; } END { rmdir $opt_destdir if $opt_destdir && $opt_destdir ne "."; } exit 0 # vim: noexpandtab tabstop=8 shiftwidth=4