2011-11-05
■ [AnyEvent][AnyEvent_HTTP]いまさら「YAPC::ASIA 2011 Tokyo 行ってきたので」(追記: 2011.11.07)

YAPC::ASIA 2011 Tokyoでは Marc Lehmann さんのトークの中で「アニメ」を連呼していたのがよかったですね。
(中略)ということで、マルチなダウンロードには AnyEvent を使うのがいいんだな! と勝手に合点して、AnyEvent::Pixiv::Download というのを書いた。今更感がすごいけど。
追記: github に上げました https://github.com/ishiduca/p5-AnyEvent-Pixiv-Download 原理的には同じなんだけど、メソッド名とか変わってます (2011.11.07)
サンプルのダウンロードスクリプト(dl.pl)
#!/usr/bin/env perl use strict; use warnings; use Config::Pit; use AnyEvent::Pixiv::Download; my $config = pit_get('www.pixiv.net', require => { pixiv_id => '', pass => '', }); my $cv = AE::cv; my $client = AnyEvent::Pixiv::Download->new( pixiv_id => $config->{pixiv_id}, pass => $config->{pass}, ); for my $illust_id (qw/22854495 22856417 22855011 22849905/) { $client->to_mode_medium($illust_id, 'deep', sub { my $information = shift; my $illust_top_url = $information->{illust_top_url}; for my $img_src (@{$information->{contents}}) { $cv->begin; $client->download($img_src, $illust_top_url, sub { my(undef, $headers) = @_; warn "!! finish ", $headers->{URL}, "\n"; $cv->end; }); } }); } $cv->recv; 1;
多重コールバックになるUIとか、リ○○ラをいちいち指定しなくちゃいけないのがださくて仕方ないので、ツッコミください
lib/AnyEvent/Pixiv/Download.pm
package AnyEvent::Pixiv::Download; use warnings; use strict; use Carp; use AnyEvent; use AnyEvent::HTTP; use Web::Scraper; use File::Basename; use Data::Dumper; our $VERSION = '0.01'; my $www_pixiv_net = 'http://www.pixiv.net'; my $login_php = "${www_pixiv_net}/login.php"; my $mypage_php = "${www_pixiv_net}/mypage.php"; my $illust_top = "${www_pixiv_net}/member_illust.php?mode=medium&illust_id="; sub new { my $class = shift; my %args = @_; my $self = bless {}, $class; $self->{pixiv_id} = delete $args{pixiv_id} || Carp::croak qq(! faild: "pixiv_id" not found); $self->{pass} = delete $args{pass} || Carp::croak qq(! failed: "pass" not found); $self->{verbose} = delete $args{verbose} || 1; # this test mode ! $self->{cookie_jar} = {}; $self->{information_mode_medium} = {}; $self->login; return $self; } sub login { my $self = shift; my $sub_cv = AE::cv; my $login; $login = http_request('POST' => $login_php, headers => { 'content-type' => 'application/x-www-form-urlencoded' }, body => "mode=login&pixiv_id=$self->{pixiv_id}&pass=$self->{pass}", recurse => 0, sub { my($body, $headers) = @_; warn Dumper $headers if $self->{verbose} == 2; warn qq(fetch: "${login_php}"\n) if $self->{verbose} == 1; Carp::croak qq(! failed: "set-cookie" not found at $headers->{URL}\n) unless $headers->{'set-cookie'}; $self->{cookie_jar} = _cookie_jar_hogehoge($headers->{'set-cookie'}) or Carp::croak qq(! failed: something wrong...\n); warn Dumper $self->{cookie_jar} if $self->{verbose} == 2; warn qq(get_cookie: "$headers->{'set-cookie'}"\n) if $self->{verbose} == 1; my $location = $headers->{'location'}; undef $login; my $redirect; $redirect = http_request('GET' => $location, cookie_jar => $self->{cookie_jar}, sub { my($body, $headers) = @_; Carp::croak qq(! failed: "redirect" failed\n $headers->{URL}\n) if $headers->{URL} ne $location; warn Dumper $headers if $self->{verbose} == 2; warn qq(fetch: "$headers->{URL}"\n) if $self->{verbose} == 1; undef $redirect; $sub_cv->send("sucess: login !\n"); } ); } ); my $message = $sub_cv->recv; warn $message; return $self; } sub to_mode_medium { my $self = shift; my $cb = pop; my $illust_id = shift || Carp::croak qq(! failed: "illust_id" not found\n); my $deep = shift; my $mode_medium; $mode_medium = http_request('GET', "${illust_top}${illust_id}", cookie_jar => $self->{cookie_jar}, sub { my($body, $headers) = @_; warn Dumper $headers if $self->{verbose} == 2; warn qq(fetch: "$headers->{URL}"\n) if $self->{verbose} == 1; Carp::croak qq(! failed: something wrong...\n $headers->{URL}\n) if $headers->{URL} ne "${illust_top}${illust_id}"; my $information = _scrape_mode_medium($body, $headers->{URL}); if ($deep) { my $mode_big; $mode_big = http_request('GET', $information->{contents_url}, cookie_jar => $self->{cookie_jar}, headers => { referer => $headers->{URL} }, sub { my($body, $headers) = @_; Carp::croak qq(! failed: something wrong...\n $headers->{URL}\n) if $headers->{URL} ne $information->{contents_url}; warn Dumper $headers if $self->{verbose} == 2; warn qq(fetch: "$headers->{URL}"\n) if $self->{verbose} == 1; if ($information->{contents_url} =~ /mode=manga/) { $information->{mode} = 'manga'; $information->{contents} = []; while ($body =~ m!(http://img\d\d\.pixiv\.net/img/[^']+?)'!g) { push @{$information->{contents}}, $1; } } else { # $information->{contents_url} =~ /mode=big/ $information->{mode} = 'big'; my $scraper = scraper { process '//div/a/img[1]', 'img_src' => '@src'; }; $information->{contents} = [ ($scraper->scrape($body))->{img_src} ]; } $self->{information_mode_medium} = $information; warn Dumper $information if $self->{verbose} == 2; warn Dumper $information if $self->{verbose} == 1; undef $mode_big; undef $mode_medium; $cb->($information); } ); } else { $self->{information_mode_medium}->{$illust_id} = $information; warn Dumper $information if $self->{verbose} == 2; warn Dumper $information if $self->{verbose} == 1; undef $mode_medium; $cb->($information); } } ); return $self; } sub download { my $self = shift; my $cb = pop; my $img_src = shift || Carp::croak qq(! failed: "img_src" not found\n);; my $referer = shift || Carp::croak qq(! failed: "referer" not found\n); my $options = shift; my $on_body = ($options->{on_body}) ? $options->{on_body} : (sub { my $filename = basename $img_src; $filename =~ s/\?.*$//; open my $fh, '>', $filename or Carp::croak qq(! failed: "${filename}" $!\n); binmode $fh; return sub { my($partial_body, $headers) = @_; if ($headers->{Status} =~ /^2/) { print $fh $partial_body; } return 1; }; })->(); my $done; $done = http_request('GET' => $img_src, cookie_jar => $self->{cookie_jar}, headers => { referer => $referer }, on_header => sub { my $headers = shift; if ($headers->{Status} ne '200') { ($options->{on_error} || sub { die @_ })->(qq(failed: "${img_src}" $headers->{Status} $headers->{Reason}\n)); return ; } return 1; }, on_body => $on_body, sub { my($body, $headers) = @_; undef $done; $cb->(@_); } ); return $self; } sub _scrape_mode_medium { my $body = shift || Carp::croak qq(! failed: "body" not found\n); my $illust_top_url = shift || Carp::croak qq(! failed: "illust_top_url" not found\n); my $scraper = scraper { process '//h3[1]', 'title' => 'TEXT'; process '//p[@class="works_caption"]', 'description' => 'HTML'; process '//a[@class="avatar_m"]', 'author_name' => '@title'; process '//a[@class="avatar_m"]', 'author_url' => [ '@href', sub { return $www_pixiv_net . $_; } ]; process '//div[@class="works_display"]/a[1]', 'contents_url' => [ '@href', sub { return join '/', $www_pixiv_net, $_; } ]; process '//div[@class="works_display"]/a[1]/img[1]', 'img_src' => '@src'; }; my $information = $scraper->scrape($body); $information->{author} = {}; $information->{author}->{name} = delete $information->{author_name}; $information->{author}->{url} = delete $information->{author_url}; $information->{illust_top_url} = $illust_top_url; return $information; } sub _cookie_jar_hogehoge { local $_ = shift || return ; my %cookie = (); my $phpsessid = 'PHPSESSID'; map{ my($key, $value) = split /=/; $cookie{$key} = $value; }(split /; /); Carp::croak qq(! failed: "${phpsessid}" not found) unless $cookie{$phpsessid}; return { $cookie{domain} => { $cookie{path} => { $phpsessid => { _expires => AnyEvent::HTTP::parse_date $cookie{expires}, value => $cookie{$phpsessid}, }, }, }, version => 1, }; } 1;
コメントを書く
トラックバック - http://perl.g.hatena.ne.jp/ishiduca/20111105