Plagger::Plugin::CustomFeed::AmazonAssociateReportJP を試してみたら、スクレイピングが上手くいってなかったので修正してみました。
たぶんどこかに最新が反映されてるのがあるのかもしれませんし、うまい修正ではないかもしれませんが、きょう現在、無事にレポートされてきています。
AmazonAssociateReportJPパッチ
--- AmazonAssociateReportJP.pm.org 2008-12-15 05:12:40.000000000 +0900
+++ AmazonAssociateReportJP.pm 2008-12-15 05:39:15.000000000 +0900
@@ -71,7 +71,7 @@
mech => $mech,
email => $plugin->conf->{email},
password => $plugin->conf->{password},
- start_url => 'http://affiliate.amazon.co.jp/gp/associates/join/main.html',
+ start_url => 'https://affiliate.amazon.co.jp/',
}, $class;
}
@@ -80,18 +80,17 @@
my $mech = $self->mech;
my $res = $mech->get($self->start_url);
return unless $mech->success;
- $mech->follow_link(url_regex => qr!associates/login/login\.html!);
- $mech->form_number(1);
+ $mech->form_number(2);
$mech->field(email => $self->email);
$mech->field(password => $self->password);
$mech->click;
- return if ($mech->content =~ m!<input name="email" type="text"!); # oops, login failed!
+ return if ($mech->content =~ m!<input type="text" name="email"!); # oops, login failed!
return 1;
}
sub summary_html {
my $self = shift;
- if ($self->mech->content =~ m!(<table class="report" id="earningsSummary">.*?</table>)!is) {
+ if ($self->mech->content =~ m!</form>\n(<div class="note">.*\*[^<]+</div>\n</div>)!is) {
my $html = $1;
$html =~ s!<a [^>]+>.+?</a>!!isg;
$html =~ s!<img [^>]+/>!!isg;
@@ -119,10 +118,11 @@
$self->mech->follow_link(url_regex => qr/report\.html.*?ordersReport/);
$self->mech->submit_form(form_number => 8);
my $content = $self->mech->content;
+ $content =~ s/<form/\n<form/g;
if ($content =~ m!(<table class="report" id="ordersReport">.*?</table>)!is) {
$html = $1;
}
- if ($content =~ m!(<table class="ordersReportSummary">.*?</table>)!is) {
+ if ($content =~ m!(<table class="report" id="ordersReportSummary">.*?</table>)!is) {
$html .= $1;
}
return $html;
