#!/usr/bin/perl ############################################################################### # Integrates with Delicious Library and scrapes data from comics.org # # # # v1.0: 2007-11-06, morbus@disobey.com, email me if you use/modify. # ############################################################################### # This script requires a healthy dose of external dependencies. Please read # # http://www.disobey.com/node/1820 for this pre-configuration and history. # ############################################################################### # changes (2007-11-06, version 1.0): # # - initial public release. # ############################################################################### use warnings; use strict; use CGI qw/:standard/; use LWP::Simple; print "Content-type: text/html\n\n"; # delicious library sends us B0xxx69268 if (param('ItemId') !~ /B0x{1,}/) { exit; } my %gcd_data; # holder of goodies used in final xml. my $gcd_id = param('ItemId'); $gcd_id =~ s/B0x{1,}//; my $gcd_lookup = 'http://comics.org/details.lasso?id='. $gcd_id; my $gcd_dump = get($gcd_lookup); # 'ere she goes, keptin. $gcd_dump =~ s/(\n|\r)//g; # strip all newlines. # and start slurping in relevant data by any means necessary. ($gcd_data{'title'}) = $gcd_dump =~ m!(.*)!; ($gcd_data{'publisher'}) = $gcd_dump =~ m!- (.*?), .*? ?\d{4}, coverprice!; ($gcd_data{'release_date'}) = $gcd_dump =~ m!.*?, (.*? ?\d{4}), coverprice!; ($gcd_data{'price_retail'}) = $gcd_dump =~ m!coverprice (\d+\.\d+)!; ($gcd_data{'genre'}) = $gcd_dump =~ m!Genre: (.*?)!; ($gcd_data{'pages'}) = $gcd_dump =~ m!(\d{1,}) pages!; # now, get the cover image URL. my $gcd_image = 'http://comics.org/coverview.lasso?id='. $gcd_id .'&zoom=4'; my $gcd_image_dump = get($gcd_image); # this is a quickie lookup to fill in LargeImage:URL. ($gcd_data{'image'}) = $gcd_image_dump =~ m! $gcd_data{image} Comic Book \$$gcd_data{price_retail} $gcd_data{publisher} $gcd_data{release_date} $gcd_data{title} $gcd_data{pages} $gcd_data{genre} EVIL_HEREDOC_OF_ORMS_BY_GORE