#!perl -w # # Form2WSDL Invoker # March 2005 # by Ross Shannon # ################################################################################ # Pull in required Perl modules use SOAP::Transport::HTTP; # Receive SOAP request and fire it to method below (should ask for 'invoke') SOAP::Transport::HTTP::CGI -> dispatch_to('SOAP2Invoke') -> handle; ################################################################################ # Package that deals with the SOAP call, the invoke method does it all package SOAP2Invoke; use strict; use CGI; use CGI::Carp qw(fatalsToBrowser); use URI; use LWP::UserAgent; use HTML::Form; use HTML::TreeBuilder; use HTML::Element; use SOAP::Lite; ################################################################################ # Set constants and initialise global variables my $doc_root = $ENV{'DOCUMENT_ROOT'}; # filesystem location my $timestamp = scalar localtime(); # report timestamp # User-Agent string and contact address for robot my $version = "0.4.0"; my $botUA = "Form2WSDL/$version"; my $botFrom = 'ross.shannon@gmail.com'; # A little metadata... my $browser = $ENV{'HTTP_USER_AGENT'}; ############################################################################### # Variables, variables everywhere my $userAgent; my $document; my $request; my $response; my @forms; my $formid; ################################################################################ sub invoke { # @_ holds all arguments passed to this script. # First one (after classname) will always be the URL to work on. my ($class, $url, $formid, %formfields) = @_; &getDocument($url); &extractForms($url); # Now we have the form in memory. # Change elements in the form to match the data sent in the SOAP envelope, # then submit it and pipe the output back foreach my $field (%formfields) { if ($field eq "submit") { last; } # Stop if we're about to change submit, we don't want to if ($forms[$formid]->find_input($field)) { $forms[$formid]->find_input($field)->value($formfields{$field}); } } # Submit form # If a submit button has been chosen from the "submit" enumeration, # submit with that button selected (as if it had been clicked) my $formresponse; if (defined $formfields{"submit"}) { $formresponse= $userAgent->request($forms[$formid]->click($formfields{"submit"})); } else { $formresponse= $userAgent->request($forms[$formid]->click); } my $tree = HTML::TreeBuilder->new; # empty tree $tree->parse($formresponse->content); my $h = $tree->look_down("_tag", "head"); my $base = HTML::Element->new("base", "href" => $url); # Add a base href to the page, so that all links and images are resolved to # the right address. Don't add it if the submit button clicked had the name # "btnI", which is Google's I'm Feeling Lucky button. Helps in some cases. unless ($formfields{"submit"} eq "btnI") { $h->push_content($base); } return $tree->as_HTML; ##return $document; } ################################################################################ # Functions # Fetches document and returns it. Assumes basic validation check has been done on the URL. sub getDocument { my $url = shift; # Configure the UA $userAgent = LWP::UserAgent->new; $userAgent->agent("$botUA"); $userAgent->from("$botFrom"); $userAgent->timeout(20); $userAgent->proxy(['http', 'ftp', 'https'], 'http://proxy5.ucd.ie:8585'); $userAgent->no_proxy('localhost', 'htmlsource',); # Send request $request = HTTP::Request->new('GET', $url); # Get response $response = $userAgent->request($request); # Check how the bot got on if ($response->is_success) { # Store the response text $document = $response->content; } else { # Something's gone wrong with the request &print_error("Could not get page “$url”. Please check that the address is valid and that the page exists.

\n

The server’s response was: “".$response->status_line."”", 1); } } # Parses the page for HTML forms sub extractForms { @forms = HTML::Form->parse($response); } # Prints error page. If second argument is 1, the script'll die right here. sub print_error { my $message = shift; my $shoulddie = shift; print "

\n

Error

\n\n"; print "

$message

\n"; print "
"; if ($shoulddie == 1) { &print_footer(); exit; } }