#!perl -w
#
# Form2WSDL Invoker
# March 2005
# by Ross Shannon
#
################################################################################
# Pull in required Perl modules
use SOAP::Transport::HTTP;
# Receive SOAP request and fire it to method below (should ask for 'invoke')
SOAP::Transport::HTTP::CGI
-> dispatch_to('SOAP2Invoke')
-> handle;
################################################################################
# Package that deals with the SOAP call, the invoke method does it all
package SOAP2Invoke;
use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use URI;
use LWP::UserAgent;
use HTML::Form;
use HTML::TreeBuilder;
use HTML::Element;
use SOAP::Lite;
################################################################################
# Set constants and initialise global variables
my $doc_root = $ENV{'DOCUMENT_ROOT'}; # filesystem location
my $timestamp = scalar localtime(); # report timestamp
# User-Agent string and contact address for robot
my $version = "0.4.0";
my $botUA = "Form2WSDL/$version";
my $botFrom = 'ross.shannon@gmail.com';
# A little metadata...
my $browser = $ENV{'HTTP_USER_AGENT'};
###############################################################################
# Variables, variables everywhere
my $userAgent;
my $document;
my $request;
my $response;
my @forms;
my $formid;
################################################################################
sub invoke {
# @_ holds all arguments passed to this script.
# First one (after classname) will always be the URL to work on.
my ($class, $url, $formid, %formfields) = @_;
&getDocument($url);
&extractForms($url);
# Now we have the form in memory.
# Change elements in the form to match the data sent in the SOAP envelope,
# then submit it and pipe the output back
foreach my $field (%formfields) {
if ($field eq "submit") { last; } # Stop if we're about to change submit, we don't want to
if ($forms[$formid]->find_input($field)) {
$forms[$formid]->find_input($field)->value($formfields{$field});
}
}
# Submit form
# If a submit button has been chosen from the "submit" enumeration,
# submit with that button selected (as if it had been clicked)
my $formresponse;
if (defined $formfields{"submit"}) {
$formresponse= $userAgent->request($forms[$formid]->click($formfields{"submit"}));
} else {
$formresponse= $userAgent->request($forms[$formid]->click);
}
my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse($formresponse->content);
my $h = $tree->look_down("_tag", "head");
my $base = HTML::Element->new("base", "href" => $url);
# Add a base href to the page, so that all links and images are resolved to
# the right address. Don't add it if the submit button clicked had the name
# "btnI", which is Google's I'm Feeling Lucky button. Helps in some cases.
unless ($formfields{"submit"} eq "btnI") {
$h->push_content($base);
}
return $tree->as_HTML;
##return $document;
}
################################################################################
# Functions
# Fetches document and returns it. Assumes basic validation check has been done on the URL.
sub getDocument {
my $url = shift;
# Configure the UA
$userAgent = LWP::UserAgent->new;
$userAgent->agent("$botUA");
$userAgent->from("$botFrom");
$userAgent->timeout(20);
$userAgent->proxy(['http', 'ftp', 'https'], 'http://proxy5.ucd.ie:8585');
$userAgent->no_proxy('localhost', 'htmlsource',);
# Send request
$request = HTTP::Request->new('GET', $url);
# Get response
$response = $userAgent->request($request);
# Check how the bot got on
if ($response->is_success) {
# Store the response text
$document = $response->content;
} else {
# Something's gone wrong with the request
&print_error("Could not get page “$url”. Please check that the address is valid and that the page exists.
\nThe server’s response was: “".$response->status_line."”", 1);
}
}
# Parses the page for HTML forms
sub extractForms {
@forms = HTML::Form->parse($response);
}
# Prints error page. If second argument is 1, the script'll die right here.
sub print_error {
my $message = shift;
my $shoulddie = shift;
print "
\n
Error
\n\n";
print "
$message
\n";
print "
";
if ($shoulddie == 1) {
&print_footer();
exit;
}
}