#!/usr/bin/perl
# make perl reasonably restrictive
use strict;
use warnings;
# parsers
use HTML::Form;
use HTML::TreeBuilder;
use CGI (qw/escape/);
# modules for outbound requests
require HTTP::Request;
use LWP::UserAgent;
##########################################################
# start from there
my $base_uri="http://www.gem.or.at/cgi-bin/gem.pl?aktion=suche&" .
"funktion=maske&sprache=de";
# for requests
my ($ua, $request, $response);
my (@forms, $form, %sc_urls);
##########################################################
# make a request to collect the dropdown options
$request = HTTP::Request->new(GET => "$base_uri");
$ua = LWP::UserAgent->new;
$response = $ua->request($request);
# for testing: local version
#$response = cat("gem.html");
@forms = HTML::Form->parse($response, $base_uri);
# subroutine (see below) for extraction
my %out=listoptions($forms[0],"stichwort1");
##########################################################
# q'n'd rss header ...
print <<"EOF";
EOF
my $escaped_uri=escape($base_uri);
for my $stichwort (keys %out) {
if ($stichwort) {
print <<"EOF";
$out{$stichwort}
$escaped_uri
EOF
print STDERR ">>>>>>>> $stichwort\n";
$forms[0]->param('stichwort1',$stichwort);
$ua = LWP::UserAgent->new;
$forms[0]->action =~ m#(.*/)#;
my $base_uri=$1;
# caching
my $content;
if (-f "gem-db-$stichwort.html") {
$content=cat("gem-db-$stichwort.html");
} else {
$response = $ua->request($forms[0]->click);
open O,">gem-db-$stichwort.html";
print O $response->content;
close O;
$content=$response->content;
}
my $tree=HTML::TreeBuilder->new_from_content($content);
for ([$tree->find('table')]->[3]->find('td')) {
my $link=$base_uri.[$_->extract_links]->[0][0][0];
if ($link =~ /gem.pl/) {
#my $content=join(" ",keys %$_);
# _parent _content align valign _tag
my $content=${$$_{_content}[0]}{"_content"}[0];
#>>>>>>>> 51
#Use of uninitialized value in concatenation (.) or string at
# ./gender-rdf.pl line 61.
#Use of uninitialized value in concatenation (.) or string at
# ./gender-rdf.pl line 61.
#Can't use string ("Es wurde kein Eintrag in der Dat") as a HASH ref
# while "strict refs" in use at ./gender-rdf.pl line 64.
if ($content && $link) {
$escaped_uri=escape($link);
print <<"EOF";
$content
$escaped_uri
EOF
}
}
#print [$_->find("td")]->[0]->attr("text");
#print " > ",$_->find("a")->attr("text"),"\n";
}
print <<"EOF";
EOF
print STDERR "<<<<<<<< $stichwort\n";
}
}
##########################################################
# q'n'd rss footer ...
print <<"EOF";
EOF
sub cat {
my ($name, $oldifd, $data);
($name)=@_;
$oldifd=$/;
$/=undef;
open I,$name;
$data=;
close I;
$/=$oldifd;
return $data;
}
sub listoptions ($$) {
my ($form,$name)=@_;
my %liste;
my @values=$form->find_input($name)->possible_values;
my @names=$form->find_input($name)->value_names;
for my $i (0..$#names) {
$liste{$values[$i]}=$names[$i];
}
return %liste;
}