#!/usr/bin/perl -w


sub download_detail
{
    my ($dl) = @_;

    open(PIN, "wget -q \"http://careo.ucalgary.ca/$dl\" -O - |")
	|| die "Unable to open wget command: $!\n";

    my $detail_html="";
    while (<PIN>) { 
	$detail_html .= $_; 
    } 
    
    close(PIN);

    return $detail_html
}

sub download_lom_html
{
    my ($wosid,$object,$detail_html) = @_;

    my @links = ($detail_html =~ m/<a\s+href=\"(.*?)\">/sg);


    my @lom_links = grep { $_ =~ m/1\.1\.1\.1\.1/ } @links;

    
    foreach my $ll (@lom_links) {

	open(PIN, "wget -q \"http://careo.ucalgary.ca/$ll\" -O - |")
	    || die "Unable to open wget command: $!\n";

	my $lom_html="";
	while (<PIN>) { 
	    $lom_html .= $_; 
	} 
    
	close(PIN);
	
	my ($lom_xml) = ($lom_html =~ m/(&lt;lom.*\/lom&gt;)/s);

	$lom_xml =~ s/&lt;/</g;
	$lom_xml =~ s/&gt;/>/g;
	$lom_xml =~ s/&amp;/&/g;
	$lom_xml =~ s/&quot;/"/g;
	
	print "Saving lom as $object.xml\n";
	open(HOUT,">$object.xml") 
	    || die "Unable to open file out for writing: $!\n";
	
	print HOUT $lom_xml;

	close(HOUT);

    }    
}


sub browse_category
{
    my ($cat_url) = @_;

    open(PIN, "wget -q \"$cat_url\" -O - |")
	|| die "Unable to open wget command: $!\n";

    my $file="";
    while (<PIN>) { 
	$file .= $_; 
    } 
    
    close(PIN);

    my @links = ($file =~ m/<a\s+href=\"(.*?)\">/sg);

    my @detail_links = grep { $_ =~ m/details\?/ } @links;

    foreach my $dl (@detail_links) {
	$dl =~ s/&amp;/&/g;
	my ($wosid,$object) = ($dl  =~ m/wosid=(\w+).*object=(\w+)$/);
	print "$wosid, $object\n";

##	print "$dl\n";

	my $detail_html = download_detail($dl);

	download_lom_html($wosid,$object,$detail_html);
    }
}


sub browse_arts
{
    my $arts_url = "http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/tUkzqf9mt6KIbfa9Dmbd0g/40.0.6.7.5.1.7";

    browse_category($arts_url);
}

sub browse_science
{
    my $science_url = "http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/BeWs1oNbUyFqlcTa4wWTuM/8.0.5.3.1.1.1.0.2";

    browse_category($science_url);
}

browse_arts();
browse_science();


#http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/Nvlr48HkBbktBsDlEKDZWM/34.0.6.1.3.0.1.1.1.1.1.0


#http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wo/Nvlr48HkBbktBsDlEKDZWM/34.0.6.1.3.0.1.1.1.1.1.0


#http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wa/details?theme=careo&wosid=tUkzqf9mt6KIbfa9Dmbd0g&object=651479
#http://careo.ucalgary.ca/cgi-bin/WebObjects/CAREO.woa/1/wa/discuss?theme=careo&wosid=tUkzqf9mt6KIbfa9Dmbd0g&object=651479
