Was this page helpful?

HMDB Updated script

    Table of contents
    No headers

    #!/usr/bin/perl -w use strict; use HTTP::Request::Common qw(GET POST); use LWP::UserAgent; use strict; &novice_try; exit 0; sub novice_try{     my ($query)= @_;     my $site = "http://www.hmdb.ca";     my $target = "/labm/servlet/labm.mlims.doMsdb";     my $agent = LWP::UserAgent->new();     my $request = POST $site.$target, Content_Type => 'form-data',                                       Content      => [ typeofsearch => "findMSDbParent.jsp",                                                         checkbox     => 'checkbox',                                                         ion_mode     => 'Negative',                                                         mw           => '853.3',                                                         mw_tol       => '0.1'] ;     #print $request->as_string;     my $response = $agent->request( $request );     my $text = $response->as_string; #    print "first response:",$response->as_string,"\n";     my ( $loc ) = $text =~ /Location:([^\n]*)\n/; #    print "location:$loc\n"; #    print "second response:", $response->as_string,"\n";     # use ?ec_crd=<integer>&ec_p=<integer> to specify number of records per     # output page and page number if desired     # e.g., GET "$loc?ec_crd=1000&ec_p=5" to retrieve records 4000-4999     $request =  GET "$loc?ec_crd=1000";     $response = $agent->request( $request );     my @lines = split "\n", $response->as_string;     while ( @lines ) {         my $line = shift @lines;         #print "skipping1: $line\n";         last if ( $line =~ /HMDB ID/ );     }     while ( @lines ) {         my $line = shift @lines;         #print "skipping2: $line\n";         last if ( $line =~ /tableBody/ );     }     print "metabolite section\n\n";     my $metab;     my $item;     #     # item 0 - 5 tell which field of the hmdb output a token is extracted from     #   0 - HMDB ID     #   1 - compound name     #   2 - chemical formula     #   3 - adduct molec. wt. [matching mol. wt]     #   4- difference between query and target molec. wt.     #   5 - adduct type     #     my @itemfield = ( "HMDB_ID", "compound", "formula", "molwt", "molwtdiff", "adduct" );     my @store;     while ( @lines ) {         my $line = shift @lines;         last if ( $line =~ /<\/tbody>/ );         if ( $line =~ /<tr/ ) {             # beginning of section for a metabolite,  start a new hash             $metab = {};             $item = 0;         } elsif ( $line =~ /<\/tr>/ ) {             # store result for this metabolite.             push @store, $metab;         } elsif ( $line =~ /<td>/ ) {             # continue to read until a </td> tag is found             my $bigline = $line;             until ( $line=~ /<\/td/ ) {                 # print " $line does not match </td\n";                 $line = shift @lines;                 $bigline .= $line;             }             $bigline =~ s/<td>|<\/td>//g;       # delete <td> tags             $bigline =~ s/<a[^>]*>|<\/a>//gi;   # remove <a> tags             unless ( $item == 1 ) {             # remove white space except from compound name                 $bigline =~ s/\s*//g;           # remove leading white space             } else {                 $bigline =~ s/^\s*|\s*$//g;     # for compound name, only remove whitespace at ends             }             print "item:$item   bigline:$bigline\n";             $metab->{ $itemfield[$item] } = $bigline;             $item++;         }         #print "$line\n";     }     foreach my $metab ( @store ) {         foreach my $item ( 0 .. 5 ) {             print "$itemfield[$item]   ",$metab->{ $itemfield[$item] }, "\n";         }         print "\n";     }     return; }

    Was this page helpful?
    Tag page (Edit tags)
    • No tags
    You must login to post a comment.