#!/usr/bin/perl -w
use strict;
## Set up the Perl modules we need
## Read the command line options
use Getopt::Long;
## Read in the fasta sequence file
use Bio::SeqIO;
## Run the blast query
use LWP::UserAgent;
#use HTTP::Cookies;
## Parse the results
use HTML::Strip;
use Bio::SearchIO;
## Set up the net stuff
## Set up a 'web user agent' object (something like a browser).
my $ua = LWP::UserAgent->new;
## Configure the user agent:
## Response time out (in seconds)
$ua->timeout(10);
## How the user agent should handle cookies:
#$ua->cookie_jar({ file => "lwpcookies.txt", autosave => 1})
## Note the following URI is the form behind the login page:
## http://yh.genomics.org.cn/potato/login.jsp
my $loginUri = 'http://yh.genomics.org.cn/potato/check.jsp';
## Use the user agent to login to the webserver
my $login = $ua->
post( $loginUri, [ username => 'test', password => 'abc123' ]);
#warn $login->as_string, "\n";
#exit;
## Don't know why the above dosn't trigger writing a cookie to the
## cookie.jar! (So cookie code is commented out.)
## Grab the cookie manually
die "CANT FIND MY COOKIES!!!!\n"
unless $login->as_string =~ /^Set-Cookie: JSESSIONID=(\w*);/sm;
my $JSESSIONID = $1;
## Manuall set the cookie in the user agents header, so it will be
## used for all subsequent communication.
$ua->default_header('Set-Cookie' => $JSESSIONID);
## The following URI is the form behind the blast page:
## http://yh.genomics.org.cn/potato/search.jsp
my $blastUri = 'http://yh.genomics.org.cn:8099/blast.cgi';
## Now we are set to loop through the sequences in the fasta sequence
## file and blast each one!
## Process the command line
my $fastaFile;
GetOptions(
"fasta=s" => \$fastaFile,
)
or die "failed to parse command line options!\n";
die "please pass a fasta file to blast\n"
unless $fastaFile && -e $fastaFile;
## Process the fasta file
my $s = Bio::SeqIO->
new( -file => $fastaFile,
-format => 'fasta'
);
while(my $seq = $s->next_seq){
## Lets just dump the results!
my $outFile = 'Scratch/'. $seq->id. '.blast.html';
if(-s $outFile ){
warn "skipping, file exists : $outFile\n";
next;
}
warn "doing ". $seq->id. " (". $seq->length. ")\n";
#warn $seq->seq, "\n";
open(OUT, '>', $outFile)
or die "cant open $outFile\n";
## Blast it!
## Set any blast options you want here!
my $blast = $ua->
post( $blastUri, [ DATALIB => 'potato',
SEQUENCE => $seq->seq ]);
#warn $blast->as_string, "\n";
#exit;
print OUT $blast->as_string;
}
__END__
## below we already moved onto processing the results, which is too much for one script.
## Get the results (scalar) as a file handle for processing with
## SearchIO. Here we also strip out the HTML from the results.
my $string = HTML::Strip->new->parse( $blast->as_string );
open my $fh, '<', \$string
or die "FUCK!\n";
my $in =
Bio::SearchIO->new( -format => "blast", -fh => $fh );
while( my $result = $in->next_result ) {
while( my $hit = $result->next_hit ) {
while( my $hsp = $hit->next_hsp ) {
## Apply some basic filtering
if( $hsp->length('total') > 50 ) {
if ( $hsp->percent_identity >= 75 ) {
print
join("\t",
$hit->name(),
$hsp->percent_identity,
$hsp->length('total'),
# No. mismatches?
$hsp->percent_identity('total'),
# No. gaps
$hsp->gaps('total'),
$hsp->start('query'),
$hsp->end('query'),
$hsp->start('hit'),
$hsp->end('hit'),
$hsp->evalue(),
$hsp->score(),
), "\n";
}
}
}
}
}
last;
}
warn "OK\n";