Monday, 27 June 2011

here it is wrapped in Perl

#!/usr/bin/perl -w

use strict;

## Set up the Perl modules we need

## Read the command line options
use Getopt::Long;

## Read in the fasta sequence file
use Bio::SeqIO;

## Run the blast query
use LWP::UserAgent;
#use HTTP::Cookies;

## Parse the results
use HTML::Strip;
use Bio::SearchIO;






## Set up the net stuff

## Set up a 'web user agent' object (something like a browser).

my $ua = LWP::UserAgent->new;



## Configure the user agent:

## Response time out (in seconds)
$ua->timeout(10);

## How the user agent should handle cookies:
#$ua->cookie_jar({ file => "lwpcookies.txt", autosave => 1})



## Note the following URI is the form behind the login page:
## http://yh.genomics.org.cn/potato/login.jsp

my $loginUri = 'http://yh.genomics.org.cn/potato/check.jsp';



## Use the user agent to login to the webserver

my $login = $ua->
post( $loginUri, [ username => 'test', password => 'abc123' ]);

#warn $login->as_string, "\n";
#exit;



## Don't know why the above dosn't trigger writing a cookie to the
## cookie.jar! (So cookie code is commented out.)

## Grab the cookie manually

die "CANT FIND MY COOKIES!!!!\n"
unless $login->as_string =~ /^Set-Cookie: JSESSIONID=(\w*);/sm;

my $JSESSIONID = $1;



## Manuall set the cookie in the user agents header, so it will be
## used for all subsequent communication.

$ua->default_header('Set-Cookie' => $JSESSIONID);



## The following URI is the form behind the blast page:
## http://yh.genomics.org.cn/potato/search.jsp

my $blastUri = 'http://yh.genomics.org.cn:8099/blast.cgi';





## Now we are set to loop through the sequences in the fasta sequence
## file and blast each one!

## Process the command line

my $fastaFile;

GetOptions(
"fasta=s" => \$fastaFile,
)
or die "failed to parse command line options!\n";

die "please pass a fasta file to blast\n"
unless $fastaFile && -e $fastaFile;



## Process the fasta file

my $s = Bio::SeqIO->
new( -file => $fastaFile,
-format => 'fasta'
);

while(my $seq = $s->next_seq){
## Lets just dump the results!
my $outFile = 'Scratch/'. $seq->id. '.blast.html';
if(-s $outFile ){
warn "skipping, file exists : $outFile\n";
next;
}
warn "doing ". $seq->id. " (". $seq->length. ")\n";
#warn $seq->seq, "\n";
open(OUT, '>', $outFile)
or die "cant open $outFile\n";
## Blast it!
## Set any blast options you want here!
my $blast = $ua->
post( $blastUri, [ DATALIB => 'potato',
SEQUENCE => $seq->seq ]);
#warn $blast->as_string, "\n";
#exit;
print OUT $blast->as_string;
}




__END__

## below we already moved onto processing the results, which is too much for one script.
## Get the results (scalar) as a file handle for processing with
## SearchIO. Here we also strip out the HTML from the results.
my $string = HTML::Strip->new->parse( $blast->as_string );
open my $fh, '<', \$string
or die "FUCK!\n";

my $in =
Bio::SearchIO->new( -format => "blast", -fh => $fh );
while( my $result = $in->next_result ) {
while( my $hit = $result->next_hit ) {
while( my $hsp = $hit->next_hsp ) {

## Apply some basic filtering
if( $hsp->length('total') > 50 ) {
if ( $hsp->percent_identity >= 75 ) {
print
join("\t",
$hit->name(),
$hsp->percent_identity,
$hsp->length('total'),
# No. mismatches?
$hsp->percent_identity('total'),
# No. gaps
$hsp->gaps('total'),
$hsp->start('query'),
$hsp->end('query'),
$hsp->start('hit'),
$hsp->end('hit'),
$hsp->evalue(),
$hsp->score(),
), "\n";
}
}
}
}
}
last;
}

warn "OK\n";

Post a Comment