#! /usr/bin/perl -w # # # DAB-GUI.pl # # A graphical user interface for the Divide and BLAST program # # (C)2002 Soren M. Johnson # # Based on the original Divide and BLAST program by Rahul Karnik # # Divides up a protein sequence file into subsequences with overlap, # for BLASTing individually and filters results for unique hits #------------------------------------------------------------------------ # Rahul Karnik # 12/16/1999 -- Original version # 12/30/1999 -- Enhanced output file - input parameters # - sub-sequence positions # -- BLAST directory option # Soren Johnson # 11/17/2002 -- First GUI version using Perl/Tk #------------------------------------------------------------------------ use Tk; use Tk::DialogBox; use Tk::DirTree; use Tk::LabEntry; use strict; # Always! # Variables # ------------------------------------------------------ my $input_file; # File to process my $input_file_path; # File to process with pathname my $part_length; # Length of each sub-sequence my $part_overlap; # Overlap between subsequences in amino acids my $expect_1; # Expect value for full sequence BLAST my $expect_2; # Expect value for subsequence BLAST my $output_dir; # Output directory my $matrix; # Matrix to use for BLAST my $html_out; # Flag for HTML output my $output_file; # Final output file my $seq_name=""; my $full_seq=""; my @full_seq_hits; my $num_parts; # Create the main GUI window # ------------------------------------------------------ my $top = MainWindow->new(); $top->title ("Divide and BLAST"); # Create frames to divide the windowspace # ------------------------------------------------------- my $frame1 = $top->Frame()->pack(-pady => 10, -padx => 20, -ipadx => 0, -ipady => 0); my $frame2 = $top->Frame()->pack(-pady => 10); my $frame3 = $top->Frame()->pack(-pady => 10); my $frame4 = $top->Frame()->pack(-pady => 10); # Frame 1: Input File and Output Directory # ------------------------------------------------------- $frame1->Label(text => "Input File: ")->grid(-row => 0, -col => 0, -sticky => 'e'); $input_file = "None Selected"; my $infile_label = $frame1->Label(text => "$input_file", relief => 'sunken', width => 30 )->grid(-row => 0, -col => 1, -padx => 5, -pady => 10); my $open = $frame1->Button(text => 'Open File', command => \&openfile )->grid(-row => 0, -col => 2); $frame1->Label(text => "Output Directory: ")->grid(-row => 1, -col => 0); $output_dir = "Current Path"; my $output_Label = $frame1->Label(text => "$output_dir", relief => 'sunken', width => 30 )->grid(-row => 1, -col => 1); my $output_Button = $frame1->Button(text => 'Change...', command => \&display_out_dir )->grid(-row => 1, -col => 2); # Frame 2: Expect Value Scales # ------------------------------------------------------- # Large Sequence Expect Scale my $label_e1 = $frame2->Label(text => "Full Sequence Expect Value: " )->grid(-row => 0, -col => 0, -sticky => 'e'); my $expect_1 = 10; $frame2->Scale(orient => 'horizontal', from => 0, to => 20, tickinterval => 5, font => '-adobe-times-*-r-normal--12-120-75-75-p-56-iso8859-1', length => 300, # in pixels variable => \$expect_1, )->grid(-row => 0, -col => 1); # Sub-Sequence Expect Scale my $label_e2 = $frame2->Label(text => "Sub-Sequence Expect Value: " )->grid(-row => 1, -col => 0, -sticky => 'e'); my $expect_2 = 10; $frame2->Scale(orient => 'horizontal', from => 0, to => 20, tickinterval => 5, font => '-adobe-times-*-r-normal--12-120-75-75-p-56-iso8859-1', length => 300, # in pixels variable => \$expect_2, )->grid(-row => 1, -col => 1); # Frame 3: Sub-Sequence Settings # ------------------------------------------------------- $frame3->Label(text => "Sub-Sequence Length: " )->grid(-row => 4, -col => 0, -sticky => 'e'); $part_length = 20; $frame3->Entry(width => 3, textvariable => \$part_length )->grid(-row => 4, -col => 1, -sticky => 'w'); $frame3->Label(text => "amino acids" )->grid(-row => 4, -col => 2, -sticky => 'w'); $frame3->Label(text => "Sub-Sequence Overlap: " )->grid(-row => 5, -col => 0, -sticky => 'e'); $part_overlap = 10; $frame3->Entry(width => 3, textvariable => \$part_overlap )->grid(-row => 5, -col => 1, -sticky => 'w'); $frame3->Label(text => "amino acids" )->grid(-row => 5, -col => 2, -sticky => 'w'); $frame3->Checkbutton( variable => \$html_out, text => 'HTML Output' )->grid(-row => 6, -columnspan => 3); # Frame 4: Execute Button # ------------------------------------------------------- my $DAB_Button = $frame4->Button(text => 'Execute', command => \&execute )->grid(-row => 6, -col => 2); # Directory Sidebox # ------------------------------------------------------- my $directory; my $top2 = $top->Toplevel; $top2->DirTree()->pack(); $top2->Label(-text => $directory)->pack(); $top2->Button(-text => 'OK', -command => [$top2 => 'destroy'])->pack(); # Initiate the infinite event loop # ------------------------------------------------------- MainLoop(); # Subroutine definitions # ------------------------------------------------------- sub display_out_dir { # $top->TopLevel(-title => "Select Output Directory", # -buttons => ["OK", "Cancel"]); # $dirBox->DirTree()->pack; # $output_Label->configure(text => "$output_dir"); return; } sub openfile { $input_file_path = $top->getOpenFile(); $infile_label->configure(text => "$input_file"); my @input_path = split (/\//, $input_file_path); $input_file = pop @input_path; return; } sub print_options { print "\n"; print "\nInput File: $input_file"; print "\nOutput Directory: $output_dir"; print "\nOutput File: $output_file"; print "\nExpect Value 1: $expect_1"; print "\nExpect Value 2: $expect_2"; print "\nSub-Sequence Length: $part_length"; print "\nSub-Sequence Overlap: $part_overlap"; print "\nHTML: $html_out"; print "\n\n"; } sub execute { &print_options; &process_options; &print_options; &get_file; &make_files_and_blast; &compile_full_results; &filter_results; } sub process_options { $html_out = ($html_out? 1:0); $output_dir = "output"; #Check if length of subsequence is less than overlap if($part_length < $part_overlap) { die "Sub-sequence length cannot be less than sub-sequence overlap\n"; } #Decide on matrix to use based on subsequence length if($part_length < 35) { $matrix="PAM30"; } elsif($part_length < 50) { $matrix="PAM70"; } elsif($part_length < 85) { $matrix="BLOSUM80"; } else { $matrix="BLOSUM62"; } } #-------------------Subroutine get_file---------------------------------- sub get_file { open INPUT_FILE, "<$input_file"; my $i; while() { chop; if (/^>/) { $seq_name = $_; # Use first line of FASTA file as name of sequence } else { $full_seq = $full_seq.$_; # Append to the $full_seq variable if sequence } } close INPUT_FILE; } #-------------------End of get_file-------------------------------------- #-------------------Subroutine make_files-------------------------------- sub make_files_and_blast { my $i; my $blast_file; my $curr_part; $num_parts = length($full_seq)/($part_length-$part_overlap); system "mkdir $output_dir"; print "BLASTing full sequence..."; system "./blastcl3 -p blastp -i $input_file -o $output_dir\/$input_file.blast -b 0 -e $expect_1"; print "done.\n"; for($i=0; $i < $num_parts; $i++) { $curr_part = substr ($full_seq, $i*($part_length-$part_overlap), $part_length); $blast_file="$input_file.".($i+1); open OUTPUT_FILE, ">$output_dir/$blast_file"; print OUTPUT_FILE $seq_name, ", Sub-sequence #", $i+1, "\n"; print OUTPUT_FILE $curr_part; print OUTPUT_FILE "\n"; close OUTPUT_FILE; print "BLASTing sub-sequence number ", ($i+1), "..."; system "./blastcl3 -p blastp -i $output_dir\/$blast_file -o $output_dir\/$blast_file.blast -b 0 -e $expect_2 -M $matrix"; print "done.\n"; } } #-------------------End of make_file-------------------------------------- #-------------------Subroutine compile_full_results---------------------- sub compile_full_results { my $i; my $curr_hit; open FULLHITS, "$output_dir\/$input_file.blast"; while() { next unless (/^.*\|.*\|/); next if (/^Query=/); $curr_hit = substr($_, 0, 66); push @full_seq_hits, $curr_hit; } close FULLHITS; } #-------------------End of compile_full_results--------------------------- #-------------------Subroutine filter_results---------------------------- sub filter_results { my $i; my $j; my $curr_hit; my $curr_start; my $curr_end; my $uid; my $rest; my $flag; my $num_hits; $output_file=($html_out? "unique.html":"unique.out"); open FILTERED, ">>$output_dir\/$output_file"; if($html_out) { print FILTERED "\n"; print FILTERED "DAB results\n"; print FILTERED ""; print FILTERED "DAB results for input file $input_file using the following parameters:
\n"; print FILTERED "Input file: $input_file
\n"; print FILTERED "Sub-sequence length: $part_length
\n"; print FILTERED "Subsequence overlap: $part_overlap
\n"; print FILTERED "Expect value for full sequence BLAST: $expect_1
\n"; print FILTERED "Expect value for sub-sequence BLAST: $expect_2
\n"; print FILTERED "

" } else { print FILTERED "----------------------------------------\n"; print FILTERED "Output for input file $input_file using the following parameters:\n"; print FILTERED "Input file: $input_file\n"; print FILTERED "Sub-sequence length: $part_length\n"; print FILTERED "Subsequence overlap: $part_overlap\n"; print FILTERED "Expect value for full sequence BLAST: $expect_1\n"; print FILTERED "Expect value for sub-sequence BLAST: $expect_2\n"; print FILTERED "----------------------------------------\n\n"; } for($i=1; $i<($num_parts+1); $i++) { open UNFILTERED, "$output_dir\/$input_file.$i.blast"; $num_hits=0; $curr_start = (($part_length-$part_overlap)*($i-1)) + 1; $curr_end = ($curr_start+$part_length)-1; if($curr_end > length($full_seq)) { $curr_end=length($full_seq); } if($html_out) { print FILTERED "

Sub-sequence $i ($curr_start to $curr_end):
\n"; print FILTERED "\n"; } else { print FILTERED "\nSub-sequence $i ($curr_start to $curr_end):\n"; print FILTERED "---------------\n"; } while() { next unless (/^.*\|.*\|/); next if (/^Query=/); $curr_hit=substr($_, 0, 66); $flag=0; for($j=0; $j<@full_seq_hits; $j++){ if($curr_hit eq $full_seq_hits[$j]) { $flag=1; } } unless($flag) { if ($html_out) { ($uid, $rest) = split(/\s/, $_, 2); print FILTERED "$uid $rest
\n"; } else { print FILTERED $_; } $num_hits++; } } close UNFILTERED; if($html_out) { print FILTERED "
\n"; print FILTERED "Sub-sequence $i has $num_hits unique hits.

\n\n"; } else { print FILTERED "Sub-sequence $i has $num_hits unique hits.\n\n"; } } if ($html_out) { print FILTERED ""; print FILTERED ""; } close FILTERED; } #-------------------End of filter_results--------------------------------