Program for finding the longest open reading frame out of the 6 possible frames in any contigs. The start can be either the first codon or a methionine codon (M) after a stop codon.
Input should be one line, a fsa file should be converted by the following script perl -ape 'chomp;if(/>/){s/^/\n/;s/$/ xx/;}' so there is two xx rigth in front of the sequence
#!/usr/bin/perl -w
#findstop6frames.pl or find longest start stop strech
#input one line per seq. which shall contain two xx in front of the seq
@lager = @ARGV; undef @ARGV;$max= $lager[0]; # arg set to 3 only forward
$rr=6;
if($max eq 3){ $rr=3;} # only forward
while (<>){ $r=0;$ll2=0;$cor=0;$flx=0;$flx1=0; # reading record
while (<>){ $r=0;$ll2=0;$cor=0;$flx=0;$flx1=0; # reading record
while ( $r <$rr ) {$r=$r+1; # over $rr rounds (frames)
if ($r == 1 && /xx/) { s/xx(.*)/$&/;$b=$1;$c=length($b); }
if ($r == 4) {$cor=3; $seq=reverse(uc($b)); $seq =~ s/\n//;$seq =~ s/$/\n/; $seq =~ s/A/t/g; # reverse seq
$seq =~ s/T/a/g; $seq =~ s/G/c/g; $seq =~ s/C/g/g; $b=uc($seq);}
$d=-1+$r-$cor; # set frame
while ( $d < $c-2 ) {
$fl=1;
while ( $fl == 1 && $d < $c-2 ) { # finding startpoint
$f=substr($b,$d,3);
# if($f =~ /TAA|TGA|TAG/i && $d <$c-9 ){;} else { $ff=$f; $dd=$d; $fl=0; $flx=1; } # start if non stop codon
if($f =~ /ATG/i || $d >$c-9 || $d < 3+$r-$cor){ $ff=$f; $dd=$d; $fl=0; $flx=1; } # start with start codon or M
$d=$d+3; } # next codon
$fl=1;
while ($fl == 1 && $d < $c-2 ) { # finding stop codon or end
$f=substr($b,$d,3);
if($f =~ /TAA|TGA|TAG/i || $d >$c-4 ){ $ll=$d- $dd;
if( $ll > $ll2) { $r1=$r ; $ff1=$ff; $dd1=$dd+1 ; $f2=$f; $ll2=$ll; $ds=$d+1;} $fl=0; $flx1=1;} # update the longest frame
$d=$d+3; }
}}
if($flx*$flx1 > 0) {$ratio=$ll2/$c; s/xx/ fr $r1 START Query: $dd1 $ff1 STOP $ds $f2 $c l\= $ll2 $ratio xx/; # putting results to the record
if($r1 > 3){chomp;$r1x=$r1-3;s/fr \d+/fr $r1x/;s/xx.*/reversed xx$b/;} } # if longest is frame >3 put the reversed to record
print "$_" ; }
>Contig1072_32458_IMA8 GAAGGTTGGGTGCCCAGGGCTGGAGGAGGGGCTTGTGGAATTAGTGTCTGATGGGGACAGACTTTCAGTTTGGGAAGTGGAAAAGGTT CTGGAGACAGATGGTGGCGACAGCTGCCCAGCAGCGTGAAGGTACTTCATGCACTGAACTGTATACTCCGAAGCGCTTAAAACGGTGAATTACTGCTTCCCGTCAATATGCCGA CTTTAGACGCTCCAGAAGGGAGGCTGAGAAAATTCAAGTACCGGGGCAAAGATGCGTCTATCCGGCGGCACCAGCGCATGGCAGTCAGCCTGGAACTCCGCAAGGCCAAGAAAG ATGAGCAGGCCTTAAAGAGAAGAAATATCACCATTTTCTCCCCTGAACCAGCTTCTGGAGAGCTGACCAAAGGGGTCAGCCTCACCCTGCAAGAAATCATCAGTGGCGTGAATG CCTCAGATCCAGACCTGTGTTTCCAGGCCACCCAGGCAGCCAGGAAAATGCTGTCCCAAGAAAAGAACCCTCCTCTAAAATTGATTGTTGAAGCAGGCCTCATCCCCAGGCTGG TGGAGTTCCTGAAGTTGTCACCTCACCCCTGCTTGCAGTTTGAGGCAGCCTGGGCTCTGACCAACATCGCTTCTGGGACTTCAGAGCAGACTCAAGCTGTTGTGGAAGGTGGGG CCATCCCACCTTTGGTTGAGCTCCTGTCTTCCCCCCACATGACTGTGTGCGAACAGGCGGTGTGGGCTCTTGGTAATATCGCAGGTGATGGCCCAGAATTCAGAGATCTCGTTA TCTCGAGCAATGCTATTCCATATCTGCTGGCCCTCGTTTCATCAACCATACCAATCACGTTTCTACGGAACATCACGTGGACCTTGTCCAACTTGTGCCGAAACAAGAACCCTT ACCCTTCCGTGAAAGCCGTGAAGCAGATGTTGCCTGTCCTGTCCCACCTCCTGCAGCACCAAGACAGCGAAATTCTCTCGGACACCTGCTGG
>Contig1072_32458_IMA8 fr 1 START Query: 196 ATG STOP 975 TGG 978 l= 780 0.797546012269939 xxGAAGGTTGGGTGCCCAGGGCTGGA GGAGGGGCTTGTGGAATTAGTGTCTGATGGGGACAGACTTTCAGTTTGGGAAGTGGAAAAGGTTCTGGAGACAGATGGTGGCGACAGCTGCCCAGCAGCGTGAAGGTACTTCAT GCACTGAACTGTATACTCCGAAGCGCTTAAAACGGTGAATTACTGCTTCCCGTCAATATGCCGACTTTAGACGCTCCAGAAGGGAGGCTGAGAAAATTCAAGTACCGGGGCAAA GATGCGTCTATCCGGCGGCACCAGCGCATGGCAGTCAGCCTGGAACTCCGCAAGGCCAAGAAAGATGAGCAGGCCTTAAAGAGAAGAAATATCACCATTTTCTCCCCTGAACCA GCTTCTGGAGAGCTGACCAAAGGGGTCAGCCTCACCCTGCAAGAAATCATCAGTGGCGTGAATGCCTCAGATCCAGACCTGTGTTTCCAGGCCACCCAGGCAGCCAGGAAAATG CTGTCCCAAGAAAAGAACCCTCCTCTAAAATTGATTGTTGAAGCAGGCCTCATCCCCAGGCTGGTGGAGTTCCTGAAGTTGTCACCTCACCCCTGCTTGCAGTTTGAGGCAGCC TGGGCTCTGACCAACATCGCTTCTGGGACTTCAGAGCAGACTCAAGCTGTTGTGGAAGGTGGGGCCATCCCACCTTTGGTTGAGCTCCTGTCTTCCCCCCACATGACTGTGTGC GAACAGGCGGTGTGGGCTCTTGGTAATATCGCAGGTGATGGCCCAGAATTCAGAGATCTCGTTATCTCGAGCAATGCTATTCCATATCTGCTGGCCCTCGTTTCATCAACCATA CCAATCACGTTTCTACGGAACATCACGTGGACCTTGTCCAACTTGTGCCGAAACAAGAACCCTTACCCTTCCGTGAAAGCCGTGAAGCAGATGTTGCCTGTCCTGTCCCACCTC CTGCAGCACCAAGACAGCGAAATTCTCTCGGACACCTGCTGG