Find repeat of 1 to 6 unit - print only total repeat longer than 1+$len (set to 20)
Input should be one line, a fsa file can be converted by the following script perl -ape 'chomp;if(/>/){s/^/\n/;s/$/ xx/;}' so there is two xx rigth in front of the sequence
#!/usr/bin/perl -w
#findmicro.pl input oneline seq. seperated by 2 \t or xx in front of the seq
#find from 1 to 6 in repeat unit - print only total repeat longer than 1+$len
print "rpt No\t repeat\t end at\t totall\t name\n";
print "----------------------------------------\n";
$len =19; #set the length of the microsatellite
while (<>){
chomp; @a =split /\t/, $_ ; $c= $a[2];$d=$a[0];
if(/xx/){s/(.*)xx(.*)/$&/;$c= $2;$d= $1}
$ll=length($c); $i2 =1;$ii=0; $ki=0; $b1="a";
while ($ii <6) { $i1 =1; $i =$i2;
if($i < $ii) {$i2=$i2+1;} if($i >= $ii) {$i2=1; $ii=$ii+1; }
while ($i < $ll-$ii) { $i=$i+$ii;$kj=0;$bb=substr($c,$i,$ii);
if($b1 eq $bb) {$i1=$i1+1;}
if($b1 ne $bb && $i1*$ii > $len ){
$ki=$ki+1;$k[$ki] = $i-$i1*$ii;$k[$ki+100] = ($i1-1)*$ii;
$kk[$ki][1]= $i1; $kk[$ki][2]= $b1; $kk[$ki][3]=$i ;
$kk[$ki][4]=$ll; $kk[$ki][5]= $d ;
}
if($b1 ne $bb){ $i1=1;} $b1=$bb;
}}
# test for only one motive at a certain location
# print only the simplest e.i. ac and not acac
$kj=0;$kk[0][3]=0;
while ($ki > $kj) {$kj=$kj+1;$kl=0;$flag=0;
while ($kj > $kl) {$kl=$kl+1;
if(($kk[$kj][3]-$kk[$kl-1][3] )**2 < 100){$flag=1;}
}
$lg= length( $kk[$kj][2]);$l0=0;
$flag1=1; #if flag1 is set to 1 monorepeats is included, otherwise 0
while ($l0 <$lg) {$l0=$l0+1;if(substr($kk[$kj][2],0,1) ne substr($kk[$kj][2],$l0-1,1)) {$flag1=1;}}
if($flag == 0 && $flag1 ==1) { print "$kk[$kj][1]\t $kk[$kj][2]\t $kk[$kj][3]\t $kk[$kj][4]\t $kk[$kj][5]\n";}
}
}