#! /usr/bin/perl -w

# Do a piece of a profmark benchmark, for SSEARCH searches by FPS
# (family-pairwise-search; best E-value of all individual queries).
#
# This script is normally called by pmark_master.pl; its command line
# syntax is tied to pmark_master.pl.
#
# Usage:     x-ssearch-fps <top_builddir>          <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile>
# Example: ./x-ssearch-fps /usr/local/fasta-36x2   ~/src/hmmer  testdir     test.tbl  pmark.msa test.fa  test.out
#        
# SRE, Tue Apr 20 10:18:56 2010 [Janelia]
# SVN $Id$

BEGIN {
    $top_builddir  = shift;
    $top_srcdir    = shift;
    $wrkdir        = shift;
    $tblfile       = shift;
    $msafile       = shift;
    $fafile        = shift;
    $outfile       = shift;
}
use lib "${top_srcdir}/easel/demotic";
use demotic_fasta;

$fasta      = "${top_builddir}/bin/ssearch36";
$opts       = "-q";

if (! -d $top_builddir)                                 { die "didn't find BLAST build directory $top_builddir"; }
if (! -d $top_srcdir)                                   { die "didn't find H3 source directory $top_srcdir"; }
if (! -x $fasta)                                        { die "didn't find executable $fasta"; }
if (! -e $wrkdir)                                       { die "$wrkdir doesn't exist"; }

open(OUTFILE,">$outfile") || die "failed to open $outfile";
open(TABLE, "$tblfile")   || die "failed to open $tblfile";
MSA:
while (<TABLE>)
{
    ($msaname) = split;

    %seen          = ();
    %best_pval     = ();
    %best_bitscore = ();

    `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`;
    if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; }

    # Extract a list of individual sequence names from the multiple alignment.
    $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`;
    if ($?) { print "FAILED: esl-seqstat -a $wrkdir/$msaname.sto\n"; next MSA; }

    @qnames = split(/^/,$output);
    chop (@qnames);

    # Loop over each query; blast; accumulate best pval for each target
    foreach $qname (@qnames) 
    {
	$output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`;
	if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; }

	if (! open(FASTA, "$fasta $opts $wrkdir/$msaname.query.fa $fafile |")) {
	    print "FAILED: $fasta $opts $wrkdir/$msaname.query.fa $fafile |\n"; next MSA;
	}
	
	if (! demotic_fasta::parse(\*FASTA)) {
	    print "FAILED: demotic parser for fasta output\n"; next MSA; 
	}
	
	for ($i = 0; $i < $demotic_fasta::nhits; $i++)
	{
	    $target   = $demotic_fasta::hit_target[$i];
	    $pval     = $demotic_fasta::hit_Eval[$i];
	    $bitscore = $demotic_fasta::hit_bitscore[$i];

	    if (! $seen{$target} || $pval < $best_pval{$target})
	    { 		
		$seen{$target}          = 1;
		$best_pval{$target}     = $pval; 
		$best_bitscore{$target} = $bitscore; 
	    }
	}
	close FASTA;
    }
    
    # Append to the outfile.
    foreach $target (keys(%seen)) 
    {
	printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname;
    }

    unlink "$wrkdir/$msaname.sto";
    unlink "$wrkdir/$msaname.query.fa";
}
close TABLE;
close OUTFILE;
