#!/usr/bin/perl -w 

# sntcooc.perl [-sort-buffer-size 200M] [-sort-batch-size 253] [-sort-compress gzip] output vcb1 vcb2 snt12 
#
# This file is part of mgiza++.  Its use is licensed under the GNU General
# Public License version 2 or, at your option, any later version.

use strict;
use File::Basename;
use FindBin qw($Bin);

sub systemCheck($);

my $sortArgs = "";
for (my $i = 0; $i < (@ARGV - 4); ++$i)
{
  my $arg = $ARGV[$i];
  if ($arg eq "-sort-buffer-size")
  {
		$sortArgs .= " -S " .$ARGV[++$i];
  }
  elsif ($arg eq "-sort-batch-size")
  {
	  $sortArgs .= " --batch-size " .$ARGV[++$i];
  }
  elsif ($arg eq "-sort-compress")
  {
	  $sortArgs .= " --compress-program " .$ARGV[++$i];
  }
}
					
my $out		= $ARGV[@ARGV - 4];
my $vcb1	= $ARGV[@ARGV - 3];
my $vcb2	= $ARGV[@ARGV - 2];
my $snt12	= $ARGV[@ARGV - 1];

my $SORT_EXEC = `gsort --help 2>/dev/null`; 
if($SORT_EXEC) {
  $SORT_EXEC = 'gsort';
}
else {
  $SORT_EXEC = 'sort';
}

my $TMPDIR=dirname($out);

my $cmd;
$cmd = "$Bin/snt2coocrmp $vcb1 $vcb2 $snt12 ";
$cmd .= "| $SORT_EXEC $sortArgs -T $TMPDIR -nk 1 -nk 2 | uniq > $out";
systemCheck($cmd);

#############################

sub systemCheck($)
{
  my $cmd = shift;
	print STDERR "Executing $cmd \n";
	
  my $retVal = system($cmd);
  if ($retVal != 0)
  {
    exit(1);
  }
}