#!/usr/bin/perl

# #####################################################
#
# This file is part of the Perl scripts of the MASV System.
# MASV = Munich Automatic Speaker Verification
#
# Copyright 2002-2003, Ulrich Trk
# Institute of Phonetics and Speech Communication
# University of Munich
# tuerk@phonetik.uni-muenchen.de
#
#
#   MASV is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   MASV is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with MASV; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# #####################################################

my $CVS_Version_String = '$Id: mod_hmm.pl,v 1.7 2004/03/31 17:32:04 tuerk Exp $';
my $CVS_Name_String = '$Name: rel-1-4-01 $';


use strict;

use FileHandle;
use Getopt::Long qw( GetOptions );

my $help = 0;
my $Identify = 0;

my $use_USER_flag = 0;

my @exclude_list = ();
my @include_list = ();

GetOptions('include_features=s' => \@include_list,
           'exclude_features=s' => \@exclude_list,
           'use_USER_format' => \$use_USER_flag,
           'help|?' => \$help,
           'version' => \$Identify);

if ($Identify) {
	printf "$0\n$CVS_Version_String\n$CVS_Name_String\n\n"; 
}

@include_list = split( /,/, join(',', @include_list));
@exclude_list = split( /,/, join(',', @exclude_list));

if ((@ARGV != 2) || ($help)) {
   print "Usage: mod_hmm.pl [options] <infile> <outfile>\n";
   print "   options:\n";
   print "     -use_USER_format  : change format to USER>\n";
   print "     -include_features= <comma seperated list of pos. numbers>\n";
   print "     -exclude_features= <comma seperated list of pos. numbers>\n";
   print "   default: include all features\n\n";
   exit;
 }


my $in_file = $ARGV[0];
my $out_file = $ARGV[1];

###########################
my $display = 0;

my $fh_in = &open_file("<", $in_file);
my @hmm_in = <$fh_in>;
close ($fh_in);

my $fh_out = &open_file(">", $out_file);

my @streamWidths = ();
my $vecSize = 0;
foreach my $line (@hmm_in) {
	chomp($line);
	if ($line =~ /<STREAMINFO>\s+(\d+)\s+(.+)/i) {
		@streamWidths = split(/\s+/, $2);
		if ((scalar @streamWidths) != $1) {
			die ("Error in streaminfo!\n");
		}
	}
	if ($line =~ /<VECSIZE>\s+(\d+)/i ) {
		$vecSize = $1;
	}
}
  

if ($display) {
print STDERR "streamWidths: @streamWidths\n";
print STDERR "vecSize: $vecSize\n";
}


my $tempSum = 0;
$tempSum += $_ foreach (@streamWidths);
die ("sum of stream widths is not equal vector size!\n") if ($tempSum != $vecSize);

my $currentStream = 1;
my $numFeatures = $vecSize; # no stream handling yet!
if ( ( ( scalar @include_list ) == 0) || ( ((scalar @include_list) == 1) && ($include_list[0] eq "all")))  {
	@include_list = (1 .. $numFeatures);
}

my @selected_list = ();
foreach (@include_list) {
	if (!(&test_containedInArray( $_, \@exclude_list))) {
		push(@selected_list, $_);
	}
}

@selected_list = sort sorting_num (@selected_list);

$selected_list[$_] -= 1 foreach (0 .. $#selected_list);

if ($display) {
	print STDERR "@selected_list \n";
}	

my @newStreamWidths = ();

# only one stream for now:
my $newVecSize = scalar @selected_list;
$newStreamWidths[$_] = $streamWidths[$_] - ($vecSize - $newVecSize) foreach (0 .. $#streamWidths);

my $writeLine;
my $line;
my $line_counter = 0;
while ($line_counter <= $#hmm_in) {
	$line = $hmm_in[$line_counter];
	if ($line =~ /<STREAM>\s*(\d*)/i) {
		$currentStream = ($1) ? $1 : 1;
		print $fh_out $line . "\n";
	}
	elsif ($line =~ /(.*)(<STREAMINFO>)([\s\d]+)(.*)/i) {
		$writeLine = $1 . $2 . " " . (scalar @newStreamWidths);
		foreach (@newStreamWidths) {
			$writeLine .= ( " " . $_ );
		}
		$writeLine .=  $4;
		print $fh_out $writeLine . "\n";
	}
	elsif ($line =~ /(.*)(<VECSIZE>\s*)(\d+)(.*)/i ) {
		$writeLine = $1 . $2 .  $newVecSize .  $4;
		if ($use_USER_flag) {
			$writeLine =~ s/<(USER|MFCC|LPC|LPCEPSTRA|LPDELCEP|IREFC|FBANK|MELSPEC)[^>]*>/<USER>/;
		}	
		print $fh_out $writeLine . "\n";
	
	}
	elsif ($line =~ /(.*)(<MEAN>\s*)(\d+)(.*)/i ) {
		$writeLine = $1 . $2  . $newStreamWidths[$currentStream - 1] . $4;
		print $fh_out $writeLine . "\n";
		$line_counter++;
		$line = $hmm_in[$line_counter];
		$line =~ s/^\s*(.*)\s*$/$1/; # remove leading and trailing whitespace
		my @vectorItems = split(/\s+/, $line);
		@vectorItems = @vectorItems[@selected_list];
		print $fh_out " @vectorItems\n";
	}
	elsif ($line =~ /(.*)(<VARIANCE>\s*)(\d+)(.*)/i ) {
		$writeLine = $1 . $2  . $newStreamWidths[$currentStream - 1] . $4;
		print $fh_out $writeLine . "\n";
		$line_counter++;
		$line = $hmm_in[$line_counter];
		$line =~ s/^\s*(.*)\s*$/$1/; # remove leading and trailing whitespace
		my @vectorItems = split(/\s+/, $line);
		@vectorItems = @vectorItems[@selected_list];
		print $fh_out " @vectorItems\n";
	}
	elsif ($line =~ /<GCONST>/i ) {
		# do nothing
	}
	else {
		print $fh_out $line . "\n";
	}
	$line_counter++;
}

close ($fh_out);

exit 0;


sub test_containedInArray {
	my $testitem = $_[0];
	my $array_ref = $_[1];
	
	foreach my $item (@$array_ref) {
		if ($testitem eq $item) {
			return 1;
		}
	}
	return 0;
}

sub open_file {
	my ($filemode, $filename) = @_;

	my $fh = new FileHandle;
	open ($fh, $filemode . $filename) ||  die ("Cannot open $filename");
	return $fh;
}


sub sorting_num { 
        my $retval;

        if ($a > $b) { 
                $retval = 1;
        } 
        elsif ($a == $b) {
                $retval = 0;
        } 
        else { 
                $retval = -1;
        }
        $retval; 
}
														
