DNWA/11/wywolania/Data/stide_v1.1/Seq-code/stide.cc

/*********************************************************************
 *                                                                   *
 * STIDE: Sequence Time-Delay Embedding v1.1                         *
 *                                                                   *
 * Written by Steve Hofmeyr 7/21/96                                  *
 * Revised by Julie Rehmeyer 3/98                                    *
 *                                                                   *
 * Copyright (C) 1996, 1998 Regents of the University of New Mexico. *
 * All Rights Reserved.                                              *
 *                                                                   *
 * This program is free software; you can redistribute it and/or     *
 * modify it under the terms of the GNU General Public License as    *
 * published by the Free Software Foundation; either version 2 of    *
 * the License, or (at your option) any later version.               *
 *                                                                   *
 * This program is distributed in the hope that it will be useful,   *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of    *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the     *
 * GNU General Public License for more details.                      *
 *                                                                   *
 * You should have received a copy of the GNU General Public         *
 * License along with this program; if not, write to the Free        *
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,     *
 * USA.                                                              *
 *                                                                   *
 ********************************************************************/

#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <fstream.h>
#include "../Utils/arrays.h"
#include "../Utils/hash.h"
#include "seq_config.h"
#include "seq_stream.h"
#include "flexitree.h"

#define DBREV 1

int counter = 0;

Stream *GetReadyStream(Array<Stream> &streams, HashTableInt
		       &sid_table, int &num_streams_fnd, int
		       &total_pairs_read, const Config &cfg);
int ReadDB(SeqForest &db_forest, const string &db_name, 
	   int &seq_len); 
void WriteDB(const SeqForest &db_forest, const string &db_name, const
	     int db_size, const int seq_len);
void FinalReport(const Config &cfg, const SeqForest &normal, const int
		 num_streams_fnd, const int num_seqs_added, const
		 Array<Stream> &streams, const int db_size); 
void WriteDBStats(const SeqForest &db_forest, ostream &out_stream,
		  const int db_size); 
void OutputGraph(const SeqForest &db_forest, string db_name);
int GetPrimeLargerThan(const int n);

/*********************************************************************
 *   main()                                                          *
 *   Input:  int argc:  Number of command-line arguments             *
 *           char *argv[]:  array of strings containing              *
 *                          command-line arguments                   *
 *   Output: 0 if successful, -1 if unsuccessful                     *
 *********************************************************************/

int main(int argc, char *argv[])

{
  Config cfg((const int) argc, (const char **) argv);    
                             // Declare configuration object and do
			     // the configuration on the basis of the
			     // command line arguments and the
			     // configuration file  
  Stream *active_stream;     // This will point to the stream that
			     // currently has a sequence to be worked
			     // on (either added to the database or
			     // compared).   
  HashTableInt sid_table(GetPrimeLargerThan(cfg.max_streams));  
			     // Hash table relating external stream ids to 
                             // internal sids; make size of table
			     // smallest prime larger than the number
			     // of streams
  SeqForest normal(cfg.max_elements);     // Uninitialized forest of
				          // normal sequences
  Array<Stream> streams(cfg.max_streams); // Array of stream objects,
					  // one for each data stream
					  // in input, which are
					  // allocated as needed
  int num_streams_fnd = 0;                // Number of data streams
					  // encountered to date
  int total_pairs_read = cfg.pair_offset; // Number of pairs read from
					  // input to date from all
					  // the data streams combined
					  // -- can be offset using
					  // the "-n" switch
  int db_size;                            // Total number of unique
					  // sequences in the database
  int init_db_size = 0;                   // Number of unique
					  // sequences in the
					  // pre-existing database 


  // Read database into normal, if database exists
  db_size = init_db_size = ReadDB(normal, cfg.db_name, cfg.seq_len);

  if (cfg.add_to_db) {
    while ((active_stream = 
            GetReadyStream(streams, sid_table, num_streams_fnd,
			   total_pairs_read, cfg))
	   != NULL) { 
      active_stream->AddToDB(normal, db_size, total_pairs_read, cfg); 
    }
    WriteDB(normal, cfg.db_name, db_size, cfg.seq_len);
    if (cfg.output_graph) {
      OutputGraph(normal,cfg.db_name);
    }
  
  }
  else {
    int i = 0;
    while ((active_stream = 
	    GetReadyStream(streams, sid_table, num_streams_fnd,
			   total_pairs_read, cfg))  
	   != NULL) { 
      active_stream->CompareSeq(cfg, normal, total_pairs_read);
    }
  }
  FinalReport(cfg, normal, num_streams_fnd, db_size - init_db_size,
	      streams, db_size); 
  return(0);
}

/**********************************************************************
 * GetReadyStream()                                                   *
 *   This function reads a pair from the input, appends the element   *
 *   to the current sequence string in the appropriate data stream,   *
 *   finds out if that data stream has a complete sequence to be      *
 *   processed, continues until it has found such a data stream, and  *
 *   returns a pointer to it.  It updates num_streams_fnd,            *
 *   total_pairs_read, sid_table, and streams.                        * 
 *                                                                    *
 * Input: Array<Stream> &streams: the array of streams that we have   *
 *                                found so far                        *
 *        HashTableInt &sid_table: hash table relating external sids  *
 *                                 to internal sids                   *
 *        int &num_streams_fnd: the number of streams found so far;   *
 *        int &total_pairs_read:  the number of pairs read from the   *
 *                                input stream so far                 *
 *        const Config &cfg: configuration information                *
 *                                                                    *
 * Output: a pointer to the next stream that is ready for processing  *
 **********************************************************************/

Stream *GetReadyStream(Array<Stream> &streams, HashTableInt
		       &sid_table, int &num_streams_fnd, int
		       &total_pairs_read, const Config &cfg)

{
  Stream *ready_stream = NULL;
  int ext_sid;
  int int_sid;
  int sval;

  cin >> ext_sid;
  while (!cin.eof()) {
    if (ext_sid == -1) {
      break;
    }
    int_sid = sid_table.ExtToInt(ext_sid, num_streams_fnd);
    cin >> sval;
    ++total_pairs_read;

    // Update num_streams_fnd, if necessary
    if (int_sid >= num_streams_fnd) {
      if (int_sid > cfg.max_streams) {
	cerr<<"ERROR: Too many streams to follow, aborting..."<<endl;
	exit(-1);
      }
      // We need a new stream object
      streams[num_streams_fnd].Init(cfg, int_sid, ext_sid);
      num_streams_fnd = int_sid + 1;
    }
    streams[int_sid].Append(sval);
    if (streams[int_sid].Ready()) {
      ready_stream = &streams[int_sid];
      break;
    }
    cin >> ext_sid;
  }

  return ready_stream;
}

/*********************************************************************
 * ReadDB()                                                          *
 *   Reads the database from a file and returns the number of unique *
 *   sequences in the database.  Checks for appropriate revision     *
 *   number.  If it is a revision DBREV database, the second line    *
 *   will be "#DBseq_len: " followed by the sequence length.  The    *
 *   next line will contain a single number, giving the root of the  *
 *   first tree.  The following lines will contain the tree itself.  *
 *   The first seq_len numbers make up the first sequence (so the    *
 *   first number of the second line will be the same as the number  *
 *   on the first line).  The next number will be a negative number  *
 *   between -(seq_len-1) and -2, indicating how far to backtrack in *
 *   the first sequence, and the following positive numbers give the *
 *   rest of the second sequence.  So, for example, -3 would mean    *
 *   backtrack 3 numbers, take the previous numbers including the    *
 *   one you're on, and append the next two numbers.  So after the   *
 *   -3 you would find two positive numbers, followed by a negative  *
 *   number (which you would use the same way as you used the -3, on *
 *   the most recent sequence).  Each tree is terminated by the      *
 *   number -1.  So the sample input file                            *
 *     3                                                             *
 *     3 4 2 9 10 3 -4 3 9 8 -2 3 -3 4 9 -1                          *
 *     2                                                             *
 *     2 3 4 5 6 7 -3 2 9 -1                                         *
 *   yields the sequences:                                           *
 *     3 4 2 9 10 3                                                  *
 *     3 4 2 3 9 8                                                   *
 *     3 4 2 3 9 3                                                   *
 *     3 4 2 3 4 9                                                   *
 *     2 3 4 5 6 7                                                   *
 *     2 3 4 5 2 9                                                   *
 *                                                                   *
 *   Input:   SeqForest &db_forest   Forest of sequences             *
 *            const string &db_name  Name of database                *
 *            int &seq_len           User-specified sequence length  *
 *                                                                   *
 *   Output:  the number of unique sequences in the database         *
 *                                                                   *
 ********************************************************************/

int ReadDB(SeqForest &db_forest, const string &db_name, 
	   int &seq_len)  
{
  ifstream in_db_file(db_name.c_str());  // file to read the database from
  int db_size = 0;               // size of the database
  int root;                      // the first element of the sequences
				 // we are reading in at the moment;
				 // i.e., the root of this tree
  string buff;
  int db_seq_len;
  int rev_num;

  if (!in_db_file.is_open()) {
    cerr<<"WARNING: Cannot open database file " << db_name 
	<< " for input"<<endl<<"Creating a new file"<<endl;
    return 0;
  } 

  // Check to see if the first line contains "#DBrev:"
  in_db_file>>buff;
  if (buff == "#DBrev:") {
    in_db_file>>rev_num;
    if (rev_num > DBREV) {
      cerr << "ERROR: The revision number is greater than " << DBREV
	   << ".  This version of STIDE is only capable of dealing "
           << "with databases through DBrev " << DBREV 
	   << ".  Aborting..."<<endl; 
      exit(-1);
    }
    if (rev_num < DBREV) {
      cerr << "ERROR: Revision number of database must be >= " << DBREV 
	   << endl;
      exit(-1);
    }
    // Now we know that it is revision DBREV.  Check sequence length of
    // database against user-indicated sequence length 
    in_db_file>>buff;
    // Now check to see if next line is "#DBseq_len: " followed by a
    // number 
    if (buff != "#DBseq_len:") {
      cerr << "ERROR: The second line of the database does not "
	   << "contain the string \"#DBseq_len: \"" << endl
	   << "followed by the sequence length of the database, as "
	   << "required of revision " << DBREV 
	   << " databases.  Aborting..."<< endl; 
      exit(-1);
    }
    in_db_file>>db_seq_len;
    if (db_seq_len != seq_len) {
      cerr << "WARNING: Database sequence length is " << db_seq_len 
           << ", which does not match "
	   << "sequence length specified" << endl 
	   << "by user (or by default if no specification was given), "
           << "which is " << seq_len << endl
	   << "I will use the database sequence length.  If that is "
           << "not what you intended, type Ctrl-C to abort." << endl;
      seq_len = db_seq_len;
    }
    // Read next number into root
    in_db_file >> root;
  }
  // Otherwise, we assume we have an old-style database, and let the
  // user know that that's our assumption
  else {
    cerr << "WARNING: The string \"DBrev: \" is not in the first "
         << "line of the database." << endl
	 << "I'm assuming that it's an older style of database, and "
         << "will read it in" << endl
         << "based on that assumption.  If that is not what you want " 
         << "me to do, type CTRL-C" << endl << endl;
    // we have just read the first root into buff -- put it in root
    // instead 
    root = atoi(buff.c_str());
  }

  while (!in_db_file.eof()) {
    if (root == -1) break;
    db_forest.trees_found[root]++; 
    in_db_file>>db_forest.trees[root];
    db_size += db_forest.trees[root].NumLeaves();
    in_db_file>>root;
  }
  in_db_file.close();

  return db_size;
}


/*********************************************************************
 * WriteDB()                                                         *
 *   Writes db_forest to the file db_name, with the format described *
 *   in the header of ReadDB().  Prints database statistics at the   *
 *   end of the file.                                                *
 *                                                                   *
 *   Input:  const SeqForest &db_forest  Forest of sequences in      *
 *                                       database                    * 
 *           const string &db_name        Name of file in which to    *
 *                                       put database.               *
 *           const int db_size           Number of unique sequences  *
 *                                       in the database             *
 *           const int seq_len           Sequence length             *
 *                                                                   *
 *   Output: none                                                    *
 ********************************************************************/

void WriteDB(const SeqForest &db_forest, const string &db_name, const
	     int db_size, const int seq_len) 
{

  ofstream out_db_file(db_name.c_str());

  if (!out_db_file.is_open()) {
    cerr << "ERROR: Cannot open database file " << db_name
         << "for output, aborting..." << endl ;
    exit(-2);
  }
  out_db_file << "#DBrev: " << DBREV << endl;
  out_db_file << "#DBseq_len: " << seq_len << endl;

  for (int i = 0; i < db_forest.trees.Size(); i++) {
    if (db_forest.trees_found[i]) {
      out_db_file<<i<<endl;
      out_db_file<<db_forest.trees[i]<<endl;
    }
  }
  out_db_file<<" -1"<<endl;
  // we can now write anything, so I will write the db stats
  out_db_file<<"; DB STATS"<<endl;
  WriteDBStats(db_forest, out_db_file, db_size);
  out_db_file.close();

}

/*********************************************************************
 * FinalReport()                                                     *
 *   Reports data at end of run.  The number of streams, the number  *
 *   of input pairs, and the number of sequences in the input are    *
 *   always reported.  If we have done a comparison run, we report   *
 *   the number of anomalies, and the precentage of sequences that   *
 *   were anomalous.  Additionally, if asked for, the Hamming        *
 *   distance or locality frame count is reported.  If we have added *
 *   to the database, we report having done so and report the number *
 *   of sequences added.  If database statistics are asked for, we   *
 *   report the number of nodes, the number of unique sequences, the *
 *   number of branches, and the average database branch factor.     * 
 *                                                                   *
 *   Input:  const Config &cfg:         Configuration information    *
 *           const SeqForest &normal:   DB of normal sequences       *
 *           const int num_streams_fnd: Total number of streams found*
 *           const int num_seqs_added:  Number of unique sequences   *
 *                                      added                        *
 *           const Array<Stream> &streams:  Array of data streams    *
 *           const int db_size:         Number of unique sequences   *
 *                                      in DB                        *
 *                                                                   *
 *   Output: none                                                    *
 *                                                                   *
 *********************************************************************/

void FinalReport(const Config &cfg, const SeqForest &normal, const int
		 num_streams_fnd, const int num_seqs_added, const
		 Array<Stream> &streams, const int db_size) 
{
  int total_pairs = 0;
  int total_seqs = 0;
  int total_anoms = 0;
  int total_max_lfc = 0;
  int total_max_hdist = 0;
  int db_nodes = 0;
  int db_seqs = 0;
  int db_branches = 0;
  int j;

  // Sum up number of pairs input and number of seqs from all the streams
  for (j = 0; j < num_streams_fnd; j++) {
    total_seqs += streams[j].GetNumSeqsFnd();
    total_pairs += streams[j].GetNumPairsRead();
  }

  cout << endl;
  cout << "Number of different streams in input = " 
       << num_streams_fnd << endl;
  cout << "Total number of input pairs = " 
       << total_pairs << endl;
  cout << "Total number of sequences in input = " 
       << total_seqs << endl;

  if (cfg.add_to_db) {
    cout << "File added to database" << endl;
    cout << "Number of new sequences added to the database: " 
         << num_seqs_added << endl;
  }
  else {
    cout << "Scan completed" << endl;
    // Sum up number of anomalies from all the streams
    for (j = 0; j < num_streams_fnd; j++) {
      total_anoms += streams[j].GetNumAnoms();
    }

    cout << "Number of anomalies = " 
         << total_anoms << endl;
    cout << "Percentage anomalous = " 
	 << ((float)total_anoms * 100.0)/total_seqs << endl;

    // If asked for, compute Hamming distances across streams and report
    if (cfg.compute_hdist) {
      for (j = 0; j < num_streams_fnd; j++) {
	if (streams[j].GetMaxHDist() > total_max_hdist) {
	  total_max_hdist = streams[j].GetMaxHDist();
	}
      }
      cout << "Largest minimum Hamming distance = "
           << total_max_hdist << endl; 
    }

    // If asked for, compute lfc across streams and report
    if (cfg.lf_size > 1) {
      for (j = 0; j < num_streams_fnd; j++) {
	if (streams[j].GetMaxLFC() > total_max_lfc) {
	  total_max_lfc = streams[j].GetMaxLFC();
	}
      }
      cout << "Maximum lfc = " << total_max_lfc << endl;
    }
  }

  // If asked for, compute db stats and report
  if (cfg.write_db_stats) {
    WriteDBStats(normal, cout, db_size);
  }
}


/*********************************************************************
 * WriteDBStats()                                                    *
 *   Computes and writes to standard output the number of nodes in   *
 *   the database, the number of unique sequences, the number of     *
 *   branches, and the average database branch factor.               * 
 *                                                                   *
 *   Input:  const SeqForest &db_forest  Forest of sequences in      *
 *                                database                           *
 *           ostream &out_stream  Where to write info                *
 *           const int db_size    Number of unique sequences in the  *
 *                                database                           *
 *                                                                   *
 *   Output: none                                                    *
 *********************************************************************/

void WriteDBStats(const SeqForest &db_forest, ostream &out_stream,
		  const int db_size)   
{
  int db_nodes = 0;
  int db_branches = 0;

  for (int i = 0; i < db_forest.trees.Size(); i++) {
      if (db_forest.trees_found[i]) {
	db_nodes += db_forest.trees[i].NumNodes();
        db_branches += db_forest.trees[i].NumBranches();
      }
    }

  out_stream << "Number of DB nodes = " << db_nodes << endl;
  out_stream << "Number of unique sequences = "<<db_size << endl;
  out_stream << "Number of branches (edges) = "<<db_branches << endl;
  out_stream << "Average DB branch factor = "  
        <<((float)db_branches/(db_nodes - db_size))<<endl;
  
}


/*********************************************************************
 * OutputGraph()                                                     *
 *   Writes a file db_name.dot containing input for the program Dot. *
 *   Running Dot on db_name.dot produces a PostScript file           *
 *   containing a picture of the whole database tree.                * 
 *                                                                   *
 *   Input:  const SeqForest &db_forest  Forest of sequences in      *
 *                                       database                    *
 *           const string db_name        Filename to use             *
 *                                                                   *
 *   Output: none                                                    *
 *********************************************************************/

void OutputGraph(const SeqForest &db_forest, const string db_name) 
{
  char *dot_filename;
  dot_filename = new char [strlen(db_name.c_str())+4];
  strcpy(dot_filename, db_name.c_str());
  ofstream output_file(strcat(dot_filename,".dot"));

  output_file<<"digraph \""<<db_name<<"\" {"<<endl;
  output_file<<"  ratio=auto;"<<endl;
  output_file<<"  page=\"8.5,11\";"<<endl;
  for (int i = 0; i < db_forest.trees.Size(); i++) {
    if (db_forest.trees_found[i]) 
      db_forest.trees[i].OutputGraph(output_file);
  }
  output_file<<"}"<<endl;
  output_file.close();
}


/****************************************************************************
 *  GetPrimeLargerThan(int n)                                               * 
 *    Returns the smallest prime larger than the input integer.             *
 *    Changes no values.                                                    * 
 *                                                                          * 
 *  Input:  const int n                                                     *
 *  Output: smallest prime larger than n                                    *
 ***************************************************************************/

int GetPrimeLargerThan(const int n) 
{
  int primes[n];
  int primes_fnd = 1;
  int curr_num = 3;
  int is_prime = 1;

  primes[0] = 2;
  while(1) {
    for (int i = 0; i < primes_fnd; i++) {
      if ((curr_num % primes[i]) == 0) {
	is_prime = 0;
	break;
      }
    }
    if (is_prime == 1) {
      primes[primes_fnd++] = curr_num;
      if (curr_num > n) {
	break;
      }
    }
    curr_num = curr_num + 2;
    is_prime = 1;
  }
  return curr_num;
}
... 2021-01-28 18:33:55 +01:00			`/*********************************************************************`
			`* *`
			`* STIDE: Sequence Time-Delay Embedding v1.1 *`
			`* *`
			`* Written by Steve Hofmeyr 7/21/96 *`
			`* Revised by Julie Rehmeyer 3/98 *`
			`* *`
			`* Copyright (C) 1996, 1998 Regents of the University of New Mexico. *`
			`* All Rights Reserved. *`
			`* *`
			`* This program is free software; you can redistribute it and/or *`
			`* modify it under the terms of the GNU General Public License as *`
			`* published by the Free Software Foundation; either version 2 of *`
			`* the License, or (at your option) any later version. *`
			`* *`
			`* This program is distributed in the hope that it will be useful, *`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of *`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *`
			`* GNU General Public License for more details. *`
			`* *`
			`* You should have received a copy of the GNU General Public *`
			`* License along with this program; if not, write to the Free *`
			`* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, *`
			`* USA. *`
			`* *`
			`********************************************************************/`

			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <iostream.h>`
			`#include <fstream.h>`
			`#include "../Utils/arrays.h"`
			`#include "../Utils/hash.h"`
			`#include "seq_config.h"`
			`#include "seq_stream.h"`
			`#include "flexitree.h"`

			`#define DBREV 1`

			`int counter = 0;`

			`Stream *GetReadyStream(Array<Stream> &streams, HashTableInt`
			`&sid_table, int &num_streams_fnd, int`
			`&total_pairs_read, const Config &cfg);`
			`int ReadDB(SeqForest &db_forest, const string &db_name,`
			`int &seq_len);`
			`void WriteDB(const SeqForest &db_forest, const string &db_name, const`
			`int db_size, const int seq_len);`
			`void FinalReport(const Config &cfg, const SeqForest &normal, const int`
			`num_streams_fnd, const int num_seqs_added, const`
			`Array<Stream> &streams, const int db_size);`
			`void WriteDBStats(const SeqForest &db_forest, ostream &out_stream,`
			`const int db_size);`
			`void OutputGraph(const SeqForest &db_forest, string db_name);`
			`int GetPrimeLargerThan(const int n);`

			`/*********************************************************************`
			`* main() *`
			`* Input: int argc: Number of command-line arguments *`
			`* char argv[]: array of strings containing `
			`* command-line arguments *`
			`* Output: 0 if successful, -1 if unsuccessful *`
			`*********************************************************************/`

			`int main(int argc, char *argv[])`

			`{`
			`Config cfg((const int) argc, (const char **) argv);`
			`// Declare configuration object and do`
			`// the configuration on the basis of the`
			`// command line arguments and the`
			`// configuration file`
			`Stream *active_stream; // This will point to the stream that`
			`// currently has a sequence to be worked`
			`// on (either added to the database or`
			`// compared).`
			`HashTableInt sid_table(GetPrimeLargerThan(cfg.max_streams));`
			`// Hash table relating external stream ids to`
			`// internal sids; make size of table`
			`// smallest prime larger than the number`
			`// of streams`
			`SeqForest normal(cfg.max_elements); // Uninitialized forest of`
			`// normal sequences`
			`Array<Stream> streams(cfg.max_streams); // Array of stream objects,`
			`// one for each data stream`
			`// in input, which are`
			`// allocated as needed`
			`int num_streams_fnd = 0; // Number of data streams`
			`// encountered to date`
			`int total_pairs_read = cfg.pair_offset; // Number of pairs read from`
			`// input to date from all`
			`// the data streams combined`
			`// -- can be offset using`
			`// the "-n" switch`
			`int db_size; // Total number of unique`
			`// sequences in the database`
			`int init_db_size = 0; // Number of unique`
			`// sequences in the`
			`// pre-existing database`




			`// Read database into normal, if database exists`
			`db_size = init_db_size = ReadDB(normal, cfg.db_name, cfg.seq_len);`

			`if (cfg.add_to_db) {`
			`while ((active_stream =`
			`GetReadyStream(streams, sid_table, num_streams_fnd,`
			`total_pairs_read, cfg))`
			`!= NULL) {`
			`active_stream->AddToDB(normal, db_size, total_pairs_read, cfg);`
			`}`
			`WriteDB(normal, cfg.db_name, db_size, cfg.seq_len);`
			`if (cfg.output_graph) {`
			`OutputGraph(normal,cfg.db_name);`
			`}`

			`}`
			`else {`
			`int i = 0;`
			`while ((active_stream =`
			`GetReadyStream(streams, sid_table, num_streams_fnd,`
			`total_pairs_read, cfg))`
			`!= NULL) {`
			`active_stream->CompareSeq(cfg, normal, total_pairs_read);`
			`}`
			`}`
			`FinalReport(cfg, normal, num_streams_fnd, db_size - init_db_size,`
			`streams, db_size);`
			`return(0);`
			`}`

			`/**********************************************************************`
			`* GetReadyStream() *`
			`* This function reads a pair from the input, appends the element *`
			`* to the current sequence string in the appropriate data stream, *`
			`* finds out if that data stream has a complete sequence to be *`
			`* processed, continues until it has found such a data stream, and *`
			`* returns a pointer to it. It updates num_streams_fnd, *`
			`* total_pairs_read, sid_table, and streams. *`
			`* *`
			`* Input: Array<Stream> &streams: the array of streams that we have *`
			`* found so far *`
			`* HashTableInt &sid_table: hash table relating external sids *`
			`* to internal sids *`
			`* int &num_streams_fnd: the number of streams found so far; *`
			`* int &total_pairs_read: the number of pairs read from the *`
			`* input stream so far *`
			`* const Config &cfg: configuration information *`
			`* *`
			`* Output: a pointer to the next stream that is ready for processing *`
			`**********************************************************************/`

			`Stream *GetReadyStream(Array<Stream> &streams, HashTableInt`
			`&sid_table, int &num_streams_fnd, int`
			`&total_pairs_read, const Config &cfg)`

			`{`
			`Stream *ready_stream = NULL;`
			`int ext_sid;`
			`int int_sid;`
			`int sval;`

			`cin >> ext_sid;`
			`while (!cin.eof()) {`
			`if (ext_sid == -1) {`
			`break;`
			`}`
			`int_sid = sid_table.ExtToInt(ext_sid, num_streams_fnd);`
			`cin >> sval;`
			`++total_pairs_read;`

			`// Update num_streams_fnd, if necessary`
			`if (int_sid >= num_streams_fnd) {`
			`if (int_sid > cfg.max_streams) {`
			`cerr<<"ERROR: Too many streams to follow, aborting..."<<endl;`
			`exit(-1);`
			`}`
			`// We need a new stream object`
			`streams[num_streams_fnd].Init(cfg, int_sid, ext_sid);`
			`num_streams_fnd = int_sid + 1;`
			`}`
			`streams[int_sid].Append(sval);`
			`if (streams[int_sid].Ready()) {`
			`ready_stream = &streams[int_sid];`
			`break;`
			`}`
			`cin >> ext_sid;`
			`}`

			`return ready_stream;`
			`}`

			`/*********************************************************************`
			`* ReadDB() *`
			`* Reads the database from a file and returns the number of unique *`
			`* sequences in the database. Checks for appropriate revision *`
			`* number. If it is a revision DBREV database, the second line *`
			`* will be "#DBseq_len: " followed by the sequence length. The *`
			`* next line will contain a single number, giving the root of the *`
			`* first tree. The following lines will contain the tree itself. *`
			`* The first seq_len numbers make up the first sequence (so the *`
			`* first number of the second line will be the same as the number *`
			`* on the first line). The next number will be a negative number *`
			`* between -(seq_len-1) and -2, indicating how far to backtrack in *`
			`* the first sequence, and the following positive numbers give the *`
			`* rest of the second sequence. So, for example, -3 would mean *`
			`* backtrack 3 numbers, take the previous numbers including the *`
			`* one you're on, and append the next two numbers. So after the *`
			`* -3 you would find two positive numbers, followed by a negative *`
			`* number (which you would use the same way as you used the -3, on *`
			`* the most recent sequence). Each tree is terminated by the *`
			`* number -1. So the sample input file *`
			`* 3 *`
			`* 3 4 2 9 10 3 -4 3 9 8 -2 3 -3 4 9 -1 *`
			`* 2 *`
			`* 2 3 4 5 6 7 -3 2 9 -1 *`
			`* yields the sequences: *`
			`* 3 4 2 9 10 3 *`
			`* 3 4 2 3 9 8 *`
			`* 3 4 2 3 9 3 *`
			`* 3 4 2 3 4 9 *`
			`* 2 3 4 5 6 7 *`
			`* 2 3 4 5 2 9 *`
			`* *`
			`* Input: SeqForest &db_forest Forest of sequences *`
			`* const string &db_name Name of database *`
			`* int &seq_len User-specified sequence length *`
			`* *`
			`* Output: the number of unique sequences in the database *`
			`* *`
			`********************************************************************/`

			`int ReadDB(SeqForest &db_forest, const string &db_name,`
			`int &seq_len)`
			`{`
			`ifstream in_db_file(db_name.c_str()); // file to read the database from`
			`int db_size = 0; // size of the database`
			`int root; // the first element of the sequences`
			`// we are reading in at the moment;`
			`// i.e., the root of this tree`
			`string buff;`
			`int db_seq_len;`
			`int rev_num;`

			`if (!in_db_file.is_open()) {`
			`cerr<<"WARNING: Cannot open database file " << db_name`
			`<< " for input"<<endl<<"Creating a new file"<<endl;`
			`return 0;`
			`}`

			`// Check to see if the first line contains "#DBrev:"`
			`in_db_file>>buff;`
			`if (buff == "#DBrev:") {`
			`in_db_file>>rev_num;`
			`if (rev_num > DBREV) {`
			`cerr << "ERROR: The revision number is greater than " << DBREV`
			`<< ". This version of STIDE is only capable of dealing "`
			`<< "with databases through DBrev " << DBREV`
			`<< ". Aborting..."<<endl;`
			`exit(-1);`
			`}`
			`if (rev_num < DBREV) {`
			`cerr << "ERROR: Revision number of database must be >= " << DBREV`
			`<< endl;`
			`exit(-1);`
			`}`
			`// Now we know that it is revision DBREV. Check sequence length of`
			`// database against user-indicated sequence length`
			`in_db_file>>buff;`
			`// Now check to see if next line is "#DBseq_len: " followed by a`
			`// number`
			`if (buff != "#DBseq_len:") {`
			`cerr << "ERROR: The second line of the database does not "`
			`<< "contain the string \"#DBseq_len: \"" << endl`
			`<< "followed by the sequence length of the database, as "`
			`<< "required of revision " << DBREV`
			`<< " databases. Aborting..."<< endl;`
			`exit(-1);`
			`}`
			`in_db_file>>db_seq_len;`
			`if (db_seq_len != seq_len) {`
			`cerr << "WARNING: Database sequence length is " << db_seq_len`
			`<< ", which does not match "`
			`<< "sequence length specified" << endl`
			`<< "by user (or by default if no specification was given), "`
			`<< "which is " << seq_len << endl`
			`<< "I will use the database sequence length. If that is "`
			`<< "not what you intended, type Ctrl-C to abort." << endl;`
			`seq_len = db_seq_len;`
			`}`
			`// Read next number into root`
			`in_db_file >> root;`
			`}`
			`// Otherwise, we assume we have an old-style database, and let the`
			`// user know that that's our assumption`
			`else {`
			`cerr << "WARNING: The string \"DBrev: \" is not in the first "`
			`<< "line of the database." << endl`
			`<< "I'm assuming that it's an older style of database, and "`
			`<< "will read it in" << endl`
			`<< "based on that assumption. If that is not what you want "`
			`<< "me to do, type CTRL-C" << endl << endl;`
			`// we have just read the first root into buff -- put it in root`
			`// instead`
			`root = atoi(buff.c_str());`
			`}`

			`while (!in_db_file.eof()) {`
			`if (root == -1) break;`
			`db_forest.trees_found[root]++;`
			`in_db_file>>db_forest.trees[root];`
			`db_size += db_forest.trees[root].NumLeaves();`
			`in_db_file>>root;`
			`}`
			`in_db_file.close();`

			`return db_size;`
			`}`


			`/*********************************************************************`
			`* WriteDB() *`
			`* Writes db_forest to the file db_name, with the format described *`
			`* in the header of ReadDB(). Prints database statistics at the *`
			`* end of the file. *`
			`* *`
			`* Input: const SeqForest &db_forest Forest of sequences in *`
			`* database *`
			`* const string &db_name Name of file in which to *`
			`* put database. *`
			`* const int db_size Number of unique sequences *`
			`* in the database *`
			`* const int seq_len Sequence length *`
			`* *`
			`* Output: none *`
			`********************************************************************/`

			`void WriteDB(const SeqForest &db_forest, const string &db_name, const`
			`int db_size, const int seq_len)`
			`{`

			`ofstream out_db_file(db_name.c_str());`

			`if (!out_db_file.is_open()) {`
			`cerr << "ERROR: Cannot open database file " << db_name`
			`<< "for output, aborting..." << endl ;`
			`exit(-2);`
			`}`
			`out_db_file << "#DBrev: " << DBREV << endl;`
			`out_db_file << "#DBseq_len: " << seq_len << endl;`

			`for (int i = 0; i < db_forest.trees.Size(); i++) {`
			`if (db_forest.trees_found[i]) {`
			`out_db_file<<i<<endl;`
			`out_db_file<<db_forest.trees[i]<<endl;`
			`}`
			`}`
			`out_db_file<<" -1"<<endl;`
			`// we can now write anything, so I will write the db stats`
			`out_db_file<<"; DB STATS"<<endl;`
			`WriteDBStats(db_forest, out_db_file, db_size);`
			`out_db_file.close();`

			`}`

			`/*********************************************************************`
			`* FinalReport() *`
			`* Reports data at end of run. The number of streams, the number *`
			`* of input pairs, and the number of sequences in the input are *`
			`* always reported. If we have done a comparison run, we report *`
			`* the number of anomalies, and the precentage of sequences that *`
			`* were anomalous. Additionally, if asked for, the Hamming *`
			`* distance or locality frame count is reported. If we have added *`
			`* to the database, we report having done so and report the number *`
			`* of sequences added. If database statistics are asked for, we *`
			`* report the number of nodes, the number of unique sequences, the *`
			`* number of branches, and the average database branch factor. *`
			`* *`
			`* Input: const Config &cfg: Configuration information *`
			`* const SeqForest &normal: DB of normal sequences *`
			`* const int num_streams_fnd: Total number of streams found*`
			`* const int num_seqs_added: Number of unique sequences *`
			`* added *`
			`* const Array<Stream> &streams: Array of data streams *`
			`* const int db_size: Number of unique sequences *`
			`* in DB *`
			`* *`
			`* Output: none *`
			`* *`
			`*********************************************************************/`

			`void FinalReport(const Config &cfg, const SeqForest &normal, const int`
			`num_streams_fnd, const int num_seqs_added, const`
			`Array<Stream> &streams, const int db_size)`
			`{`
			`int total_pairs = 0;`
			`int total_seqs = 0;`
			`int total_anoms = 0;`
			`int total_max_lfc = 0;`
			`int total_max_hdist = 0;`
			`int db_nodes = 0;`
			`int db_seqs = 0;`
			`int db_branches = 0;`
			`int j;`

			`// Sum up number of pairs input and number of seqs from all the streams`
			`for (j = 0; j < num_streams_fnd; j++) {`
			`total_seqs += streams[j].GetNumSeqsFnd();`
			`total_pairs += streams[j].GetNumPairsRead();`
			`}`

			`cout << endl;`
			`cout << "Number of different streams in input = "`
			`<< num_streams_fnd << endl;`
			`cout << "Total number of input pairs = "`
			`<< total_pairs << endl;`
			`cout << "Total number of sequences in input = "`
			`<< total_seqs << endl;`

			`if (cfg.add_to_db) {`
			`cout << "File added to database" << endl;`
			`cout << "Number of new sequences added to the database: "`
			`<< num_seqs_added << endl;`
			`}`
			`else {`
			`cout << "Scan completed" << endl;`
			`// Sum up number of anomalies from all the streams`
			`for (j = 0; j < num_streams_fnd; j++) {`
			`total_anoms += streams[j].GetNumAnoms();`
			`}`

			`cout << "Number of anomalies = "`
			`<< total_anoms << endl;`
			`cout << "Percentage anomalous = "`
			`<< ((float)total_anoms * 100.0)/total_seqs << endl;`

			`// If asked for, compute Hamming distances across streams and report`
			`if (cfg.compute_hdist) {`
			`for (j = 0; j < num_streams_fnd; j++) {`
			`if (streams[j].GetMaxHDist() > total_max_hdist) {`
			`total_max_hdist = streams[j].GetMaxHDist();`
			`}`
			`}`
			`cout << "Largest minimum Hamming distance = "`
			`<< total_max_hdist << endl;`
			`}`

			`// If asked for, compute lfc across streams and report`
			`if (cfg.lf_size > 1) {`
			`for (j = 0; j < num_streams_fnd; j++) {`
			`if (streams[j].GetMaxLFC() > total_max_lfc) {`
			`total_max_lfc = streams[j].GetMaxLFC();`
			`}`
			`}`
			`cout << "Maximum lfc = " << total_max_lfc << endl;`
			`}`
			`}`

			`// If asked for, compute db stats and report`
			`if (cfg.write_db_stats) {`
			`WriteDBStats(normal, cout, db_size);`
			`}`
			`}`


			`/*********************************************************************`
			`* WriteDBStats() *`
			`* Computes and writes to standard output the number of nodes in *`
			`* the database, the number of unique sequences, the number of *`
			`* branches, and the average database branch factor. *`
			`* *`
			`* Input: const SeqForest &db_forest Forest of sequences in *`
			`* database *`
			`* ostream &out_stream Where to write info *`
			`* const int db_size Number of unique sequences in the *`
			`* database *`
			`* *`
			`* Output: none *`
			`*********************************************************************/`

			`void WriteDBStats(const SeqForest &db_forest, ostream &out_stream,`
			`const int db_size)`
			`{`
			`int db_nodes = 0;`
			`int db_branches = 0;`

			`for (int i = 0; i < db_forest.trees.Size(); i++) {`
			`if (db_forest.trees_found[i]) {`
			`db_nodes += db_forest.trees[i].NumNodes();`
			`db_branches += db_forest.trees[i].NumBranches();`
			`}`
			`}`

			`out_stream << "Number of DB nodes = " << db_nodes << endl;`
			`out_stream << "Number of unique sequences = "<<db_size << endl;`
			`out_stream << "Number of branches (edges) = "<<db_branches << endl;`
			`out_stream << "Average DB branch factor = "`
			`<<((float)db_branches/(db_nodes - db_size))<<endl;`

			`}`


			`/*********************************************************************`
			`* OutputGraph() *`
			`* Writes a file db_name.dot containing input for the program Dot. *`
			`* Running Dot on db_name.dot produces a PostScript file *`
			`* containing a picture of the whole database tree. *`
			`* *`
			`* Input: const SeqForest &db_forest Forest of sequences in *`
			`* database *`
			`* const string db_name Filename to use *`
			`* *`
			`* Output: none *`
			`*********************************************************************/`

			`void OutputGraph(const SeqForest &db_forest, const string db_name)`
			`{`
			`char *dot_filename;`
			`dot_filename = new char [strlen(db_name.c_str())+4];`
			`strcpy(dot_filename, db_name.c_str());`
			`ofstream output_file(strcat(dot_filename,".dot"));`

			`output_file<<"digraph \""<<db_name<<"\" {"<<endl;`
			`output_file<<" ratio=auto;"<<endl;`
			`output_file<<" page=\"8.5,11\";"<<endl;`
			`for (int i = 0; i < db_forest.trees.Size(); i++) {`
			`if (db_forest.trees_found[i])`
			`db_forest.trees[i].OutputGraph(output_file);`
			`}`
			`output_file<<"}"<<endl;`
			`output_file.close();`
			`}`


			`/****************************************************************************`
			`* GetPrimeLargerThan(int n) *`
			`* Returns the smallest prime larger than the input integer. *`
			`* Changes no values. *`
			`* *`
			`* Input: const int n *`
			`* Output: smallest prime larger than n *`
			`***************************************************************************/`

			`int GetPrimeLargerThan(const int n)`
			`{`
			`int primes[n];`
			`int primes_fnd = 1;`
			`int curr_num = 3;`
			`int is_prime = 1;`

			`primes[0] = 2;`
			`while(1) {`
			`for (int i = 0; i < primes_fnd; i++) {`
			`if ((curr_num % primes[i]) == 0) {`
			`is_prime = 0;`
			`break;`
			`}`
			`}`
			`if (is_prime == 1) {`
			`primes[primes_fnd++] = curr_num;`
			`if (curr_num > n) {`
			`break;`
			`}`
			`}`
			`curr_num = curr_num + 2;`
			`is_prime = 1;`
			`}`
			`return curr_num;`
			`}`