/**********************************************************************
 *
 * suffix.h -- definitions used in suffix.cpp
 *
 * Copyright 2000 Gordon W. Paynter
 * Copyright 2000 The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#ifndef SUFFIX_H
#define SUFFIX_H

// Types

// The input words stored as an array of type symbol.
// Required range: 0 - vocabulary size (typically hundreds of thousands)
typedef unsigned int symbol;

// The cellindex and cellcount types are used when we store indexes
// to and ranges of cells in large arrays.
// Required Range: 0 - length of input (typically millions)
typedef unsigned int cellindex;
typedef unsigned int cellcount;

// The frequency type is used when we want to store the frequency with 
// which a phrase (or some other thing) occurs
// Required range: 0 - frequency of most common symbol (often "the")
typedef unsigned int frequency;

// Global variables 

// The symbol array holds the input words
extern cellcount symbol_array_length;
extern symbol   *symbols;

// The number of words read is storesd in inputLength.
extern cellcount inputLength;

// Suffix and prefix arrays are used to extract phrases
extern symbol  **suffixArray;
extern symbol  **prefixArray;

// Collection-specific information about the first stopword/content symbols
extern symbol firstStopSymbol;
extern symbol lastStopSymbol;
extern symbol firstContentSymbol;
extern symbol lastContentSymbol;


// Are we allowed to terminate a phrase on a stopword?
extern int phraseMode;
extern int minOccurs;

#define ANYPHRASE 0
#define STOPWORDS 1


// Direction values
#define SUFFIX 0
#define PREFIX 1


// Special symbol values
#define COLLECTIONSTART 1
#define COLLECTIONEND 2
#define DOCUMENTSTART 3
#define PHRASELIMIT 4

#define LASTDELIMITER 4

#endif
