// check.h -- manipulate the suffixCheck array
//
// The suffixCheck array is used to prevent a phrase being added to
// the results of getExpansions.  It works by declaring an array that
// has one bit for each entry in the suffix array, which is cleared at
// the start of getExpansions.  As getExpansions adds each phrase to
// the result set, we set the bits in the suffixCheck array that
// correspond to the positions of of the phrase in the suffixArray.

// (The point is that getExpansions can later check whether a phrase
// or its prefix has been seen before by looking up the bit
// corresponding to its first occurence in the suffixCheck array: if
// the bit is set, a subphrase of this phrase is already in the
// results already, and the current phrase can be ignored.)

#include "suffix.h"

// The suffixCheck array
#if defined (__WIN32__)
typedef unsigned char check;
#else
#include <inttypes.h>
typedef uint8_t check;
#endif

check *suffixCheck;

// The length of the suffixCheck array
cellcount checkLength;

// Some useful bitmask constants 
const check ALLBITS        = 255;
const check LEFTMOSTBIT    = static_cast<check>(ALLBITS << 7);
const check LEFTMOSTBITS[] = {static_cast<check>(ALLBITS << 8), 
			      static_cast<check>(ALLBITS << 7), 
			      static_cast<check>(ALLBITS << 6), 
			      static_cast<check>(ALLBITS << 5), 
			      static_cast<check>(ALLBITS << 4),
			      static_cast<check>(ALLBITS << 3), 
			      static_cast<check>(ALLBITS << 2), 
			      static_cast<check>(ALLBITS << 1),  
			      static_cast<check>(ALLBITS << 0)};


// Allocate the initial memory for suffixCheck
inline void allocateSuffixCheck(cellcount number_of_symbols) 
{
  checkLength = number_of_symbols / (sizeof(check) * 8) + 1;
  suffixCheck = new check[checkLength];
  if (suffixCheck == NULL) {
    cerr << "Suffix error: out of memory allocating suffixCheck array" << endl; 
    exit(2);
  }
  memset(suffixCheck, 0, sizeof(check) * checkLength);
}


// Set all bits in suffixCheck to 0
inline void clearSuffixCheck() 
{
  memset(suffixCheck, 0, sizeof(check) * checkLength);
}


// Get the value of a particular bit in suffixCheck
inline int getSuffixCheck(cellindex position) 
{
  cellindex cell = position >> 3;
  check remainder = position & 0x07; // the last 3 bits
  if (suffixCheck[cell] & (LEFTMOSTBIT >> remainder)) {
    return 1;
  }
  return 0;
}                                                                               

// Set the value of a bit in suffixCheck to 1
inline void setSuffixCheck(cellindex suff) 
{
  cellindex cell = suff >> 3;
  check remainder = suff & 0x07u; // the last 3 bits
  suffixCheck[cell] |= (LEFTMOSTBIT >> remainder);
}  


// Set the bits in a cell in the suffixArray.
// Bits are indexed 0-7.
inline void setCellBits(cellindex cell_offset, unsigned first_bit, unsigned last_bit) 
{
  unsigned number_of_bits = last_bit - first_bit + 1;
  check mask = (LEFTMOSTBITS[number_of_bits] >> first_bit);
  suffixCheck[cell_offset] |= mask;
}


inline void setSuffixCheck(cellindex first, cellindex last) 
{
  // Find the first and last cells in which bits are set
  cellindex first_cell = first >> 3;
  cellindex last_cell = last >> 3;

  // If all the values are in the same cell, set them
  if (first_cell == last_cell) {
    setCellBits(first_cell, (first & 0x07u), (last & 0x07u));
    return;
  }

  // Set the bits in the first and last cells
  setCellBits(first_cell, (first & 0x07u), 7);
  setCellBits(last_cell, 0, (last & 0x07u));

  // Set the bits in the intermediate cells
  ++first_cell;
  if(last_cell > first_cell)
    memset(suffixCheck + first_cell, ALLBITS, 
	   (last_cell - first_cell) * sizeof(check));
}


// Print the suffixCheck array (for debugging)
void printSuffixCheck() 
{
  for (cellcount i = 0; i < checkLength; ++i) {
    cout << "cell " << i << "  \t";
    cout << "(" << (i * 8) << "-" << (i * 8 + 7) << ")    \t";
    for (cellindex j = 0; j < 8; ++j)
      cout << getSuffixCheck(i * 8 + j);
    cout << " (" << (unsigned int) suffixCheck[i] << ")\n";
  }
}

