/**********************************************************************
 *
 * mgq.c -- cut-dowm version of mgquery
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "mgq.h"


#include <stdio.h>
#include <string.h>
/* #include <io.h> */
#include <fcntl.h>

#ifdef __cplusplus
extern "C" {
#endif
  
#include "sysfuncs.h"
  
#include "messages.h"
#include "memlib.h"
  
#include "invf.h"
#include "text.h"
#include "lists.h"
#include "backend.h"
#include "environment.h"
#include "globals.h"
#include "mg_errors.h"
#include "commands.h"
#include "text_get.h"
#include "term_lists.h"
#include "local_strings.h"

#include "words.h"
#include "stemmer.h"
#include "stem_search.h"
  
#ifdef __cplusplus
}
#endif


#include "mgq.h"

/* get a reasonable database cache size */
#ifndef MAXNUMDATABASEINFO
#  ifdef GSDLSERVER
#    define MAXNUMDATABASEINFO 10
#  else
#    define MAXNUMDATABASEINFO 2
#  endif
#endif

#define MAXCOLLECTIONLEN  16
#define MAXMGDIRLEN       256
#define MAXGENSUFFIXLEN   256
#define MAXTEXTSUFFIXLEN  256

typedef struct DatabaseInfo {
  int accessnum; /* -1 = invalid record */
  char collection[MAXCOLLECTIONLEN];
  char mgdir[MAXMGDIRLEN];
  char gensuffix[MAXGENSUFFIXLEN];
  char textsuffix[MAXTEXTSUFFIXLEN];
  query_data *qd;
} DatabaseInfo;


/* globals needed by some vague part of mg... */
FILE *OutFile = NULL, *InFile = NULL;
int OutPipe = 0, InPipe = 0;
int Quitting = 0;

/* globals needed to handle loading of databases */
static int cur_cachenum = -1;

/* globals needed by the database cache */
static DatabaseInfo dbcache[MAXNUMDATABASEINFO];
static int cache_nextaccessnum = 0;
static int cache_numloaded = 0;



#if defined(PARADOCNUM) ||  defined(NZDL)
static int GetDocNumFromParaNum(query_data *qd, int paranum) {
  int Documents = qd->td->cth.num_of_docs;
  int *Paragraph = qd->paragraph;
  int low = 1, high = Documents;
  int mid = (low+high)/2;

  while ((mid = (low+high)/2) >=1 && mid <= Documents)
    {
      if (paranum > Paragraph[mid])
        low = mid+1;
      else if (paranum <= Paragraph[mid-1])
        high = mid-1;
      else
        return mid;
    }
  FatalError(1, "Bad paragraph number.\n");
  return 0;
}

static int GetParaNumFromDocNum(query_data *qd, int docnum) {
  int Documents = qd->td->cth.num_of_docs;
  int *Paragraph = qd->paragraph;

  if (docnum > 0 && docnum <= Documents)
    return Paragraph[docnum-1]+1;
  return 0;
}
#endif


/*****************************************************************************/

static void MGQError(char *emsg)
{
  fprintf(stderr,"Fatal error: %s\n", emsg);
  exit(1);
}

static int ProcessDocs (query_data * qd, int skip, int howmany, 
			enum result_kinds kind, 
			int (*sender)(char *,int,int,float,void *), void *ptr) {
  int max_buf = 0, output_failure = 0;
  int DocCount = 0;
  int need_text = (kind == result_docs);

  /* skip the requested number of documents */
  while (skip > 0) {
    if (!NextDoc(qd)) return 0;
    skip--;
  }

  /* find out the maximum size for the text buffer */
  if (need_text) max_buf = atoi (GetDefEnv ("buffer", "1048576"));

  /* process each document */
  do {
    u_char *UDoc = NULL;
    unsigned long ULen=0;

#if defined(PARADOCNUM) ||  defined(NZDL)
    /* adjust the document number for paragraph level result_docs */
    /* this is a bit of a hack ... */
    if (kind==result_docs && qd->id->ifh.InvfLevel == 3 &&
	qd->DL != NULL && (int)qd->doc_pos < (int)qd->DL->num)
      qd->DL->DE[qd->doc_pos].DocNum = GetParaNumFromDocNum(qd, qd->DL->DE[qd->doc_pos].DocNum);
#endif
    
    if (need_text) {
      /* load the compressed text */
      if (LoadCompressedText (qd, max_buf))
	MGQError("Unable to load compressed text(memory?).");

      /* uncompress the loaded text */
      UDoc = GetDocText (qd, &ULen);
      if (UDoc == NULL) MGQError("UDoc is unexpectedly NULL");
    }
      
    if (UDoc != NULL  ||  kind == result_docnums) {
      int docnum = GetDocNum(qd);
#if defined(PARADOCNUM) ||  defined(NZDL)
      if (qd->id->ifh.InvfLevel == 3) docnum = GetDocNumFromParaNum(qd, docnum);
#endif
      switch (kind) {
      case result_docnums:
	if (sender != NULL)
	  output_failure = (*sender)("",0,docnum,GetDocWeight(qd),ptr);
	break;
      case result_docs:
	if (sender != NULL)
	  output_failure = (*sender)((char *)UDoc,ULen,docnum,GetDocWeight(qd),ptr);
	break;
      default:
	break;
      }
    }
    DocCount++;
      
  } while (NextDoc (qd) && output_failure == 0 && --howmany > 0);
  
  /*  if (need_text) FreeTextBuffer (qd);*/
  
  return (DocCount);
}


static void send_query_term_freqs(QueryTermList *qtl,
				  int (*sender)(char *,int,int,float,void *), void *ptr)
{
  int i = 0;
  for (i = 0; i < qtl->num; i++)
    if (sender != NULL) {
      /*      word = word2str(qtl->QTE[i].Term); 
	      (* sender)(word, strlen(word), qtl->QTE[i].Count, (float)0.0, ptr); */
      (* sender)((char *)(qtl->QTE[i].Term+1), qtl->QTE[i].Term[0], 
		 qtl->QTE[i].Count, (float)0.0, ptr);
    }
}


static void send_terms (TermList * qtl, 
			int (*sender)(char *,int,int,float,void *), void *ptr)
{
  int i = 0;
  if (sender == NULL) return;
  for (i = 0; i < qtl->num; i++)
    {
      /*      word = word2str(qtl->TE[i].Word);
	      (* sender)(word, strlen(word), qtl->TE[i].Count, (float)0.0, ptr);*/
      (* sender)((char *)(qtl->TE[i].Word+1), qtl->TE[i].Word[0], 
		 qtl->TE[i].Count, (float)0.0, ptr);
    }
}


/* MoreDocs () */
/* Displays all documents in list  DocList. */
/* Documents are fetched, then decompressed and displayed according to the */
/* format implied in  FormString(). */

static void
MoreDocs (query_data * qd, enum result_kinds kind, 
	  int skip, int howmany,
	  int (*sender)(char *,int,int,float,void *), void *ptr)
{
  qd->num_of_ans = qd->DL->num;
  switch (kind) {
  case result_docs:
  case result_docnums:
    if (qd->num_of_ans > 0)
      ProcessDocs (qd, skip, howmany, kind, sender, ptr);
    break;
  case result_termfreqs:
    send_query_term_freqs(qd->QTL, sender, ptr);
    break;
  case result_terms:
    send_terms(qd->TL, sender, ptr);
    break;
  }
}






/******************************************
 * functions to handle the database cache *
 ******************************************/

/* init_dbcache should be called at the start of each */
/* function which deals with the database cache */
static void init_dbcache (void) {
  static int dbcacheinited = 0;
  int i = 0;

  if (dbcacheinited) return;

  cache_numloaded = 0;

  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    dbcache[i].accessnum = -1;
    dbcache[i].collection[0] = '\0';
    dbcache[i].mgdir[0] = '\0';
    dbcache[i].gensuffix[0] = '\0';
    dbcache[i].textsuffix[0] = '\0';
    dbcache[i].qd = NULL;
  }

  dbcacheinited = 1;
}

/* returns the next cache access number and increments it */
static int get_next_accessnum (void) {
  return cache_nextaccessnum++;
}

/* get_free_dbcache returns the cache number which */
/* was used the longest time ago */
/* init_dbcache should be called before this function */
static int get_free_dbcache (void) {
  int i = 0;
  int minaccessnum = cache_nextaccessnum; /* the current max */
  int minpos = 0;

  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    if (dbcache[i].accessnum < minaccessnum) {
      minaccessnum = dbcache[i].accessnum;
      minpos = i;
    }
  }
  
  return minpos;
}

/* search_collect will search for an index which */
/* belongs to a certain collection It returns -1 if none could be found. */
/* init_dbcache should be called before this function */
static int search_collect (char *collection) {
  int i = 0;

  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    if ((dbcache[i].accessnum >= 0) &&
	(dbcache[i].qd != NULL) &&
	(strcmp (collection, dbcache[i].collection) == 0)
	/* && (dbcache[i].qd->id->ifh.InvfLevel == 2)*/
	) {
      dbcache[i].accessnum = get_next_accessnum ();
      return i;
    }
  }

  return -1;
}

/* search_gensuffix will search for an index which */
/* has a certain gensuffix. It returns -1 if none could be found. */
/* init_dbcache should be called before this function */
static int search_gensuffix (char *gensuffix) {
  int i = 0;

  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    if ((dbcache[i].accessnum >= 0) &&
	(dbcache[i].qd != NULL) &&
	(strcmp (gensuffix, dbcache[i].gensuffix) == 0)) {
      dbcache[i].accessnum = get_next_accessnum ();
      return i;
    }
  }

  return -1;
}

/* unload_database will unload a certain entry within */
/* the database cache, clearing it for furture use. */
static void unload_database (int i) {
  /* check to see if it contains anything */
  if (dbcache[i].accessnum < 0 || dbcache[i].qd == NULL) 
    return;

  /* unload all the query information  */
  FinishQuerySystem(dbcache[i].qd);
  
  /* reset all the db info */
  dbcache[i].accessnum = -1;
  dbcache[i].collection[0] = '\0';
  dbcache[i].mgdir[0] = '\0';
  dbcache[i].gensuffix[0] = '\0';
  dbcache[i].textsuffix[0] = '\0';
  dbcache[i].qd = NULL;

  cache_numloaded--;
  if (cache_numloaded < 0) cache_numloaded = 0;
}

/* cache_database will store the information about */
/* a database in the database cache. */
static void cache_database (int i, char *collection, char *mgdir, char *gensuffix,
			    char *textsuffix, query_data *qd) {
  /* make sure this entry has been unloaded first */
  if (dbcache[i].accessnum >= 0 && dbcache[i].qd != NULL) 
    unload_database (i);

  /* store the db info */
  dbcache[i].accessnum = get_next_accessnum ();
  strcpy (dbcache[i].collection, collection);
  strcpy (dbcache[i].mgdir, mgdir);
  strcpy (dbcache[i].gensuffix, gensuffix);
  strcpy (dbcache[i].textsuffix, textsuffix);
  dbcache[i].qd = qd;

  cache_numloaded++;
}

static void make_current (int i) {
  /* see if it is the current index */
  if (i == cur_cachenum) return;
 
  /* unload the old index */
  if (cur_cachenum >= 0) UninitEnv ();
  cur_cachenum = -1;

  /* make sure the new one is ok */
  if (i < 0 || dbcache[i].accessnum < 0 || dbcache[i].qd == NULL)
    return;

  /* load the new one */

  /* Initialise the environment with default values */
  InitEnv ();
  
  SetEnv("mgdir",dbcache[i].mgdir,NULL);
  SetEnv("mgname",dbcache[i].gensuffix,NULL);
  SetEnv("textname",dbcache[i].textsuffix,NULL);
  
  PushEnv ();
  
  cur_cachenum = i;
}



/********************
 * public functions *
 ********************/

int mgq_ask(char *line)
{
  query_data *qd = (query_data *)NULL;
  char QueryType = QUERY_BOOLEAN;
  char OutputType = QUERY_DOCNUMS;
  char *LinePtr = (char *)NULL;

  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL) return 0;
  
  ResetFileStats (qd);
  qd->max_mem_in_use = qd->mem_in_use = 0;
  qd->tot_hops_taken += qd->hops_taken;
  qd->tot_num_of_ptrs += qd->num_of_ptrs;
  qd->tot_num_of_accum += qd->num_of_accum;
  qd->tot_num_of_terms += qd->num_of_terms;
  qd->tot_num_of_ans += qd->num_of_ans;
  qd->tot_text_idx_lookups += qd->text_idx_lookups;
  qd->hops_taken = qd->num_of_ptrs = 0;
  qd->num_of_accum = qd->num_of_ans = qd->num_of_terms = 0;
  qd->text_idx_lookups = 0;
  
  LinePtr = ProcessCommands (line, qd);
  if (CommandsErrorStr) {
    fprintf (stderr, "%s\n", CommandsErrorStr);
    return 0;
  }
  if (*LinePtr == '\0') return 1;
  
  FreeQueryDocs (qd);
  
  QueryType = get_query_type ();
  OutputType = get_output_type ();
  /* No point in hiliting words on a docnum query */
  if (OutputType == OUTPUT_HILITE && QueryType == QUERY_DOCNUMS)
    OutputType = OUTPUT_TEXT;
  
  switch (QueryType)
    {
    case QUERY_BOOLEAN:
      {
	char *maxdocs = (char *)NULL;
	BooleanQueryInfo bqi;
	maxdocs = GetDefEnv ("maxdocs", "all");
	bqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
        if (qd->sd->sdh.indexed)
	  BooleanQuery (qd, line, &bqi, (BooleanEnv (GetEnv ("casefold"), 0) |
					 (BooleanEnv (GetEnv ("stem"), 0) << 1)));
	else
	  BooleanQuery (qd, line, &bqi, qd->sd->sdh.stem_method);
	/*	if (qd->sd->sdh.indexed) BooleanQuery (qd, line, &bqi, 3);
	else BooleanQuery (qd, line, &bqi, qd->sd->sdh.stem_method); */
	break;
      }
    case QUERY_APPROX:
    case QUERY_RANKED:
      {
	char *maxdocs = (char *)NULL;
	char *maxterms = (char *)NULL;
	char *maxaccum = (char *)NULL;
	RankedQueryInfo rqi;
	maxdocs = GetDefEnv ("maxdocs", "all");
	maxterms = GetDefEnv ("max_terms", "all");
	maxaccum = GetDefEnv ("max_accumulators", "all");
	rqi.Sort = BooleanEnv (GetEnv ("sorted_terms"), 0);
	rqi.QueryFreqs = BooleanEnv (GetEnv ("qfreq"), 1);
	rqi.Exact = QueryType == QUERY_RANKED;
	rqi.MaxDocsToRetrieve = strcmp (maxdocs, "all") ? atoi (maxdocs) : -1;
	rqi.MaxTerms = strcmp (maxterms, "all") ? atoi (maxterms) : -1;
	rqi.MaxParasToRetrieve = rqi.MaxDocsToRetrieve;
	if (qd->id->ifh.InvfLevel == 3 && GetEnv ("maxparas"))
	  rqi.MaxParasToRetrieve = atoi (GetEnv ("maxparas"));
	rqi.AccumMethod = toupper (*GetDefEnv ("accumulator_method", "A"));
	rqi.MaxAccums = strcmp (maxaccum, "all") ? atoi (maxaccum) : -1;
	rqi.HashTblSize = IntEnv (GetEnv ("hash_tbl_size"), 1000);
	rqi.StopAtMaxAccum = BooleanEnv (GetEnv ("stop_at_max_accum"), 0);
	rqi.skip_dump = GetEnv ("skip_dump");
	RankedQuery (qd, line, &rqi);
	break;
      }
    case QUERY_DOCNUMS:
      {
	DocnumsQuery (qd, line);
	break;
      }
    }

  return 1;
}

int mgq_numdocs(void)
{
  query_data *qd = NULL;
  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL) return 0;

  if (qd->DL) return qd->DL->num;
  else return 0;
}

int mgq_numterms(void)
{
  query_data *qd = NULL;
  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL) return 0;

  if (qd->QTL) return qd->QTL->num;
  else return 0;
}

int mgq_results(enum result_kinds kind,int skip,int howmany, int (*sender)(char *, int, int, float, void *), void *ptr)
{
  query_data *qd = NULL;
  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL) return 0;

  if (qd->DL) {
    qd->doc_pos = 0;
    MoreDocs(qd, kind, skip, howmany, sender, ptr);
  }
  return 0;
}

/* get all the terms that match wordstem using the current stemmer and */
/* stemming method. The callback is the same style used by mgq_results */
int mgq_equivterms (unsigned char *wordstem, int (*sender)(char *, int, int, float, void *),
		    void *ptr) {
  int stem_method = 0;
  query_data *qd = NULL;
  TermList *equivterms = NULL;  /* used for equivalent terms */

  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL || wordstem == NULL || sender == NULL) return 0;

  if (qd->sd->sdh.indexed) {
    stem_method = BooleanEnv(GetEnv("casefold"),0) | (BooleanEnv(GetEnv("stem"),0) << 1);
  } else {
    stem_method = qd->sd->sdh.stem_method;
  }

  /* make the term list */
  equivterms = MakeTermList (0);
  
  /* expand out this word */
  if (FindWords (qd->sd, wordstem, stem_method, &equivterms) > 0) {
    int i;
    for (i=0; i < equivterms->num; i++) {
      (* sender)((char *)(equivterms->TE[i].Word+1), equivterms->TE[i].Word[0], 
		 equivterms->TE[i].Count, (float)0.0, ptr);
    }
  }

  /* free the term list */
  if (equivterms != NULL) FreeTermList (&equivterms);

  return 0;
}

/* gets the total number of documents retrieved. If this is not available */
/* it will set total_retrieved to 0 (even when it obviously isn't) */
int mgq_docsretrieved (int *total_retrieved, int *is_approx) {
  query_data *qd = NULL;

  if (cur_cachenum == -1) return 0;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL || total_retrieved == NULL || is_approx == NULL) return 0;

  /* set default values */
  *total_retrieved = 0;
  *is_approx = 0;

  if (qd->DL == NULL) return 0;

  *total_retrieved = qd->DL->total_retrieved;
  *is_approx = qd->DL->is_approx;

  return 0;
}


/* use mgq_getmaxstemlen to determine the length of the word stems to pass */
/* to mgq_stemword */
int mgq_getmaxstemlen () {
  return MAXSTEMLEN;
}

/* note: the stemming method and the stemmer come from the last query */
/* "word" should be at least maxstemlen+1 long and it is a string that */
/* starts with the string length */
void mgq_stemword (unsigned char *word) {
  int stem_method = 0;
  query_data *qd = NULL;

  if (cur_cachenum == -1) return;
  qd = dbcache[cur_cachenum].qd;
  if (qd == NULL || word == NULL) return;

  if (qd->sd->sdh.indexed) {
    stem_method = BooleanEnv(GetEnv("casefold"),0) | (BooleanEnv(GetEnv("stem"),0) << 1);
  } else {
    stem_method = qd->sd->sdh.stem_method;
  }

  stemmer (stem_method, qd->sd->sdh.stemmer_num, word);
}



int is_dbcache_full (void) {
  init_dbcache ();
  if (cache_numloaded >= MAXNUMDATABASEINFO) return 1;
  return 0;
}

int load_database (char *collection, char *mgdir, 
		   char *gensuffix, char *textsuffix) {
  int i = 0;
  query_data *qd = NULL;
  /*  FILE *deb = NULL; */
  init_dbcache ();

  /* print out some debug information */
/*  deb = fopen ("/home/rjmcnab/gsdl/etc/deb.txt", "a");
  fprintf (deb, "\ncache_nextaccessnum: %i\n", cache_nextaccessnum);
  fprintf (deb, "cache_numloaded: %i\n", cache_numloaded);
  fprintf (deb, "cur_cachenum: %i\n", cur_cachenum);
  fprintf (deb, "MAXNUMDATABASEINFO: %i\n\n", MAXNUMDATABASEINFO);
  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    fprintf (deb, "Entry %i\n", i);
    fprintf (deb, "  accessnum: %i\n", dbcache[i].accessnum);
    fprintf (deb, "  collection: %s\n", dbcache[i].collection);
    fprintf (deb, "  mgdir: %s\n", dbcache[i].mgdir);
    fprintf (deb, "  gensuffix: %s\n", dbcache[i].gensuffix);
    fprintf (deb, "  textsuffix: %s\n", dbcache[i].textsuffix);
    fprintf (deb, "  qd: %x\n", (int)(dbcache[i].qd));
  }
  fclose (deb); */

  /* search for the index */
  i = search_gensuffix (gensuffix);
  if (i >= 0) {
    make_current (i);
    return 1;
  }

  /* if there was a current database then the */
  /* environment needs uninitialising */
  make_current (-1);

  /* get a free cache number */
  i = get_free_dbcache ();
  unload_database (i);

  /* load the index */
  qd = InitQuerySystem (mgdir, gensuffix, textsuffix, NULL);
  if (qd == NULL) return 0;

  /* cache this index */
  cache_database (i, collection, mgdir, gensuffix, textsuffix, qd);

  /* make this index current */
  make_current (i);

  return 1;
}

/* load_text_database tries to make an index of the */
/* specified collection current */
int load_text_database (char *collection) {
  int i = 0;
  init_dbcache ();

  /* search for the index */
  i = search_collect (collection);

  /* return if none were found */
  if (i < 0) return 0;
  
  /* make this index current */
  make_current (i);
  return 1;
}

void close_all_databases (void) {
  int i = 0;
  init_dbcache ();
  
  /* unload all active databases */
  for (i=0; i<MAXNUMDATABASEINFO; i++) {
    unload_database (i);
  }

  /* if there was a current database then the */
  /* environment needs uninitialising */
  make_current (-1);
}
