/**********************************************************************
 *
 * lucenesearch.cpp -- 
 * Copyright (C) 1999-2002  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/


#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\iostream>
#elif defined(GSDL_USE_IOS_H)
#  include <iostream.h>
#else
#  include <iostream>
#endif

#include <stdio.h>
#include <time.h>

#include "gsdlconf.h"
#include "gsdltools.h"
#include "lucenesearch.h"
#include "fileutil.h"
#include "queryinfo.h"
#include "gsdlunicode.h"

#include "expat_resultset.h"

text_t lucenesearchclass::getindexsuffix(const queryparamclass &qp) {
  text_t indexsuffix = "index";
  // get the first char of the level to be the start of the index name
  text_t suffix = substr(qp.level.begin(), qp.level.begin()+1);
  lc(suffix);
  text_t ind = qp.index;
  text_t sub = qp.subcollection;
  text_t lang = qp.language;
   
  // collection name not added for Lucene
  indexsuffix = filename_cat(indexsuffix, suffix +ind + sub + lang);
  return indexsuffix;

}

////////////////////
// lucenesearch class //
////////////////////

lucenesearchclass::lucenesearchclass ()
  : searchclass() {

  textlevel = "Doc";
}

lucenesearchclass::~lucenesearchclass () 
{
  if (cache != NULL) 
    {
      delete cache;
      cache = NULL;
    }
}

void lucenesearchclass::set_text_level(const text_t &textlevel_arg)
{
  textlevel = textlevel_arg;
}


bool lucenesearchclass::search(const queryparamclass &queryparams, 
			       queryresultsclass &queryresult) {
 
#ifdef __WIN32__
  char basepath[]="";
#else
  char basepath[] = "/";
#endif
 
  char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr(); 

  // set default stem method from values originally set on prefs page
  int defaultStemMethod = 0;
  if (queryparams.casefolding) {
    defaultStemMethod |= 1;
  }
  if (queryparams.stemming) {
    defaultStemMethod |= 2;
  }

  text_t utf8querystring = to_utf8(queryparams.querystring);

  text_t escaped_utf8querystring = "";
  text_t::const_iterator here = utf8querystring.begin();
  while (here != utf8querystring.end()) {
    if (*here == '"') escaped_utf8querystring.push_back('\\');
    escaped_utf8querystring.push_back(*here);
    ++here;
  }

  // Use this to skip lucene_query.pl and access GS2LuceneQuery directly (Java must be on path)
  // text_t cmd = "java -classpath \"" + filename_cat(gsdlhome, "bin", "java", "LuceneWrapper3.jar") + "\" org.greenstone.LuceneWrapper3.GS2LuceneQuery ";
  text_t cmd = "\"" + filename_cat(gsdlhome, "bin", "script", "lucene_query.pl") + "\"";
  cmd += (text_t)" \""+indexname + (text_t)"\" \"" + escaped_utf8querystring + (text_t)"\""; 

  if (!queryparams.filterstring.empty()) {
    cmd += " -filter \"" + queryparams.filterstring + "\"";
  }
  if (!queryparams.sortfield.empty() && queryparams.sortfield != "rank") {
    cmd += " -sort \"" + queryparams.sortfield + "\"";
  }
  if (queryparams.sortorder == 1) {
    cmd += " -reverse_sort";
  }
  if (!queryparams.fuzziness.empty()) {
    cmd += " -fuzziness " + queryparams.fuzziness;
  }

  // New code to support configuration of the default conjuction operator
  // set default Boolean combiner from all/some setting
  // if match_mode == 1, ie all, default=1 ie AND
  // if match_mode == 0, ie some, default=0, ie OR
  if (queryparams.match_mode)
    {
      cmd += " -dco AND";
    }

  // New code to allow Lucene to do paging of search results. This should
  // substantially improve performance as we don't have to return all 12000
  // hits if we only need the first 20!
  if (queryparams.startresults && queryparams.endresults)
    {
      cmd += (text_t)" -startresults " + (queryparams.startresults - 1);
      cmd += (text_t)" -endresults " + (queryparams.endresults - 1);
    }

  text_t xml_text = "";

  // I don't want to do this, but I have to.
  text_t gsdlhome_env = "GSDLHOME=" + gsdlhome;
  putenv(gsdlhome_env.getcstr());

#ifdef __WIN32__
  putenv("GSDLOS=windows");

  //FILE *PIN = _popen(cmd.getcstr(), "r"); // didn't seem to work
  cmd = (text_t)"perl -S "+cmd;
  // we write the result to a file
  clock_t this_time = clock();
  text_t filename = "luc";
  filename.append(this_time);
  filename.append(".txt");
 
  // Used concat 'collectdir' with 'filename' but this ran into problems
  // when run from a CD-ROM.
  // Since this is Windows only code, now changed to use %TEMP% directory,
  // a location designed to be writable

  // Need to set the Apache server to pass the TEMP envvar (or TMP) with 
  // passenv. Little point using _tempnam() function that generates a 
  // filename and where you need to pass in the dir if *TMP* doesn't exist.  
  // http://msdn.microsoft.com/en-us/library/hs3e7355%28v=vs.80%29.aspx
  // dir parameter: "The path used in the file name if there is no TMP
  // environment variable, or if TMP is not a valid directory." Seems
  // the inverse of what we want: to use TMP/TEMP if collectdir is invalid.

  text_t out_file = filename_cat(getenv("TEMP"),filename); // TMP or TEMP

  cmd += " -out \"" + out_file + "\"";
  int rv = gsdl_system(cmd, true, cerr);
  if (rv != 0) {
    cerr << "tried to run command \""<<cmd<<"\", but it failed\n";
  } else {
    read_file(out_file, xml_text);
    remove(out_file.getcstr()); // now delete it
  }
#else

#if defined(MACOS)
  putenv("GSDLOS=darwin"); 
#else
  putenv("GSDLOS=linux"); 
#endif

  // What about Solaris etc ????


  FILE *PIN = popen(cmd.getcstr(), "r");

  if (PIN==NULL) {
    perror("PIPE");
    cerr << "Error: unable to open pipe to " << cmd << endl;
    
    return false;
  }
  while (!feof(PIN)) {
    char buffer[256];
    int num_bytes = fread(buffer,1,256,PIN);
    xml_text.appendcarr(buffer,num_bytes);
  }

#endif

  expat_resultset(xml_text,queryresult);
  
#ifdef __WIN32__
  //  _pclose(PIN);
#else
  pclose(PIN);
#endif

  return true;
}


bool lucenesearchclass::browse_search(const queryparamclass &queryparams, 
				      int start, int numDocs,
				      queryresultsclass &queryresult) {

  cerr << "**** Not sure what this function does!" << endl;
  return false;
}

// the document text for 'docnum' is placed in 'output'
// docTargetDocument returns 'true' if it was able to
// try to get a document
// collection is needed to see if an index from the 
// collection is loaded. THe default index bits are just there cos
// the mg version needs them

bool lucenesearchclass::docTargetDocument(const text_t &/*defaultindex*/,
					const text_t &/*defaultsubcollection*/,
					const text_t &/*defaultlanguage*/,
					const text_t &collection,
					int docnum,
					text_t &output) {

  // we now get the document directly by lucenesource, so don't use this 
  // method
  return false;
}

// used to clear any cached databases for persistent versions of 
// Greenstone like the Windows local library
void lucenesearchclass::unload_database () {
}

void lucenesearchclass::set_gsdlhome (const text_t &gh)
{
  gsdlhome = gh;
}
