 
/**********************************************************************
 *
 * collectserver.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "collectserver.h"
#include "OIDtools.h"
#include <assert.h>
#include "display.h"

void check_if_valid_buildtype(const text_t& buildtype)
{
  if (buildtype=="mg") {
#ifndef ENABLE_MG
    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
#endif
  }

  else if (buildtype=="mgpp") {
#ifndef ENABLE_MGPP
    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
#endif
  }

  else if (buildtype=="lucene") {
#ifndef ENABLE_LUCENE
    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
#endif
  }

  else {
    cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
  }

}


void check_if_valid_infodbtype(const text_t& infodbtype)
{
  if (infodbtype=="gdbm") {
#ifndef USE_GDBM
    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
#endif
  }
  else if (infodbtype=="gdbm-txtgz") {
#ifndef USE_GDBM
    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
#endif
  }
  else if (infodbtype=="jdbm") {
#ifndef USE_JDBM
    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
#endif
  }
  else if (infodbtype=="sqlite") {
#ifndef USE_SQLITE
    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
#endif
  }
  else if (infodbtype=="mssql") {
#ifndef USE_MSSQL
    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
#endif
  }

  else {
    cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
  }

}



collectserver::collectserver () 
  : collectinfo() 
{
  configinfo.collection = "null";
}

collectserver::~collectserver () {

  // clean up the sources
  sourcelistclass::iterator source_here = sources.begin();
  sourcelistclass::iterator source_end = sources.end();
  while (source_here != source_end) {
    if ((*source_here).s != NULL)
      delete (*source_here).s;
    ++source_here;
  }
  sources.clear();

  // clean up the filters
  filtermapclass::iterator filter_here = filters.begin();
  filtermapclass::iterator filter_end = filters.end();
  while (filter_here != filter_end) {
    if ((*filter_here).second.f != NULL) 
      delete (*filter_here).second.f;
    ++filter_here;
  }
  filters.clear();
}

// configure should be called for each line in the
// configuration files to configure the collection server and everything
// it contains. The configuration should take place just before initialisation
void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
  if (cfgline.size() >= 1) {
    const text_t &value = cfgline[0];
    if (key == "plugin")
    {
    	//get the plugin name
	const text_t &name = cfgline[0];
	
	if (name == "HTMLPlugin" || name== "PDFPlugin")
	{
		for (int hI = 1; hI < cfgline.size(); hI++)
		{
			const text_t &plugOption = cfgline[hI];
			
			if (plugOption == "-use_realistic_book") 
			{
				collectinfo.useBook = true;
				break;
			}
		}
	}
    }
    else if (key == "gsdlhome") configinfo.gsdlhome = value;
    else if (key == "gdbmhome") configinfo.dbhome = value;
    else if (key == "collecthome") configinfo.collecthome = value;
    else if (key == "collection") {
      configinfo.collection = value;
      collectinfo.shortInfo.name = value;
    } 
    else if (key == "collectdir") configinfo.collectdir = value;
    else if (key == "host") collectinfo.shortInfo.host = value;
    else if (key == "port") collectinfo.shortInfo.port = value.getint();
    else if (key == "public") {
      if (value == "true") collectinfo.isPublic = true;
      else collectinfo.isPublic = false;
    } else if (key == "beta") {
      if (value == "true") collectinfo.isBeta = true;
      else collectinfo.isBeta = false;
    } else if (key == "collectgroup") {
      if (value == "true") collectinfo.isCollectGroup = true;
      else collectinfo.isCollectGroup = false;
    } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
    else if (key == "supercollectionoptions") {
      text_tarray::const_iterator begin = cfgline.begin();
      text_tarray::const_iterator end = cfgline.end();
      while(begin != end) {
	
	if (*begin == "uniform_search_results_formatting") {
	  collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
	}
	begin++;
      }
    }
    else if (key == "builddate") collectinfo.buildDate = value.getint();
    else if (key == "languages") collectinfo.languages = cfgline;
    else if (key == "numdocs") collectinfo.numDocs = value.getint();
    else if (key == "numsections") collectinfo.numSections = value.getint();
    else if (key == "numwords") collectinfo.numWords = value.getint();
    else if (key == "numbytes") collectinfo.numBytes = value.getint();
    else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
    else if (key == "collectionmeta") {
      // genuine collmeta get added as collectionmeta and collection_macros
      // .collmeta just get added as collection_macros
      text_t params;
      if (cfgline.size() == 3) {
	// get the params for later
	text_t::const_iterator first=cfgline[1].begin()+1;
	text_t::const_iterator last=cfgline[1].end()-1;
	params=substr(first, last);
      }
      
      text_t meta_name = cfgline[0];
      if (*(meta_name.begin())=='.') {
	// a .xxx collectionmeta. strip off the . and
	// look it up in the indexmap to get the actual value
	
	text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
	text_t new_name;
	
	// Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
	// Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here. 
	// So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
	// contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.

	if (indexmap.from2to(name, new_name)) {
	  meta_name = new_name;
	} 
      } else {
	// add them to collectionmeta
	text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
	if (cfgline.size() == 2) {
	  lang_map[g_EmptyText] = cfgline[1];
	} else if (cfgline.size() == 3 ) {
	  // get the lang out of params
	  paramhashtype params_hash;
	  splitparams(params, params_hash);
	  
	  text_t lang = params_hash["l"];
	  lang_map[lang] = cfgline[2];
	  if (lang_map[g_EmptyText].empty()) {
	    // want the first one as the default if no default specified
	    lang_map[g_EmptyText] = cfgline[2];
	  }
	}
	collectinfo.collectionmeta[cfgline[0]] = lang_map;
	
      }
      
      // add all collectionmeta to macro list
      text_tmap params_map = collectinfo.collection_macros[meta_name];
      
      if (cfgline.size() == 2) {// no params for this macro
	params_map[g_EmptyText] = cfgline[1];
      }
      else if (cfgline.size() == 3) {// has params
	params_map[params] = cfgline[2];
	if (params_map[g_EmptyText].empty()) {
	  params_map[g_EmptyText] = cfgline[2];
	}
      }
      collectinfo.collection_macros[meta_name] = params_map;
    }
    else if (key == "collectionmacro") {
      text_t nobrackets;
      text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
      // add all to macro list
      if (cfgline.size() == 2) { // no params for this macro
	params_map[g_EmptyText] = cfgline[1];
      }
      else if (cfgline.size() == 3) {// has params
	// strip [ ] brackets from params
	text_t::const_iterator first=cfgline[1].begin()+1;
	text_t::const_iterator last=cfgline[1].end()-1;
	nobrackets=substr(first, last);
	params_map[nobrackets] = cfgline[2];
      }
      collectinfo.collection_macros[cfgline[0]] = params_map;
      
    } else if (key == "format" && cfgline.size() == 2)
      collectinfo.format[cfgline[0]] = cfgline[1];
    else if (key == "building" && cfgline.size() == 2)
      collectinfo.building[cfgline[0]] = cfgline[1];
    else if (key == "httpdomain") collectinfo.httpdomain = value;
    else if (key == "httpprefix") collectinfo.httpprefix = value;
    else if (key == "receptionist") collectinfo.receptionist = value;
    else if (key == "buildtype") {
      check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
      collectinfo.buildType = value;
    }
    // backwards compatibility - searchytpes is now a format statement
    else if (key == "searchtype") { // means buildtype is mgpp
      if (collectinfo.buildType.empty()) {
	check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
	collectinfo.buildType = "mgpp";
      }
      joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
      //collectinfo.searchTypes = cfgline;
    }
    else if (key == "infodbtype") {
      check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
      collectinfo.infodbType = value;
    }
    else if (key == "separate_cjk") {
      if (value == "true") collectinfo.isSegmented = true;
      else collectinfo.isSegmented = false;
    }
    // What have we set in our collect.cfg file :  document or collection ?
    else if (key == "authenticate") collectinfo.authenticate = value;

    // What have we set for our group list
    else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);

	// build.cfg, earliestDatestamp of this collection needed for 
	// OAIServer to work out earliestDatestamp of this repository
	else if (key == "earliestdatestamp") {
		collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
	}
	
    // store all the mappings for use when collection meta is read later
    // (build.cfg read before collect.cfg)
    else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
      indexmap.importmap (cfgline, true);
      
    } 
    // In the map the key-value pair contain the same
    // data i.e key == data, if key is 2 then data is 2
    
    // What have we set for our public_documents ACL
    else if (key == "public_documents")
       {
	  text_tarray::const_iterator begin = cfgline.begin();
	  text_tarray::const_iterator end = cfgline.end();
	  while(begin != end)
	     {
		// key = data i.e if key is 2 then data is 2
		// collectinfo.public_documents[*begin] is the key
		// *begin is the data value

		collectinfo.public_documents[*begin] = *begin;
		++begin;
	     }
       }
    
    // What have we set for our private_documents ACL
    else if (key == "private_documents")
       {
	  text_tarray::const_iterator begin = cfgline.begin();
	  text_tarray::const_iterator end = cfgline.end();
	  while(begin != end)
	     {
		// key = data i.e if key is 2 then data is 2
		// collectinfo.public_documents[*begin] is the key
		// *begin is the data value
		
		collectinfo.private_documents[*begin] = *begin;
		++begin;
	     }
       }

    // dynamic_classifier <UniqueID> "<Options>"
    else if (key == "dynamic_classifier")
    {
      collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
    }
  }
  
  // configure the filters
  filtermapclass::iterator filter_here = filters.begin();
  filtermapclass::iterator filter_end = filters.end();
  while (filter_here != filter_end) {
    assert ((*filter_here).second.f != NULL);
    if ((*filter_here).second.f != NULL) 
      (*filter_here).second.f->configure(key, cfgline);

    ++filter_here;
  }

  // configure the sources
  sourcelistclass::iterator source_here = sources.begin();
  sourcelistclass::iterator source_end = sources.end();
  while (source_here != source_end) {
    assert ((*source_here).s != NULL);
    if ((*source_here).s != NULL)
      (*source_here).s->configure(key, cfgline);
    
    ++source_here;
  }
}


void collectserver::configure (const text_t &key, const text_t &value) {
  text_tarray cfgline;
  cfgline.push_back (value);
  configure(key, cfgline);
}

void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
  // if we've not been properly configured, then it is a foregone
  // conclusion that we cannot be active
  if (this->configinfo.collection == "null")
    {
      wasSuccess = false;
    }
  // if no build date exists, then the collection was probably not built;
  // ditto if the number of documents is zero, then something is pretty
  // wrong
  else if (this->collectinfo.buildDate == 0 ||
      this->collectinfo.numDocs == 0)
    {
      wasSuccess =  false;
    }
  // it is probably okay
  else
    wasSuccess = true;
}


bool collectserver::init (ostream &logout) {
  // delete the indexmap
  indexmap.clear();

  // init the filters
  filtermapclass::iterator filter_here = filters.begin();
  filtermapclass::iterator filter_end = filters.end();
  while (filter_here != filter_end) {
    assert ((*filter_here).second.f != NULL);
    if (((*filter_here).second.f != NULL) &&
	!(*filter_here).second.f->init(logout)) return false;
    
    ++filter_here;
  }

  // init the sources
  sourcelistclass::iterator source_here = sources.begin();
  sourcelistclass::iterator source_end = sources.end();
  while (source_here != source_end) {
    assert ((*source_here).s != NULL);
    if (((*source_here).s != NULL) &&
	!(*source_here).s->init(logout)) return false;
    
    ++source_here;
  }

  return true;
}


void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
		      comerror_t &err, ostream &/*logout*/) {
  reponse = collectinfo; 
  err = noError;
}

void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
				    comerror_t &err, ostream &/*logout*/) {
  response.clear ();

  // get a list of filter names
  filtermapclass::iterator filter_here = filters.begin();
  filtermapclass::iterator filter_end = filters.end();
  while (filter_here != filter_end) {
    response.filterNames.insert ((*filter_here).first);
    ++filter_here;
  }

  err = noError;
}

void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
				       InfoFilterOptionsResponse_t &response, 
				       comerror_t &err, ostream &logout) {
  outconvertclass text_t2ascii;

  filterclass *thisfilter = filters.getfilter(request.filterName);
  if (thisfilter != NULL) {
    thisfilter->get_filteroptions (response, err, logout);
  } else {
    response.clear ();
    err = protocolError;
    text_t& infodbtype = collectinfo.infodbType;

    // Don't print out the warning if were's asking about SQLQueryFilter 
    // when we know the infodbtype is something other than .*sql.*

    if ((request.filterName != "SQLQueryFilter") 
	|| (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
      logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
	     << "filter \"" << request.filterName << "\".\n\n";
    }
  }
}

void collectserver::filter (FilterRequest_t &request,
			    FilterResponse_t &response,
			    comerror_t &err, ostream &logout) {
  outconvertclass text_t2ascii;

  // translate any ".fc", ".pr" etc. stuff in the docSet
  text_t translatedOID;
  text_tarray translatedOIDs;
  text_tarray::iterator doc_here = request.docSet.begin();
  text_tarray::iterator doc_end = request.docSet.end();
  while (doc_here != doc_end) {
    if (needs_translating (*doc_here)) {
      sourcelistclass::iterator source_here = sources.begin();
      sourcelistclass::iterator source_end = sources.end();
      while (source_here != source_end) {
	assert ((*source_here).s != NULL);
	if (((*source_here).s != NULL) &&
	    ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
	  if (err != noError) return;
	  break;
	}
	++source_here;
      }
      translatedOIDs.push_back (translatedOID);
    } else {
      translatedOIDs.push_back (*doc_here);
    }
    ++doc_here;
  }
  request.docSet = translatedOIDs;

  response.clear();

  filterclass *thisfilter = filters.getfilter(request.filterName);
  if (thisfilter != NULL) {
    // filter the data
    thisfilter->filter (request, response, err, logout);
    if (err != noError) return;

    // fill in the metadata for each of the OIDs (if it is requested)
    if (request.filterResultOptions & FRmetadata) {

      bool processed = false;
      ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
      ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
      while (resultdoc_here != resultdoc_end) {

	text_t deleted_status = "";
	bool append_metadata = (request.filterResultOptions & FROAI) ? true : false;

	// try each of the sources in turn
	sourcelistclass::iterator source_here = sources.begin();
	sourcelistclass::iterator source_end = sources.end();
	while (source_here != source_end) {
	  assert ((*source_here).s != NULL);

	  // first check for oai metadata from the oai_db, if asked for it (if FROAI is set)
	  if(((*source_here).s != NULL) &&
	     (request.filterResultOptions & FROAI) &&
	     ((*source_here).s->get_oai_metadata(request.requestParams, request.refParams,
						 request.getParents, request.fields, 
						 (*resultdoc_here).OID, deleted_status, (*resultdoc_here).metadata, 
						 err, logout))) {
	    
	    if (err != noError) return;

	    processed = true;
	  }

	  // if OID is the special OAI specific OAI_EARLIESTTIMESTAMP_OID,
	  // then we'd have got its OAI meta above if we were requested to do so.
	  // Either way, we won't be additionally getting regular meta for this OID,
	  // as it's not a real doc OID, so we stop processing this OID here.
	  if((*resultdoc_here).OID == OAI_EARLIESTTIMESTAMP_OID) {
	    ++source_here;
	    if(processed == true) break;
	    else continue;
	  }
  
	  // We may or may not have got oai_meta (depends on if FROAI was set).
	  // If we didn't get oai_meta, then deleted_status would still be "".
	  // If we did get oai_meta, and if the deleted_status for the OID was D for deleted entry,
	  // don't bother getting any other metadata, as there will be no entry for that OID in index db.
	  
	  // Note that if we did get oai_meta and OID marked as existing, we're in append_mode:
	  // don't let get_metadata() clear the metadata list, as there's already stuff in there
	  //if(deleted_status == "E") append_metadata = true;

	  if (((*source_here).s != NULL) &&
	      deleted_status != "D" &&
	      ((*source_here).s->get_metadata(request.requestParams, request.refParams,
					      request.getParents, request.fields, 
					      (*resultdoc_here).OID, (*resultdoc_here).metadata, 
					      err, logout, append_metadata))) {
	    if (err != noError) return; // check for errors again	  
	    
	    processed = processed || true; // processed would not have been set yet if not doing FROAI. Set now.
	    // OR-ing isn't necessary, but indicates some consideration of both get oai meta & get meta success
	  }
	  
	  if(processed) break;

	  ++source_here;
	}
	if (!processed) {

	  logout << text_t2ascii << "Protocol Error: nothing processed for "
	   << "filter \"" << request.filterName << "\".\n\n";

	  err = protocolError;
	  return;
	}
	++resultdoc_here;
      }
    }  
    
    err = noError;
  }
  else
  {
    response.clear ();
    err = protocolError;
    logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
	   << "filter \"" << request.filterName << "\".\n\n";
  }
}

void collectserver::get_document (const DocumentRequest_t &request,
				  DocumentResponse_t &response,
				  comerror_t &err, ostream &logout) {

  sourcelistclass::iterator source_here = sources.begin();
  sourcelistclass::iterator source_end = sources.end();
  while (source_here != source_end) {
    assert ((*source_here).s != NULL);
    if (((*source_here).s != NULL) &&
	((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
      if (err != noError) return;
      break;
    }
    ++source_here;
  }
}

void collectserver::is_searchable (bool &issearchable, comerror_t &err,
				   ostream &logout) {

  sourcelistclass::iterator source_here = sources.begin();
  sourcelistclass::iterator source_end = sources.end();
  while (source_here != source_end) {
    assert ((*source_here).s != NULL);
    if (((*source_here).s != NULL) &&
	((*source_here).s->is_searchable (issearchable, err, logout))) {
      if (err != noError) return;
      break;
    }
    ++source_here;
  }
}


bool operator==(const collectserverptr &x, const collectserverptr &y) {
  return (x.c == y.c);
}

bool operator<(const collectserverptr &x, const collectserverptr &y) {
  return (x.c < y.c);
}


// thecollectserver remains the property of the calling code but
// should not be deleted until it is removed from this list.
void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
  // can't add a null collection server
  assert (thecollectserver != NULL);
  if (thecollectserver == NULL) return;
  
  // can't add an collection server with no collection name
  assert (!(thecollectserver->get_collection_name()).empty());
  if ((thecollectserver->get_collection_name()).empty()) return;

  collectserverptr cptr;
  cptr.c = thecollectserver;
  collectserverptrs[thecollectserver->get_collection_name()] = cptr;
}

// getcollectserver will return NULL if the collectserver could not be found
collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
  // can't find a collection with no name
  if (collection.empty()) return NULL;

  iterator here = collectserverptrs.find (collection);
  if (here == collectserverptrs.end()) return NULL;
  
  return (*here).second.c;
}
