/**********************************************************************
 *
 * cgiwrapper.cpp -- output pages using the cgi protocol
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include <stdio.h>
#include <cstring>
#ifdef __WIN32__
#include <fcntl.h>
#endif

#include "gsdlconf.h"
#include "cgiwrapper.h"
#include "gsdlsitecfg.h"
#include "maincfg.h"
#include "fileutil.h"
#include "cgiutils.h"
#include <stdlib.h>
#include <assert.h>

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace/std/iostream>
#  include <ospace/std/fstream>
#elif defined(GSDL_USE_IOS_H)
#  include <iostream.h>
#  include <fstream.h>
#else
#  include <iostream>
#  include <fstream>
#endif

#ifdef USE_FASTCGI
#include "fcgiapp.h"
#endif

#include "authenaction.h"
#include "browseaction.h"
#include "collectoraction.h"
#include "depositoraction.h"
#include "documentaction.h"
#include "dynamicclassifieraction.h"
#include "extlinkaction.h"
#include "pageaction.h"
#ifdef ENABLE_MGPP
#include "phindaction.h"
#endif
#include "pingaction.h"
#include "queryaction.h"

#if defined(USE_SQLITE)
#include "sqlqueryaction.h"
#endif

#if defined(GSDL_USE_GTI_ACTION)
#include "gtiaction.h"
#endif

#if defined(USE_RSS)
#include "rssaction.h"
#endif

#include "tipaction.h"
#include "statusaction.h"
#include "usersaction.h"
#include "configaction.h"

#include "vlistbrowserclass.h"
#include "hlistbrowserclass.h"
#include "datelistbrowserclass.h"
#include "invbrowserclass.h"
#include "pagedbrowserclass.h"
#include "htmlbrowserclass.h"
#include "phindbrowserclass.h"


#ifdef USE_FASTCGI
// used to output the text from receptionist
class fcgistreambuf : public streambuf {
public:
  fcgistreambuf ();
  int sync ();
  int overflow (int ch);
  int underflow () {return EOF;}
  
  void fcgisbreset() {fcgx_stream = NULL; other_ostream = NULL;};
  void set_fcgx_stream(FCGX_Stream *newone) {fcgx_stream=newone;};
  void set_other_ostream(ostream *newone) {other_ostream=newone;};
  
private:
  FCGX_Stream *fcgx_stream;
  ostream *other_ostream;
};

fcgistreambuf::fcgistreambuf() {
  fcgisbreset();
  if (base() == ebuf()) allocate();
  setp (base(), ebuf());
};

int fcgistreambuf::sync () {
  if ((fcgx_stream != NULL) &&
      (FCGX_PutStr (pbase(), out_waiting(), fcgx_stream) < 0)) {
    fcgx_stream = NULL;
  }

  if (other_ostream != NULL) {
    char *thepbase=pbase();
    for (int i=0;i<out_waiting();++i) (*other_ostream).put(thepbase[i]);
  }
  
  setp (pbase(), epptr());
  
  return 0;
}

int fcgistreambuf::overflow (int ch) {
  if (sync () == EOF) return EOF;
  if (ch != EOF) sputc (ch);
  return 0;
}

#endif

static void format_error_string (text_t &errorpage, const text_t &errortext, bool debug) {

  errorpage.clear();

  if (debug) {
    errorpage += "\n";
    errorpage += "ERROR: " + errortext;
    errorpage += "\n";
    
  } else {

    errorpage += "Content-type: text/html\n\n";
    
    errorpage += "<html>\n";
    errorpage += "<head>\n";
    errorpage += "<title>Error</title>\n";
    errorpage += "</head>\n";
    errorpage += "<body>\n";
    errorpage += "<h2>Oops!</h2>\n";
    errorpage += errortext;
    errorpage += "</body>\n";
    errorpage += "</html>\n";
  }
}

static void page_errorcollect (const text_t &gsdlhome, text_t &errorpage, bool debug) {

  text_t collecthome = filename_cat (gsdlhome, "collect");

  text_t errortext = "No valid collections were found: Check that your collect directory\n";
  errortext += "(" + collecthome + ") is readable and contains at least one valid collection.\n";
  errortext += "Note that modelcol is NOT a valid collection.\n";
  errortext += "If the path to your collect directory is wrong edit the 'gsdlhome' field\n";
  errortext += "in your gsdlsite.cfg configuration file.\n";

  format_error_string (errorpage, errortext, debug);
}

static void page_errorsitecfg (text_t &errorpage, bool debug, int mode) {

  text_t errortext;

  if (mode == 0) {
    errortext += "The gsdlsite.cfg configuration file could not be found. This\n";
    errortext += "file should contain configuration information relating to this\n";
    errortext += "site's setup.\n";

  } else if (mode == 1) {
    errortext += "The gsdlsite.cfg configuration file does not contain a valid\n";
    errortext += "gsdlhome entry.\n";
  }

  if (debug) {
    errortext += "gsdlsite.cfg should reside in the directory from which the\n";
    errortext += "library executable was run.\n";
  } else {
    errortext += "gsdlsite.cfg should reside in the same directory as the library\n";
    errortext += "executable file.\n";
  }

  format_error_string (errorpage, errortext, debug);
}


static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection,
			       bool debug, text_t &errorpage) {

  text_t errortext;

  if (collection.empty()) {
    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
    errortext += "The main.cfg configuration file could not be found. This file\n";
    errortext += "should contain configuration information relating to the\n";
    errortext += "setup of the interface. As this receptionist is not being run\n";
    errortext += "in collection specific mode the file should reside at\n";
    errortext += main_cfg_file + ".\n";
  } else {
    text_t collect_cfg_file = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg");
    text_t main_collect_cfg_file = filename_cat (gsdlhome, "etc", "collect.cfg");
    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
    errortext += "Either the collect.cfg or main.cfg configuration file could\n";
    errortext += "not be found. This file should contain configuration information\n";
    errortext += "relating to the setup of the interface. As this receptionist is\n";
    errortext += "being run in collection specific mode the file should reside\n";
    errortext += "at either " + collect_cfg_file + ",\n";
    errortext += main_collect_cfg_file + " or " + main_cfg_file + ".\n";
  }

  format_error_string (errorpage, errortext, debug);
}


static void page_errorinit (const text_t &gsdlhome, bool debug, text_t &errorpage) {

  text_t errortext = "An error occurred during the initialisation of the Greenstone Digital\n";
  errortext += "Library software. It is likely that the software has not been setup\n";
  errortext += "correctly.\n";

  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
  // This is all commented out because I think it's a really bad idea
  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
  // The only people who should need the contents of this file should have access to it through the file system
  // I think you can also view the contents of this file through the statusaction if you have a suitable login
//   char *efile = error_file.getcstr();
//   ifstream errin (efile);
//   delete []efile;
//   if (errin) {
//     errortext += "The error log, " + error_file + ", contains the\n";
//     errortext += "following information:\n\n";
//     if (!debug) errortext += "<pre>\n";

//     char c;
//     errin.get(c);
//     while (!errin.eof ()) {
//       errortext.push_back(c);
//       errin.get(c);
//     }
    
//     if (!debug) errortext += "</pre>\n";

//     errin.close();

//   } else {
    errortext += "Please consult " + error_file + " for more information.\n";
//   }

  format_error_string (errorpage, errortext, debug);
}

static void page_errorparseargs (const text_t &gsdlhome, bool debug, text_t &errorpage) {

  text_t errortext = "An error occurred during the parsing of the cgi arguments.\n";

  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
  // This is all commented out because I think it's a really bad idea
  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
  // The only people who should need the contents of this file should have access to it through the file system
  // I think you can also view the contents of this file through the statusaction if you have a suitable login
//   char *efile = error_file.getcstr();
//   ifstream errin (efile);
//   delete []efile;
//   if (errin) {
//     errortext += "The error log, " + error_file + ", contains the\n";
//     errortext += "following information:\n\n";
//     if (!debug) errortext += "<pre>\n";

//     char c;
//     errin.get(c);
//     while (!errin.eof ()) {
//       errortext.push_back(c);
//       errin.get(c);
//     }
//     if (!debug) errortext += "</pre>\n";
//     errin.close();

//   } else {
    errortext += "Please consult " + error_file + " for more information.\n";
//   }

  format_error_string (errorpage, errortext, debug);
}

static void page_errorcgipage (const text_t &gsdlhome, bool debug, text_t &errorpage) {

  text_t errortext = "An error occurred during the construction of the cgi page.\n";

  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
  // This is all commented out because I think it's a really bad idea
  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
  // The only people who should need the contents of this file should have access to it through the file system
  // I think you can also view the contents of this file through the statusaction if you have a suitable login
//   char *efile = error_file.getcstr();
//   ifstream errin (efile);
//   delete []efile;
//   if (errin) {
//     errortext += "The error log, " + error_file + ", contains the\n";
//     errortext += "following information:\n\n";
//     if (!debug) errortext += "<pre>\n";

//     char c;
//     errin.get(c);
//     while (!errin.eof ()) {
//       errortext.push_back(c);
//       errin.get(c);
//     }
//     if (!debug) errortext += "</pre>\n";
//     errin.close();

//   } else {
    errortext += "Please consult " + error_file + " for more information.\n";
//   }

  format_error_string (errorpage, errortext, debug);
}

static void print_debug_info (receptionist &recpt) {

  outconvertclass text_t2ascii;
  const recptconf &configinfo = recpt.get_configinfo ();
  text_t etc_dir = filename_cat (configinfo.gsdlhome, "etc");

  cout << "\n";
  cout << text_t2ascii
       << "------------------------------------------------------------\n"
       << "Configuration and initialization completed successfully.\n"
       << "  Note that more debug information may be available in the\n"
       << "  initialization and error log error.txt in " << etc_dir << ".\n"
       << "------------------------------------------------------------\n\n";

  bool colspec = false;
  if (configinfo.collection.empty()) {
    cout << "Receptionist is running in \"general\" (i.e. not \"collection "
	 << "specific\") mode.\n";
  } else {
    cout << text_t2ascii 
	 << "Receptionist is running in \"collection specific\" mode.\n"
	 << "  collection=" << configinfo.collection << "\n"
	 << "  collection directory=" << configinfo.collectdir << "\n";
    colspec = true;
  }
  
  cout << text_t2ascii << "  gsdlhome=" << configinfo.gsdlhome << "\n";
  if (!configinfo.collecthome.empty())
    cout << text_t2ascii << "  collecthome=" << configinfo.collecthome << "\n";
  if (!configinfo.dbhome.empty())
    cout << text_t2ascii << "  dbhome=" << configinfo.dbhome << "\n";
  cout << text_t2ascii << "  httpprefix=" << configinfo.httpprefix << "\n";
  cout << text_t2ascii << "  httpweb=" << configinfo.httpweb << "\n";
  cout << text_t2ascii << "  gwcgi=" << configinfo.gwcgi << "\n\n"
       << "  Note that unless gwcgi has been set from a configuration\n"
       << "  file it is dependent on environment variables set by your\n"
       << "  webserver. Therefore it may not have the same value when run\n"
       << "  from the command line as it would be when run from your\n"
       << "  web server.\n";
  if (configinfo.usecookiesForUID) 
    cout << "UID will be saved as a cookie\n";
  else 
    cout << "UID not saved as a cookie\n";
  if (configinfo.usecookiesForE) 
    cout << "The e arg will be saved as a cookie\n";
  else 
    cout << "The e arg is not saved as a cookie - set usecookiesForE to true to use cookies for e \n";
  if (configinfo.logcgiargs) 
    cout << "logging is enabled\n";
  else 
    cout << "logging is disabled\n";
  cout << "------------------------------------------------------------\n\n";

  text_tset::const_iterator this_mfile = configinfo.macrofiles.begin();
  text_tset::const_iterator end_mfile = configinfo.macrofiles.end();
  cout << "Macro Files:\n"
       << "------------\n";
  text_t mfile;
  bool found;
  while (this_mfile != end_mfile) {
    cout << text_t2ascii << *this_mfile;
    int spaces = (22 - (*this_mfile).size());
    if (spaces < 2) spaces = 2;
    text_t outspaces;
    for (int i = 0; i < spaces; ++i) outspaces.push_back (' ');
    cout << text_t2ascii << outspaces;

    found = false;
    if (colspec) {
      // collection specific - try collectdir/macros first
      mfile = filename_cat (configinfo.collectdir, "macros", *this_mfile);
      if (file_exists (mfile)) {
	cout << text_t2ascii << "found (" << mfile << ")\n";
	found = true;
      }
    }
 
    if (!found) {
      // try main macro directory
      mfile = filename_cat (configinfo.gsdlhome, "macros", *this_mfile);
      if (file_exists (mfile)) {
	cout << text_t2ascii << "found (" << mfile << ")\n";
	found = true;
      }
    }

    if (!found)
      cout << text_t2ascii << "NOT FOUND\n";

    ++this_mfile;
  }

  cout << "------------------------------------------------------------\n\n"
       << "Collections:\n"
       << "------------\n"
       << "  Note that collections will only appear as \"running\" if\n"
       << "  their build.cfg files exist, are readable, contain a valid\n"
       << "  builddate field (i.e. > 0), and are in the collection's\n"
       << "  index directory (i.e. NOT the building directory)\n\n";

  recptprotolistclass *protos = recpt.get_recptprotolist_ptr();
  recptprotolistclass::iterator rprotolist_here = protos->begin();
  recptprotolistclass::iterator rprotolist_end = protos->end();

  bool is_z3950 = false;
  bool found_valid_col = false;


  while (rprotolist_here != rprotolist_end) {
    comerror_t err;    
    if ((*rprotolist_here).p == NULL) continue;
    else if (is_z3950==false &&
	     (*rprotolist_here).p->get_protocol_name(err) == "z3950proto") {
      cout << "\nZ39.50 Servers:   (always public)\n"
	   << "---------------\n";
      is_z3950=true;
    }

    text_tarray collist;
    (*rprotolist_here).p->get_collection_list (collist, err, cerr);
    if (err == noError) {
      text_tarray::iterator collist_here = collist.begin();
      text_tarray::iterator collist_end = collist.end();
      
      while (collist_here != collist_end) {
	
	cout << text_t2ascii << *collist_here;
	
	int spaces = (22 - (*collist_here).size());
	if (spaces < 2) spaces = 2;
	text_t outspaces;
	for (int i = 0; i < spaces; ++i) outspaces.push_back (' ');
	cout << text_t2ascii << outspaces;
	
	  ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr);
	  if (cinfo != NULL) {
	    if (cinfo->isPublic) cout << "public ";
	    else cout << "private";

	    if (cinfo->buildDate > 0) {
	      cout << "   running    ";
	      found_valid_col = true;
	    } else {
	      cout << "   not running";
	    }
	  }

	  cout << "\n";

	  ++collist_here;
      }
    }
    is_z3950=false;
    ++rprotolist_here;
  } // end of while loop

  if (!found_valid_col) {
    cout << "WARNING: No \"running\" collections were found. You need to\n";
    cout << "         build one of the above collections\n";
  }

  cout << "\n------------------------------------------------------------\n";
  cout << "------------------------------------------------------------\n\n";
  cout << "receptionist running in command line debug mode\n";
  cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n";

}




void add_all_actions(receptionist& recpt, userdbclass* udb, keydbclass* kdb, isPersistentEnum isPersistentVal) 
{
  // the list of actions.

#ifdef GSDL_USE_TIP_ACTION
  tipaction* atipaction = new tipaction();
  recpt.add_action (atipaction);
#endif

#ifdef GSDL_USE_STATUS_ACTION
  statusaction *astatusaction = new statusaction();
  astatusaction->set_receptionist (&recpt);
  recpt.add_action (astatusaction);
#endif

  pageaction *apageaction = new pageaction();
  apageaction->set_receptionist (&recpt);
  recpt.add_action (apageaction);

#ifdef GSDL_USE_PING_ACTION
  recpt.add_action (new pingaction());
#endif

  ispersistentaction *aIsPersistentAction = new ispersistentaction(isPersistentVal); 
  recpt.add_action (aIsPersistentAction);
  
#if defined(USE_RSS)
  rssaction *arssaction = new rssaction();
  recpt.add_action (arssaction);
#endif

  queryaction *aqueryaction = new queryaction();
  aqueryaction->set_userdb(udb);
  aqueryaction->set_receptionist (&recpt);
  recpt.add_action (aqueryaction);

#if defined(USE_SQLITE)
  sqlqueryaction *asqlqueryaction = new sqlqueryaction();
  asqlqueryaction->set_receptionist (&recpt);
  recpt.add_action (asqlqueryaction);
#endif

  documentaction *adocumentaction = new documentaction();
  adocumentaction->set_receptionist (&recpt);
  recpt.add_action (adocumentaction);

#ifdef GSDL_USE_USERS_ACTION
  usersaction *ausersaction = new usersaction();
  ausersaction->set_userdb(udb);
  recpt.add_action (ausersaction);
#endif

#ifdef GSDL_USE_EXTLINK_ACTION
  extlinkaction *aextlinkaction = new extlinkaction();
  aextlinkaction->set_receptionist(&recpt);
  recpt.add_action (aextlinkaction);
#endif
		
#ifdef GSDL_USE_AUTHEN_ACTION
  authenaction *aauthenaction = new authenaction();
  aauthenaction->set_userdb(udb);
  aauthenaction->set_keydb(kdb);
  aauthenaction->set_receptionist(&recpt);
  recpt.add_action (aauthenaction);
#endif

#ifdef GSDL_USE_COLLECTOR_ACTION
  collectoraction *acollectoraction = new collectoraction();
  acollectoraction->set_receptionist (&recpt);
  recpt.add_action(acollectoraction);
#endif

#ifdef GSDL_USE_DEPOSITOR_ACTION
  depositoraction *adepositoraction = new depositoraction();
  adepositoraction->set_receptionist (&recpt);
  recpt.add_action(adepositoraction);
#endif

#ifdef GSDL_USE_BROWSE_ACTION
  browseaction *abrowseaction = new browseaction();
  abrowseaction->set_receptionist (&recpt);
  recpt.add_action(abrowseaction);
#endif

#ifdef GSDL_USE_PHIND_ACTION
  // Phind uses MPPP,do we also need to check if ENABLE_MGPP is set??
  phindaction *aphindaction = new phindaction();
  recpt.add_action(aphindaction);
#endif

#ifdef GSDL_USE_GTI_ACTION
  gtiaction *agtiaction = new gtiaction();
  agtiaction->set_receptionist(&recpt);
  recpt.add_action(agtiaction);
#endif

  dynamicclassifieraction *adynamicclassifieraction = new dynamicclassifieraction();
  adynamicclassifieraction->set_receptionist(&recpt);
  recpt.add_action(adynamicclassifieraction);  

#if defined(USE_MYSQL) || defined(USE_ACCESS)
  orderaction *aorderaction = new orderaction();
  aorderaction->set_receptionist(&recpt);
  recpt.add_action(aorderaction);
#endif

  // action that allows collections to be added, released etc.  when server
  // is persistent (e.g. fastcgi or when Greenstone is configured as an
  // Apache module).  Presumably this includes Windows server.exe as well

  // Want to always include it in list of actions even if compiling
  // Greenstone to be used in a non-persistent way (e.g. library.cgi).
  // This is so the e-variable that is formed is consistent between the
  // persisent executable and the non-persistent executable
  // 

  configaction *aconfigaction = new configaction();
  aconfigaction->set_receptionist(&recpt);
  recpt.add_action(aconfigaction);
}



void add_all_browsers(receptionist& recpt) 
{
  // list of browsers
  vlistbrowserclass *avlistbrowserclass = new vlistbrowserclass();
  avlistbrowserclass->set_receptionist(&recpt);
  recpt.add_browser (avlistbrowserclass);
  recpt.setdefaultbrowser ("VList");

  hlistbrowserclass *ahlistbrowserclass = new hlistbrowserclass();
  ahlistbrowserclass->set_receptionist(&recpt);
  recpt.add_browser (ahlistbrowserclass);

#ifdef GSDL_USE_DATELIST_BROWSER
  datelistbrowserclass *adatelistbrowserclass = new datelistbrowserclass();
  recpt.add_browser (adatelistbrowserclass);
#endif

  invbrowserclass *ainvbrowserclass = new invbrowserclass();
  recpt.add_browser (ainvbrowserclass);

#ifdef GSDL_USE_PAGED_BROWSER
  pagedbrowserclass *apagedbrowserclass = new pagedbrowserclass();
  recpt.add_browser (apagedbrowserclass);
#endif

#ifdef GSDL_USE_HTML_BROWSER
  htmlbrowserclass *ahtmlbrowserclass = new htmlbrowserclass();
  recpt.add_browser (ahtmlbrowserclass);
#endif

#ifdef GSDL_USE_PHIND_BROWSER
  phindbrowserclass *aphindbrowserclass = new phindbrowserclass();;
  recpt.add_browser (aphindbrowserclass);
#endif
}


// cgiwrapper does everything necessary to output a page
// using the cgi protocol. If this is being run for a particular
// collection then "collection" should be set, otherwise it
// should equal "".
void cgiwrapper (receptionist &recpt, text_t collection) {
  int numrequests = 0;
  bool debug = false;
  const recptconf &configinfo = recpt.get_configinfo ();

  // find out whether this is being run as a cgi-script
  // or a fastcgi script
#ifdef USE_FASTCGI
  fcgistreambuf outbuf;
  int isfastcgi = !FCGX_IsCGI();
  FCGX_Stream *fcgiin, *fcgiout, *fcgierr;
  FCGX_ParamArray fcgienvp;
#else
  int isfastcgi = 0;
#endif

  // we need gsdlhome to do fileupload stuff, so moved this configure stuff before the get argstr stuff
  // init stuff - we can't output error pages directly with
  // fastcgi so the pages are stored until we can output them
  text_t errorpage;
  outconvertclass text_t2ascii;

  // set defaults
  int maxrequests = 10000;
  recpt.configure ("collection", collection);
  char *script_name = getenv("SCRIPT_NAME");
  if (script_name != NULL) recpt.configure("gwcgi", script_name);
  else recpt.configure("gwcgi", "/gsdl");

  // read in the configuration files.
  text_t gsdlhome;
  text_t collecthome;
  configurator gsdlconfigurator(&recpt);
  if (!site_cfg_read (gsdlconfigurator, gsdlhome, collecthome, maxrequests)) {
    // couldn't find the site configuration file
    page_errorsitecfg (errorpage, debug, 0);
  } else if (gsdlhome.empty()) {
    // no gsdlhome in gsdlsite.cfg
    page_errorsitecfg (errorpage, debug, 1);
  } else if (!directory_exists(gsdlhome)) {
    // gsdlhome not a valid directory
    page_errorsitecfg (errorpage, debug, 1);
  } else if (!main_cfg_read (recpt, gsdlhome, collecthome, collection)) {
    // couldn't find the main configuration file
    page_errormaincfg (gsdlhome, collection, debug, errorpage);
  } else  if (configinfo.collectinfo.empty() && false) { // commented out for corba
    // don't have any collections
    page_errorcollect (gsdlhome, errorpage, debug);
  }

  // set up the httpweb variable if it hasn't been defined yet
  if (configinfo.httpweb.empty()) {
    recpt.configure("httpweb", configinfo.httpprefix+"/web");
  }
  
  // get the query string if it is not being run as a fastcgi
  // script
  text_t argstr = g_EmptyText;
  fileupload_tmap fileuploads;
  cgiargsclass args;
  char *aURIStr;
  if (!isfastcgi) {
    char *request_method_str = getenv("REQUEST_METHOD");
    char *content_length_str = getenv("CONTENT_LENGTH");
    if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
	content_length_str != NULL)  {
      // POST form data
      long content_length = (content_length_str ? atoi(content_length_str) : 0);
      if (content_length > 0) {
#ifdef __WIN32__
	// On Windows it is important that standard input be read in binary
	// mode, otherwise end of line "<CR><LF>" is turned into <LF> only
	// which breaks the MIME standard (and our parsing code!)

	int result = _setmode( _fileno( stdin ), _O_BINARY );
	if( result == -1 ) {
	  cerr << "Warning: Failed to set standard input to binary mode." << endl;
	  cerr << "         Parsing of multi-part MIME will most likely fail" << endl;
	}
#endif

	long length = content_length;
	unsigned char * buffer = new unsigned char[content_length];

	int chars_read = fread(buffer,1,content_length,stdin);

	if (chars_read != content_length) {
	  cerr << "Warning: mismatch between CONTENT_LENGTH and data read from standard in" << endl;
	}

	argstr.setcarr((char *)buffer, content_length);     

	text_t content_type;
	char *content_type_str = getenv("CONTENT_TYPE");
	if (content_type_str) content_type = content_type_str;
	argstr = parse_post_data(content_type, argstr, fileuploads, gsdlhome);
      }
    } else {
      aURIStr = getenv("QUERY_STRING");
      if ((request_method_str != NULL && strcmp(request_method_str, "GET") == 0)
	  || aURIStr != NULL) {
	// GET form data
	if (aURIStr != NULL) argstr = aURIStr;
	//kjdon a get form is not unicode
	argstr.setencoding(1); 
      } else {
	// debugging from command line
	debug = true;
      }
    }
  }

  if (debug) {
    cout << "Configuring Greenstone...\n";
    cout << flush;
  }


  if (errorpage.empty()) {

    // initialise the library software
    if (debug) {
      cout << "Initializing...\n";
      cout << flush;
    }

    text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
    char *eout = error_file.getcstr();
    ofstream errout (eout, ios::app);
    delete []eout;
    if (!recpt.init(errout)) {
      // an error occurred during the initialisation
      errout.close();
      page_errorinit(gsdlhome, debug, errorpage);
    }
    errout.close();
  }

  if (debug && errorpage.empty()) {
    // get query string from command line
    print_debug_info (recpt);
    char cinURIStr[1024];
    cin.get(cinURIStr, 1024);
    argstr = cinURIStr;
  }

  // cgi scripts only deal with one request
  if (!isfastcgi) maxrequests = 1;

  // Page-request loop. If this is not being run as a fastcgi
  // process then only one request will be processed and then
  // the process will exit.
  while (numrequests < maxrequests) {
#ifdef USE_FASTCGI
    if (isfastcgi) {
      if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break;

      char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp);
      char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp);

      if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
	  content_length_str != NULL)  {
	// POST form data
	int content_length = text_t(content_length_str).getint();
	if (content_length > 0) {
	  argstr.clear();
	  int c;
	  do {
	    c = FCGX_GetChar (fcgiin);
	    if (c < 0) break;
	    argstr.push_back (c);
	    --content_length;
	  } while (content_length > 0);
	}

      } else {
	// GET form data
	aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp);
	if (aURIStr != NULL) argstr = aURIStr;
	else argstr = g_EmptyText;
      }
    }
#endif

    // get output streams ready
#ifdef USE_FASTCGI
    outbuf.fcgisbreset ();
    if (isfastcgi) outbuf.set_fcgx_stream (fcgiout);
    else outbuf.set_other_ostream (&cout);
    ostream pageout (&outbuf);
#else
#define pageout cout
#endif

    // if using fastcgi we'll load environment into a map,
    // otherwise simply pass empty map (can't get environment
    // variables using getenv() while using FCGX versions
    // of fastcgi - at least I can't ;-) - Stefan)
    text_tmap fastcgienv;
#ifdef USE_FASTCGI
    if (isfastcgi) {
      for(; *fcgienvp != NULL; ++fcgienvp) {
	text_t fvalue = *fcgienvp;
	text_t::const_iterator begin = fvalue.begin();
	text_t::const_iterator end = fvalue.end();
	text_t::const_iterator equals_sign = findchar (begin, end, '=');
	if (equals_sign != end)
	  fastcgienv[substr(begin, equals_sign)] = substr(equals_sign+1, end);
      }
    }
#endif

    // temporarily need to configure gwcgi here when using fastcgi as I can't
    // get it to pass the SCRIPT_NAME environment variable to the initial 
    // environment (if anyone can work out how to do this using the apache 
    // server, let me know). Note that this overrides the gwcgi field in 
    // site.cfg (which it shouldn't do) but I can't at present set gwcgi 
    // from site.cfg as I have old receptionists laying around that wouldn't
    // appreciate it. The following 5 lines of code should be deleted once
    // I either a: get the server to pass SCRIPT_NAME at initialization
    // time or b: convert all the collections using old receptionists over
    // to this version and uncomment gwcgi in the site.cfg file -- Stefan.
#ifdef USE_FASTCGI
    if (isfastcgi) {
      recpt.configure("gwcgi", fastcgienv["SCRIPT_NAME"]);
    }
#endif


    // if there has been no error so far, perform the production of the
    // output page
    if (errorpage.empty()) {
      text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
      char *eout = error_file.getcstr();
      ofstream errout (eout, ios::app);
      delete []eout;

#if defined(__WIN32__) && defined(GSDL_USE_IOS_H)
      // old Windows compilers (VC++4.2)
      cerr = errout;
#else
      // can't do this anymore according to c++ standard...
      // cerr = errout;
      // ... but can do this instead
      streambuf* errbuf = cerr.rdbuf(errout.rdbuf());
#endif

      // parse the cgi arguments and produce the resulting page if there
      // has been no errors so far
      if (!recpt.parse_cgi_args (argstr, fileuploads, args, errout, fastcgienv)) {
	errout.close ();
	page_errorparseargs(gsdlhome, debug, errorpage);
      } else {
	// produce the output page

	if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) {
	  errout.close ();
	  page_errorcgipage(gsdlhome, debug, errorpage);
	}
	recpt.log_cgi_args (args, errout, fastcgienv);
	errout.close ();
      }

#if !defined(__WIN32__) || !defined(GSDL_USE_IOS_H)
    // restore the cerr buffer
    cerr.rdbuf(errbuf);
#endif
    }
    // clean up any files that were uploaded 
    fileupload_tmap::const_iterator this_file = fileuploads.begin();
    fileupload_tmap::const_iterator end_file = fileuploads.end();
    while (this_file != end_file)
      {
	if (file_exists((*this_file).second.tmp_name)) 
	  {
	    char *thefile = (*this_file).second.tmp_name.getcstr();
	    unlink(thefile);
	    delete [] thefile;
	  }
	++this_file;
      }
    
    // there was an error, output the error page
    if (!errorpage.empty()) {
      pageout << text_t2ascii << errorpage;
      errorpage.clear();
      numrequests = maxrequests; // make this the last page
    }
    pageout << flush;
    
    // finish with the output streams
#ifdef USE_FASTCGI
    if (isfastcgi) FCGX_Finish();
#endif

    ++numrequests;
  }

  return;
}
