/**********************************************************************
 *
 * gdbmclass.cpp -- 
 * Copyright (C) 1999-2008  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "gdbmclass.h"
#include "gsdltools.h"
#include "gsdlunicode.h"
#include "fileutil.h"
#include "stdlib.h"
#include <cstring>


gdbmclass::gdbmclass(const text_t& gsdlhome) 
  : dbclass(gsdlhome)
{ 
  gdbmfile = NULL; 
}

gdbmclass::~gdbmclass()
{
  closedatabase();
}


// returns true if opened
bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys, 
#ifdef _MSC_VER
			      bool need_filelock
#else
                              bool
#endif
			      )
{

  text_t data_location;
  int block_size = 512;
  
  if (gdbmfile != NULL) {
    if (openfile == filename) return true;
    else closedatabase ();
  }

  // Map the DB mode values into GDBM mode values
  int gdbm_mode = GDBM_READER;
  if (mode == DB_WRITER)
  {
    gdbm_mode = GDBM_WRITER;
  }
  else if (mode == DB_WRITER_CREATE)
  {
    gdbm_mode = GDBM_WRCREAT;
  }

  text_t gdbm_filename = filename;
  if (gdbm_mode == GDBM_READER)
  {
    // make sure we have the right file extension. Should be all gdb now.
    // But need to handle old style ldb/bdb collection dbs

    // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
    if (!file_exists(gdbm_filename))
    {
      // should be all gdb now
      if (ends_with(gdbm_filename, ".gdb")) {
	gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";	
	
	if (!file_exists(gdbm_filename)) {
	  // try bdb as well
	  gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
	}
      }
    }

    // The following fall-back is less likely to be needed now that GDBM
    // library reads both little-endian and big-endian files
    if (!file_exists(gdbm_filename))
    {
      // We're desperate, so try generating the desired GDBM file from a txtgz file
      gdbm_filename = filename;
      generate_from_txtgz(gdbm_filename);
    }
  }

  char *namebuffer = gdbm_filename.getcstr();
  do {
#ifdef _MSC_VER
    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
#else
    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
#endif
    --num_retrys;
  } while (num_retrys>0 && gdbmfile==NULL &&
	   (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
  delete []namebuffer;
  
  if (gdbmfile == NULL && logout != NULL) {
    outconvertclass text_t2ascii;
    (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
  }

  openfile = filename;

  return (gdbmfile != NULL);
}


void gdbmclass::closedatabase ()
{
  if (gdbmfile == NULL) return;
  
  gdbm_close (gdbmfile);
  gdbmfile = NULL;
  openfile.clear();
}


void gdbmclass::deletekey (const text_t &key)
{
  if (gdbmfile == NULL) return;

  // get a utf-8 encoded c string of the unicode key
  datum key_data;
  key_data.dptr = (to_utf8(key)).getcstr();
  if (key_data.dptr == NULL) return;
  key_data.dsize = strlen (key_data.dptr);

  // delete the key
  gdbm_delete (gdbmfile, key_data);

  // free up the key memory
  delete []key_data.dptr;
}


// returns file extension string
text_t gdbmclass::getfileextension ()
{
  // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
  return ".gdb";
  //if (littleEndian()) return ".ldb";
  //return ".bdb";
}


// returns true on success
bool gdbmclass::getkeydata (const text_t& key, text_t &data)
{
  datum key_data;
  datum return_data;

  if (gdbmfile == NULL) return false;
  
  // get a utf-8 encoded c string of the unicode key
  key_data.dptr = (to_utf8(key)).getcstr();
  if (key_data.dptr == NULL) {
    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
    return false;
  }
  key_data.dsize = strlen (key_data.dptr);
  
  // fetch the result
  return_data = gdbm_fetch (gdbmfile, key_data);
  delete []key_data.dptr;
  
  if (return_data.dptr == NULL) return false;

  data.setcarr (return_data.dptr, return_data.dsize);
  free (return_data.dptr);
  data = to_uni(data);  // convert to unicode

  return true;
}


// returns array of keys
text_tarray gdbmclass::getkeys ()
{
  text_tarray keys;

  text_t key = getfirstkey();
  while (!key.empty())
  {
    keys.push_back(key);
    key = getnextkey(key);
  }

  return keys;
}


// returns true on success
bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
{
  if (gdbmfile == NULL) return false;
  
  // store the value
  datum key_data;
  datum data_data;

  // get a utf-8 encoded c string of the unicode key
  key_data.dptr = (to_utf8(key)).getcstr();
  if (key_data.dptr == NULL) {
    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
    return false;
  }
  key_data.dsize = strlen (key_data.dptr);

  data_data.dptr = (to_utf8(data)).getcstr();
  if (data_data.dptr == NULL) {
    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
    delete []key_data.dptr;
    return false;
  }
  data_data.dsize = strlen (data_data.dptr);

  int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
  delete []key_data.dptr;
  delete []data_data.dptr;

  return (ret == 0);
}


// ----------------------------------------------------------------------------------------
//   GDBM-ONLY FUNCTIONS
// ----------------------------------------------------------------------------------------

// getfirstkey and getnextkey are used for traversing the database
// no insertions or deletions should be carried out while traversing
// the database. when there are no keys left to visit in the database
// an empty string is returned.
text_t gdbmclass::getfirstkey ()
{
  if (gdbmfile == NULL) return g_EmptyText;

  // get the first key
  datum firstkey_data = gdbm_firstkey (gdbmfile);
  if (firstkey_data.dptr == NULL) return g_EmptyText;

  // convert it to text_t
  text_t firstkey;
  firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
  free (firstkey_data.dptr);
  return to_uni(firstkey);  // convert to unicode
}


text_t gdbmclass::getnextkey (const text_t &key)
{
  if (gdbmfile == NULL || key.empty()) return g_EmptyText;

  // get a utf-8 encoded c string of the unicode key
  datum key_data;
  key_data.dptr = (to_utf8(key)).getcstr();
  if (key_data.dptr == NULL) return g_EmptyText;
  key_data.dsize = strlen (key_data.dptr);
  
  // get the next key
  datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
  if (nextkey_data.dptr == NULL) {
    delete []key_data.dptr;
    return g_EmptyText;
  }

  // convert it to text_t
  text_t nextkey;
  nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
  free (nextkey_data.dptr);
  delete []key_data.dptr;
  return to_uni(nextkey);  // convert to unicode
}


// The following routine was written before the GDBM library was upgraded
// to read both little-endian and big-endian files, and so the reason for
// it being developed is not so important now.  It's useful to keep around
// however because the file format for GDBM is not guaranteed to be the
// same from one computer architecture to the next.  Even if they are both
// the same endian-ness.  We encountered this issue on the Mac at one stage.
// the size of the header data-structure for GDBM was simply a different
// size to that produced on Linux.  The Mac case resolved to be binary
// compatabile with Linux, but shows that we can't rely on this always being
// the case.  Using this method provides a contingency plan.  The databases
// can be shipped at .txt.gz (i.e. portable), which are then converted on
// on the host's machine to a native GDBM database that is meaningly to
// that computer.

void gdbmclass::generate_from_txtgz (text_t filename)
{
    // Looking to read in the database
    // => check to see if .ldb/.bdb file already there
    // if not (first time) then generate using txt2db

      // need to generate architecture native GDBM file using txt2db

      // replace sought after gdbm filename ext with ".txt.gz"

      text_t::const_iterator begin = filename.begin();
      text_t::const_iterator end= filename.end();
      
      if (begin != end) {
	end = end - 1;
      }
		
      text_t::const_iterator here = end;

      bool found_ext = false;

      while (here != begin) {
	if (*here == '.') {
	  found_ext = true;
	  break;
	}
	here--;
      }
      
      text_t filename_root;

      if (found_ext) {
	filename_root = substr(begin,here);
      }
      else {
	filename_root = filename;
      }

      text_t txtgz_filename = filename_root + ".txt.gz";
      if (file_exists(txtgz_filename))
      {
	//text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
	//cmd += " | txt2db \"" + filename + "\"";

	// Test to make sure Perl is on the path
	// On Linux, the output of the test goes to STDOUT so redirect it to STDERR
	text_t cmd_test = "perl -v 1>&2";
	int rv_test = gsdl_system(cmd_test, true, cerr);
	if (rv_test != 0) {
	  cerr << "Tried to find Perl. Return exit value of running " 
	       << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
	  cerr << "Check that Perl is set in your environment variable PATH." << endl;
	  cerr << "At present, PATH=" << getenv("PATH") << endl;
	}

	text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
	int rv = gsdl_system(cmd, true, cerr);
	// For some reason, launching this command with gsdl_system() still returns 1
	// even when it returns 0 when run from the command-line. We can check whether
	// we succeeded by looking at whether the output database file was created.
	if (rv != 0) {
	  cerr << "Warning, non-zero return value on running command \""
	       << cmd << "\": " << rv << endl;
	  if (!file_exists(filename)) {
	    cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
	  }
	}	
      }
}
