// Master-worker program to read in a list of files and invoke 
// import on each separately using manifest files in Greenstone 3, 
// with synchronisation using OpenMPI
//
// hussein suleman
// 1 july 2010

#include "mpi.h"

#include <stdio.h>
#include <stdlib.h>

#include <fstream>
#include <iostream>
#include <string>

using namespace std;

#define KILOBUF 512
//#define MEGABUF 655360
#define MEGABUF 10240
    
int main( int argc, char *argv [] )
{
   int numtasks, rank, rc;            // MPI variables

   if (argc != 6 ) {
     cerr << "Usage: " << argv[0] << "filelist epoch gsdlhome site collection" << endl;
     exit(-1);
   }

   char *filelist = argv[1];          // list of filenames
   char *epochStr = argv[2];          // number of files per task
   int epoch = atoi (epochStr);       
   char *gsdlhomedir = argv[3];      // location of import script
   char *site = argv[4];              // Greenstone site
   char *collection = argv[5];        // Greenstone collection
                
   // start MPI environment
   rc = MPI_Init(&argc,&argv);
   if (rc != MPI_SUCCESS) {
      printf ("Error starting MPI program. Terminating.\n");
      MPI_Abort(MPI_COMM_WORLD, rc);
   }
   
   // get MPI variables: number of processors and processor number
   MPI_Status stat;
   MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
   MPI_Comm_rank(MPI_COMM_WORLD,&rank);

      
   if (rank == 0)
   // master node processing
   {
      char incoming[KILOBUF];          // buffer for acknowledgments
      char buffer[MEGABUF];         // buffer to send tasks
      MPI_Request request[KILOBUF];    // request monitor for all tasks
      MPI_Status status[KILOBUF];      // status monitor for all tasks
      int actualTasks = 0;         // number of processors running

      // open file listing filenames to process
      ifstream infile;
      infile.open (filelist);
      string line;
      
      // set initial status of all processors to idle
      for ( int j=0; j<KILOBUF; j++ )
         incoming[j] = ' ';
      
      // scan through contents of file listing
      while (!infile.eof ())
      {
         // get a filename
         getline (infile, line);
         if (line.length() > 0)
         {
            // search for idle processor
            int dest=0;
            int found = 0;
            while ((dest<(numtasks-1)) && (found == 0))
               if (incoming[dest] == ' ')
                  found = 1;
               else
                  dest++;

            // if no idle processor, wait for one to become idle
            if (found == 0) {
	      MPI_Waitany (numtasks-1, request, &dest, status);
	    }

            // add the first filename to the instruction
            sprintf (buffer, "<Filename>%s</Filename>", line.c_str ());
            int epochCounter = epoch;

            // if epoch>1 and more filenames, add more filenames
            while ((epochCounter > 1) && (!infile.eof ()))
            {
               getline (infile, line);
               if (line.length () > 0)
               {
                  char buffer2[1024];
                  sprintf (buffer2, "<Filename>%s</Filename>", line.c_str ());
                  strcat (buffer, buffer2);
               }
               epochCounter--;
            }
            
            // mark processors as busy
            incoming[dest] = 'B';
            // send out the job to the processor
            MPI_Send (&buffer, strlen (buffer)+1, MPI_CHAR, dest+1, 1, MPI_COMM_WORLD);
            // wait for a done acknowledgement
            MPI_Irecv (&incoming[dest], 1, MPI_CHAR, dest+1, 1, MPI_COMM_WORLD, &request[dest]);
            // update counter of actual tasks
            if (dest > actualTasks)
               actualTasks = dest;
         }
      }

      infile.close();

      // wait until all outstanding tasks are completed
      int dest;
      for ( int k=0; k<actualTasks; k++ )
         MPI_Waitany (actualTasks, request, &dest, status);

      // send message to end all processing engines      
      char endstr[5] = "end";
      for ( int i=1; i<numtasks; i++ )
         MPI_Send (endstr, 4, MPI_CHAR, i, 1, MPI_COMM_WORLD);

   }
   else
   // slave node processing
   {
      char incoming[MEGABUF];

      do {   
         // wait for instruction from master
         MPI_Recv (&incoming, MEGABUF, MPI_CHAR, 0, 1, MPI_COMM_WORLD, &stat);
         if (strcmp (incoming, "end") != 0)
         {
            // process a received job
            cout << "Processing [" << rank << ":" << incoming << "]" << endl;
            
            // construct manifest filename
            char manifestfile[128];
            sprintf (manifestfile, "%u.manifest.xml", rank);
	    char* manifestfilename = tempnam(NULL,manifestfile);
	    if (manifestfilename != NULL) {
            
	      // create manifest file
	      ofstream manifestfile;
	      manifestfile.open (manifestfilename);
	      manifestfile << "<Manifest><Index>" << incoming << "</Index></Manifest>" << endl;
	      manifestfile.close ();
	      
	      // create Greenstone import command
	      char command[2048];
	      sprintf (command, "%s\\bin\\script\\import.pl -keepold -manifest %s -site %s %s", gsdlhomedir, manifestfilename, site, collection);
	      
	      // cout << "**** cmd = " << command << endl;

	      // invoke Greenstone import with manifest file
	      system (command);
	      
	      char line = ' ';
	      // send completed message
	      MPI_Send (&line, 1, MPI_CHAR, 0, 1, MPI_COMM_WORLD);

	      free(manifestfilename);
	    }
	    else {
	      cerr << "Error: Unable to create temporary manifest file for rank=" << rank << endl;
	    }
	 }
      } while (strcmp (incoming, "end") != 0);
      // stop when "end" instruction is received

   }

   // clean up MPI environment
   MPI_Finalize();
}
                                                       
