/**********************************************************************
 *
 * httpreq.cpp
 * Copyright (C) 1996
 * 
 * A component of the fnord webserver written by bmorin@wpi.edu.
 *
 * Altered for use with the Greenstone digital library software by the
 * New Zealand Digital Library Project at the University of Waikato,
 * New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include <windows.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include "httpreq.h"
#include "parse.h"
#include "netio.h"
#include "settings.h"
#include "httpsrv.h"
#include "httpsend.h"
#include "cgiwrapper.h"
#include "d_winsock.h"

/*
Implementation Notes:

HTTP field names, method and version strings are converted to upper case
right after being read from the client in order to allow case insensitive
string comparisons to be done on them.  Since these fields are worked with a
lot, this should help performance.
*/

//Private Data and declarations
#define IO_BUFFER_SIZE 16384        //16K IO Buffer
#define MAX_HTTP_LINE_LEN 1024      //Max length of line in a header of 1024

//Private Function Declarations with Return Contstants

/*
Function Name: DispatchRequest
Purpose: Manages having the request parsed, then sent to the right function
         to send a response or handle an error.
Parameters:
         ClientSocket - Socket the client is on
	 ClientSockAddr - Address of client
	 AddrLen - Length of address of client
	 IOBuffer - Pointer to buffer allocated for IO operations
	 ThreadNum - Number of thread that called this function for debugging purposes
Notes:   I'm still playing with the keep alive support.  I commented out
	 the stuff for giving a client a timeout because I was unable to detect
	 disconnects.
More Notes: Not sure if this organization will allow me to easily add support
	    for ISAPI filter DLLs.
*/
void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer);

/*
Function Name: Get HTTP Headers
Purpose: Manages having the request parsed, then sent to the right function
         to send a response or handle an error.
Parameters:
	RequestInfo - Request information structure (see httpreq.h)
	RequestFields - HTTP request fields structure (see httpreq.h)
Returns: GH_ERROR on error (diconnect, bad data, Windows in a bad mood, etc.)
			GH_UNKNOWN_VERSION if the version number is not HTTP/0.9 or HTTP/1.x
			GH_SIMPLE_REQUEST on a properly formated HTTP/0.9 request
			GH_10_REQUEST on a properly formated HTTP/1.x request
*/
int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
#define GH_BAD_METHOD	   -2
#define GH_ERROR           -1
#define GH_UNKNOWN_VERSION  0
#define GH_SIMPLE_REQUEST   1
#define GH_10_REQUEST       2

/*
Function Name: Clean Up HTTP Headers
Purpose: Cleans up memory dynamicly allocated for headers
Parameters:
	RequestInfo - Request information structure (see httpreq.h)
	RequestFields - HTTP request fields structure (see httpreq.h)
Returns: Nothing
*/
void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);

/*
Function Name: Split Query
Purpose: Splits the file and query part of a URI.  In other words, it
	 puts the parts before and after the "?" in differnet strings.
Parameters:
	 URIStr - The requested URI
	 FileStr - String to contain the name of the path + file part of the URI
	 QueryStr - String to contain the query part of the URI
Returns: TRUE if there is a query, else FALSE
*/
BOOL SplitQuery(char *URIStr, char *FileStr, char *QueryStr, int ThreadNum);

/*
Function Name: Get File
Purpose: Attempts to find a given file, including looking for index.html.
	 Updates the given URI string so it points to the true document location
Parameters:
	 FilePath - Path of file, may be modified to best reflect the retrived file
				  or directory
	 URIStr - URI string, minus the query
Returns: GF_ERROR on error
			GF_FILE_FOUND on success
			GF_INDEX_FOUND if file is a directory with an index.html file in it
			GF_DIRECTORY if file is a directory
			GF_FILE_NOT_FOUND if file was found
*/

/*
Function Name: Process Simple Request
Purpose: Sends a reply to a HTTP 0.9 "simple" request
Parameters:
	ClientSocket - Socket the client is on
	RequestInfo - Structure storing the parsed headers
	IOBuffer - Pointer to buffer allocated for IO operations
	TheadNum - Number of calling thread for debugging
Notes: I should really test this and see if it works...
*/
void ProcessSimpleRequest(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);

//Public Functions
/******************************************************************************/
void RequestThread(RequestThreadMessageT *Parameters) {
  SOCKADDR_IN ClientSockAddr;
  SOCKET ClientSocket;
  int AddrLen;
  //Allocate an IO buffer for this thread
  BYTE *IOBuffer = new BYTE[IO_BUFFER_SIZE];

  //Get the parameters for the request
  ClientSocket = Parameters->ClientSocket;
  ClientSockAddr = Parameters->ClientSockAddr;
  AddrLen = Parameters->AddrLen;
  DispatchRequest(ClientSocket, ClientSockAddr, AddrLen, IOBuffer);
}
/******************************************************************************/

//Private Functions

/******************************************************************************/
void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen,  BYTE *IOBuffer) {
  RequestInfoT RequestInfo;
  RequestFieldsT RequestFields;

  // TrayAddConnection();

  //Setup the RequestInfo structure
  memset(&RequestInfo, 0, sizeof(RequestInfoT));
  RequestInfo.ThreadNum = 0;
  RequestInfo.IOBuffer = IOBuffer;
  RequestInfo.IOBufferSize = IO_BUFFER_SIZE;
  RequestInfo.ClientSocket = ClientSocket;
  RequestInfo.ClientSockAddr = ClientSockAddr;
  RequestInfo.AddrLen = AddrLen;
  RequestInfo.KeepAlive = FALSE;
  
  int GetHeadersResult;
  do {
    //Get Headers
    GetHeadersResult = GetHTTPHeaders(RequestInfo, RequestFields);

    //Figure out what version we're dealing with and deal with it
    switch (GetHeadersResult) {
    case GH_SIMPLE_REQUEST :
      SendHTTPError(400, "HTTP Request not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
      // TrayIncNumServed();
      break;
    case GH_10_REQUEST :
      ExamineURIStr(RequestFields.URIStr,&RequestInfo,&RequestFields);
      // TrayIncNumServed();
      break;
    case GH_UNKNOWN_VERSION :
      SendHTTPError(400, "HTTP Version not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
      // TrayIncNumServed();
      break;
/* added Feb 2002 to handle stupid MS behaviour */
    case GH_BAD_METHOD :
      SendHTTPError(501, "Not implemented", "Only GET and POST currently implemented", RequestInfo, RequestFields);
      break;
    case GH_ERROR:
      //Disconnect
      RequestInfo.KeepAlive = FALSE;
      break;
    }
    CleanUpHTTPHeaders(RequestInfo, RequestFields);
  } while (0/*RequestInfo.KeepAlive == TRUE*/);
  //Close connection
  CloseSocket(RequestInfo.ClientSocket);
  // TrayRemoveConnection();
}

void resetRequestFieldsT(RequestFieldsT& RequestFields)
{
  // Can't just 'memset' the text_t fields (as used to be done),
  // as this could lead to inconsistent state in text_t fields
  // ultimately resulting in a segmentation fault
  // => need to go through an explicitly reset them

  //Simple request line info v0.9
  RequestFields.MethodStr.clear();
  RequestFields.URIStr.clear();
  //added v1.0
  RequestFields.VersionStr.clear();
  //General Header
  RequestFields.DateStr.clear();
  RequestFields.MIMEVerStr.clear();
  RequestFields.PragmaStr.clear();
  //Request Header
  RequestFields.AuthorizationStr.clear();
  RequestFields.FromStr.clear();
  RequestFields.IfModSinceStr.clear();
  RequestFields.RefererStr.clear();
  RequestFields.UserAgentStr.clear();
  //Entity Header (Only CGI stuff)
  RequestFields.ContentEncodingStr.clear();
  RequestFields.ContentTypeStr.clear();
  RequestFields.ContentLengthStr.clear();
  //v1.0 Optional (the more common ones)
  RequestFields.AcceptStr.clear();
  RequestFields.AcceptLangStr.clear();
  //v1.1 Exentions
  RequestFields.ConnectionStr.clear();
  
  //Reset all the non text_t fields 
  RequestFields.ContentLength = 0;
  RequestFields.Content = NULL;
  RequestFields.NumOtherHeaders = 0;
 
  for (int i=0 ; i<MAX_OTHER_HEADERS; i++) {
	RequestFields.OtherHeaders[i].Var.clear();
	RequestFields.OtherHeaders[i].Val.clear();
  }
}

/******************************************************************************/
int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
  //Parsing and IO buffers
  text_t CurLine;
  text_t NextLine;
  text_t FieldNameStr;
  text_t FieldValStr;
  
  //Parsing and IO working vars
  int ReadBufferIndex;
  int DataInBuffer;
  text_t::const_iterator next;
  text_t::const_iterator end;

  resetRequestFieldsT(RequestFields);

  ReadBufferIndex = 0;
  DataInBuffer = 0;
  
  //Get First Line
  if (GetLine(CurLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
	      RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
	      RequestInfo.ThreadNum) != 0) return GH_ERROR;
  do {//Get Next Line, append it if the first charactor is space
    if(GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
	       RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
	       RequestInfo.ThreadNum) != 0) return GH_ERROR;
    if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
      CurLine += NextLine;
    }
  } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
  //Method String (first word)
  GetWord(RequestFields.MethodStr, CurLine.begin(), CurLine.end(), next);
  uc(RequestFields.MethodStr);

  /* Added Feb 2002 - IE since about version 5 send stupid frontpage requests
     for MS Document formats eg "GET /_vti_inf.html" */ 
  if (RequestFields.MethodStr == "OPTIONS") {
    return GH_BAD_METHOD;
  } 
  //Version String (last word)
  GetLastWord(RequestFields.VersionStr, CurLine.begin(), CurLine.end(), end);
  uc(RequestFields.VersionStr);
  text_t::const_iterator versionbegin = RequestFields.VersionStr.begin();

  if ((RequestFields.VersionStr.size() > 5) && (substr(versionbegin, versionbegin+5) != "HTTP/")) {
    //No version, assume simple request
    //part after method is URI
    RequestFields.URIStr = CurLine;
    return GH_SIMPLE_REQUEST;
  }

  //URI String (in between End of first and Start of last)
  //<Method> <WhiteSpace> <URI> <WhiteSpace> <Version> <CRLF>
  //                  next^             end^
  text_t spacebuffer;
  text_t::const_iterator here = next;
  while (here != end) {
    // do this to remove trailing space
    if (*here == ' ' || *here == '\t') {
      spacebuffer.push_back(*here);
    } else {
      if (!spacebuffer.empty()) {
	RequestFields.URIStr += spacebuffer;
	spacebuffer.clear();
      }
      RequestFields.URIStr.push_back(*here);
    }
    ++here;
  }

  //Only accept requests from HTTP/0.9 or HTTP/1.X clients, we'll
  //assume that anything else will require an upgrade or patch
  if ((RequestFields.VersionStr.size() > 7) && (substr(versionbegin, versionbegin+7) != "HTTP/1.")) {
    return GH_UNKNOWN_VERSION;
  }
  
  //Get the rest of the lines
  CurLine = NextLine;
  
  while (!CurLine.empty()) {//Blank Line, we're done
    do {//Get Next Line, append it if the first charactor is space
      if (GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
		  RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
		  RequestInfo.ThreadNum) != 0)
	return GH_ERROR;
      if (NextLine.empty())
	break;
      if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
	CurLine += NextLine;
      }
    } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
    
    GetWord(FieldNameStr, CurLine.begin(), CurLine.end(), next);
    uc(FieldNameStr);
    
    FieldValStr = substr(next, CurLine.end());
    
    //Process it
    //In order of expected commonality
    //All constants are in canonized, thus in upper case and case sensitive
    //comparisons are used

    //--Just About Always--
    if (FieldNameStr == "ACCEPT:") {
      if (!RequestFields.AcceptStr.empty()) {
	RequestFields.AcceptStr += ", ";
      }
      RequestFields.AcceptStr += FieldValStr;
    }
    else if (FieldNameStr == "DATE:") {
      RequestFields.DateStr = FieldValStr;
    }
    else if (FieldNameStr == "USER-AGENT:") {
      RequestFields.UserAgentStr = FieldValStr;
    }
    else if (FieldNameStr == "CONNECTION:") {
      RequestFields.ConnectionStr = FieldValStr;
    }
    //--Sometimes--
    else if (FieldNameStr == "ACCEPT-LANGUAGE:") {
      RequestFields.AcceptLangStr = FieldValStr;
    }
    else if (FieldNameStr == "REFERER:") {
      RequestFields.RefererStr = FieldValStr;
    }
    else if (FieldNameStr == "IF-MODIFIED-SINCE:") {
      RequestFields.IfModSinceStr = FieldValStr;
    }
    //--Uncommon--
    else if (FieldNameStr == "FROM:") {
      RequestFields.FromStr = FieldValStr;
    }
    else if (FieldNameStr == "MIME-VERSION:") {
      RequestFields.MIMEVerStr = FieldValStr;
    }
    else if (FieldNameStr == "PRAGMA:") {
      RequestFields.PragmaStr = FieldValStr;
    }
    //--Special case--
    else if (FieldNameStr == "AUTHORIZATION:") {
      RequestFields.AuthorizationStr = FieldValStr;
    }
    else if (FieldNameStr == "CONTENT-LENGTH:") {
      RequestFields.ContentLengthStr = FieldValStr;
    }
    else if (FieldNameStr == "CONTENT-TYPE:") {
      RequestFields.ContentTypeStr = FieldValStr;
    }
    else if (FieldNameStr == "CONTENT-ENCODING:") {
      RequestFields.ContentEncodingStr = FieldValStr;
    }
    else if (!FieldNameStr.empty()) {
      //Add it to the other headers

      //Remove the colon
      if (*(FieldNameStr.end()-1) == ':') {
	FieldNameStr.pop_back();
      }
      RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Var = FieldNameStr;
      RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Val = FieldValStr;
      ++RequestFields.NumOtherHeaders;
    }
    CurLine = NextLine;
  }
  
  if (!RequestFields.ContentLengthStr.empty()) { //Do we have attached data?
    unsigned int NumRecv;
    
    RequestFields.ContentLength = RequestFields.ContentLengthStr.getint();
    if (RequestFields.ContentLength > 0) {

      //Allocate memory
      RequestFields.Content = new BYTE[RequestFields.ContentLength];
      
      //Get rest of data from get lines
      NumRecv = DataInBuffer - ReadBufferIndex;
      
      if (NumRecv >RequestFields.ContentLength) {
	//Overflow, only read what they said they'd send
	NumRecv = RequestFields.ContentLength;
      }
      memcpy(RequestFields.Content, RequestInfo.IOBuffer + ReadBufferIndex,
	     NumRecv);
      
      while (NumRecv < RequestFields.ContentLength) {
	NumRecv += GetData(RequestInfo.ClientSocket,
			   RequestFields.Content + NumRecv,
			   RequestFields.ContentLength - NumRecv,
			   RequestInfo.ThreadNum);
	if (NumRecv < 0) return GH_ERROR;
      }

      // It seems to be important on NT that all available data was read
      // from the socket before the socket is closed (otherwise netscape
      // throws a "connection reset by peer" error). Since netscape seems
      // to send a few extra bytes in certain situations we'll make sure we
      // slurp it all up here.
      char *tmpbuffer = new char[100]; // this had new char(100)????
      //      unsigned long int nonblockmode=1;
      //      ioctlsocket(RequestInfo.ClientSocket, FIONBIO, &nonblockmode);
      d_recv(RequestInfo.ClientSocket, tmpbuffer, 100, 0);
      delete []tmpbuffer;

    }
    else {
      RequestFields.Content = NULL;
      RequestFields.ContentLength = 0;
    }
  }
  else {
    RequestFields.Content = NULL;
    RequestFields.ContentLength = 0;
  }
  
  return GH_10_REQUEST;
}

/******************************************************************************/
void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
  //Clean up memory allocated for the Content
  if (RequestFields.Content != NULL) {
    delete[] RequestFields.Content;
  }
  
  // clean up memory allocated for the IOBuffer
  if (RequestInfo.IOBuffer != NULL) {
    delete[] RequestInfo.IOBuffer;
    RequestInfo.IOBuffer = NULL;
  }
}
