/**************************************************************************
 *
 * TextEl.cpp -- Data structures for parsed documents
 * Copyright (C) 1999  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 **************************************************************************/

#include "TextEl.h"
#include "ctype.h"

#define REC_TERM   '\002'
#define PARA_TERM  '\003'


void TextEl::Clear () {
  elType = TextE;
  tagName.erase (tagName.begin(), tagName.end());
  text.erase (text.begin(), text.end());
}


static bool ReadTextTag (FileBuf &buf, TextEl &el) {
  // we must have some sort of tag
  unsigned char c;
  if (!buf.Peek(c)) return false;
  
  // get the '<'
  if (c != '<') return false;
  el.text.push_back (c);
  if (!buf.Next() || !buf.Peek (c)) return false;
  
  // see if we have a '/'
  if (c == '/') {
    // we have a closing tag
    el.text.push_back (c);
    if (!buf.Next() || !buf.Peek (c)) return false;
    el.elType = CloseTagE;
  } else {
    // we have an opening tag
    el.elType = OpenTagE;
  }

  // get the tag name
  while (isalnum(c)) {
    el.text.push_back (c);
    el.tagName.push_back (c);
    if (!buf.Next() || !buf.Peek (c)) return false;
  }
  
  // get everything up to the closing '>'
  while (c != '>') {
    el.text.push_back (c);
    if (!buf.Next() || !buf.Peek (c)) return false;
  }

  // get the closing '>'
  el.text.push_back (c);
  buf.Next();

  // if there is no tag name then make this a text element
  if (el.tagName.empty()) el.elType = TextE;
  
  return true;
}

static bool ReadTextTag (u_char ** buffer, TextEl &el) {
  // we must have some sort of tag
  u_char * buffer_pos = *buffer;
  if (*buffer_pos == '\0' || *buffer_pos != '<') {
     return false;
  }
  el.text.push_back('<');
  buffer_pos ++;
  if (*buffer_pos == '\0') {
    *buffer = buffer_pos;
    return false;
  }
  // see if we have a '/'
  if (*buffer_pos == '/') {
    el.text.push_back('/');
    el.elType = CloseTagE;
    buffer_pos++;
    if (*buffer_pos == '\0') {    
      *buffer = buffer_pos;
      return false;
    }
  } else {
    el.elType = OpenTagE;
  }
  // get the tag name
  while (isalnum(*buffer_pos)) {
    el.text.push_back (*buffer_pos);
    el.tagName.push_back (*buffer_pos);
    buffer_pos++;
    
    if (*buffer_pos == '\0') {
      *buffer = buffer_pos;
      return false;
    }

  }
  cout << "tag name = "<<el.tagName<<endl;
  // get everything up to closing '>'
  while (*buffer_pos != '>') {
    el.text.push_back(*buffer_pos);
    buffer_pos++;
    if (*buffer_pos == '\0') {
      *buffer = buffer_pos;
      return false;
    }
    
  }
  // get teh closing '>'
  el.text.push_back('>');
  buffer_pos++;
  *buffer = buffer_pos;
  cout << "at end of readtexttag, buffer is "<<*buffer<<endl;
  // if there is no tag name then make this a text element
  if (el.tagName.empty()) el.elType = TextE;
  
  return true;
}

static void ToggleParaTag (TextEl &el, bool &compatInPara) {
  SetCStr (el.tagName, "Paragraph", 9);
  el.text.erase (el.text.begin(), el.text.end());
  if (compatInPara) {
    el.elType = CloseTagE;
    el.text.push_back (PARA_TERM);
  } else el.elType = OpenTagE;
  compatInPara = !compatInPara;
}

static void SetRecTag (TextEl &el, TextElType elType) {
  el.elType = elType;
  SetCStr (el.tagName, "Document", 8);
  el.text.erase (el.text.begin(), el.text.end());
  if (elType == CloseTagE)
    el.text.push_back (REC_TERM);
}


bool ReadTextEl (FileBuf &buf, TextEl &el,
		 bool compatMode, bool &compatInPara) {
  el.Clear();

  unsigned char c;
  if (!buf.Peek (c)) return false;

  if (compatMode) {
    if (c == PARA_TERM) {
      ToggleParaTag (el, compatInPara);
      if (!compatInPara) buf.Next();
      return true;
    }
    if (c == REC_TERM) {
      if (compatInPara) {
	// need to close this paragraph
	ToggleParaTag (el, compatInPara);
	return true;
      }
      // can close this document
      buf.Next();
      SetRecTag (el, CloseTagE);
      return true;
    }
    if (!compatInPara) {
      // need to open a paragraph
      ToggleParaTag (el, compatInPara);
      return true;
    }
  }
  
  // see if we have some sort of tag
  if (c == '<') return ReadTextTag (buf, el);

  // read in a text element
  el.elType = TextE;
  while (c != '<' && !(compatMode && (c == PARA_TERM || c == REC_TERM))) {
    el.text.push_back (c);
    if (!buf.Next() || !buf.Peek (c)) break;
  }

  return true;
}
bool ReadTextEl (u_char **buffer, TextEl &el,
		 bool compatMode, bool &compatInPara) {
  el.Clear();
  u_char * buffer_pos = *buffer;
  if (*buffer_pos == '\0') return false;
  if (compatMode) {
    if (*buffer_pos == PARA_TERM) {
      ToggleParaTag (el, compatInPara);
      if (!compatInPara) buffer_pos++;
      *buffer = buffer_pos;
      return true;
    }
    if (*buffer_pos == REC_TERM) {
      if (compatInPara) {
	// need to close this paragraph
	ToggleParaTag (el, compatInPara);
	*buffer = buffer_pos;
	return true;
      }
      // can close this document
      buffer_pos++;
      SetRecTag (el, CloseTagE);
      *buffer = buffer_pos;
      return true;
    }
    if (!compatInPara) {
      // need to open a paragraph
      ToggleParaTag (el, compatInPara);
      *buffer = buffer_pos;
      return true;
    }
  }
  
  // see if we have some sort of tag
  if (*buffer_pos == '<') return ReadTextTag (buffer, el);

  // read in a text element
  el.elType = TextE;
  while (*buffer_pos != '<' && !(compatMode && (*buffer_pos == PARA_TERM || *buffer_pos == REC_TERM))) {
    el.text.push_back (*buffer_pos);
    buffer_pos++;
    if (*buffer_pos == '\0') break;
  }
  *buffer = buffer_pos;
  cout << "text element: "<<el.text<<endl;
  return true;
}
  
static void AddTextEl (TextElArray &doc, mg_u_long &docLen,
		       const TextEl &el) {
  doc.push_back (el);
  docLen += el.text.size();

//    cout << "elType:  " << el.elType << "\n";
//    cout << "tagName: " << el.tagName << "\n";
//    cout << "text:    \"" << el.text << "\"\n\n";
}


bool ReadDoc (FileBuf &buf, const UCArray &docTag,
	      TextElArray &doc, mg_u_long &docLen,
	      bool compatMode) {
  bool compatInPara = false;
  bool foundDocEl = false;
  TextEl el;
  
  doc.erase (doc.begin(), doc.end());
  docLen = 0;

  if (compatMode) {
    // add <Document><Paragraph>
    SetRecTag (el, OpenTagE);
    AddTextEl (doc, docLen, el);
    ToggleParaTag (el, compatInPara);
    AddTextEl (doc, docLen, el);
    
  } else {
    // look for an opening docTag
    do {
      if (!ReadTextEl (buf, el, compatMode, compatInPara)) return false;
    } while (el.elType != OpenTagE || el.tagName != docTag);
    
    AddTextEl (doc, docLen, el);
  }

  
  // get all elements until the closing docTag
  while (ReadTextEl (buf, el, compatMode, compatInPara)) {
    foundDocEl = true;
    AddTextEl (doc, docLen, el);
    if (el.elType == CloseTagE && el.tagName == docTag)
      return true;
  }

  
  if (compatMode) {
    if (!foundDocEl) { // end of text
      doc.erase (doc.begin(), doc.end());
      return false;
    }

    // if we got here then the eof was encountered before
    // the closing document tag
    if (compatInPara) {
      // need to close this paragraph
      ToggleParaTag (el, compatInPara);
      AddTextEl (doc, docLen, el);
    }
    // close this document
    SetRecTag (el, CloseTagE);
    AddTextEl (doc, docLen, el);
  }
  
  return true;
}

// copy of readdoc to get TextElArray from u_char buffer
bool ReadDoc(u_char ** buffer, const UCArray &docTag, 
	     TextElArray &doc, mg_u_long &docLen,
	     bool compatMode) {
  bool compatInPara = false;
  bool foundDocEl = false;
  TextEl el;
  
  doc.erase (doc.begin(), doc.end());
  docLen = 0;

  if (compatMode) {
    // add <Document><Paragraph>
    SetRecTag (el, OpenTagE);
    AddTextEl (doc, docLen, el);
    ToggleParaTag (el, compatInPara);
    AddTextEl (doc, docLen, el);
    
  } else {
    // look for an opening docTag
    do {
      if (!ReadTextEl (buffer, el, compatMode, compatInPara)) return false;
    } while (el.elType != OpenTagE || el.tagName != docTag);
    
    AddTextEl (doc, docLen, el);
  }

  
  // get all elements until the closing docTag
  while (ReadTextEl (buffer, el, compatMode, compatInPara)) {
    foundDocEl = true;
    AddTextEl (doc, docLen, el);
    if (el.elType == CloseTagE && el.tagName == docTag)
      return true;
  }

  
  if (compatMode) {
    if (!foundDocEl) { // end of text
      doc.erase (doc.begin(), doc.end());
      return false;
    }

    // if we got here then the eof was encountered before
    // the closing document tag
    if (compatInPara) {
      // need to close this paragraph
      ToggleParaTag (el, compatInPara);
      AddTextEl (doc, docLen, el);
    }
    // close this document
    SetRecTag (el, CloseTagE);
    AddTextEl (doc, docLen, el);
  }
  
  return true;
}


