Solution


/*******************************************************
** Name:        fileparser.cxx
** Author:      Leo Liberti
** Source:      GNU C++
** Purpose:     www exploring topologizer - file parser
**              finds 'parseTag = "value"' or 'parseTag = value'
**              in a file
** History:     060820 work started
*******************************************************/

#include<iostream>
#include<fstream>
#include<istream>
#include<sstream>
#include<iterator>
#include "fileparser.h"
#include "wet.h"

namespace WET {
  const int maxBufSize = 1024;
  enum theParsingStatuses { outState, inTagState, inValueState };
  const char charCloseTag = '>';
  const char charQuote = '\"';
  const char charSpace = ' ';
  const char charNewLine = '\n';
  const char charEqual = '=';
};

FileParserException::FileParserException() { }
FileParserException::~FileParserException() { }

FileParser::FileParser() { }
FileParser::FileParser(std::string theFileName, std::string theParseTag) : 
  fileName(theFileName), parseTag(theParseTag) { }

FileParser::~FileParser() { }

void FileParser::setFileName(std::string theFileName) {
  fileName = theFileName;
}

std::string FileParser::getFileName(void) const {
  return fileName;
}

void FileParser::setParseTag(std::string theParseTag) {
  parseTag = theParseTag;
}

std::string FileParser::getParseTag(void) const {
  return parseTag;
}

void FileParser::parse(void) throw(FileParserException) {
  using namespace std;
  using namespace WET;
  ifstream is(fileName.c_str());
  // verify that file fileName can be opened
  if (!is) {
    cerr << "FileParser::parse(): cannot open file " << fileName << endl;
    throw FileParserException();
  }
  // verify that parseTag is initialized
  if (parseTag.size() == 0) {
    cerr << "FileParser::parse(): parse tag not initialized" << endl;
    throw FileParserException();
  }
  // local data necessary to parse the tag
  stringstream buffer;
  int buffersize = 0;
  char nextChar;
  string nextString;
  int parseStatus = outState;
  bool openQuote = false;
  string tmpString;
  // start parsing up to end of file
  while(!is.eof()) {

    is.get(nextChar);

    if (parseStatus == outState) {
      // generic position in file, just look for parseTag
      buffer << nextChar;
      buffersize++;
      if (buffersize > parseTag.size()) {
        // buffer exceeds parseTag's size, shift left all characters
        tmpString = buffer.str().substr(1, buffer.str().npos);
        buffer.str("");
        buffer << tmpString;
        buffersize--;
      }
      if (isTailCaseInsensitive(buffer.str(), parseTag)) {
        // parseTag found, change state
        parseStatus = inTagState;
        buffer.str("");
      }

    } else if (parseStatus == inTagState) {

      // we have already found a parseTag, look for '=' 
      if (nextChar == charEqual) {
        // found '=', skip spaces and newlines
        is.get(nextChar);
        while(nextChar == charSpace || nextChar == charNewLine) {
          is.get(nextChar);
        }
        if (is.eof()) {
          // if eof reached, malformed tag, abort
          cerr << "FileParser::parse(): EOF reached before tag finished" 
               << endl;
          throw FileParserException();
        } else {
          // found first character of value, change state
          parseStatus = inValueState;
          if (nextChar == charQuote) {
            // if first char of value is an open quote then push the char 
            // back onto the stream (it will be treated later)
            is.putback(nextChar);
          }
        }        
      }

    } else if (parseStatus == inValueState) {

      // reading the value
      if (!openQuote && nextChar == charQuote) {
        // value is in quotes
        openQuote = true;
      } else if ((openQuote && nextChar == charQuote) || 
                 (!openQuote && 
                  (nextChar == charSpace || nextChar == charNewLine || 
                   nextChar == charCloseTag))) {
        // found either closing quote or closing space/newline tag
        // the buffer contains the value, record this
        parsedString.push_back(buffer.str());
        buffer.str("");
        buffersize = 0;
        openQuote = false;
        // change state
        parseStatus = outState;
      } else {
        // the current token is part of the value
        buffer << nextChar;
      }
    }
  }
}

int FileParser::getNumberOfParsedStrings(void) const {
  return parsedString.size();
}

std::string FileParser::getParsedString(int i) const 
  throw(FileParserException) {
  using namespace std;
  if (i >= parsedString.size() || i < 0) {
    cerr << "FileParser::getParsedString(" << i << "): counter out of bounds" 
         << endl;
    throw FileParserException();
  }
  return parsedString[i];
}
 
int FileParser::compareCaseInsensitive(const std::string& s1, 
                                       const std::string& s2) const {
  using namespace std;
  string::const_iterator p1 = s1.begin();
  string::const_iterator p2 = s2.begin();
  while(p1 != s1.end() && p2 != s2.end()) {
    if (toupper(*p1) < toupper(*p2)) {
      return -1;
    } else if (toupper(*p1) > toupper(*p2)) {
      return 1;
    } 
    p1++;
    p2++;
  }
  if (s1.size() < s2.size()) {
    return -1;
  } else if (s1.size() > s2.size()) {
    return 1;
  } 
  return 0;
}
 
bool FileParser::isTailCaseInsensitive(const std::string& s1, 
                                       const std::string& s2) const {
  using namespace std;
  int s2len = s2.size();
  if (s1.size() >= s2.size() && 
      compareCaseInsensitive(s1.substr(s1.size() - s2len, s2len), s2) == 0) {
    return true;
  }
  return false;
}



Leo Liberti 2008-01-12