/******************************************************* ** Name: fileparser.cxx ** Author: Leo Liberti ** Source: GNU C++ ** Purpose: www exploring topologizer - file parser ** finds 'parseTag = "value"' or 'parseTag = value' ** in a file ** History: 060820 work started *******************************************************/ #include<iostream> #include<fstream> #include<istream> #include<sstream> #include<iterator> #include "fileparser.h" #include "wet.h" namespace WET { const int maxBufSize = 1024; enum theParsingStatuses { outState, inTagState, inValueState }; const char charCloseTag = '>'; const char charQuote = '\"'; const char charSpace = ' '; const char charNewLine = '\n'; const char charEqual = '='; }; FileParserException::FileParserException() { } FileParserException::~FileParserException() { } FileParser::FileParser() { } FileParser::FileParser(std::string theFileName, std::string theParseTag) : fileName(theFileName), parseTag(theParseTag) { } FileParser::~FileParser() { } void FileParser::setFileName(std::string theFileName) { fileName = theFileName; } std::string FileParser::getFileName(void) const { return fileName; } void FileParser::setParseTag(std::string theParseTag) { parseTag = theParseTag; } std::string FileParser::getParseTag(void) const { return parseTag; } void FileParser::parse(void) throw(FileParserException) { using namespace std; using namespace WET; ifstream is(fileName.c_str()); // verify that file fileName can be opened if (!is) { cerr << "FileParser::parse(): cannot open file " << fileName << endl; throw FileParserException(); } // verify that parseTag is initialized if (parseTag.size() == 0) { cerr << "FileParser::parse(): parse tag not initialized" << endl; throw FileParserException(); } // local data necessary to parse the tag stringstream buffer; int buffersize = 0; char nextChar; string nextString; int parseStatus = outState; bool openQuote = false; string tmpString; // start parsing up to end of file while(!is.eof()) { is.get(nextChar); if (parseStatus == outState) { // generic position in file, just look for parseTag buffer << nextChar; buffersize++; if (buffersize > parseTag.size()) { // buffer exceeds parseTag's size, shift left all characters tmpString = buffer.str().substr(1, buffer.str().npos); buffer.str(""); buffer << tmpString; buffersize--; } if (isTailCaseInsensitive(buffer.str(), parseTag)) { // parseTag found, change state parseStatus = inTagState; buffer.str(""); } } else if (parseStatus == inTagState) { // we have already found a parseTag, look for '=' if (nextChar == charEqual) { // found '=', skip spaces and newlines is.get(nextChar); while(nextChar == charSpace || nextChar == charNewLine) { is.get(nextChar); } if (is.eof()) { // if eof reached, malformed tag, abort cerr << "FileParser::parse(): EOF reached before tag finished" << endl; throw FileParserException(); } else { // found first character of value, change state parseStatus = inValueState; if (nextChar == charQuote) { // if first char of value is an open quote then push the char // back onto the stream (it will be treated later) is.putback(nextChar); } } } } else if (parseStatus == inValueState) { // reading the value if (!openQuote && nextChar == charQuote) { // value is in quotes openQuote = true; } else if ((openQuote && nextChar == charQuote) || (!openQuote && (nextChar == charSpace || nextChar == charNewLine || nextChar == charCloseTag))) { // found either closing quote or closing space/newline tag // the buffer contains the value, record this parsedString.push_back(buffer.str()); buffer.str(""); buffersize = 0; openQuote = false; // change state parseStatus = outState; } else { // the current token is part of the value buffer << nextChar; } } } } int FileParser::getNumberOfParsedStrings(void) const { return parsedString.size(); } std::string FileParser::getParsedString(int i) const throw(FileParserException) { using namespace std; if (i >= parsedString.size() || i < 0) { cerr << "FileParser::getParsedString(" << i << "): counter out of bounds" << endl; throw FileParserException(); } return parsedString[i]; } int FileParser::compareCaseInsensitive(const std::string& s1, const std::string& s2) const { using namespace std; string::const_iterator p1 = s1.begin(); string::const_iterator p2 = s2.begin(); while(p1 != s1.end() && p2 != s2.end()) { if (toupper(*p1) < toupper(*p2)) { return -1; } else if (toupper(*p1) > toupper(*p2)) { return 1; } p1++; p2++; } if (s1.size() < s2.size()) { return -1; } else if (s1.size() > s2.size()) { return 1; } return 0; } bool FileParser::isTailCaseInsensitive(const std::string& s1, const std::string& s2) const { using namespace std; int s2len = s2.size(); if (s1.size() >= s2.size() && compareCaseInsensitive(s1.substr(s1.size() - s2len, s2len), s2) == 0) { return true; } return false; }