/*******************************************************
** Name: fileparser.cxx
** Author: Leo Liberti
** Source: GNU C++
** Purpose: www exploring topologizer - file parser
** finds 'parseTag = "value"' or 'parseTag = value'
** in a file
** History: 060820 work started
*******************************************************/
#include<iostream>
#include<fstream>
#include<istream>
#include<sstream>
#include<iterator>
#include "fileparser.h"
#include "wet.h"
namespace WET {
const int maxBufSize = 1024;
enum theParsingStatuses { outState, inTagState, inValueState };
const char charCloseTag = '>';
const char charQuote = '\"';
const char charSpace = ' ';
const char charNewLine = '\n';
const char charEqual = '=';
};
FileParserException::FileParserException() { }
FileParserException::~FileParserException() { }
FileParser::FileParser() { }
FileParser::FileParser(std::string theFileName, std::string theParseTag) :
fileName(theFileName), parseTag(theParseTag) { }
FileParser::~FileParser() { }
void FileParser::setFileName(std::string theFileName) {
fileName = theFileName;
}
std::string FileParser::getFileName(void) const {
return fileName;
}
void FileParser::setParseTag(std::string theParseTag) {
parseTag = theParseTag;
}
std::string FileParser::getParseTag(void) const {
return parseTag;
}
void FileParser::parse(void) throw(FileParserException) {
using namespace std;
using namespace WET;
ifstream is(fileName.c_str());
// verify that file fileName can be opened
if (!is) {
cerr << "FileParser::parse(): cannot open file " << fileName << endl;
throw FileParserException();
}
// verify that parseTag is initialized
if (parseTag.size() == 0) {
cerr << "FileParser::parse(): parse tag not initialized" << endl;
throw FileParserException();
}
// local data necessary to parse the tag
stringstream buffer;
int buffersize = 0;
char nextChar;
string nextString;
int parseStatus = outState;
bool openQuote = false;
string tmpString;
// start parsing up to end of file
while(!is.eof()) {
is.get(nextChar);
if (parseStatus == outState) {
// generic position in file, just look for parseTag
buffer << nextChar;
buffersize++;
if (buffersize > parseTag.size()) {
// buffer exceeds parseTag's size, shift left all characters
tmpString = buffer.str().substr(1, buffer.str().npos);
buffer.str("");
buffer << tmpString;
buffersize--;
}
if (isTailCaseInsensitive(buffer.str(), parseTag)) {
// parseTag found, change state
parseStatus = inTagState;
buffer.str("");
}
} else if (parseStatus == inTagState) {
// we have already found a parseTag, look for '='
if (nextChar == charEqual) {
// found '=', skip spaces and newlines
is.get(nextChar);
while(nextChar == charSpace || nextChar == charNewLine) {
is.get(nextChar);
}
if (is.eof()) {
// if eof reached, malformed tag, abort
cerr << "FileParser::parse(): EOF reached before tag finished"
<< endl;
throw FileParserException();
} else {
// found first character of value, change state
parseStatus = inValueState;
if (nextChar == charQuote) {
// if first char of value is an open quote then push the char
// back onto the stream (it will be treated later)
is.putback(nextChar);
}
}
}
} else if (parseStatus == inValueState) {
// reading the value
if (!openQuote && nextChar == charQuote) {
// value is in quotes
openQuote = true;
} else if ((openQuote && nextChar == charQuote) ||
(!openQuote &&
(nextChar == charSpace || nextChar == charNewLine ||
nextChar == charCloseTag))) {
// found either closing quote or closing space/newline tag
// the buffer contains the value, record this
parsedString.push_back(buffer.str());
buffer.str("");
buffersize = 0;
openQuote = false;
// change state
parseStatus = outState;
} else {
// the current token is part of the value
buffer << nextChar;
}
}
}
}
int FileParser::getNumberOfParsedStrings(void) const {
return parsedString.size();
}
std::string FileParser::getParsedString(int i) const
throw(FileParserException) {
using namespace std;
if (i >= parsedString.size() || i < 0) {
cerr << "FileParser::getParsedString(" << i << "): counter out of bounds"
<< endl;
throw FileParserException();
}
return parsedString[i];
}
int FileParser::compareCaseInsensitive(const std::string& s1,
const std::string& s2) const {
using namespace std;
string::const_iterator p1 = s1.begin();
string::const_iterator p2 = s2.begin();
while(p1 != s1.end() && p2 != s2.end()) {
if (toupper(*p1) < toupper(*p2)) {
return -1;
} else if (toupper(*p1) > toupper(*p2)) {
return 1;
}
p1++;
p2++;
}
if (s1.size() < s2.size()) {
return -1;
} else if (s1.size() > s2.size()) {
return 1;
}
return 0;
}
bool FileParser::isTailCaseInsensitive(const std::string& s1,
const std::string& s2) const {
using namespace std;
int s2len = s2.size();
if (s1.size() >= s2.size() &&
compareCaseInsensitive(s1.substr(s1.size() - s2len, s2len), s2) == 0) {
return true;
}
return false;
}