22 #include "dimension.h" 58 DataSpecSchema::operator=(x);
77 void guessFromStream(std::istream& file, uintmax_t fileSize=uintmax_t(-1));
81 std::ifstream is(fileName);
98 void givenTFguessRemainder(std::istream& initialInput, std::istream& remainingInput,
const T& tf, uintmax_t fileSize);
101 void guessRemainder(std::istream& initialInput, std::istream& remainingInput,
char separator, uintmax_t fileSize);
113 template <
class T,
class U>
130 #include "CSVParser.cd" 131 #include "CSVParser.xcd"
DataSpecSchema toSchema()
std::size_t nColAxes() const
start column of the data area
void reportFromCSVFile(istream &input, ostream &output, const DataSpec &spec, uintmax_t fileSize)
creates a report CSV file from input, with errors sorted at begining of file, with a column for error...
void guessFromFile(const std::string &fileName)
initial stab at dataspec from examining file
std::size_t dataRowOffset
const std::vector< size_t > & uniqueValues() const
number of unique values in each column corrected for header row, so may be slightly inaccurate if hea...
std::size_t dataColOffset
void escapeDoubledQuotes(std::string &line, const DataSpec &spec)
replace doubled quotes with escaped quotes
bool processChunk(std::istream &input, const T &tf, size_t until, U &)
process chunk of input, updating guessed spec
Creation and access to the minskyTCL_obj object, which has code to record whenever Minsky's state cha...
void guessRemainder(std::istream &initialInput, std::istream &remainingInput, char separator, uintmax_t fileSize)
figure out the tokenizer function and call givenTFguessRemainder
CLASSDESC_ACCESS(DataSpec)
void loadValueFromCSVFile(VariableValue &v, const vector< string > &filenames, const DataSpec &spec)
load a variableValue from a list of files according to data spec
void givenTFguessRemainder(std::istream &initialInput, std::istream &remainingInput, const T &tf, uintmax_t fileSize)
try to fill in remainder of spec, given a tokenizer function tf eg boost::escaped_list_separator<char...
std::vector< size_t > starts
std::size_t maxColumn
maximum number of columns that can be configured independently. Columns after this limit are treated ...
void toggleDimension(std::size_t c)
std::size_t nRowAxes() const
start row of the data area
DataSpec & operator=(const DataSpecSchema &x)
void stripByteOrderingMarker(std::istream &s)
checks if the input stream has the UTF-8 byte ordering marker, and removes it if present ...
void guessFromStream(std::istream &file, uintmax_t fileSize=uintmax_t(-1))
initial stab at dataspec from examining stream
std::set< unsigned > dataCols
std::vector< size_t > m_uniqueValues
number of unique values in each column
void setDataArea(std::size_t row, std::size_t col)
set top left cell of the data area
void populateFromRavelMetadata(const std::string &metadata, const std::string &horizontalName, std::size_t row)
populates this spec from a "RavelHypercube" entry, row is the row being read, used to set the headerR...
std::set< unsigned > dimensionCols
rows and columns that are comment lines to be ignored