23 #include "CSVParser.rcd"    24 #include "dataSpecSchema.rcd"    25 #include "dimension.rcd"    30 #include <memoryapi.h>    34 #include <sys/resource.h>    40 #include <boost/type_traits.hpp>    41 #include <boost/tokenizer.hpp>    42 #include <boost/token_functions.hpp>    43 #include <boost/pool/pool.hpp>    49             class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
    58         return Traits::eq(e_,c);
    68       return std::find_if(escape_.begin(),escape_.end(),
f)!=escape_.end();
    72       return std::find_if(c_.begin(),c_.end(),
f)!=c_.end();
    76       return std::find_if(quote_.begin(),quote_.end(),
f)!=quote_.end();
    78     template <
typename iterator, 
typename Token>
    79     void do_escape(iterator& next,iterator end,Token& tok) {
    83       if (Traits::eq(*next,
'n')) {
    87       if (is_quote(*next)) {
    95       if (is_escape(*next)) {
   100       tok+=escape_.front()+*next;
   106                                   Char c = 
',',Char  q = 
'\"')
   107       : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
   110       : escape_(e), c_(c), quote_(q), last_(false) { }
   114     template <
typename InputIterator, 
typename Token>
   115     bool operator()(InputIterator& next,InputIterator end,Token& tok) {
   116       bool bInQuote = 
false;
   129         if (is_escape(*next)) {
   130           do_escape(next,end,tok);
   132         else if (is_c(*next)) {
   143         else if (is_quote(*next)) {
   163     escape(escape), quote(quote) {}
   173         else if (*next==quote)
   175         else if (!quoted && isspace(*next))
   177             while (isspace(*next)) ++next;
   195     Any(
const any& x): any(x), hash(x.hash()) {}
   196     bool operator<(
const Any& x)
 const {
return static_cast<const any&
>(*this)<x;}
   197     bool operator==(
const Any& x)
 const {
return static_cast<const any&
>(*this)==
static_cast<const any&
>(x);}
   210       auto i=tokens.find(x);
   213           i=tokens.emplace(x, tokenRefs.size()).first;
   214           tokenRefs.push_back(&(i->first));
   219       if (i<tokenRefs.size()) 
return *tokenRefs[i];
   225   using Key=vector<SliceLabelToken, LibCAllocator<SliceLabelToken>>;
   226   template <
class V> 
using Map=map<Key,V,less<Key>,LibCAllocator<pair<const Key,V>>>;
   230     const char* 
what() const noexcept
 override {
return "No data columns specified\nIf dataset has no data, try selecting counter";}
   235     std::string msg=
"Duplicate key";
   240       msg+=
"\nTry selecting a different duplicate key action";
   242     const char* 
what() const noexcept
 override {
return msg.c_str();}
   251     InvalidData(
const string& data, 
const string& type,
const string& colName):
   252       data(data), type(type), colName(colName)
   253     {msg=
"Invalid data: "+data+
" for "+type+
" dimensioned column: "+colName;}
   254     const char* 
what() const noexcept
 override {
return msg.c_str();}
   259     std::string msg=
"Short line";
   264     const char* 
what() const noexcept
 override {
return msg.c_str();}
   269     const char* 
what() const noexcept
 override {
return "exhausted memory - try reducing the rank";}
   275     if (!s.empty() && s[0]==s[s.size()-1] && !isalnum(s[0]))
   277         const double r=
quotedStoD(s.substr(1,s.size()-2),charsProcd);
   281     if (s.empty()) 
return nan(
""); 
   284       const double r=stod(s,&charsProcd);
   285       if (charsProcd==s.size())
   290     auto n=s.find_first_of(
"0123456789,.+-");
   291     return stod(s.substr(n),&charsProcd);
   298       if (!isspace(c) && c!=
',' && c!=
'.')
   308     for (
size_t i=0; i<v.size(); ++i)
   330     for (
size_t i=start; i<v.size(); ++i)
   331       if (!v[i].empty()) 
return false;
   346     catch (...) {
return false;}
   347     return charsProcd==stripped.size();
   359   struct hash<vector<T>>
   363       for (
auto& i: x) r^=std::hash<T>()(i);
   373   m_nColAxes=std::min(col, maxColumn);
   374   numCols=std::max(numCols, m_nColAxes);
   376     headerRow=row>0? row-1: 0;
   377   if (dimensions.size()<nColAxes()) dimensions.resize(nColAxes());
   378   if (dimensionNames.size()<nColAxes()) dimensionNames.resize(nColAxes());
   380   dimensionCols.erase(dimensionCols.lower_bound(nColAxes()), dimensionCols.end());
   382   for (
unsigned i=0; i<m_nColAxes; ++i)
   384   for (
unsigned i=m_nColAxes; i<numCols && i<maxColumn; ++i)
   389 template <
class TokenizerFunction>
   395   firstEmpty=numeric_limits<size_t>::max();
   402   vector<set<size_t,less<size_t>,LibCAllocator<size_t>>> uniqueVals;
   403   m_uniqueValues.clear(); 
   409           m_uniqueValues.resize(uniqueVals.size());
   410           for (
size_t i=0; i<uniqueVals.size(); ++i) m_uniqueValues[i]=uniqueVals[i].size();
   414             pu.
setProgress(
double(remainingInput.tellg())/fileSize);
   417       m_uniqueValues.resize(uniqueVals.size());
   418       for (
size_t i=0; i<uniqueVals.size(); ++i) m_uniqueValues[i]=uniqueVals[i].size();
   422         pu.
setProgress(
double(remainingInput.tellg())/fileSize);
   424   catch (std::exception&)
   427       throw std::runtime_error(
"CSV format guess terminated by user, best guess specification used.");
   435     givenTFguessRemainder(initialInput, remainingInput, 
SpaceSeparatorParser(escape,separator,quote),fileSize); 
   437     givenTFguessRemainder(initialInput, remainingInput, 
Parser(escape,separator,quote),fileSize);
   440 template <
class TokenizerFunction, 
class UniqueVals>
   444   const hash<string> h;
   445   for (; getline(
input, buf) && row<until; ++row)
   447       if (buf.empty()) 
continue;
   449       if (buf.back()==
'\r') buf=buf.substr(0,buf.size()-1);
   453           static const regex ravelHypercube(
"\"RavelHypercube=(.*)\"");
   454           if (regex_match(buf, match, ravelHypercube))
   457                 string metadata=match[1];
   459                 metadata.erase(
remove(metadata.begin(),metadata.end(),
'\\'),metadata.end());
   460                 string horizontalName;
   462                 static const regex re(
"HorizontalDimension=\"(.*)\"");
   463                 if (regex_match(buf, match, re))
   465                     horizontalName=match[1];
   468                 populateFromRavelMetadata(metadata, horizontalName, row);
   476       const boost::tokenizer<TokenizerFunction> tok(buf.begin(),buf.end(), tf);
   477       const vector<string> line(tok.begin(), tok.end());
   478       if (line.size()>uniqueVals.size())
   479         uniqueVals.resize(std::min(maxColumn, line.size()));
   480       for (
size_t i=0; i<std::min(line.size(), uniqueVals.size()); ++i)
   481         uniqueVals[i].insert(h(line[i]));
   483       nCols=std::max(nCols, line.size());
   484       if (starts.back()==line.size())
   486       if (starts.size()-1 < firstEmpty && starts.back()<nCols && 
emptyTail(line, starts.back()))
   487         firstEmpty=starts.size()-1;
   491   for (
unsigned long i=0; i<starts.size(); ++i) 
   493   const double av=sum/(starts.size());
   494   for (; starts.size()>m_nRowAxes && (starts[m_nRowAxes]>av); 
   497   if (m_nRowAxes>=row-1) m_nRowAxes=1;
   499   for (
size_t i=nRowAxes(); i<starts.size(); ++i)
   500     m_nColAxes=std::max(m_nColAxes,starts[i]);
   502   if (m_nRowAxes==0 && nCols-m_nColAxes>1)
   505   if (firstEmpty==m_nRowAxes) ++m_nRowAxes; 
   506   headerRow=nRowAxes()>0? nRowAxes()-1: 0;
   508   dimensionCols.clear();
   509   for (; i<nColAxes() && i<maxColumn; ++i) dimensionCols.insert(i);
   511   for (; i<nCols && i<maxColumn; ++i) dataCols.insert(i);
   519   size_t numCommas=0, numSemicolons=0, numTabs=0;
   522   ostringstream streamBuf;
   539     istringstream inputCopy(streamBuf.str());
   540     if (numCommas>0.9*row && numCommas>numSemicolons && numCommas>numTabs)
   541       guessRemainder(inputCopy,
input,
',',fileSize);
   542     else if (numSemicolons>0.9*row && numSemicolons>numTabs)
   543       guessRemainder(inputCopy,
input,
';',fileSize);
   544     else if (numTabs>0.9*row)
   545       guessRemainder(inputCopy,
input,
'\t',fileSize);
   547       guessRemainder(inputCopy,
input,
' ',fileSize);
   553    vector<NamedDimension> ravelMetadata;
   554    json(ravelMetadata,metadata);
   556    setDataArea(headerRow, ravelMetadata.size());
   557    dimensionNames.clear();
   559    for (
auto& i: ravelMetadata)
   560      if (i.name==horizontalName)
   562          horizontalDimension=i.dimension;
   563          horizontalDimName=i.name;
   567          dimensions.push_back(i.dimension);
   568          dimensionNames.push_back(i.name);
   570    for (
size_t i=0; i<dimensions.size(); ++i)
   571      dimensionCols.insert(i);
   579     if (!buf.empty() && buf.back()==
'\r')
   580       buf.erase(buf.size()-1);
   587     bool r=getline(
input,line).good();
   595         if (quoteCount%2==0) 
break; 
   597         r=getline(
input,buf).good(); 
   602     return r || !line.empty();
   608     for (
size_t i=1; i<line.size(); ++i)
   609       if (line[i]==spec.
quote && line[i-1]==spec.
quote &&
   610           ((i==1 && (i==line.size()-1|| line[i+1]!=spec.
quote)) ||                                       
   612             ((line[i-2]!=spec.
quote && line[i-2]!=spec.
escape &&
   613               (line[i-2]!=spec.
separator || i==line.size()-1|| line[i+1]!=spec.
quote))  
   623     template <
class E> 
void operator()(
const E& ex, 
size_t row) {
throw ex;}
   635     vector<unordered_map<typename Key::value_type, size_t>> 
dimLabels;
   643       parse(
input,spec,fileSize,onError,checkValues);
   647     ParseCSV(
const vector<string>& filenames, 
const DataSpec& spec, uintmax_t, E& onError, 
bool checkValues=
false)
   650       for (
auto& 
f: filenames)
   655               parse(
input,spec,std::filesystem::file_size(
f),onError);
   657           catch (
const std::exception& ex)
   660               throw std::runtime_error(
f+
": "+ex.what());
   667     void parse(istream& 
input, 
const DataSpec& spec, uintmax_t fileSize, E& onError, 
bool checkValues=
false)
   678       uintmax_t bytesRead=0;
   682           if (hc.xvectors.empty()) 
   687                   hc.xvectors.back().dimension=spec.
dimensions[i];
   717                           const boost::tokenizer<P> tok(buf.begin(), buf.end(), csvParser);
   718                           auto field=tok.begin();
   719                           for (
size_t i=0; i<spec.
dimensionNames.size() && field!=tok.end(); ++i, ++field);
   720                           for (; field!=tok.end(); ++field)
   722                               horizontalLabels.emplace_back(sliceLabelTokens[
""]);
   724                               horizontalLabels.emplace_back
   731                   set<typename Key::value_type> uniqueLabels;
   732                   dimLabels.emplace_back();
   733                   for (
auto& i: horizontalLabels)
   734                     if (!sliceLabelTokens[i].empty() && uniqueLabels.insert(i).second)
   736                         dimLabels.back()[i]=hc.xvectors.back().size();
   737                         hc.xvectors.back().emplace_back(sliceLabelTokens[i]);
   750             auto blankToken=sliceLabelTokens[
""];
   754                 const boost::tokenizer<P> tok(buf.begin(), buf.end(), csvParser);
   757                 size_t dim=0, dataCols=0;
   759                 for (
auto field=tok.begin(); field!=tok.end(); ++col, ++field)
   763                       if (spec.
dimensions[dim].type!=Dimension::string && field->empty())
   764                         goto invalidKeyGotoNextLine;
   766                       if (dim>=hc.xvectors.size())
   767                         hc.xvectors.emplace_back(
"?"); 
   770                           auto trimmedField=
trimWS(*field);
   771                           if (trimmedField.empty() && spec.
dimensions[col].type!=Dimension::string)
   773                           auto keyElem=anyVal[dim](trimmedField);
   775                           if (dimLabels[dim].emplace(sliceLabelTokens[skeyElem], dimLabels[dim].size()).second)
   776                             hc.xvectors[dim].emplace_back(keyElem);
   777                           key.emplace_back(sliceLabelTokens[skeyElem]);
   782                             goto invalidKeyGotoNextLine;
   788                 if (key.size()<hc.rank()-tabularFormat)
   791                       goto invalidKeyGotoNextLine;
   792                     onError(ShortLine(key,sliceLabelTokens),row);
   796                 for (
auto field=tok.begin(); field!=tok.end(); ++col,++field)
   801                           if (horizontalLabels[dataCols]==blankToken)
   803                           key.emplace_back(horizontalLabels[dataCols]);
   813                         else if (!checkValues &&
   814                                  ((s.empty() && (!isdigit(c)&&c!=
'-'&&c!=
'+')) ||
   815                                   ((s==
"-"||s==
"+") && !isdigit(c))))
   817                         else if (!isspace(c) && c!=
'.' && c!=
',')
   821                       bool valueExists=!s.empty() && s!=
"\\N" && (isdigit(s[0])||s[0]==
'-'||s[0]==
'+'||s[0]==
'.');
   822                       if (checkValues && !valueExists && !s.empty() && s!=
"\\N") 
   831                               auto i=tmpData.find(key);
   838                                     if (checkValues && end<s.length())
   840                                     if (i==tmpData.end())
   842                                         tmpData.emplace(key,v);
   843                                         onError.rowKeyInsert(key,row);
   846                                 catch (
const std::bad_alloc&)
   850                                     if (checkValues) onError(InvalidData(s,
"value",spec.
dimensionNames[col]),row);
   854                               if (valueExists && i!=tmpData.end())
   859                                       onError(DuplicateKey(key,sliceLabelTokens),row); 
   877                                       i->second=((c+1)*i->second + v)/(c+2);
   896                       onError(ShortLine(key,sliceLabelTokens),row);
   900                 bytesRead+=buf.size();
   902               invalidKeyGotoNextLine:;
   906       catch (
const std::bad_alloc&)
   908           throw MemoryExhausted();
   910       catch (
const std::length_error&)
   912           throw MemoryExhausted();
   914       catch (
const std::exception& ex)
   916           auto msg=string(ex.what())+
" at line:"+to_string(row)+
", col:"+to_string(col);
   919           throw std::runtime_error(msg);
   924   template <
class P,  
class E, 
class S>
   935           throw runtime_error(
"Duplicate dimension: "+spec.
dimensionNames[i]);
   942         auto& tmpData=parseCSV.
tmpData;
   944         auto& hc=parseCSV.
hc;
   947         auto d=dimLabels.begin();
   948         assert(hc.xvectors.size()==dimLabels.size());
   949         for (
auto i=hc.xvectors.begin(); i!=hc.xvectors.end();)
   952               hc.xvectors.erase(i);
   959         assert(hc.xvectors.size()<=dimLabels.size());
   961         for (
auto& xv: hc.xvectors)
   962           xv.imposeDimension();
   965         if (hc.logNumElements()>
log(numeric_limits<size_t>::max()))
   966           throw runtime_error(
"Hypercube dimensionality exceeds maximum size, results are likely to be garbage.\n"   967                               "Suggest rolling up one or more axes by ignoring them, and setting 'Duplicate Key Action' as appropriate");
   969         if (
log(tmpData.size())-hc.logNumElements()>=
log(0.5)) 
   973               throw MemoryExhausted();            
   982             for (
auto& i: tmpData)
   985                 assert (hc.rank()<=i.first.size());
   986                 assert(dimLabels.size()>=hc.rank());
   987                 int j=hc.rank()-1, k=i.first.size()-1; 
   990                     while (dimLabels[k].size()<2) --k; 
   991                     auto dimLabel=dimLabels[k].find(i.first[k]);
   992                     assert(dimLabel!=dimLabels[k].end());
   993                     idx = (idx*dims[j]) + dimLabel->second;
  1003               throw MemoryExhausted();                          
  1004             auto dims=hc.dims();
  1007             map<size_t,double,less<size_t>,LibCAllocator<pair<const size_t,double>>> indexValue; 
  1009               for (
auto& i: tmpData)
  1012                   assert (dims.size()<=i.first.size());
  1013                   assert(dimLabels.size()>=dims.size());
  1014                   int j=dims.size()-1, k=i.first.size()-1;
  1015                   while (j>=0 && k>=0) 
  1017                       while (dimLabels[k].size()<2) --k; 
  1018                       auto dimLabel=dimLabels[k].find(i.first[k]);
  1019                       assert(dimLabel!=dimLabels[k].end());
  1020                       idx = (idx*dims[j]) + dimLabel->second;
  1024                   if (!
isnan(i.second))
  1025                     indexValue.emplace(idx, i.second);
  1026                   ++
minsky().progressState;
  1030               vv.
index(indexValue);
  1032               ++
minsky().progressState;
  1035         minsky().progressState.title=
"Cleaning up";
  1036         minsky().progressState.displayProgress();
  1038     catch (
const std::bad_alloc&)
  1040         throw MemoryExhausted();
  1042     catch (
const std::length_error&)
  1044         throw MemoryExhausted();
  1053       loadValueFromCSVFileT<SpaceSeparatorParser>(v,filenames,spec,onError);
  1055       loadValueFromCSVFileT<Parser>(v,filenames,spec,onError);
  1065     const char* 
what() const noexcept
 override {
return "Failed to rewind input";}
  1077     struct ErrorReporter 
  1079       Map<size_t> firstRow;
  1080       map<size_t,Key> duplicates;
  1081       map<size_t,string> invalidData;
  1082       void operator()(
const DuplicateKey& ex, 
size_t row) {
  1083         duplicates.emplace(firstRow[ex.key],ex.key);
  1084         duplicates.emplace(row,ex.key);
  1086       void operator()(
const InvalidData& ex, 
size_t row) {invalidData.emplace(row, ex.msg);}
  1087       void operator()(
const ShortLine& ex, 
size_t row) {invalidData.emplace(row, ex.msg);}
  1089       void rowKeyInsert(
const Key& key, 
size_t row) {firstRow.emplace(key,row);}
  1101     multimap<Key,string> duplicateLines;
  1102     vector<string> invalidDataLines;
  1110           if (onError.duplicates.contains(row))
  1112               string msg=
"Duplicate key";
  1114               duplicateLines.emplace(onError.duplicates[row],msg);
  1116           if (onError.invalidData.contains(row))
  1118               string msg=onError.invalidData[row];
  1120               invalidDataLines.push_back(msg);
  1122           bytesRead+=buf.size();
  1138             output<<
"Error"<<sep;
  1140           bytesRead+=buf.size();
  1145     for (
auto& i: invalidDataLines)
  1148     for (
auto& i: duplicateLines)
  1149       output<<i.second<<endl;
  1153         if (!onError.duplicates.contains(row) && !onError.invalidData.contains(row))
  1154           output<<sep+buf<<endl;
  1155         bytesRead+=buf.size();
  1164       reportFromCSVFileT<SpaceSeparatorParser>(
input,output,spec,fileSize);
  1166       reportFromCSVFileT<Parser>(
input,output,spec,fileSize);
 
void loadValueFromCSVFileS(VariableValue &v, S &filenames, const DataSpec &spec)
map< Key, V, less< Key >, LibCAllocator< pair< const Key, V > >> Map
std::size_t nColAxes() const
start column of the data area 
size_t operator()(const Any &x) const
void reportFromCSVFile(istream &input, ostream &output, const DataSpec &spec, uintmax_t fileSize)
creates a report CSV file from input, with errors sorted at begining of file, with a column for error...
bool operator()(I &next, I end, std::string &tok)
vector< const string * > tokenRefs
DuplicateKeyAction duplicateKeyAction
boost::tokenizer< Parser > Tokenizer
const Hypercube & hypercube() const override
string stripWSAndDecimalSep(const string &s)
handle reporting errors in loadValueFromCSVFileT when loading files 
escapedListSeparator::EscapedListSeparator< char > Parser
size_t firstNumerical(const vector< string > &v)
CLASSDESC_ACCESS_EXPLICIT_INSTANTIATION(minsky::DataSpec)
Tokens< SliceLabelToken > sliceLabelTokens
std::size_t numCols
number of columns in CSV. Must be > dataColOffset 
bool operator==(const Any &x) const
double quotedStoD(const string &s, size_t &charsProcd)
T operator[](const string &x)
void do_escape(iterator &next, iterator end, Token &tok)
EscapedListSeparator(Char e='\\', Char c=',', Char q='\"') 
civita::Dimension horizontalDimension
void escapeDoubledQuotes(std::string &line, const DataSpec &spec)
replace doubled quotes with escaped quotes 
DuplicateKey(const Key &x, const Tokens< SliceLabelToken > &tokens)
std::vector< civita::Dimension > dimensions
const char * what() const noexcept override
bool operator<(const Any &x) const
bool emptyTail(const vector< string > &v, size_t start)
vector< unordered_map< typename Key::value_type, size_t > > dimLabels
bool counter
count data items, not read their values 
std::basic_string< Char, Traits > string_type
ParseCSV(istream &input, const DataSpec &spec, uintmax_t fileSize, E &onError, bool checkValues=false)
bool isNumerical(const std::string &s)
std::string horizontalDimName
bool dontFail
do not throw an error on corrupt data 
bool processChunk(std::istream &input, const T &tf, size_t until, U &)
process chunk of input, updating guessed spec 
EscapedListSeparator(string_type e, string_type c, string_type q)
void guessRemainder(std::istream &initialInput, std::istream &remainingInput, char separator, uintmax_t fileSize)
figure out the tokenizer function and call givenTFguessRemainder 
TensorVal tensorInit
when init is a tensor of values, this overrides the init string 
std::string trimWS(const std::string &s)
const char * what() const noexcept override
std::size_t headerRow
number of header rows 
SpaceSeparatorParser(char escape='\\', char sep=' ', char quote='"')
void setProgress(double fraction)
Sets the progress to a given fraction of this stack's allocation. 
const char * what() const noexcept override
void parse(istream &input, const DataSpec &spec, uintmax_t fileSize, E &onError, bool checkValues=false)
const Index & index(Index &&i) override
std::string str(T x)
utility function to create a string representation of a numeric type 
void reportFromCSVFileT(istream &input, ostream &output, const DataSpec &spec, uintmax_t fileSize)
string colName
column name 
const Minsky & cminsky()
const version to help in const correctness 
string data
data received in field 
size_t operator()(const vector< T > &x) const
void loadValueFromCSVFile(VariableValue &v, const vector< string > &filenames, const DataSpec &spec)
load a variableValue from a list of files according to data spec 
void loadValueFromCSVFileT(VariableValue &vv, S &stream, const DataSpec &spec, E &onError)
vector< typename Key::value_type > horizontalLabels
std::vector< std::string > dimensionNames
void givenTFguessRemainder(std::istream &initialInput, std::istream &remainingInput, const T &tf, uintmax_t fileSize)
try to fill in remainder of spec, given a tokenizer function tf eg boost::escaped_list_separator<char...
ParseCSV(const vector< string > &filenames, const DataSpec &spec, uintmax_t, E &onError, bool checkValues=false)
const string & operator[](T i) const
const char * what() const noexcept override
const char * what() const noexcept override
std::size_t maxColumn
maximum number of columns that can be configured independently. Columns after this limit are treated ...
bool getWholeLine(istream &input, string &line, const DataSpec &spec)
InvalidData(const string &data, const string &type, const string &colName)
std::size_t nRowAxes() const
start row of the data area 
bool operator()(InputIterator &next, InputIterator end, Token &tok)
vector< SliceLabelToken, LibCAllocator< SliceLabelToken > > Key
void rowKeyInsert(const Key &, size_t)
update a map of keys to first rows for duplicate key processing 
const char * what() const noexcept override
Minsky & minsky()
global minsky object 
void guessFromStream(std::istream &file, uintmax_t fileSize=uintmax_t(-1))
initial stab at dataspec from examining stream 
void operator()(const E &ex, size_t row)
called on error - ex message to pass on, row - current row 
std::set< unsigned > dataCols
Map< double > tmpData
map of data by key 
ShortLine(const Key &x, const Tokens< SliceLabelToken > &tokens)
unordered_map< string, T > tokens
static const unsigned numInitialLines
void setDataArea(std::size_t row, std::size_t col)
set top left cell of the data area 
void populateFromRavelMetadata(const std::string &metadata, const std::string &horizontalName, std::size_t row)
populates this spec from a "RavelHypercube" entry, row is the row being read, used to set the headerR...
std::set< unsigned > dimensionCols
rows and columns that are comment lines to be ignored