inlib
1.2.0
|
00001 // Copyright (C) 2010, Guy Barrand. All rights reserved. 00002 // See the file inlib.license for terms. 00003 00004 #ifndef inlib_rcsv_ntuple 00005 #define inlib_rcsv_ntuple 00006 00007 // A simple ntuple class to read at the csv format. 00008 // (csv = comma separated value). 00009 00010 // This reader can be use to read file at the hippodraw format 00011 // which is : 00012 // - one header line for the ntuple title. 00013 // - one csv line for column names. 00014 // - data at csv format. 00015 00016 #include "rntuple" 00017 00018 #include <istream> 00019 #include <sstream> 00020 00021 #include "vfind" 00022 #include "vmanip" 00023 #include "words" 00024 #include "sto" 00025 #include "s2time" 00026 #include "chars" 00027 #include "strip" 00028 #include "cids" 00029 00030 #ifdef INLIB_MEM 00031 #include "mem" 00032 #endif 00033 00034 namespace inlib { 00035 namespace rcsv { 00036 00037 class ntuple : public virtual read::intuple { 00038 public: //read::intuple 00039 virtual void start() { 00040 m_reader.clear(); 00041 m_reader.seekg(0,std::ios::beg); 00042 if(m_hippo) { 00043 skip_line(m_reader,m_sz); 00044 skip_line(m_reader,m_sz); 00045 } 00046 } 00047 virtual bool next() { 00048 if(!m_sep) return false; //not inited. 00049 if(m_reader.tellg()>=m_sz) return false; 00050 // first time we are at bol but else we are at eol. 00051 char c; 00052 m_reader.get(c); 00053 if(c==LF()){ 00054 if(m_reader.tellg()>=m_sz) { 00055 //eof. Tell caller to stop looping on ntuple rows. 00056 return false; 00057 } 00058 //eol. Next char read is going to be at bol. 00059 } else { 00060 m_reader.putback(c); 00061 //bol 00062 } 00063 // ready for a new row : 00064 00065 while(skip_comment(m_reader,m_sz)){} 00066 if(m_reader.tellg()>=m_sz) return false; 00067 00068 return _read_line(); 00069 } 00070 00071 virtual read::icol* find_icol(const std::string& a_name){ 00072 return find_named<read::icol>(m_cols,a_name); 00073 } 00074 00075 virtual const std::vector<read::icol*>& columns() const {return m_cols;} 00076 protected: 00077 static bool _read(std::istream& a_reader, 00078 char,std::streampos, 00079 double& a_v) { 00080 a_reader >> a_v; 00081 if(a_reader.tellg()==std::streampos(-1)) {a_v = 0;return false;} 00082 //std::cout << "debug : _read(double) " << a_v << std::endl; 00083 return true; 00084 } 00085 static bool _read(std::istream& a_reader, 00086 char a_sep,std::streampos a_sz, 00087 time_t& a_v) { 00088 std::string s; 00089 char c; 00090 while(true){ 00091 if(a_reader.tellg()>=a_sz) break; 00092 a_reader.get(c); 00093 if((c==a_sep)||(c==CR())||(c==LF())) { 00094 a_reader.putback(c); 00095 break; 00096 } 00097 s += c; 00098 } 00099 if(!s2time(s,a_v)) return false; 00100 return true; 00101 } 00102 static bool _read(std::istream& a_reader, 00103 char a_sep,std::streampos a_sz, 00104 std::string& a_v) { 00105 a_v.clear(); 00106 char c; 00107 while(true){ 00108 if(a_reader.tellg()>=a_sz) break; 00109 a_reader.get(c); 00110 if((c==a_sep)||(c==CR())||(c==LF())) { 00111 a_reader.putback(c); 00112 break; 00113 } 00114 a_v += c; 00115 } 00116 return true; 00117 } 00118 00119 public: 00120 template <class T> 00121 class column : public virtual read::icolumn<T> { 00122 public: //icol 00123 virtual const std::string& name() const {return m_name;} 00124 public: //icolumn<T> 00125 virtual bool get_entry(T& a_v) const { 00126 a_v = m_tmp; 00127 return true; 00128 } 00129 public: 00130 column(const std::string& a_name) 00131 :m_name(a_name) 00132 ,m_tmp(T()) 00133 {} 00134 virtual ~column(){} 00135 protected: 00136 column(const column& a_from) 00137 :read::intuple(a_from),read::icolumn<T>(a_from) 00138 ,m_name(a_from.m_name) 00139 ,m_tmp(a_from.m_tmp) 00140 {} 00141 column& operator=(const column& a_from){ 00142 m_name = a_from.m_name; 00143 m_tmp = a_from.m_tmp; 00144 return *this; 00145 } 00146 public: 00147 // should be used in ntuple _read_line only : 00148 void set_value(const T& a_v){m_tmp = a_v;} 00149 protected: 00150 std::string m_name; 00151 T m_tmp; 00152 }; 00153 00154 00155 #ifdef INLIB_MEM 00156 public: 00157 static const std::string& s_class() { 00158 static const std::string s_v("inlib::rcsv::ntuple"); 00159 return s_v; 00160 } 00161 #endif 00162 public: 00163 ntuple(std::istream& a_reader) 00164 :m_reader(a_reader) 00165 ,m_sep(0) 00166 ,m_sz(0) 00167 ,m_hippo(false) 00168 { 00169 #ifdef INLIB_MEM 00170 mem::increment(s_class().c_str()); 00171 #endif 00172 } 00173 virtual ~ntuple() { 00174 inlib::clear<read::icol>(m_cols); 00175 #ifdef INLIB_MEM 00176 mem::decrement(s_class().c_str()); 00177 #endif 00178 } 00179 protected: 00180 ntuple(const ntuple& a_from) 00181 :read::intuple(a_from) 00182 ,m_reader(a_from.m_reader) 00183 ,m_sep(a_from.m_sep) 00184 ,m_sz(a_from.m_sz) 00185 ,m_hippo(a_from.m_hippo) 00186 { 00187 #ifdef INLIB_MEM 00188 mem::increment(s_class().c_str()); 00189 #endif 00190 } 00191 ntuple& operator=(const ntuple& a_from){ 00192 m_sep = a_from.m_sep; 00193 m_hippo = a_from.m_hippo; 00194 return *this; 00195 } 00196 public: 00197 void set_hippo(bool a_hippo) {m_hippo = a_hippo;} 00198 00199 std::istream& istrm() {return m_reader;} 00200 00201 static bool find_sep(std::ostream& a_out, 00202 std::istream& a_reader,bool a_hippo, 00203 bool a_verbose, 00204 char& a_sep){ 00205 // analyse first data line to find the char separator. 00206 00207 a_reader.clear(); 00208 a_reader.seekg(0,std::ios::end); 00209 std::streampos sz = a_reader.tellg(); 00210 a_reader.seekg(0,std::ios::beg); 00211 if(!sz) { 00212 a_out << "inlib::rcsv::ntuple::find_sep :" 00213 << " stream is empty." 00214 << std::endl; 00215 a_sep = 0; 00216 return false; 00217 } //file empty. 00218 if(a_verbose) a_out << "file size " << sz << std::endl; 00219 00220 if(a_hippo) { //skip first two lines : 00221 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;} 00222 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;} 00223 } else { 00224 while(skip_comment(a_reader,sz)){} 00225 } 00226 if(a_reader.tellg()>=sz) {a_sep=0;return false;} //no data line. 00227 00228 // get first data line : 00229 std::string sfirst; 00230 {char c; 00231 while(true) { 00232 if(a_reader.tellg()>=sz) break; 00233 a_reader.get(c); 00234 if((c==CR())||(c==LF())) break; 00235 sfirst += c; 00236 }} 00237 if(sfirst.empty()) { 00238 a_out << "inlib::rcsv::ntuple::find_set :" 00239 << " first datat line is empty." 00240 << std::endl; 00241 a_sep = 0; 00242 return false; 00243 } 00244 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl; 00245 00246 //guess sep from first data line : 00247 std::istringstream strm(sfirst.c_str()); 00248 double d; 00249 strm >> d; 00250 std::streampos pos = strm.tellg(); 00251 if(pos==std::streampos(-1)) { 00252 a_out << "inlib::rcsv::ntuple::find_sep :" 00253 << " first line does not start with a number." 00254 << std::endl; 00255 a_sep = 0; 00256 return false; 00257 } //not a number. 00258 if(a_verbose) a_out << "first number " << d 00259 << " ending at pos " << pos << std::endl; 00260 if(pos>=(std::streampos)sfirst.size()) { 00261 a_out << "inlib::rcsv::ntuple::find_sep :" 00262 << " no separator found in first line." 00263 << " pos " << pos 00264 << " sfirst.size() " << sfirst.size() 00265 << std::endl; 00266 a_sep = 0; 00267 return false; 00268 } //no sep. 00269 00270 strm.get(a_sep); 00271 00272 return true; 00273 } 00274 00275 public: 00276 bool initialize(std::ostream& a_out, 00277 char a_sep = 0, //guessed 00278 const std::string& a_suffix = "x", //col suffix 00279 bool a_verbose = false) { 00280 inlib::clear<read::icol>(m_cols); 00281 m_sep = 0; 00282 m_sz = 0; 00283 00284 if(a_suffix.empty()) { 00285 a_out << "inlib::rcsv::ntuple::initialize :" 00286 << " expect a column suffix." 00287 << std::endl; 00288 return false; 00289 } 00290 00291 m_reader.clear(); 00292 m_reader.seekg(0,std::ios::end); 00293 m_sz = m_reader.tellg(); 00294 m_reader.seekg(0,std::ios::beg); 00295 if(!m_sz) { 00296 a_out << "inlib::rcsv::ntuple::initialize :" 00297 << " stream is empty." 00298 << std::endl; 00299 return false; //file empty. 00300 } 00301 if(a_verbose) a_out << "file size " << m_sz << std::endl; 00302 00303 std::vector<std::string> labels; 00304 if(m_hippo) { //skip first two lines : 00305 std::string title; 00306 if(!read_line(m_reader,m_sz,title)) {a_sep = 0;return false;} 00307 std::string s; 00308 if(!read_line(m_reader,m_sz,s)) {a_sep = 0;return false;} 00309 inlib::words(s,"\t",false,labels); 00310 } else { 00311 while(skip_comment(m_reader,m_sz)){} 00312 } 00313 if(m_reader.tellg()>=m_sz) {m_sz=0;return false;} 00314 00315 // get first data line : 00316 std::string sfirst; 00317 {{char c; 00318 while(true) { 00319 if(m_reader.tellg()>=m_sz) break; 00320 m_reader.get(c); 00321 if((c==CR())||(c==LF())) break; 00322 sfirst += c; 00323 }} 00324 if(sfirst.empty()) { 00325 a_out << "inlib::rcsv::ntuple::initialize :" 00326 << " first datat line is empty." 00327 << std::endl; 00328 m_sz = 0; 00329 return false; 00330 }} 00331 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl; 00332 00333 if(a_sep) { 00334 m_sep = a_sep; 00335 } else { 00336 //guess sep from first data line : 00337 std::istringstream strm(sfirst.c_str()); 00338 double d; 00339 strm >> d; 00340 std::streampos pos = strm.tellg(); 00341 if(pos==std::streampos(-1)) { 00342 a_out << "inlib::rcsv::ntuple::initialize :" 00343 << " first line does not start with a number." 00344 << std::endl; 00345 m_sz = 0; 00346 return false; 00347 } 00348 if(a_verbose) a_out << "first number " << d 00349 << " ending at pos " << pos << std::endl; 00350 if(pos>=(std::streampos)sfirst.size()) { 00351 a_out << "inlib::rcsv::ntuple::initialize :" 00352 << " no separator found in first line." 00353 << std::endl; 00354 m_sz = 0; 00355 return false; 00356 } 00357 strm.get(m_sep); 00358 } 00359 if(a_verbose) a_out << "sep " << (int)m_sep << std::endl; 00360 00361 // in case sep is ' ', there is an ambiguity with some leading 00362 // space in front of first number. 00363 if(m_sep==' ') inlib::strip(sfirst,leading,' '); 00364 00365 std::vector<std::string> words; 00366 {std::string sep; 00367 sep += m_sep; 00368 inlib::words(sfirst,sep,true,words);} 00369 00370 // look if words are numbers : 00371 if(a_verbose) a_out << "words " << words.size() << std::endl; 00372 unsigned int index = 0; 00373 std::vector<std::string>::iterator it; 00374 for(it=words.begin();it!=words.end();++it,index++) { 00375 if(a_verbose) a_out << "word " << sout(*it) << "" << std::endl; 00376 if((*it).empty()) { 00377 // do not accept : 00378 // <num><sep><num><sep><sep><num>... 00379 // but accept a trailing <sep> (glast.tnt) : 00380 // <num><sep><num>....<sep><num><sep> 00381 if(index==(words.size()-1)) { 00382 break; 00383 } else { 00384 a_out << "inlib::rcsv::ntuple::initialize :" 00385 << " empty word." 00386 << std::endl; 00387 m_sep = 0; 00388 m_sz = 0; 00389 return false; 00390 } 00391 } 00392 std::string name(a_suffix+to<unsigned int>(m_cols.size())); 00393 if(m_hippo) { 00394 if(index>=labels.size()) { 00395 a_out << "inlib::rcsv::ntuple::initialize :" 00396 << " warning : not enough labels." 00397 << std::endl; 00398 } else { 00399 name = labels[index]; 00400 } 00401 } 00402 double d; 00403 if(to<double>(*it,d)) { 00404 if(a_verbose) a_out << "number " << d << std::endl; 00405 create_column<double>(name); 00406 } else { 00407 time_t time; 00408 if(s2time(*it,time)) { 00409 create_column<time_t>(name); 00410 } else { 00411 create_column<std::string>(name); 00412 } 00413 } 00414 } 00415 unsigned int num = m_cols.size(); 00416 if(!num) { 00417 a_out << "inlib::rcsv::ntuple::initialize :" 00418 << " zero columns." 00419 << std::endl; 00420 m_sep = 0; 00421 m_sz = 0; 00422 return false; 00423 } 00424 00425 return true; 00426 } 00427 00428 static const std::string& s_cid(cid a_id) { 00429 if(a_id==_cid(double())) { 00430 static const std::string s_v("double"); 00431 return s_v; 00432 } else if(a_id==_cid(time_t())) { 00433 static const std::string s_v("time"); 00434 return s_v; 00435 } else if(a_id==_cid(std::string())) { 00436 static const std::string s_v("string"); 00437 return s_v; 00438 } else { 00439 static const std::string s_v("unknown"); 00440 return s_v; 00441 } 00442 } 00443 00444 void dump_columns(std::ostream& a_out) const { 00445 if((m_sep>=32)&&(m_sep<=126)) { //printable 00446 a_out << "separator is '" << m_sep << "'" << std::endl; 00447 } else { 00448 a_out << "separator is " << (unsigned int)m_sep << std::endl; 00449 } 00450 std::vector<read::icol*>::const_iterator it; 00451 for(it=m_cols.begin();it!=m_cols.end();++it) { 00452 a_out << (*it)->name() 00453 << " " << s_cid((*it)->id_cls()) 00454 << std::endl; 00455 } 00456 } 00457 00458 protected: 00459 template <class T> 00460 column<T>* create_column(const std::string& a_name){ 00461 if(find_named<read::icol>(m_cols,a_name)) return 0; 00462 column<T>* col = new column<T>(a_name); 00463 if(!col) return 0; 00464 m_cols.push_back(col); 00465 return col; 00466 } 00467 00468 static bool read_line(std::istream& a_reader,std::streampos a_sz, 00469 std::string& a_s){ 00470 a_s.clear(); 00471 char c; 00472 while(true) { 00473 if(a_reader.tellg()>=a_sz) {a_s.clear();return false;} 00474 a_reader.get(c); 00475 if(c==CR()) continue; 00476 if(c==LF()) break; //eol. 00477 a_s += c; 00478 } 00479 return true; 00480 } 00481 00482 static bool skip_line(std::istream& a_reader,std::streampos a_sz){ 00483 char c; 00484 while(true) { 00485 if(a_reader.tellg()>=a_sz) return false; 00486 a_reader.get(c); 00487 if(c==LF()) break; 00488 } 00489 return true; 00490 } 00491 00492 static bool skip_comment(std::istream& a_reader,std::streampos a_sz){ 00493 //ret true = we had a commented line, false : a data line or nothing. 00494 if(a_reader.tellg()>=a_sz) return false; 00495 //we should be at bol : 00496 char c; 00497 a_reader.get(c); 00498 if(c=='#') { 00499 return skip_line(a_reader,a_sz); 00500 //eol. Next char should be bol. 00501 } else { 00502 a_reader.putback(c); 00503 return false; 00504 } 00505 } 00506 00507 bool _read_line() { 00508 // have to loop on all columns ! 00509 typedef read::icol icol_t; 00510 typedef ntuple::column<double> cold_t; 00511 typedef ntuple::column<time_t> colt_t; 00512 typedef ntuple::column<std::string> cols_t; 00513 unsigned int index = 0; 00514 unsigned int num = m_cols.size(); 00515 std::vector<icol_t*>::const_iterator it; 00516 for(it=m_cols.begin();it!=m_cols.end();++it,index++) { 00517 if(cold_t* cold = inlib::id_cast<icol_t,cold_t>(*(*it))) { 00518 double v; 00519 if(!_read(m_reader,m_sep,m_sz,v)) return false; 00520 cold->set_value(v); 00521 } else if(colt_t* colt = inlib::id_cast<icol_t,colt_t>(*(*it))) { 00522 time_t v; 00523 if(!_read(m_reader,m_sep,m_sz,v)) return false; 00524 colt->set_value(v); 00525 } else if(cols_t* cols = inlib::id_cast<icol_t,cols_t>(*(*it))) { 00526 std::string v; 00527 if(!_read(m_reader,m_sep,m_sz,v)) return false; 00528 cols->set_value(v); 00529 } else { 00530 //std::cout << "column cast failed." << std::endl; 00531 return false; 00532 } 00533 if(index==(num-1)) { //read up to LF() 00534 char c; 00535 while(true){ 00536 if(m_reader.tellg()>=m_sz) break; 00537 m_reader.get(c); 00538 if(c==LF()) break; 00539 } 00540 } else { //read sep : 00541 char sep; 00542 m_reader.get(sep); 00543 } 00544 } 00545 return true; 00546 } 00547 protected: 00548 std::istream& m_reader; 00549 char m_sep; 00550 std::vector<read::icol*> m_cols; 00551 std::streampos m_sz; 00552 bool m_hippo; 00553 }; 00554 00555 }} 00556 00557 00558 #include <fstream> 00559 00560 namespace inlib { 00561 namespace rcsv { 00562 00563 class fntuple : public ntuple { 00564 public: 00565 fntuple(const std::string& a_file) 00566 :ntuple(m_freader) 00567 ,m_file(a_file) 00568 {} 00569 virtual ~fntuple() {m_freader.close();} 00570 protected: 00571 fntuple(const fntuple& a_from) 00572 :read::intuple(a_from),ntuple(a_from) 00573 ,m_file(a_from.m_file) 00574 {} 00575 fntuple& operator=(const fntuple& a_from){ 00576 m_file = a_from.m_file; 00577 return *this; 00578 } 00579 public: 00580 bool open(){ 00581 m_freader.open(m_file.c_str()); 00582 return m_freader.fail()?false:true; 00583 } 00584 bool initialize(std::ostream& a_out, 00585 char a_sep = 0, //guessed 00586 const std::string& a_suffix = "x", //col suffix 00587 bool a_verbose = false) { 00588 if(!m_freader.is_open()) { 00589 m_freader.open(m_file.c_str()); 00590 if(m_freader.fail()) { 00591 a_out << "inlib::rcsv::ntuple::initialize :" 00592 << " can't open " << m_file << "." 00593 << std::endl; 00594 return false; 00595 } 00596 } 00597 return ntuple::initialize(a_out,a_sep,a_suffix,a_verbose); 00598 } 00599 protected: 00600 std::string m_file; 00601 std::ifstream m_freader; 00602 }; 00603 00604 }} 00605 00606 #endif