inlib  1.2.0
/Users/barrand/private/dev/softinex/old/inexlib-1.2/inlib/inlib/rcsv_ntuple
Go to the documentation of this file.
00001 // Copyright (C) 2010, Guy Barrand. All rights reserved.
00002 // See the file inlib.license for terms.
00003 
00004 #ifndef inlib_rcsv_ntuple
00005 #define inlib_rcsv_ntuple
00006 
00007 // A simple ntuple class to read at the csv format.
00008 // (csv = comma separated value).
00009 
00010 // This reader can be use to read file at the hippodraw format
00011 // which is :
00012 // - one header line for the ntuple title.
00013 // - one csv line for column names.
00014 // - data at csv format.
00015 
00016 #include "rntuple"
00017 
00018 #include <istream>
00019 #include <sstream>
00020 
00021 #include "vfind"
00022 #include "vmanip"
00023 #include "words"
00024 #include "sto"
00025 #include "s2time"
00026 #include "chars"
00027 #include "strip"
00028 #include "cids"
00029 
00030 #ifdef INLIB_MEM
00031 #include "mem"
00032 #endif
00033 
00034 namespace inlib {
00035 namespace rcsv {
00036 
00037 class ntuple : public virtual read::intuple {
00038 public: //read::intuple
00039   virtual void start() {
00040     m_reader.clear();
00041     m_reader.seekg(0,std::ios::beg);
00042     if(m_hippo) {
00043       skip_line(m_reader,m_sz);
00044       skip_line(m_reader,m_sz);
00045     }
00046   }
00047   virtual bool next() { 
00048     if(!m_sep) return false; //not inited.
00049     if(m_reader.tellg()>=m_sz) return false;
00050     // first time we are at bol but else we are at eol.
00051     char c;
00052     m_reader.get(c);
00053     if(c==LF()){
00054       if(m_reader.tellg()>=m_sz) {
00055         //eof. Tell caller to stop looping on ntuple rows.
00056         return false;
00057       }
00058       //eol. Next char read is going to be at bol.
00059     } else {
00060       m_reader.putback(c);
00061       //bol
00062     }
00063     // ready for a new row :
00064 
00065     while(skip_comment(m_reader,m_sz)){}
00066     if(m_reader.tellg()>=m_sz) return false;
00067 
00068     return _read_line();
00069   }
00070 
00071   virtual read::icol* find_icol(const std::string& a_name){
00072     return find_named<read::icol>(m_cols,a_name);
00073   }
00074 
00075   virtual const std::vector<read::icol*>& columns() const {return m_cols;}
00076 protected:
00077   static bool _read(std::istream& a_reader,
00078                            char,std::streampos,
00079                            double& a_v) {
00080     a_reader >> a_v;
00081     if(a_reader.tellg()==std::streampos(-1)) {a_v = 0;return false;}
00082     //std::cout << "debug : _read(double) " << a_v << std::endl;
00083     return true;
00084   }
00085   static bool _read(std::istream& a_reader,
00086                            char a_sep,std::streampos a_sz,
00087                            time_t& a_v) {
00088     std::string s;
00089     char c;
00090     while(true){
00091       if(a_reader.tellg()>=a_sz) break;
00092       a_reader.get(c);
00093       if((c==a_sep)||(c==CR())||(c==LF())) {
00094         a_reader.putback(c);
00095         break;
00096       }
00097       s += c;
00098     }
00099     if(!s2time(s,a_v)) return false;
00100     return true;
00101   }
00102   static bool _read(std::istream& a_reader,
00103                            char a_sep,std::streampos a_sz,
00104                            std::string& a_v) {
00105     a_v.clear();
00106     char c;
00107     while(true){
00108       if(a_reader.tellg()>=a_sz) break;
00109       a_reader.get(c);
00110       if((c==a_sep)||(c==CR())||(c==LF())) {
00111         a_reader.putback(c);
00112         break;
00113       }
00114       a_v += c;
00115     }
00116     return true;
00117   }
00118 
00119 public:
00120   template <class T>
00121   class column : public virtual read::icolumn<T> {
00122   public: //icol
00123     virtual const std::string& name() const {return m_name;}
00124   public: //icolumn<T>
00125     virtual bool get_entry(T& a_v) const {
00126       a_v = m_tmp;
00127       return true;
00128     }
00129   public:
00130     column(const std::string& a_name)
00131     :m_name(a_name)
00132     ,m_tmp(T())
00133     {}
00134     virtual ~column(){}
00135   protected:
00136     column(const column& a_from)
00137     :read::intuple(a_from),read::icolumn<T>(a_from)
00138     ,m_name(a_from.m_name) 
00139     ,m_tmp(a_from.m_tmp)
00140     {}
00141     column& operator=(const column& a_from){
00142       m_name = a_from.m_name;
00143       m_tmp = a_from.m_tmp;
00144       return *this;
00145     }
00146   public:
00147     // should be used in ntuple _read_line only :
00148     void set_value(const T& a_v){m_tmp = a_v;}
00149   protected:
00150     std::string m_name;
00151     T m_tmp;
00152   };
00153 
00154 
00155 #ifdef INLIB_MEM
00156 public:
00157   static const std::string& s_class() {
00158     static const std::string s_v("inlib::rcsv::ntuple");
00159     return s_v;
00160   }
00161 #endif
00162 public:
00163   ntuple(std::istream& a_reader)
00164   :m_reader(a_reader)
00165   ,m_sep(0)
00166   ,m_sz(0)
00167   ,m_hippo(false)
00168   {
00169 #ifdef INLIB_MEM
00170     mem::increment(s_class().c_str());
00171 #endif
00172   }
00173   virtual ~ntuple() {
00174     inlib::clear<read::icol>(m_cols);
00175 #ifdef INLIB_MEM
00176     mem::decrement(s_class().c_str());
00177 #endif
00178   }
00179 protected:
00180   ntuple(const ntuple& a_from)
00181   :read::intuple(a_from)
00182   ,m_reader(a_from.m_reader)
00183   ,m_sep(a_from.m_sep)
00184   ,m_sz(a_from.m_sz)
00185   ,m_hippo(a_from.m_hippo)
00186   {
00187 #ifdef INLIB_MEM
00188     mem::increment(s_class().c_str());
00189 #endif
00190   }
00191   ntuple& operator=(const ntuple& a_from){
00192     m_sep = a_from.m_sep;
00193     m_hippo = a_from.m_hippo;
00194     return *this;
00195   }
00196 public:
00197   void set_hippo(bool a_hippo) {m_hippo = a_hippo;}
00198 
00199   std::istream& istrm() {return m_reader;}
00200 
00201   static bool find_sep(std::ostream& a_out,
00202                               std::istream& a_reader,bool a_hippo,
00203                               bool a_verbose,
00204                               char& a_sep){
00205     // analyse first data line to find the char separator.
00206 
00207     a_reader.clear();
00208     a_reader.seekg(0,std::ios::end);
00209     std::streampos sz = a_reader.tellg();
00210     a_reader.seekg(0,std::ios::beg);
00211     if(!sz) {
00212       a_out << "inlib::rcsv::ntuple::find_sep :"
00213             << " stream is empty."
00214             << std::endl;
00215       a_sep = 0;
00216       return false;
00217     } //file empty.
00218     if(a_verbose) a_out << "file size " << sz << std::endl;
00219 
00220     if(a_hippo) { //skip first two lines :
00221       if(!skip_line(a_reader,sz)) {a_sep = 0;return false;}
00222       if(!skip_line(a_reader,sz)) {a_sep = 0;return false;}
00223     } else {
00224       while(skip_comment(a_reader,sz)){}
00225     }
00226     if(a_reader.tellg()>=sz) {a_sep=0;return false;} //no data line.
00227 
00228     // get first data line :
00229     std::string sfirst;
00230    {char c;
00231     while(true) {
00232       if(a_reader.tellg()>=sz) break;
00233       a_reader.get(c);
00234       if((c==CR())||(c==LF())) break;
00235       sfirst += c;
00236     }}
00237     if(sfirst.empty()) {
00238       a_out << "inlib::rcsv::ntuple::find_set :"
00239             << " first datat line is empty."
00240             << std::endl;
00241       a_sep = 0;
00242       return false;
00243     }
00244     if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl;
00245 
00246     //guess sep from first data line :
00247     std::istringstream strm(sfirst.c_str());
00248     double d;
00249     strm >> d;
00250     std::streampos pos = strm.tellg();
00251     if(pos==std::streampos(-1)) {
00252       a_out << "inlib::rcsv::ntuple::find_sep :"
00253             << " first line does not start with a number."
00254             << std::endl;
00255       a_sep = 0;
00256       return false;
00257     } //not a number.
00258     if(a_verbose) a_out << "first number " << d
00259                         << " ending at pos " << pos << std::endl;
00260     if(pos>=(std::streampos)sfirst.size()) {
00261       a_out << "inlib::rcsv::ntuple::find_sep :"
00262             << " no separator found in first line."
00263             << " pos " << pos
00264             << " sfirst.size() " << sfirst.size()
00265             << std::endl;
00266       a_sep = 0;
00267       return false;
00268     } //no sep.
00269 
00270     strm.get(a_sep);
00271 
00272     return true;
00273   }
00274 
00275 public:
00276   bool initialize(std::ostream& a_out,
00277                          char a_sep = 0, //guessed
00278                          const std::string& a_suffix = "x", //col suffix
00279                          bool a_verbose = false) {
00280     inlib::clear<read::icol>(m_cols);
00281     m_sep = 0;
00282     m_sz = 0;
00283 
00284     if(a_suffix.empty()) {
00285       a_out << "inlib::rcsv::ntuple::initialize :"
00286             << " expect a column suffix."
00287             << std::endl;
00288       return false;
00289     }
00290 
00291     m_reader.clear();
00292     m_reader.seekg(0,std::ios::end);
00293     m_sz = m_reader.tellg();
00294     m_reader.seekg(0,std::ios::beg);
00295     if(!m_sz) {
00296       a_out << "inlib::rcsv::ntuple::initialize :"
00297             << " stream is empty."
00298             << std::endl;
00299       return false; //file empty.
00300     }
00301     if(a_verbose) a_out << "file size " << m_sz << std::endl;
00302 
00303     std::vector<std::string> labels;
00304     if(m_hippo) { //skip first two lines :
00305       std::string title;
00306       if(!read_line(m_reader,m_sz,title)) {a_sep = 0;return false;}
00307       std::string s;
00308       if(!read_line(m_reader,m_sz,s)) {a_sep = 0;return false;}
00309       inlib::words(s,"\t",false,labels);
00310     } else {
00311       while(skip_comment(m_reader,m_sz)){}
00312     }
00313     if(m_reader.tellg()>=m_sz) {m_sz=0;return false;}
00314 
00315     // get first data line :
00316     std::string sfirst;
00317   {{char c;
00318     while(true) {
00319       if(m_reader.tellg()>=m_sz) break;
00320       m_reader.get(c);
00321       if((c==CR())||(c==LF())) break;
00322       sfirst += c;
00323     }}
00324     if(sfirst.empty()) {
00325       a_out << "inlib::rcsv::ntuple::initialize :"
00326             << " first datat line is empty."
00327             << std::endl;
00328       m_sz = 0;
00329       return false;
00330     }}
00331     if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl;
00332 
00333     if(a_sep) {
00334       m_sep = a_sep;
00335     } else {
00336       //guess sep from first data line :
00337       std::istringstream strm(sfirst.c_str());
00338       double d;
00339       strm >> d;
00340       std::streampos pos = strm.tellg();
00341       if(pos==std::streampos(-1)) {
00342         a_out << "inlib::rcsv::ntuple::initialize :"
00343               << " first line does not start with a number."
00344               << std::endl;
00345         m_sz = 0;
00346         return false;
00347       }
00348       if(a_verbose) a_out << "first number " << d
00349                           << " ending at pos " << pos << std::endl;
00350       if(pos>=(std::streampos)sfirst.size()) {
00351         a_out << "inlib::rcsv::ntuple::initialize :"
00352               << " no separator found in first line."
00353               << std::endl;
00354         m_sz = 0;
00355         return false;
00356       }
00357       strm.get(m_sep);
00358     }
00359     if(a_verbose) a_out << "sep " << (int)m_sep << std::endl;
00360 
00361     // in case sep is ' ', there is an ambiguity with some leading
00362     // space in front of first number.
00363     if(m_sep==' ') inlib::strip(sfirst,leading,' ');
00364 
00365     std::vector<std::string> words;
00366    {std::string sep;
00367     sep += m_sep;
00368     inlib::words(sfirst,sep,true,words);}
00369 
00370     // look if words are numbers :
00371     if(a_verbose) a_out << "words " << words.size() << std::endl;
00372     unsigned int index = 0;
00373     std::vector<std::string>::iterator it;
00374     for(it=words.begin();it!=words.end();++it,index++) {
00375       if(a_verbose) a_out << "word " << sout(*it) << "" << std::endl;
00376       if((*it).empty()) {
00377         // do not accept :
00378         //   <num><sep><num><sep><sep><num>...
00379         // but accept a trailing <sep> (glast.tnt) :
00380         //   <num><sep><num>....<sep><num><sep>
00381         if(index==(words.size()-1)) {
00382           break;
00383         } else {
00384           a_out << "inlib::rcsv::ntuple::initialize :"
00385                 << " empty word."
00386                 << std::endl;
00387           m_sep = 0;
00388           m_sz = 0;
00389           return false;
00390         }      
00391       }      
00392       std::string name(a_suffix+to<unsigned int>(m_cols.size()));
00393       if(m_hippo) {
00394         if(index>=labels.size()) {
00395           a_out << "inlib::rcsv::ntuple::initialize :"
00396                 << " warning : not enough labels."
00397                 << std::endl;
00398         } else {
00399           name = labels[index];
00400         }
00401       }
00402       double d;
00403       if(to<double>(*it,d)) {
00404         if(a_verbose) a_out << "number " << d << std::endl;
00405         create_column<double>(name);
00406       } else {
00407         time_t time;
00408         if(s2time(*it,time)) {
00409           create_column<time_t>(name);
00410         } else {
00411           create_column<std::string>(name);
00412         }
00413       }
00414     }
00415     unsigned int num = m_cols.size();
00416     if(!num) {
00417       a_out << "inlib::rcsv::ntuple::initialize :"
00418             << " zero columns."
00419             << std::endl;
00420       m_sep = 0;
00421       m_sz = 0;
00422       return false;
00423     }
00424 
00425     return true;
00426   }
00427 
00428   static const std::string& s_cid(cid a_id) {
00429     if(a_id==_cid(double())) {
00430       static const std::string s_v("double");
00431       return s_v;      
00432     } else if(a_id==_cid(time_t())) {
00433       static const std::string s_v("time");
00434       return s_v;      
00435     } else if(a_id==_cid(std::string())) {
00436       static const std::string s_v("string");
00437       return s_v;      
00438     } else {
00439       static const std::string s_v("unknown");
00440       return s_v;      
00441     }
00442   }
00443 
00444   void dump_columns(std::ostream& a_out) const {
00445     if((m_sep>=32)&&(m_sep<=126)) { //printable
00446       a_out << "separator is '" << m_sep << "'" << std::endl;
00447     } else {
00448       a_out << "separator is " << (unsigned int)m_sep << std::endl;
00449     }
00450     std::vector<read::icol*>::const_iterator it;
00451     for(it=m_cols.begin();it!=m_cols.end();++it) {
00452       a_out << (*it)->name()
00453             << " " << s_cid((*it)->id_cls())
00454             << std::endl;
00455     }
00456   }
00457 
00458 protected:
00459   template <class T>
00460   column<T>* create_column(const std::string& a_name){
00461     if(find_named<read::icol>(m_cols,a_name)) return 0;
00462     column<T>* col = new column<T>(a_name);
00463     if(!col) return 0;
00464     m_cols.push_back(col);
00465     return col;
00466   }
00467 
00468   static bool read_line(std::istream& a_reader,std::streampos a_sz,
00469                                std::string& a_s){
00470     a_s.clear();
00471     char c;
00472     while(true) {
00473       if(a_reader.tellg()>=a_sz) {a_s.clear();return false;}
00474       a_reader.get(c);
00475       if(c==CR()) continue;
00476       if(c==LF()) break; //eol.
00477       a_s += c;
00478     }
00479     return true;
00480   }
00481 
00482   static bool skip_line(std::istream& a_reader,std::streampos a_sz){
00483     char c;
00484     while(true) {
00485       if(a_reader.tellg()>=a_sz) return false;
00486       a_reader.get(c);
00487       if(c==LF()) break;
00488     }
00489     return true;
00490   }
00491 
00492   static bool skip_comment(std::istream& a_reader,std::streampos a_sz){
00493     //ret true = we had a commented line, false : a data line or nothing.
00494     if(a_reader.tellg()>=a_sz) return false;
00495     //we should be at bol :
00496     char c;
00497     a_reader.get(c);
00498     if(c=='#') {
00499       return skip_line(a_reader,a_sz);
00500       //eol. Next char should be bol.
00501     } else {
00502       a_reader.putback(c);
00503       return false;
00504     }
00505   }
00506 
00507   bool _read_line() {
00508     // have to loop on all columns !
00509     typedef read::icol icol_t;
00510     typedef ntuple::column<double> cold_t;
00511     typedef ntuple::column<time_t> colt_t;
00512     typedef ntuple::column<std::string> cols_t;
00513     unsigned int index = 0;
00514     unsigned int num = m_cols.size();
00515     std::vector<icol_t*>::const_iterator it;
00516     for(it=m_cols.begin();it!=m_cols.end();++it,index++) {
00517       if(cold_t* cold = inlib::id_cast<icol_t,cold_t>(*(*it))) {
00518         double v;
00519         if(!_read(m_reader,m_sep,m_sz,v)) return false;
00520         cold->set_value(v);
00521       } else if(colt_t* colt = inlib::id_cast<icol_t,colt_t>(*(*it))) {
00522         time_t v;
00523         if(!_read(m_reader,m_sep,m_sz,v)) return false;
00524         colt->set_value(v);
00525       } else if(cols_t* cols = inlib::id_cast<icol_t,cols_t>(*(*it))) {
00526         std::string v;
00527         if(!_read(m_reader,m_sep,m_sz,v)) return false;
00528         cols->set_value(v);
00529       } else {
00530         //std::cout << "column cast failed." << std::endl;
00531         return false; 
00532       }
00533       if(index==(num-1)) { //read up to LF()
00534         char c;
00535         while(true){
00536           if(m_reader.tellg()>=m_sz) break;
00537           m_reader.get(c);
00538           if(c==LF()) break;
00539         }
00540       } else { //read sep :
00541         char sep;
00542         m_reader.get(sep);
00543       }
00544     }
00545     return true;
00546   }
00547 protected:
00548   std::istream& m_reader;
00549   char m_sep;
00550   std::vector<read::icol*> m_cols;
00551   std::streampos m_sz;
00552   bool m_hippo;
00553 };
00554 
00555 }}
00556 
00557 
00558 #include <fstream>
00559 
00560 namespace inlib {
00561 namespace rcsv {
00562 
00563 class fntuple : public ntuple {
00564 public:
00565   fntuple(const std::string& a_file)
00566   :ntuple(m_freader)
00567   ,m_file(a_file)
00568   {}
00569   virtual ~fntuple() {m_freader.close();}
00570 protected:
00571   fntuple(const fntuple& a_from)
00572   :read::intuple(a_from),ntuple(a_from)
00573   ,m_file(a_from.m_file)
00574   {}
00575   fntuple& operator=(const fntuple& a_from){
00576     m_file = a_from.m_file;
00577     return *this;
00578   }
00579 public:
00580   bool open(){
00581     m_freader.open(m_file.c_str());
00582     return m_freader.fail()?false:true;
00583   }
00584   bool initialize(std::ostream& a_out,
00585                          char a_sep = 0, //guessed
00586                          const std::string& a_suffix = "x", //col suffix
00587                          bool a_verbose = false) {
00588     if(!m_freader.is_open()) {
00589       m_freader.open(m_file.c_str());
00590       if(m_freader.fail()) {
00591         a_out << "inlib::rcsv::ntuple::initialize :"
00592               << " can't open " << m_file << "."
00593               << std::endl;
00594         return false;
00595       }
00596     }
00597     return ntuple::initialize(a_out,a_sep,a_suffix,a_verbose);
00598   }
00599 protected:
00600   std::string m_file;
00601   std::ifstream m_freader;
00602 };
00603 
00604 }}
00605 
00606 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines