inlib  1.2.0
/Users/barrand/private/dev/softinex/old/inexlib-1.2/inlib/inlib/net/http
Go to the documentation of this file.
00001 // Copyright (C) 2010, Guy Barrand. All rights reserved.
00002 // See the file inlib.license for terms.
00003 
00004 #ifndef inlib_http
00005 #define inlib_http
00006 
00007 // WARNING : still experimental.
00008 
00009 //inheritance :
00010 #include "inet_socket"
00011 
00012 #include "../typedefs"
00013 #include "../file"
00014 #include "../cstr"
00015 
00016 namespace inlib {
00017 namespace net {
00018 
00019 class http : public inet_socket {
00020 public:
00021   static bool parse(const std::string& a_url,std::string& a_host,std::string& a_path) {
00022     if(a_url.substr(0,7)!="http://") return false;
00023     std::string s = a_url.substr(7,a_url.size()-7);
00024     std::string::size_type pos = s.find('/');
00025     if(pos==std::string::npos) return false;
00026     a_host = s.substr(0,pos);
00027     a_path = s.substr(pos,s.size()-pos);
00028     return true;
00029   }
00030 public:
00031   http(std::ostream& a_out,bool a_verbose)
00032   : inet_socket(a_out,a_verbose)
00033   {}
00034   virtual ~http(){}
00035 protected:
00036   http(const http& a_from): inet_socket(a_from){}
00037   http& operator=(const http&){return *this;}
00038 public:
00039   bool start(const std::string& a_host) {
00040     int port = 80;
00041     if(!connect(a_host,port,10,1)) return false;
00042     m_host = a_host;
00043     return true;
00044   }
00045   
00046   bool fetch(const std::string& a_req,const std::string& a_local) {
00047     if(m_host.empty()) return false;
00048     std::string answer;
00049 
00050     if(m_verbose) {
00051       m_out << "inlib::net::http::fetch :" 
00052             << " fetch " << sout(a_req)
00053             << " to be put in " << sout(a_local) << "."
00054             << std::endl;
00055     }
00056 
00057     //NOTE : use C str. std::string don't like CR,LF.
00058    {char* cmd = inlib::str_new();    
00059     inlib::str_cat(cmd,"GET ");
00060     //inlib::str_cat(cmd,"HEAD ");
00061     inlib::str_cat(cmd,a_req.c_str());
00062     inlib::str_cat(cmd," HTTP/1.1");
00063     inlib::str_cat(cmd,CR());
00064     inlib::str_cat(cmd,LF());
00065 
00066     inlib::str_cat(cmd,"Host: ");
00067     inlib::str_cat(cmd,m_host.c_str());
00068     inlib::str_cat(cmd,CR());
00069     inlib::str_cat(cmd,LF());
00070 /*
00071     inlib::str_cat(cmd,"Accept: *");
00072     inlib::str_cat(cmd,'/');
00073     inlib::str_cat(cmd,"*");
00074     inlib::str_cat(cmd,CR());
00075     inlib::str_cat(cmd,LF());
00076 
00077     inlib::str_cat(cmd,"User-Agent: inlib");
00078     inlib::str_cat(cmd,CR());
00079     inlib::str_cat(cmd,LF());
00080 */
00081     //end :
00082     inlib::str_cat(cmd,CR());
00083     inlib::str_cat(cmd,LF());
00084     if(m_verbose) {
00085       m_out << "inlib::net::http::fetch :"
00086             << " send_command : " << sout(std::string(cmd)) 
00087             << std::endl;
00088     }
00089     if(!send_buffer(cmd,::strlen(cmd))) {
00090       m_out << "inlib::net::http::fetch :"
00091             << " send_buffer failed." 
00092             << std::endl;
00093       inlib::str_del(cmd);
00094       return false;
00095     }
00096     inlib::str_del(cmd);}
00097 
00098     std::string first_line;
00099     char* buffer;
00100     inlib::uint64 length;
00101     char* beg; // begin of document.
00102     char* end; // *end not necessary 0.
00103     if(!get_answer(first_line,buffer,length,beg,end)) return false;
00104     //WARNING : we have to delete [] buffer.
00105 
00106     if(m_verbose) {
00107       m_out << "inlib::net::http::fetch :" 
00108             << " received first line " << sout(first_line) << "."
00109             << std::endl;
00110     }
00111 
00112     if(first_line=="HTTP/1.1 200 OK"){
00113       if(m_verbose) {
00114         m_out << "inlib::net::http::fetch :" 
00115               << " OK."
00116               << std::endl;
00117       }
00118 
00119       inlib::uint64 ldoc = end-beg;
00120       if(m_verbose) {
00121         m_out << "inlib::net::http::fetch :" 
00122               << " document length " << ldoc
00123               << std::endl;
00124       }      
00125 
00126       if(!inlib::file::write_bytes(a_local,beg,(size_t)ldoc)) {
00127         m_out << "inlib::net::http::fetch :" 
00128               << "can't write local file " << inlib::sout(a_local) 
00129               << std::endl;
00130         delete [] buffer;
00131         return false;
00132       }
00133 
00134       if(m_verbose) {
00135         m_out << "inlib::net::http::fetch :" 
00136               << " write local file " << inlib::sout(a_local) << " ok."
00137               << std::endl;
00138       }      
00139 
00140       delete [] buffer;
00141       return true;
00142     }
00143 
00144     // troubles :
00145     *end = 0;    //we have added an extra char when detting data.
00146     std::string doc(beg); 
00147     delete [] buffer;
00148   
00149     if(first_line=="HTTP/1.1 301 Moved Permanently"){
00150       m_out << "inlib::net::http::fetch :" 
00151             << " received a 301 message."
00152             << " Document " << sout(a_req) << " moved."
00153             << std::endl;
00154 
00155       m_out << "inlib::net::http::fetch :" 
00156             << " full message :" << std::endl
00157             << doc
00158             << std::endl;
00159 
00160     } else if(first_line=="HTTP/1.1 404 Not Found"){
00161       m_out << "inlib::net::http::fetch :" 
00162             << " received a 404 message."
00163             << " Document " << sout(a_req) << " not found."
00164             << std::endl;
00165       m_out << "inlib::net::http::fetch :" 
00166             << " full message :" << std::endl
00167             << doc
00168             << std::endl;
00169 
00170     } else if(first_line=="HTTP/1.1 400 Bad Request"){
00171       m_out << "inlib::net::http::fetch :" 
00172             << " received a 400 message."
00173             << " " << sout(a_req) << " is a bad request."
00174             << std::endl;
00175       m_out << "inlib::net::http::fetch :" 
00176             << " full message :" << std::endl
00177             << doc
00178             << std::endl;
00179 
00180     } else {
00181       m_out << "inlib::net::http::fetch :" 
00182             << " first line " << sout(first_line) << " not treated."
00183             << std::endl;
00184       m_out << "inlib::net::http::fetch :" 
00185             << " full message :" << std::endl
00186             << answer
00187             << std::endl;
00188 
00189     }
00190 
00191     return false;
00192   }
00193 protected:
00194   static char LF() {return 10;}
00195   static char CR() {return 13;}
00196 
00197   bool get_answer(std::string& a_first_line,char*& a_buffer,inlib::uint64& a_length,char*& a_beg,char*& a_end) {
00198     char* buffer;
00199     inlib::uint64 length;
00200     char* beg_doc;
00201     if(!get_data(buffer,length,beg_doc)) {
00202       m_out << "inlib::net::http::get_answer :"
00203             << " get_data failed." 
00204             << std::endl; 
00205       a_first_line.clear();
00206       a_buffer = 0;
00207       a_length = 0;
00208       a_beg = 0;
00209       a_end = 0;
00210       return false;
00211     }
00212     if(!length) {
00213       m_out << "inlib::net::http::get_answer :"
00214             << " get nothing from server." 
00215             << std::endl; 
00216       a_first_line.clear();
00217       a_buffer = 0;
00218       a_length = 0;
00219       a_beg = 0;
00220       a_end = 0;
00221       return false; //expect anyway something.
00222     }
00223 
00224     if(!beg_doc) {
00225       m_out << "inlib::net::http::get_answer :"
00226             << " no begin of document." 
00227             << std::endl; 
00228       a_first_line.clear();
00229       a_buffer = 0;
00230       a_length = 0;
00231       a_beg = 0;
00232       a_end = 0;
00233       return false; //expect anyway something.
00234     }
00235 
00236     if(m_verbose) {
00237       m_out << "inlib::net::http::get_answer : length " << length << std::endl;
00238     }
00239 
00240     // get first line :
00241    {char* end = 0;
00242     char* pos = buffer;
00243     char* pend = buffer+length;
00244     for(;pos!=pend;pos++) {
00245       if((*pos)==CR()) {
00246         end = pos;
00247         break;
00248       }
00249     }
00250     if(!end) {
00251       m_out << "inlib::net::http::get_answer :"
00252             << " first line end not found." 
00253             << std::endl; 
00254       delete [] buffer;
00255       a_first_line.clear();
00256       a_buffer = 0;
00257       a_length = 0;
00258       a_beg = 0;
00259       a_end = 0;
00260       return false;
00261     }
00262     char c_end = *end;
00263     *end = 0;
00264     a_first_line = buffer;
00265     *end = c_end;}
00266 
00267     /*
00268     // get header :
00269    {char* bh = beg_doc-4;
00270     char c_bh = *bh;
00271     *bh = 0;
00272     std::string header = buffer;
00273     *bh = c_bh;
00274     m_out << "header :" << std::endl
00275           << header
00276           << std::endl;}
00277     */
00278  
00279     char* end = buffer+length; //*end not necessary 0.
00280 
00281     if(end<beg_doc) {
00282       m_out << "inlib::http::fetch :" 
00283             << " strange answer end<beg_doc."
00284             << std::endl;
00285       delete [] buffer;
00286       a_first_line.clear();
00287       a_buffer = 0;
00288       a_length = 0;
00289       a_beg = 0;
00290       a_end = 0;
00291       return false;
00292     }
00293 
00294     a_buffer = buffer;
00295     a_length = length;
00296     a_beg = beg_doc;
00297     a_end = end;
00298 
00299     return true;
00300   }
00301   
00302   bool get_data(char*& a_buffer,inlib::uint64& a_length,char*& a_beg_doc){
00303     a_buffer = 0;
00304     a_length = 0;    
00305     a_beg_doc = 0;
00306     if(m_socket==(-1)) return false;
00307 
00308     unsigned int cl;
00309     bool found_cl = false;
00310     char* pos_search_cl = 0;
00311 
00312     bool found_start = false;
00313     char* pos_search_start = 0;
00314 
00315     unsigned int to_read = 4096; //sufficient to pass the header.
00316     unsigned int got = 0;
00317 
00318     unsigned int BLOCK = 65536;
00319     char* buf = new char[BLOCK];
00320     if(!buf) return false;
00321 
00322     while(true) {
00323       inlib::uint64 to_get = mn(BLOCK,to_read-got);
00324       if(!to_get) {
00325         if(m_verbose) {
00326           m_out << "inlib::base_socket::get_data :"
00327                 << " finish."
00328                 << std::endl;
00329         }
00330         if(!found_start) {
00331           m_out << "inlib::base_socket::get_data :"
00332                 << " finish but begin of documen not found."
00333                 << std::endl;
00334           disconnect();
00335           delete [] buf;
00336           delete [] a_buffer;
00337           a_buffer = 0;
00338           a_length = 0;    
00339           a_beg_doc = 0;
00340           return false;
00341         }
00342         delete [] buf;
00343         return true;
00344       }
00345 
00346       //m_out << "inlib::base_socket::get_data :"
00347       //      << " recv... "
00348       //      << std::endl;
00349 #ifdef WIN32
00350       int
00351 #else
00352       ssize_t
00353 #endif
00354       num_char = ::recv(m_socket,buf,(size_t)to_get,0);
00355       if(num_char<0) {
00356         m_out << "inlib::base_socket::get_data :"
00357               << " recv : " << serror()
00358               << std::endl;
00359         disconnect();
00360         delete [] buf;
00361         delete [] a_buffer;
00362         a_buffer = 0;
00363         a_length = 0;    
00364         a_beg_doc = 0;
00365         return false;
00366       }
00367       if(num_char==0) {
00368         m_out << "inlib::base_socket::get_data :"
00369               << " recv : returned 0."
00370               << std::endl;
00371 
00372 /*
00373         *(a_buffer+a_length) = 0;
00374         //printf("debug : \n%s\n",a_buffer);        
00375         printf("debug : xxx :\n");        
00376        {for(char* pos=a_buffer;*pos;pos++) {
00377           char c = *pos;
00378           if(is_printable(c)) printf("%c",c);
00379           else printf("%d",c);
00380         }
00381         printf("\n");}
00382 */
00383 
00384         disconnect();
00385         delete [] buf;
00386         delete [] a_buffer;
00387         a_buffer = 0;
00388         a_length = 0;    
00389         a_beg_doc = 0;
00390         return false;
00391       }
00392 
00393       if(m_verbose) {
00394         m_out << "inlib::base_socket::get_data :"
00395               << " recv : " << (int)num_char
00396               << std::endl;
00397       }
00398   
00399       if(!a_buffer) {
00400         //+1 so that "final end" points to something valid.
00401         a_buffer = new char[num_char+1];
00402         if(!a_buffer) {
00403           delete [] buf;    
00404           a_length = 0;    
00405           a_beg_doc = 0;
00406           return false;
00407         }
00408         ::memcpy(a_buffer,buf,num_char);
00409         a_length = num_char;
00410         pos_search_cl = a_buffer;
00411         pos_search_start = a_buffer;
00412       } else {
00413         unsigned long x_cl = pos_search_cl-a_buffer;
00414         unsigned long x_start = pos_search_start-a_buffer;
00415 
00416         unsigned long x_beg_doc = 0;
00417         if(a_beg_doc) x_beg_doc = a_beg_doc-a_buffer;
00418 
00419         //+1 so that "final end" points to something valid.
00420         char* b = new char[(size_t)(a_length+num_char+1)];
00421         if(!b) {
00422           delete [] buf;    
00423           delete [] a_buffer;
00424           a_buffer = 0;
00425           a_length = 0;    
00426           a_beg_doc = 0;
00427           return false;
00428         }
00429         ::memcpy(b,a_buffer,(size_t)a_length);
00430         ::memcpy(b+a_length,buf,num_char);
00431         delete [] a_buffer;
00432         a_buffer = b;
00433         a_length += num_char;
00434 
00435         pos_search_cl = a_buffer + x_cl;
00436         pos_search_start = a_buffer + x_start;
00437 
00438         if(a_beg_doc) a_beg_doc = a_buffer + x_beg_doc;
00439       }
00440 
00441       if(!found_cl) {
00442         while(true) {
00443           char* end = 0;
00444           char* pos = pos_search_cl;
00445           char* pend = a_buffer+a_length;
00446           for(;pos!=pend;pos++) {
00447             if((*pos)==CR()) {
00448               end = pos;
00449               break;
00450             }
00451           }
00452           if(!end) break;
00453           char c_end = *end;
00454           *end = 0; //transform temporarily to C string.
00455           pos = ::strstr(pos_search_cl,"Content-Length: ");
00456           if(pos) { //found it !
00457             if(::sscanf(pos+16,"%u",&cl)!=1) {
00458               m_out << "inlib::base_socket::get_data :"
00459                     << " sscanf error."
00460                     << std::endl;              
00461               disconnect();
00462               delete [] buf;
00463               delete [] a_buffer;
00464               a_buffer = 0;
00465               a_length = 0;    
00466               return false;
00467             }
00468             if(m_verbose) {
00469               m_out << "inlib::base_socket::get_data :"
00470                     << " found content-length " << cl
00471                     << std::endl;              
00472             }
00473             found_cl = true;
00474             *end = c_end;
00475             pos_search_start = end+1;
00476             break;
00477           }
00478           *end = c_end;
00479           pos_search_cl = end+1;
00480         }
00481 
00482       } 
00483 
00484       if(found_cl && !found_start) {
00485         // We have the document length. Search the beginning :
00486         if((a_length-(pos_search_start-a_buffer))>=4) {
00487           char* pos = pos_search_start;
00488           //the +1 is needed to scan end of buffer. (case of empty document).
00489           char* pend = a_buffer+a_length+1;
00490           char c1 = *pos;pos++;
00491           char c2 = *pos;pos++;
00492           char c3 = *pos;pos++;
00493           char c4 = *pos;pos++;
00494           for(;pos!=pend;pos++) {
00495             //{if(is_printable(c1)) printf("%c",c1);
00496             // else printf("%d",c1);}
00497             //{if(is_printable(c2)) printf("%c",c2);
00498             // else printf("%d",c2);}
00499             //{if(is_printable(c3)) printf("%c",c3);
00500             // else printf("%d",c3);}
00501             //{if(is_printable(c4)) printf("%c",c4);
00502             // else printf("%d",c4);}
00503             //printf("|\n");
00504 
00505             if((c1==CR())&&(c2==LF())&&(c3==CR())&&(c4==LF())){
00506               if(m_verbose) {
00507                 m_out << "inlib::http::get_data :" 
00508                       << " begin document found."
00509                       << std::endl;
00510               }
00511               found_start = true;
00512               a_beg_doc = pos;
00513               break;
00514             }
00515             c1 = c2;
00516             c2 = c3;
00517             c3 = c4;
00518             c4 = *pos;
00519           }
00520         }
00521       }
00522 
00523       if(found_start) {
00524 
00525         // have cl and found beg_doc !
00526         to_read = (a_beg_doc-a_buffer)+cl;
00527 
00528       }
00529 
00530       got += num_char;
00531     }
00532   }
00533 
00534 protected:
00535   std::string m_host;
00536 };
00537 
00538 }}
00539 
00540 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines