inlib
1.2.0
|
00001 // Copyright (C) 2010, Guy Barrand. All rights reserved. 00002 // See the file inlib.license for terms. 00003 00004 #ifndef inlib_http 00005 #define inlib_http 00006 00007 // WARNING : still experimental. 00008 00009 //inheritance : 00010 #include "inet_socket" 00011 00012 #include "../typedefs" 00013 #include "../file" 00014 #include "../cstr" 00015 00016 namespace inlib { 00017 namespace net { 00018 00019 class http : public inet_socket { 00020 public: 00021 static bool parse(const std::string& a_url,std::string& a_host,std::string& a_path) { 00022 if(a_url.substr(0,7)!="http://") return false; 00023 std::string s = a_url.substr(7,a_url.size()-7); 00024 std::string::size_type pos = s.find('/'); 00025 if(pos==std::string::npos) return false; 00026 a_host = s.substr(0,pos); 00027 a_path = s.substr(pos,s.size()-pos); 00028 return true; 00029 } 00030 public: 00031 http(std::ostream& a_out,bool a_verbose) 00032 : inet_socket(a_out,a_verbose) 00033 {} 00034 virtual ~http(){} 00035 protected: 00036 http(const http& a_from): inet_socket(a_from){} 00037 http& operator=(const http&){return *this;} 00038 public: 00039 bool start(const std::string& a_host) { 00040 int port = 80; 00041 if(!connect(a_host,port,10,1)) return false; 00042 m_host = a_host; 00043 return true; 00044 } 00045 00046 bool fetch(const std::string& a_req,const std::string& a_local) { 00047 if(m_host.empty()) return false; 00048 std::string answer; 00049 00050 if(m_verbose) { 00051 m_out << "inlib::net::http::fetch :" 00052 << " fetch " << sout(a_req) 00053 << " to be put in " << sout(a_local) << "." 00054 << std::endl; 00055 } 00056 00057 //NOTE : use C str. std::string don't like CR,LF. 00058 {char* cmd = inlib::str_new(); 00059 inlib::str_cat(cmd,"GET "); 00060 //inlib::str_cat(cmd,"HEAD "); 00061 inlib::str_cat(cmd,a_req.c_str()); 00062 inlib::str_cat(cmd," HTTP/1.1"); 00063 inlib::str_cat(cmd,CR()); 00064 inlib::str_cat(cmd,LF()); 00065 00066 inlib::str_cat(cmd,"Host: "); 00067 inlib::str_cat(cmd,m_host.c_str()); 00068 inlib::str_cat(cmd,CR()); 00069 inlib::str_cat(cmd,LF()); 00070 /* 00071 inlib::str_cat(cmd,"Accept: *"); 00072 inlib::str_cat(cmd,'/'); 00073 inlib::str_cat(cmd,"*"); 00074 inlib::str_cat(cmd,CR()); 00075 inlib::str_cat(cmd,LF()); 00076 00077 inlib::str_cat(cmd,"User-Agent: inlib"); 00078 inlib::str_cat(cmd,CR()); 00079 inlib::str_cat(cmd,LF()); 00080 */ 00081 //end : 00082 inlib::str_cat(cmd,CR()); 00083 inlib::str_cat(cmd,LF()); 00084 if(m_verbose) { 00085 m_out << "inlib::net::http::fetch :" 00086 << " send_command : " << sout(std::string(cmd)) 00087 << std::endl; 00088 } 00089 if(!send_buffer(cmd,::strlen(cmd))) { 00090 m_out << "inlib::net::http::fetch :" 00091 << " send_buffer failed." 00092 << std::endl; 00093 inlib::str_del(cmd); 00094 return false; 00095 } 00096 inlib::str_del(cmd);} 00097 00098 std::string first_line; 00099 char* buffer; 00100 inlib::uint64 length; 00101 char* beg; // begin of document. 00102 char* end; // *end not necessary 0. 00103 if(!get_answer(first_line,buffer,length,beg,end)) return false; 00104 //WARNING : we have to delete [] buffer. 00105 00106 if(m_verbose) { 00107 m_out << "inlib::net::http::fetch :" 00108 << " received first line " << sout(first_line) << "." 00109 << std::endl; 00110 } 00111 00112 if(first_line=="HTTP/1.1 200 OK"){ 00113 if(m_verbose) { 00114 m_out << "inlib::net::http::fetch :" 00115 << " OK." 00116 << std::endl; 00117 } 00118 00119 inlib::uint64 ldoc = end-beg; 00120 if(m_verbose) { 00121 m_out << "inlib::net::http::fetch :" 00122 << " document length " << ldoc 00123 << std::endl; 00124 } 00125 00126 if(!inlib::file::write_bytes(a_local,beg,(size_t)ldoc)) { 00127 m_out << "inlib::net::http::fetch :" 00128 << "can't write local file " << inlib::sout(a_local) 00129 << std::endl; 00130 delete [] buffer; 00131 return false; 00132 } 00133 00134 if(m_verbose) { 00135 m_out << "inlib::net::http::fetch :" 00136 << " write local file " << inlib::sout(a_local) << " ok." 00137 << std::endl; 00138 } 00139 00140 delete [] buffer; 00141 return true; 00142 } 00143 00144 // troubles : 00145 *end = 0; //we have added an extra char when detting data. 00146 std::string doc(beg); 00147 delete [] buffer; 00148 00149 if(first_line=="HTTP/1.1 301 Moved Permanently"){ 00150 m_out << "inlib::net::http::fetch :" 00151 << " received a 301 message." 00152 << " Document " << sout(a_req) << " moved." 00153 << std::endl; 00154 00155 m_out << "inlib::net::http::fetch :" 00156 << " full message :" << std::endl 00157 << doc 00158 << std::endl; 00159 00160 } else if(first_line=="HTTP/1.1 404 Not Found"){ 00161 m_out << "inlib::net::http::fetch :" 00162 << " received a 404 message." 00163 << " Document " << sout(a_req) << " not found." 00164 << std::endl; 00165 m_out << "inlib::net::http::fetch :" 00166 << " full message :" << std::endl 00167 << doc 00168 << std::endl; 00169 00170 } else if(first_line=="HTTP/1.1 400 Bad Request"){ 00171 m_out << "inlib::net::http::fetch :" 00172 << " received a 400 message." 00173 << " " << sout(a_req) << " is a bad request." 00174 << std::endl; 00175 m_out << "inlib::net::http::fetch :" 00176 << " full message :" << std::endl 00177 << doc 00178 << std::endl; 00179 00180 } else { 00181 m_out << "inlib::net::http::fetch :" 00182 << " first line " << sout(first_line) << " not treated." 00183 << std::endl; 00184 m_out << "inlib::net::http::fetch :" 00185 << " full message :" << std::endl 00186 << answer 00187 << std::endl; 00188 00189 } 00190 00191 return false; 00192 } 00193 protected: 00194 static char LF() {return 10;} 00195 static char CR() {return 13;} 00196 00197 bool get_answer(std::string& a_first_line,char*& a_buffer,inlib::uint64& a_length,char*& a_beg,char*& a_end) { 00198 char* buffer; 00199 inlib::uint64 length; 00200 char* beg_doc; 00201 if(!get_data(buffer,length,beg_doc)) { 00202 m_out << "inlib::net::http::get_answer :" 00203 << " get_data failed." 00204 << std::endl; 00205 a_first_line.clear(); 00206 a_buffer = 0; 00207 a_length = 0; 00208 a_beg = 0; 00209 a_end = 0; 00210 return false; 00211 } 00212 if(!length) { 00213 m_out << "inlib::net::http::get_answer :" 00214 << " get nothing from server." 00215 << std::endl; 00216 a_first_line.clear(); 00217 a_buffer = 0; 00218 a_length = 0; 00219 a_beg = 0; 00220 a_end = 0; 00221 return false; //expect anyway something. 00222 } 00223 00224 if(!beg_doc) { 00225 m_out << "inlib::net::http::get_answer :" 00226 << " no begin of document." 00227 << std::endl; 00228 a_first_line.clear(); 00229 a_buffer = 0; 00230 a_length = 0; 00231 a_beg = 0; 00232 a_end = 0; 00233 return false; //expect anyway something. 00234 } 00235 00236 if(m_verbose) { 00237 m_out << "inlib::net::http::get_answer : length " << length << std::endl; 00238 } 00239 00240 // get first line : 00241 {char* end = 0; 00242 char* pos = buffer; 00243 char* pend = buffer+length; 00244 for(;pos!=pend;pos++) { 00245 if((*pos)==CR()) { 00246 end = pos; 00247 break; 00248 } 00249 } 00250 if(!end) { 00251 m_out << "inlib::net::http::get_answer :" 00252 << " first line end not found." 00253 << std::endl; 00254 delete [] buffer; 00255 a_first_line.clear(); 00256 a_buffer = 0; 00257 a_length = 0; 00258 a_beg = 0; 00259 a_end = 0; 00260 return false; 00261 } 00262 char c_end = *end; 00263 *end = 0; 00264 a_first_line = buffer; 00265 *end = c_end;} 00266 00267 /* 00268 // get header : 00269 {char* bh = beg_doc-4; 00270 char c_bh = *bh; 00271 *bh = 0; 00272 std::string header = buffer; 00273 *bh = c_bh; 00274 m_out << "header :" << std::endl 00275 << header 00276 << std::endl;} 00277 */ 00278 00279 char* end = buffer+length; //*end not necessary 0. 00280 00281 if(end<beg_doc) { 00282 m_out << "inlib::http::fetch :" 00283 << " strange answer end<beg_doc." 00284 << std::endl; 00285 delete [] buffer; 00286 a_first_line.clear(); 00287 a_buffer = 0; 00288 a_length = 0; 00289 a_beg = 0; 00290 a_end = 0; 00291 return false; 00292 } 00293 00294 a_buffer = buffer; 00295 a_length = length; 00296 a_beg = beg_doc; 00297 a_end = end; 00298 00299 return true; 00300 } 00301 00302 bool get_data(char*& a_buffer,inlib::uint64& a_length,char*& a_beg_doc){ 00303 a_buffer = 0; 00304 a_length = 0; 00305 a_beg_doc = 0; 00306 if(m_socket==(-1)) return false; 00307 00308 unsigned int cl; 00309 bool found_cl = false; 00310 char* pos_search_cl = 0; 00311 00312 bool found_start = false; 00313 char* pos_search_start = 0; 00314 00315 unsigned int to_read = 4096; //sufficient to pass the header. 00316 unsigned int got = 0; 00317 00318 unsigned int BLOCK = 65536; 00319 char* buf = new char[BLOCK]; 00320 if(!buf) return false; 00321 00322 while(true) { 00323 inlib::uint64 to_get = mn(BLOCK,to_read-got); 00324 if(!to_get) { 00325 if(m_verbose) { 00326 m_out << "inlib::base_socket::get_data :" 00327 << " finish." 00328 << std::endl; 00329 } 00330 if(!found_start) { 00331 m_out << "inlib::base_socket::get_data :" 00332 << " finish but begin of documen not found." 00333 << std::endl; 00334 disconnect(); 00335 delete [] buf; 00336 delete [] a_buffer; 00337 a_buffer = 0; 00338 a_length = 0; 00339 a_beg_doc = 0; 00340 return false; 00341 } 00342 delete [] buf; 00343 return true; 00344 } 00345 00346 //m_out << "inlib::base_socket::get_data :" 00347 // << " recv... " 00348 // << std::endl; 00349 #ifdef WIN32 00350 int 00351 #else 00352 ssize_t 00353 #endif 00354 num_char = ::recv(m_socket,buf,(size_t)to_get,0); 00355 if(num_char<0) { 00356 m_out << "inlib::base_socket::get_data :" 00357 << " recv : " << serror() 00358 << std::endl; 00359 disconnect(); 00360 delete [] buf; 00361 delete [] a_buffer; 00362 a_buffer = 0; 00363 a_length = 0; 00364 a_beg_doc = 0; 00365 return false; 00366 } 00367 if(num_char==0) { 00368 m_out << "inlib::base_socket::get_data :" 00369 << " recv : returned 0." 00370 << std::endl; 00371 00372 /* 00373 *(a_buffer+a_length) = 0; 00374 //printf("debug : \n%s\n",a_buffer); 00375 printf("debug : xxx :\n"); 00376 {for(char* pos=a_buffer;*pos;pos++) { 00377 char c = *pos; 00378 if(is_printable(c)) printf("%c",c); 00379 else printf("%d",c); 00380 } 00381 printf("\n");} 00382 */ 00383 00384 disconnect(); 00385 delete [] buf; 00386 delete [] a_buffer; 00387 a_buffer = 0; 00388 a_length = 0; 00389 a_beg_doc = 0; 00390 return false; 00391 } 00392 00393 if(m_verbose) { 00394 m_out << "inlib::base_socket::get_data :" 00395 << " recv : " << (int)num_char 00396 << std::endl; 00397 } 00398 00399 if(!a_buffer) { 00400 //+1 so that "final end" points to something valid. 00401 a_buffer = new char[num_char+1]; 00402 if(!a_buffer) { 00403 delete [] buf; 00404 a_length = 0; 00405 a_beg_doc = 0; 00406 return false; 00407 } 00408 ::memcpy(a_buffer,buf,num_char); 00409 a_length = num_char; 00410 pos_search_cl = a_buffer; 00411 pos_search_start = a_buffer; 00412 } else { 00413 unsigned long x_cl = pos_search_cl-a_buffer; 00414 unsigned long x_start = pos_search_start-a_buffer; 00415 00416 unsigned long x_beg_doc = 0; 00417 if(a_beg_doc) x_beg_doc = a_beg_doc-a_buffer; 00418 00419 //+1 so that "final end" points to something valid. 00420 char* b = new char[(size_t)(a_length+num_char+1)]; 00421 if(!b) { 00422 delete [] buf; 00423 delete [] a_buffer; 00424 a_buffer = 0; 00425 a_length = 0; 00426 a_beg_doc = 0; 00427 return false; 00428 } 00429 ::memcpy(b,a_buffer,(size_t)a_length); 00430 ::memcpy(b+a_length,buf,num_char); 00431 delete [] a_buffer; 00432 a_buffer = b; 00433 a_length += num_char; 00434 00435 pos_search_cl = a_buffer + x_cl; 00436 pos_search_start = a_buffer + x_start; 00437 00438 if(a_beg_doc) a_beg_doc = a_buffer + x_beg_doc; 00439 } 00440 00441 if(!found_cl) { 00442 while(true) { 00443 char* end = 0; 00444 char* pos = pos_search_cl; 00445 char* pend = a_buffer+a_length; 00446 for(;pos!=pend;pos++) { 00447 if((*pos)==CR()) { 00448 end = pos; 00449 break; 00450 } 00451 } 00452 if(!end) break; 00453 char c_end = *end; 00454 *end = 0; //transform temporarily to C string. 00455 pos = ::strstr(pos_search_cl,"Content-Length: "); 00456 if(pos) { //found it ! 00457 if(::sscanf(pos+16,"%u",&cl)!=1) { 00458 m_out << "inlib::base_socket::get_data :" 00459 << " sscanf error." 00460 << std::endl; 00461 disconnect(); 00462 delete [] buf; 00463 delete [] a_buffer; 00464 a_buffer = 0; 00465 a_length = 0; 00466 return false; 00467 } 00468 if(m_verbose) { 00469 m_out << "inlib::base_socket::get_data :" 00470 << " found content-length " << cl 00471 << std::endl; 00472 } 00473 found_cl = true; 00474 *end = c_end; 00475 pos_search_start = end+1; 00476 break; 00477 } 00478 *end = c_end; 00479 pos_search_cl = end+1; 00480 } 00481 00482 } 00483 00484 if(found_cl && !found_start) { 00485 // We have the document length. Search the beginning : 00486 if((a_length-(pos_search_start-a_buffer))>=4) { 00487 char* pos = pos_search_start; 00488 //the +1 is needed to scan end of buffer. (case of empty document). 00489 char* pend = a_buffer+a_length+1; 00490 char c1 = *pos;pos++; 00491 char c2 = *pos;pos++; 00492 char c3 = *pos;pos++; 00493 char c4 = *pos;pos++; 00494 for(;pos!=pend;pos++) { 00495 //{if(is_printable(c1)) printf("%c",c1); 00496 // else printf("%d",c1);} 00497 //{if(is_printable(c2)) printf("%c",c2); 00498 // else printf("%d",c2);} 00499 //{if(is_printable(c3)) printf("%c",c3); 00500 // else printf("%d",c3);} 00501 //{if(is_printable(c4)) printf("%c",c4); 00502 // else printf("%d",c4);} 00503 //printf("|\n"); 00504 00505 if((c1==CR())&&(c2==LF())&&(c3==CR())&&(c4==LF())){ 00506 if(m_verbose) { 00507 m_out << "inlib::http::get_data :" 00508 << " begin document found." 00509 << std::endl; 00510 } 00511 found_start = true; 00512 a_beg_doc = pos; 00513 break; 00514 } 00515 c1 = c2; 00516 c2 = c3; 00517 c3 = c4; 00518 c4 = *pos; 00519 } 00520 } 00521 } 00522 00523 if(found_start) { 00524 00525 // have cl and found beg_doc ! 00526 to_read = (a_beg_doc-a_buffer)+cl; 00527 00528 } 00529 00530 got += num_char; 00531 } 00532 } 00533 00534 protected: 00535 std::string m_host; 00536 }; 00537 00538 }} 00539 00540 #endif