/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /* * File: main.cpp * Author: yangchao * */ #include#include #include #include #include using namespace std; void parseHostAndPagePath(const string url,string &hostUrl,string &pagePath){ hostUrl=url; pagePath="/"; int pos=hostUrl.find("http://"); if(-1!=pos) hostUrl=hostUrl.replace(pos,7,""); pos=hostUrl.find("https://"); if(-1!=pos) hostUrl=hostUrl.replace(pos,8,""); pos=hostUrl.find("/"); if(-1!=pos) { pagePath=hostUrl.substr(pos); hostUrl=hostUrl.substr(0,pos); } } string getPageContent(const string url){ struct hostent *host; string hostUrl,pagePath; parseHostAndPagePath(url,hostUrl,pagePath); if(0==(host=gethostbyname(hostUrl.c_str()))) { cout<<"gethostbyname error\n"< h_addr))->s_addr; int isock; if((isock=socket(AF_INET,SOCK_STREAM,0))==-1) { cout<<"open socket error\n"< 0){ if('\r'==c){ continue; }else if('\n'==c){ if(false==flag) break; flag=false; }else{ flag=true; } } int len,BUFFER_SIZE=512; char buffer[BUFFER_SIZE]; string pageContent=""; while((len=recv(isock,buffer,BUFFER_SIZE-1,0))>0){ buffer[len]='\0'; pageContent+=buffer; } return pageContent; } int main(int argc, char** argv) { cout<
以上这篇linux c++模拟简易网络爬虫实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持创新互联。
