Linux下C语言用socket获取网页源码

xiaoxiao2021-02-28  75

第一个为利用linux下的工具来获取网页源码,我用的是Wget,也可以使用Curl,curl的话更加的灵活,可以设置很多参数 C++代码 //通过Wget来获取网页     string GetHtmlByWget(string url)   {       //获取待下载网页文件名       string fileName = url.substr((int)url.find_last_of("/") + 1);       if(fileName != "")       {           string strCom = "wget -q "//wget命令,-q表示不显示下载信息           strCom.append(url);           system(strCom.c_str()); //执行wget             ifstream fin(fileName.c_str());           if(!fin)           {               return "";           }           string strHtml = "";           char chTemp[1024] = "";           //读取网页文件到内存中           while(fin.getline(chTemp , 1024))           {               strHtml.append(string(chTemp));               strcpy(chTemp , "");           }           fin.close();           strCom = "rm -f ";  //删除文件命令,-f表示直接删除不做任何提示           strCom.append(fileName);           system(strCom.c_str()); //删除刚才下载下来的文件           return strHtml; //返回网页源码       }       else      {           return "";       }   }  

第二个是用的socket的来获取源码 C++代码  //通过GET获取网页源码   string GetHtmlByGet(string url)   {       string strHtmlContent = "";       int sockfd;       struct sockaddr_in addr;       struct hostent *pURL;       char text[RECVBUF];         //分析链接       UrlInfo urlInfo = ParseURL(url);       string sAccept = "Accept: **\r\nAccept-Language: zh-cn\r\nAccept-Encoding: gzip, deflate";     //不同的主机UserAgent不同     string sUserAgent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";     //将端口转换为字符串     char t[6];     string  strPort;     sprintf(t,"%d", urlInfo.Port);     strPort = t;     //构造发送字符串     string strRequest = "";     strRequest.append("GET ");     strRequest.append(urlInfo.File);     strRequest.append("?");     strRequest.append(urlInfo.Body);     strRequest.append(" HTTP/1.1\r\n");     strRequest.append(sAccept);     strRequest.append("\r\nUser-Agent:");     strRequest.append(sUserAgent);     strRequest.append("\r\nHost:");     strRequest.append(urlInfo.Host);     strRequest.append(":");     strRequest.append(strPort);     strRequest.append("\r\nConnection: Keep-Alive\r\n\r\n");

    char* host = const_cast<char*>(urlInfo.Host.c_str());     sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); //TCP方式发送     pURL = gethostbyname(host);     addr.sin_family = AF_INET;     addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr);     addr.sin_port = htons(80);

    //连接     connect(sockfd,(struct sockaddr *)&addr,sizeof(addr));     //发送     send(sockfd, const_cast<char*>(strRequest.c_str()), strRequest.length(), 0);     //接受     while(recv(sockfd, text, RECVBUF, 0) > 0)     {         strHtmlContent.append(text);         bzero(text,RECVBUF);     }     //关闭socket     close(sockfd);     //返回接受结果     return strHtmlContent; }

使用libcurl Java代码   #include <stdio.h>     #include <string.h>     #include <curl/curl.h>       #define MAX_BUF     65536       char wr_buf[MAX_BUF+1];     int  wr_index;            size_t write_data( void *buffer, size_t size, size_t nmemb, void *userp )     {      int segsize = size * nmemb;              if ( wr_index + segsize > MAX_BUF ) {        *(int *)userp = 1;        return 0;      }              memcpy( (void *)&wr_buf[wr_index], buffer, (size_t)segsize );              wr_index += segsize;              wr_buf[wr_index] = 0;              return segsize;     }              int main( void )     {      CURL *curl;      CURLcode ret;      int  wr_error;        wr_error = 0;      wr_index = 0;              curl = curl_easy_init();      if (!curl) {        printf("couldn't init curl\n");        return 0;      }              curl_easy_setopt( curl, CURLOPT_URL, "www.exampledomain.com" );              curl_easy_setopt( curl, CURLOPT_WRITEDATA, (void *)&wr_error );      curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, write_data );              ret = curl_easy_perform( curl );        printf( "ret = %d (write_error = %d)\n", ret, wr_error );              if ( ret == 0 ) printf( "%s\n", wr_buf );        curl_easy_cleanup( curl );        return 0;     }   
转载请注明原文地址: https://www.6miu.com/read-81524.html

最新回复(0)