第二个是用的socket的来获取源码 C++代码 //通过GET获取网页源码 string GetHtmlByGet(string url) { string strHtmlContent = ""; int sockfd; struct sockaddr_in addr; struct hostent *pURL; char text[RECVBUF]; //分析链接 UrlInfo urlInfo = ParseURL(url); string sAccept = "Accept: **\r\nAccept-Language: zh-cn\r\nAccept-Encoding: gzip, deflate"; //不同的主机UserAgent不同 string sUserAgent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10"; //将端口转换为字符串 char t[6]; string strPort; sprintf(t,"%d", urlInfo.Port); strPort = t; //构造发送字符串 string strRequest = ""; strRequest.append("GET "); strRequest.append(urlInfo.File); strRequest.append("?"); strRequest.append(urlInfo.Body); strRequest.append(" HTTP/1.1\r\n"); strRequest.append(sAccept); strRequest.append("\r\nUser-Agent:"); strRequest.append(sUserAgent); strRequest.append("\r\nHost:"); strRequest.append(urlInfo.Host); strRequest.append(":"); strRequest.append(strPort); strRequest.append("\r\nConnection: Keep-Alive\r\n\r\n");
char* host = const_cast<char*>(urlInfo.Host.c_str()); sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); //TCP方式发送 pURL = gethostbyname(host); addr.sin_family = AF_INET; addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr); addr.sin_port = htons(80);
//连接 connect(sockfd,(struct sockaddr *)&addr,sizeof(addr)); //发送 send(sockfd, const_cast<char*>(strRequest.c_str()), strRequest.length(), 0); //接受 while(recv(sockfd, text, RECVBUF, 0) > 0) { strHtmlContent.append(text); bzero(text,RECVBUF); } //关闭socket close(sockfd); //返回接受结果 return strHtmlContent; }
使用libcurl Java代码 #include <stdio.h> #include <string.h> #include <curl/curl.h> #define MAX_BUF 65536 char wr_buf[MAX_BUF+1]; int wr_index; size_t write_data( void *buffer, size_t size, size_t nmemb, void *userp ) { int segsize = size * nmemb; if ( wr_index + segsize > MAX_BUF ) { *(int *)userp = 1; return 0; } memcpy( (void *)&wr_buf[wr_index], buffer, (size_t)segsize ); wr_index += segsize; wr_buf[wr_index] = 0; return segsize; } int main( void ) { CURL *curl; CURLcode ret; int wr_error; wr_error = 0; wr_index = 0; curl = curl_easy_init(); if (!curl) { printf("couldn't init curl\n"); return 0; } curl_easy_setopt( curl, CURLOPT_URL, "www.exampledomain.com" ); curl_easy_setopt( curl, CURLOPT_WRITEDATA, (void *)&wr_error ); curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, write_data ); ret = curl_easy_perform( curl ); printf( "ret = %d (write_error = %d)\n", ret, wr_error ); if ( ret == 0 ) printf( "%s\n", wr_buf ); curl_easy_cleanup( curl ); return 0; }