您的位置:首页 > 理论基础 > 计算机网络

Linux下使用Socket实现http文件下载

2017-08-17 17:28 645 查看
//test.cpp
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <unistd.h>
#include <netdb.h>

#define TARGET_URL "http://seopic.699pic.com/photo/50010/8515.jpg_wh1200.jpg"
#define TARGET_HOST "seopic.699pic.com"
#define TARGET_PORT 80 //the default port 80

static void GetIPfromDNS(char* ip_addr);
static void get_resp_header(const char *response, int *status_code, char*content_type, long* content_length);

int main(){
int client_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (client_socket < 0) {
printf("invalid socket : %d\n", client_socket);
return 0;
}
struct sockaddr_in addr;
char ip_addr[64];
memset(&addr, 0, sizeof(addr));
GetIPfromDNS(ip_addr);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr(ip_addr);
addr.sin_port = htons(TARGET_PORT);
int res = 0;
res = connect(client_socket, (struct sockaddr *) &addr, sizeof(addr));
if (res == -1){
printf("connect failed : %d\n", res);
return 0;
}
char sendbuf[1024] = {0};
char recvbuf[1024] = {0};
int index = 0;
char response;
bool isGetContent = false;
sprintf(sendbuf, \
"GET %s HTTP/1.1\r\n"\
"User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"\
"Accept: */*\r\n"\
"Host:%s\r\n"\
"\r\n"\
, TARGET_URL, TARGET_HOST);
send(client_socket, sendbuf, strlen(sendbuf),0);
while(recv(client_socket, &response, sizeof(response),0)!=0){
recvbuf[index++] = response;
if(response == '\r'){
if(recv(client_socket, &response, sizeof(response),0)!=0){
recvbuf[index++] = response;
if(response == '\n'){
if(recv(client_socket, &response, sizeof(response),0)!=0){
recvbuf[index++] = response;
if(response == '\r'){
if(recv(client_socket, &response, sizeof(response),0)!=0){
recvbuf[index++] = response;
if(response == '\n'){
isGetContent = true;
printf("\n\nSUCCESS GET HEAD\n\n");
break;
}
}
}
}
}
}
}
}
if(isGetContent == true){
printf("---------------\n");
printf("#### %ld ####\n\n", strlen(recvbuf));
printf("%s", recvbuf);
printf("---------------\n\n\n");
fflush(stdout);
int status_code;
char content_type[1024];
long content_length;
get_resp_header(recvbuf, &status_code, content_type, &content_length);
printf("%d, %s, %ld\n",  status_code, content_type, content_length);
printf("Start write file to local disk ... .... \n");
fflush(stdout);
int fd = open("mydownload.jpg", O_CREAT | O_WRONLY, S_IRWXG | S_IRWXO | S_IRWXU);
unsigned char buf[1024];
int len = 0;
int writeLength = 0;
while((len = recv(client_socket, buf, 1024, 0))!=0){
write(fd, buf, len);
writeLength += len;
if(writeLength == content_length){
break;
}
}
printf("\n\nTHE END\n\n");
close(fd);
}
close(client_socket);
return 0;
}

static void GetIPfromDNS(char* ip_addr){
struct hostent *host = gethostbyname(TARGET_HOST);
if (!host) {
ip_addr = NULL;
return;
}
for (int i = 0; host->h_addr_list[i]; i++){
strcpy(ip_addr, inet_ntoa( * (struct in_addr*) host->h_addr_list[i]));
break;
}
}

/*
# status_code : 200, 503, .... 状态码
# content_type : image/jpeg 内容类型
# content_length : 560437 内容长度(字节)
*/
void get_resp_header(const char *response, int *status_code, char*content_type, long* content_length){
char *pos = (char*)strstr(response, "HTTP/");
if (pos)
sscanf(pos, "%*s %d", status_code);

pos = (char*)strstr(response, "Content-Type:");
if (pos)
sscanf(pos, "%*s %s", content_type);

pos = (char*)strstr(response, "Content-Length:");
if (pos)
sscanf(pos, "%*s %ld",content_length);
}


运行结果

$ g++ test.cpp
$ ./a.out
SUCCESS GET HEAD

---------------
#### 563 ####

HTTP/1.1 200 OK
Server: marco/1.6
Date: Fri, 18 Aug 2017 03:06:52 GMT
Content-Type: image/jpeg
Content-Length: 560437
Connection: keep-alive
X-Request-Id: 96aaeffc8c0ece839ba5495988d22dc5; 54f5daf9ca0effd3deacdcc88a5e54da
X-Source: U/304
ETag: "82e871eb8d245fef907c9e5ef8cd8809"
X-Slice-Complete-Length: 560437
Last-Modified: Thu, 06 Apr 2017 12:59:05 GMT
X-Slice-Size: 65536
Expires: Wed, 23 Aug 2017 17:02:27 GMT
Cache-Control: max-age=691200
Accept-Ranges: bytes
Age: 458936
Via: T.2424.H.1, V.mix-gd-can-008, T.141134.R.1, M.cun-gd-zhs-131

---------------

200, image/jpeg, 560437
Start write file to local disk ... ....

THE END


主要注意两个点。

1 组织HTTP协议的应用层数据包发起请求。



2 利用服务器返回的数据格式中连续两次\r\n解析出头部信息(包含文件大小)和文件原始数据(字节流);

(例程中的51行到70行联系用了4个判断语句直接找出连续的\r\n)



补充一点

这里使用了gethostbyname系统函数向DNS服务器发起查询IP,但是帮助文档中已经说明这个函数不再推荐使用了

The gethostbyname*() and gethostbyaddr*() functions are obsolete.
Applications should use getaddrinfo(3) and getnameinfo(3) instead.


HTTP使用了TCP作为传输层协议,所以会有3次握手的过程,标准的3次握手过程:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: