您的位置:首页 > 理论基础 > 计算机网络

C code for a simple HTTP client

2011-04-07 15:43 302 查看
Linux provide a high level socket API that will allow programmer to easily connect to any TCP or UDP services.

In this tutorial, we will see how this works by implementing a simple HTTP client which will get request a web page given the hostname and the page name, then read the server answer and output the HTML content of the reply.

To be able to connect to a service built on top of TCP, we first need to create a socket for the TCP protocol, fill in a network address structure representing our destination and the port to connect to and use the latter to connect to the remote server.

From there, we will be able to send and receive data over the network. Once we are done, we will close the connection.

Below is the C code for a simple HTTP client that will get the host and the page to request from the command line arguments, resolve the hostname name to an IP, connect to this IP on port 80, build the HTTP query, send it and the retrieve the page content.

#include <stdio.h>

#include <sys/socket.h>

#include <arpa/inet.h>

#include <stdlib.h>

#include <netdb.h>

#include <string.h>

int create_tcp_socket();

char *get_ip(char *host);

char *build_get_query(char *host, char *page);

void usage();

#define HOST "coding.debuntu.org"

#define PAGE "/"

#define PORT 80

#define USERAGENT "HTMLGET 1.0"

int main(int argc, char **argv)

{

struct sockaddr_in *remote;

int sock;

int tmpres;

char *ip;

char *get;

char buf[BUFSIZ+1];

char *host;

char *page;

if(argc == 1){

usage();

exit(2);

}

host = argv[1];

if(argc > 2){

page = argv[2];

}else{

page = PAGE;

}

sock = create_tcp_socket();

ip = get_ip(host);

fprintf(stderr, "IP is %s/n", ip);

remote = (struct sockaddr_in *)malloc(sizeof(struct sockaddr_in *));

remote->sin_family = AF_INET;

tmpres = inet_pton(AF_INET, ip, (void *)(&(remote->sin_addr.s_addr)));

if( tmpres < 0)

{

perror("Can't set remote->sin_addr.s_addr");

exit(1);

}else if(tmpres == 0)

{

fprintf(stderr, "%s is not a valid IP address/n", ip);

exit(1);

}

remote->sin_port = htons(PORT);

if(connect(sock, (struct sockaddr *)remote, sizeof(struct sockaddr)) < 0){

perror("Could not connect");

exit(1);

}

get = build_get_query(host, page);

fprintf(stderr, "Query is:/n<<START>>/n%s<<END>>/n", get);

//Send the query to the server

int sent = 0;

while(sent < strlen(get))

{

tmpres = send(sock, get+sent, strlen(get)-sent, 0);

if(tmpres == -1){

perror("Can't send query");

exit(1);

}

sent += tmpres;

}

//now it is time to receive the page

memset(buf, 0, sizeof(buf));

int htmlstart = 0;

char * htmlcontent;

while((tmpres = recv(sock, buf, BUFSIZ, 0)) > 0){

if(htmlstart == 0)

{

/* Under certain conditions this will not work.

* If the /r/n/r/n part is splitted into two messages

* it will fail to detect the beginning of HTML content

*/

htmlcontent = strstr(buf, "/r/n/r/n");

if(htmlcontent != NULL){

htmlstart = 1;

htmlcontent += 4;

}

}else{

htmlcontent = buf;

}

if(htmlstart){

fprintf(stdout, htmlcontent);

}

memset(buf, 0, tmpres);

}

if(tmpres < 0)

{

perror("Error receiving data");

}

free(get);

free(remote);

free(ip);

close(sock);

return 0;

}

void usage()

{

fprintf(stderr, "USAGE: htmlget host [page]/n/

/thost: the website hostname. ex: coding.debuntu.org/n/

/tpage: the page to retrieve. ex: index.html, default: //n");

}

int create_tcp_socket()

{

int sock;

if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0){

perror("Can't create TCP socket");

exit(1);

}

return sock;

}

char *get_ip(char *host)

{

struct hostent *hent;

int iplen = 15; //XXX.XXX.XXX.XXX

char *ip = (char *)malloc(iplen+1);

memset(ip, 0, iplen+1);

if((hent = gethostbyname(host)) == NULL)

{

herror("Can't get IP");

exit(1);

}

if(inet_ntop(AF_INET, (void *)hent->h_addr_list[0], ip, iplen) == NULL)

{

perror("Can't resolve host");

exit(1);

}

return ip;

}

char *build_get_query(char *host, char *page)

{

char *query;

char *getpage = page;

char *tpl = "GET /%s HTTP/1.0/r/nHost: %s/r/nUser-Agent: %s/r/n/r/n";

if(getpage[0] == '/'){

getpage = getpage + 1;

fprintf(stderr,"Removing leading /"//", converting %s to %s/n", page, getpage);

}

// -5 is to consider the %s %s %s in tpl and the ending /0

query = (char *)malloc(strlen(host)+strlen(getpage)+strlen(USERAGENT)+strlen(tpl)-5);

sprintf(query, tpl, getpage, host, USERAGENT);

return query;

}

To compile it, run:

$ gcc -o htmlget htmlget.c
$ ./htmlget
USAGE: htmlget host [page]
host: the website hostname. ex: coding.debuntu.org
page: the page to retrieve. ex: index.html, default: /

Informative messages and errors are printed to stderr. The content of the page is printed to stdout. Thus, to save the HTML content of a page to a file, you will need to run:
$ ./htmlget coding.debuntu.org category > /tmp/page.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: