解析分块HTTP / 1.1响应

时间:2015-02-06 06:00:55

标签: c linux sockets http chunked-encoding

我刚刚实现了一个HTTP / 1.1客户端来解析分块传输编码。但是,它适用于某些网站但不适用于其他网站。我假设我需要为每个块数据读取chunkSize + 2个字节,包括\r\n,我是对的吗?

这是我的代码:

while(chunked)//if detecting chunked in the header before, this is true
{
    //getLine is a function can read a line separated by \r\n
    //sockfd is a socket created before and file position is at the start of HTTP body (after that blank line between header and body)
    line = getLine(sockfd);
    printf("%s", line);//print the chunk size line in hex
    int chunkSize = strtol(line, NULL, 16);
    if(chunkSize == 0)
    {
        printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
        break;
    }
    printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
    char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
    bzero(chunkBuf, chunkSize + 3);
    if(read(sockfd, chunkBuf, chunkSize + 2) == 0)//sockfd is a socket created before
    {
        perror("Read Error: ");
        exit(EXIT_FAILURE);
    }
    printf("%s", chunkBuf);//print the chunk content 
    free(chunkBuf);

}

实际上我可以打印出整个内容而无需解析,即逐行打印,所以我想我可能会在上面的代码中犯一些错误,有人能给我一些提示吗?

以下是供您参考的完整代码:

#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <netdb.h>
#include <netinet/in.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>

#define HTTP_VERSION "HTTP/1.1"
#define PAGE "/"

int createSokect()
{
    int socketfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
    if(socketfd < 0)
    {
        perror("Cannot create socket\n");
        exit(EXIT_FAILURE);
    }
    return socketfd;
}

char* getIP(char* host)
{
    struct hostent* hent;
    int len = 15;//xxx.xxx.xxx.xxx
    char *ipaddr = (char *)malloc(len + 1);//one more \0
    bzero(ipaddr, len + 1);
    if((hent = gethostbyname(host)) == NULL)
    {
        printf("Cannot get IP for this host: %s\n", host);
        exit(EXIT_FAILURE);
    }
    if(inet_ntop(AF_INET, (void*)hent->h_addr_list[0], ipaddr, len) == NULL)
    {
        printf("Cannot resolve IP for this host: %s\n", host);
        exit(EXIT_FAILURE);
    }
    return ipaddr;
}

char* createQuery(char* host, char* page)
{
    char* msg = "GET %s %s\r\nHost: %s\r\nConnection: close\r\n\r\n";
    char* query = (char *)malloc(strlen(host) + strlen(page) + strlen(msg) + strlen(HTTP_VERSION) - 6 + 1);//-6: %s %s %s
    sprintf(query, msg, page, HTTP_VERSION, host);
    return query;
}

char* getLine(int fd)
{
    char c = 0, pre = 0;
    char* line = 0;
    int size = 1;
    int pos = 0;
    while(read(fd, &c, 1)!=0)
    {
        if(pos + 1 == size)
        {
            size *= 2;
            line = realloc(line, size);
        }
        line[pos++] = c;
        //printf("%c", c);

        if(pre == '\r' && c == '\n')//this is a new line
        {
            break;
        }
        pre = c;

    }
    if(line)
    {
        line[pos++] = 0;
    }
    return line;
}

int main(int argc, char** argv)
{
    if(argc < 3)
    {
        perror("Need more arguments");
        exit(EXIT_FAILURE);
    }
    int sockfd = createSokect();
    char* ip = getIP(argv[1]);
    printf("Host: %s\n", argv[1]);
    printf("IP: %s\n", ip);
    struct sockaddr_in server;
    server.sin_family = AF_INET;
    int err = inet_pton(server.sin_family, ip, (void *)(&(server.sin_addr.s_addr)));
    if(err != 1)
    {
        perror("Cannot convert IP to binary address\n");
        exit(EXIT_FAILURE);
    }
    server.sin_port = htons(atoi(argv[2]));
    printf("port: %d\n", server.sin_port);

    //connect to the server
    if(connect(sockfd, (struct sockaddr *)&server, sizeof(server)) < 0)
    {
        printf("Cannot connect: %d\n", err);
        exit(EXIT_FAILURE);
    }

    char* query = createQuery(argv[1], PAGE);
    printf("##### CLIENT IS SENDING THE FOLLOWING TO SERVER:\n");
    printf("%s", query);

    int offset = 0;
    //send query to the server
    err = send(sockfd, query + offset, strlen(query) - offset, 0);
    if(err < 0)
    {
        perror("Cannot send query");
        exit(EXIT_FAILURE);
    }


    printf("##### CLIENT RECEIVED THE FOLLOWING FROM SERVER:\n");
    //receive message line by line
    bool chunked = false;
    char* line;
    while((line = getLine(sockfd)) != NULL)
    {
        printf("%s", line);
        if(!strcasecmp(line, "transfer-encoding: chunked\r\n"))
        {
            chunked = true;
            //printf("Chunked here\n");
        }
        if(!strcmp(line, "\r\n"))
        {
            printf("##### Just read blank line, now reading body.\n");
            if(chunked)//chunked, we print those in another way, otherwise line by line
            {
                free(line);
                break;
            }
        }


        free(line);
    }

    while(chunked)
    {
        line = getLine(sockfd);
        printf("%s", line);
        int chunkSize = strtol(line, NULL, 16);
        if(chunkSize == 0)
        {
            printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
            break;
        }
        printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
        char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
        bzero(chunkBuf, chunkSize + 3);
        if(read(sockfd, chunkBuf, chunkSize + 2) == 0)
        {
            perror("Read Error: ");
            exit(EXIT_FAILURE);
        }
        printf("%s", chunkBuf);
        free(chunkBuf);

    }

    //receive message from the server
    /*
     char buf[2048];
     bzero(buf, sizeof(buf));
     err = recv(sockfd, buf, sizeof(buf), 0);
     if(err < 0)
     {
     perror("Receive error");
     exit(EXIT_FAILURE);
     }
     char *content = buf;
     fprintf(stdout, content);*/

    free(query);
    free(ip);
    close(sockfd);
    printf("##### Connection closed by server.\n");
    exit(EXIT_SUCCESS);
} 

2 个答案:

答案 0 :(得分:5)

该行:

if(read(sockfd, chunkBuf, chunkSize + 2) == 0) ...

将读取chunkSize + 2,即它可以读取更少。请参阅read的手册页。您的代码应如下所示:

int n = 0;
while (n<chunkSize) {
  r = read(sockfd, chunkBuf+n, chunkSize - n);
  if (r <= 0) { error or closed conection ... }
  n += r;
}

答案 1 :(得分:0)

因为我知道块大小,所以我一个接一个地读取字符,直到块大小。这种方式可以工作。但我仍然不明白为什么我在尝试一次使用整个块大小的read或recv时失败了。