http协议,首先要了解的是url,即我们在浏览器上输入的部分,由host, port,path组成。格式如:
http://host [:port]/path/
,针对这个格式,我们需要进行拆分解析,取下,是使用c代码进行解析各种格式的域名url。
假设有如下格式类型:
char *url_1 = "http://www.baidu.com/";
char *url_2 = "https://www.baidu.com/";
char *url_3 = "http://www.baidu.com/path/hello/text.txt/";
char *url_4 = "http://www.baidu.com:56789/path/hello/text.txt/";
char *url_5 = "hxxp://www.baidu.com/";
char *url_6 = "https://www.baidu.com:1234/path/hello/text.txt/";
也就是,有端口号的,没端口号的,有加密https的,有url path的。
如代码:
上述即为代码,进行编译运行如下
如上。
代码完整如下,
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <stdlib.h>#include "xtw_log_comm.h"typedef int bool;#define false 0#define true 1/***************************************************************///the URL format normal http and https//*: http://www.baidu.com/ https://www.baidu.com///*: http://www.baidu.com/path/hello/text.txt///*: http://www.baidu.com:56789///*: http://www.baidu.com:56789/path/hello/text.txt//***************************************************************/#define URL_HOST_NAME_MAX (100) //length only for debug#define URL_IP_NAME_MAX (100) //lengthonly for debug#define URL_PATH_NAME_MAX (100) //length only for debug#define URL_HTTP_PORT (80)#define URL_HTTPS_PORT (443)//define a struct to save the url decode infotypedef struct http_url_struct{ char hostname[URL_HOST_NAME_MAX+1]; char hostip[URL_IP_NAME_MAX+1]; char url_path[URL_PATH_NAME_MAX+1]; int port; bool is_https;}http_url_t;int xtw_http_url_decode(char *url, int url_length, http_url_t *http_url_ctx){ char http_head_tag[9] = {0};//http:// https:// char http_decode_port[6] = {0}; bool is_https = false; int ret = true; int i = 0; int length = url_length; char *uri = NULL; char *phead = NULL; char *ptail = NULL; int isVaildurl = false; do{ if( url_length < 7 )//http:// { XTW_LOG_D(XTW_LOG_DEBUG, "[http] url format is wrong"); break; } // here, we assume the url is lower, not "HTTP", it is "http" memcpy(http_head_tag, url, 7); if( 0 == strcmp(http_head_tag, (char *)"http://") ) { uri = url+7; length -= 7; isVaildurl = true; http_url_ctx->port = URL_HTTP_PORT; } else { if( url_length < 8 )//https:// { ret = false; break; } memset(http_head_tag, 0x00, sizeof(http_head_tag)); memcpy(http_head_tag, url, 8); if( 0 == strcmp(http_head_tag, (char *)"https://") ) { uri = url+8; length -= 8; isVaildurl = true; is_https = true; http_url_ctx->port = URL_HTTPS_PORT; } } if( false == isVaildurl ) { ret = false; break; } phead = uri; ptail = uri; while(i<length && uri[i]!='/' && uri[i]!=':') { i++; ptail++; } if( sizeof(http_url_ctx->hostname) < (ptail-phead) ) { XTW_LOG_D(XTW_LOG_DEBUG, "[http] The hostname is too long"); ret = false; break; } memcpy(http_url_ctx->hostname,phead,(ptail-phead));//we ignore to check the ip format, eg: www.baidu.com--->118.230.21.172 if(length >= i) { length -= i; } else { length = 0;// } if( 0 == length ) { break; } //to check http://www.baidu.com:56789/ url with port uri += i; phead = uri; ptail = uri; i = 0; if( *uri == ':' ) { length -= 1; while(i<length&&uri[i] != '/') { ptail++; i++; } if(sizeof(http_decode_port) < (ptail-phead)) { ret = false; break; } memcpy(http_decode_port, phead+1, (ptail-phead)); http_url_ctx->port = atoi(http_decode_port); } if(length >= i) { length -= i; } else { length = 0;// } if( 0 == length ) { http_url_ctx->url_path[0] = ' '; break; } else { memcpy(http_url_ctx->url_path, ptail, strlen(ptail)); } }while(0); http_url_ctx->is_https = is_https; return ret;}void xtw_http_url_debug_print_info(http_url_t *http_url_ctx){ XTW_LOG_D(XTW_LOG_DEBUG, "[http] hostname: %s", http_url_ctx->hostname); XTW_LOG_D(XTW_LOG_DEBUG, "[http] hostip: %s", http_url_ctx->hostip); XTW_LOG_D(XTW_LOG_DEBUG, "[http] port: %d", http_url_ctx->port); XTW_LOG_D(XTW_LOG_DEBUG, "[http] path: %s", http_url_ctx->url_path); XTW_LOG_D(XTW_LOG_DEBUG, "[http] is https: %d", http_url_ctx->is_https);}