C/C++实现URL路径拆分

URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php

#include <Windows.h>
#include <iostream>

int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
int iStart = 0;
int iEnd = 0;
int iLen = 0;

if (strncmp(szUrl, "http://", 7) == 0)
iStart = 7;
else if (strncmp(szUrl, "https://", 8) == 0)
iStart = 8;

while (szUrl[iStart + iLen] != '\0' && szUrl[iStart + iLen] != '/')
{ iLen++; }

memcpy(szHost, szUrl + iStart, iLen);
if (strlen(szUrl) - iStart - iLen == 0)
szPath[0] = '/';
else
memcpy(szPath, szUrl + iStart + iLen, strlen(szUrl) - iStart - iLen);
return 0;
}

int main(int argc,char *argv [])
{
char szUrl[] = "http://www.baidu.com/index.html";
char szHost[1024] = { 0 };
char szPath[2048] = { 0 };

int ret = ParseUrl(szUrl,szHost,szPath);

if (ret == 0)
{
printf("主机: %s \n", szHost);
printf("路径: %s \n", szPath);
}

system("pause");
return 0;
}

http 文件下载

#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Spide(const char *pszUrl, const char *pszFile)
{
char szHost[256] = {0};
char *ptr = (char *)pszUrl;

// 判断开头是否为http://如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }

ptr = ptr + 7;
int index = 0;

while (index < 255 && *ptr && *ptr != '/')
{
szHost[index++] = *ptr++;
}
szHost[index] = '\0';

//printf("去掉http后的域名地址: %s \n", szHost);

char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1\r\n"
"Host: %s\r\n"
"User-Agent: IE or Chrome\r\n"
"Accept-Type: */*\r\n"
"Connection: Close\r\n\r\n",
ptr, szHost);

//printf("构建好的请求头:\n %s \n", buffer);

// ------------------------------------------------------------

SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);

SOCKADDR_IN addr;
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;

index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = ::gethostbyname(szHost);

if (p) {
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);
index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR) {
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
FILE *pf = fopen(pszFile, "wb");
do {
index = recv(fd, buffer, 8191, 0);
if (index <= 0) {
break;
}
buffer[index] = '\0';
fwrite(buffer, 1, index, pf);
printf("%s", buffer);
} while (TRUE);
fclose(pf);
}
}
closesocket(fd);
delete[] buffer;
return 0;
}


int main(int argc,char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);

Spide("http://cn.bing.com/","index.html");

system("pause");
return 0;
}

实现HTTP页面下载功能

#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Curl_Get(const char *pszUrl)
{
char szHost[256] = { 0 };
char *ptr = (char *)pszUrl;

// 判断开头是否为http:// 或者 https:// 如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) == 0)
ptr = ptr + 7;
else if (_strnicmp(ptr, "https://", 8) == 0)
ptr = ptr + 8;
else
return -1;

int index = 0;
while (index < 255 && *ptr && *ptr != '/')
szHost[index++] = *ptr++;
szHost[index] = '\0';

char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1 \r\n"
"Host: %s \r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 \r\n"
"Accept-Type: */* \r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 \r\n"
"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 \r\n"
"Connection: Close \r\n\r\n",
ptr, szHost);
printf("%s \n", buffer);

SOCKADDR_IN addr;
SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;
index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = gethostbyname(szHost);

if (p)
{
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);

index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR)
{
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
do
{
index = recv(fd, buffer, 8191, 0);
if (index <= 0) { break; }
buffer[index] = '\0';
printf("%s \n", buffer);
} while (TRUE);
}
}
closesocket(fd);
return 0;
}

int main(int argc, char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);
Curl_Get("http://cn.bing.com/");

WSACleanup();

system("pause");
return 0;
}