如何解析Web URL?

时间:2013-05-22 23:13:18

标签: delphi parsing url delphi-xe2

我需要能够将URL分解为不同的段。以此路径为例:

http://login:password@somehost.somedomain.com:8080/some_path/something_else.html?param1=val&param2=val#nose
\__/   \___/ \______/ \_____________________/ \__/\____________________________/ \___________________/ \__/
 |       |      |               |               |                |                        |              |
Scheme Username Password       Host            Port             Path                    Query         Fragment

这应该分解如下:

Protocol: HTTP
Username: login
Password: password
Host: somehost.somedomain.com
Port: 8080
Path Info: /some_path/something_else.html
Query String: param1=val&param2=val

如何在Delphi中执行此操作?有没有现成的东西可以拆分给我?如果没有,我该如何解析所有不同的格式?这假设它甚至可能是不同的协议,例如HTTPS或RTSP。

2 个答案:

答案 0 :(得分:31)

XE2附带Indy,为此目的有一个TIdURI类,例如:

uses
  ..., IdURI;

var
  URI: TIdURI;

URI := TIdURI.Create('http://login:password@somehost.somedomain.com:8080/some_path/something_else.html?param1=val&param2=val');
try
  // Protocol = URI.Protocol
  // Username = URI.Username
  // Password = URI.Password
  // Host = URI.Host
  // Port = URI.Port
  // Path = URI.Path
  // Query = URI.Params
finally
  URI.Free;
end;

答案 1 :(得分:21)

您可以使用InternetCrackUrl方法。

试试这个简单的

{$APPTYPE CONSOLE}

uses
  Windows,
  SysUtils,
  WinInet;

procedure ParseURL(const lpszUrl: string);
var
  lpszScheme      : array[0..INTERNET_MAX_SCHEME_LENGTH - 1] of Char;
  lpszHostName    : array[0..INTERNET_MAX_HOST_NAME_LENGTH - 1] of Char;
  lpszUserName    : array[0..INTERNET_MAX_USER_NAME_LENGTH - 1] of Char;
  lpszPassword    : array[0..INTERNET_MAX_PASSWORD_LENGTH - 1] of Char;
  lpszUrlPath     : array[0..INTERNET_MAX_PATH_LENGTH - 1] of Char;
  lpszExtraInfo   : array[0..1024 - 1] of Char;
  lpUrlComponents : TURLComponents;
begin
  ZeroMemory(@lpszScheme, SizeOf(lpszScheme));
  ZeroMemory(@lpszHostName, SizeOf(lpszHostName));
  ZeroMemory(@lpszUserName, SizeOf(lpszUserName));
  ZeroMemory(@lpszPassword, SizeOf(lpszPassword));
  ZeroMemory(@lpszUrlPath, SizeOf(lpszUrlPath));
  ZeroMemory(@lpszExtraInfo, SizeOf(lpszExtraInfo));
  ZeroMemory(@lpUrlComponents, SizeOf(TURLComponents));

  lpUrlComponents.dwStructSize      := SizeOf(TURLComponents);
  lpUrlComponents.lpszScheme        := lpszScheme;
  lpUrlComponents.dwSchemeLength    := SizeOf(lpszScheme);
  lpUrlComponents.lpszHostName      := lpszHostName;
  lpUrlComponents.dwHostNameLength  := SizeOf(lpszHostName);
  lpUrlComponents.lpszUserName      := lpszUserName;
  lpUrlComponents.dwUserNameLength  := SizeOf(lpszUserName);
  lpUrlComponents.lpszPassword      := lpszPassword;
  lpUrlComponents.dwPasswordLength  := SizeOf(lpszPassword);
  lpUrlComponents.lpszUrlPath       := lpszUrlPath;
  lpUrlComponents.dwUrlPathLength   := SizeOf(lpszUrlPath);
  lpUrlComponents.lpszExtraInfo     := lpszExtraInfo;
  lpUrlComponents.dwExtraInfoLength := SizeOf(lpszExtraInfo);

  InternetCrackUrl(PChar(lpszUrl), Length(lpszUrl), ICU_DECODE or ICU_ESCAPE, lpUrlComponents);

  Writeln(Format('Protocol : %s',[lpszScheme]));
  Writeln(Format('Host     : %s',[lpszHostName]));
  Writeln(Format('User     : %s',[lpszUserName]));
  Writeln(Format('Password : %s',[lpszPassword]));
  Writeln(Format('Path     : %s',[lpszUrlPath]));
  Writeln(Format('ExtraInfo: %s',[lpszExtraInfo]));
end;

begin
  try
   ParseURL('http://login:password@somehost.somedomain.com/some_path/something_else.html?param1=val&param2=val');
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  readln;
end.

这将返回

Protocol : http
Host     : somehost.somedomain.com
User     : login
Password : password
Path     : /some_path/something_else.html
ExtraInfo: ?param1=val&param2=val