您的位置:首页 > 其它

URL functions and classes

2004-12-03 09:22 239 查看
$INCLUDE ../cDefines.inc}
unit cURL;

{                                                                              }
{                              URL Utilities 3.07                              }
{                                                                              }
{             This unit is copyright ?2000-2004 by David J Butler             }
{                                                                              }
{                  This unit is part of Delphi Fundamentals.                   }
{                     Its original file name is cURL.pas                       }
{       The latest version is available from the Fundamentals home page        }
{                     http://fundementals.sourceforge.net/                     }
{                                                                              }
{                I invite you to use this unit, free of charge.                }
{        I invite you to distibute this unit, but it must be for free.         }
{             I also invite you to contribute to its development,              }
{             but do not distribute a modified copy of this file.              }
{                                                                              }
{          A forum is available on SourceForge for general discussion          }
{             http://sourceforge.net/forum/forum.php?forum_id=2117             }
{                                                                              }
{ Revision history:                                                            }
{   17/10/2000  1.01  Unit cInternetStandards.                                 }
{   22/12/2001  1.02  Unit cMIME.                                              }
{   12/12/2002  3.03  Unit cInternetUtils.                                     }
{   21/02/2004  3.04  Added URL protocol base class.                           }
{   22/02/2004  3.05  Added URL File Protocol implementation class.            }
{   05/03/2004  3.06  Unit cURL.                                               }
{   12/03/2004  3.07  Added asynchronous URL content functions.                }
{                                                                              }

interface

uses
  { Delphi }
  SysUtils,

  { Fundamentals }
  cReaders,
  cThreads;

{                                                                              }
{ URL protocol                                                                 }
{   URL protocol implementations must use AURLProtocol as their base class.    }
{   URL protocol implementations must call RegisterURLProtocol to register     }
{   the implementation object.                                                 }
{                                                                              }
type
  AURLProtocol = class
  public
    { URL }
    function  DecodeURL(const URL: String; var Protocol, Host, Path: String): Boolean; virtual;

    { Content }
    function  IsContentSupported(const Protocol, Host, Path: String): Boolean; virtual;
    function  GetContentReader(const Protocol, Host, Path: String;
              var ContentType: String): AReaderEx; virtual;
    function  GetContentString(const Protocol, Host, Path: String;
              var Content, ContentType: String): Boolean; virtual;
  end;
  EURLProtocol = class(Exception);

procedure RegisterURLProtocol(const Handler: AURLProtocol);

{                                                                              }
{ URL string                                                                   }
{                                                                              }
const
  protoHTTP   = 'http';
  protoNNTP   = 'news';
  protoFTP    = 'ftp';
  protoGopher = 'gopher';
  protoEMail  = 'mailto';
  protoHTTPS  = 'https';
  protoIRC    = 'irc';
  protoFile   = 'file';
  protoTelnet = 'telnet';

procedure DecodeURL(const URL: String; var Protocol, Host, Path: String);
function  EncodeURL(const Protocol, Host, Path: String): String;

{                                                                              }
{ URL content (blocking functions)                                             }
{                                                                              }
function  GetURLProtocolContentReader(const Protocol, Host, Path: String;
          var ContentType: String): AReaderEx;
function  GetURLProtocolContentString(const Protocol, Host, Path: String;
          var Content, ContentType: String): Boolean;

function  GetURLContentReader(const URL: String;
          var ContentType: String): AReaderEx;
function  GetURLContentString(const URL: String;
          var Content, ContentType: String): Boolean;

function  RequireURLProtocolContentReader(const Protocol, Host, Path: String;
          var ContentType: String): AReaderEx;
function  RequireURLProtocolContentString(const Protocol, Host, Path: String;
          var ContentType: String): String;

function  RequireURLContentReader(const URL: String; var ContentType: String): AReaderEx;
function  RequireURLContentString(const URL: String; var ContentType: String): String;

{                                                                              }
{ URL content (asynchronous functions)                                         }
{   Call GetURLContentAsync to retrieve URL content asynchronously.            }
{   Caller must free the returned TURLContentAsync object.                     }
{                                                                              }
type
  TURLContentAsync = class;
  TURLContentNotifyEvent = procedure (const URLContent: TURLContentAsync;
      const Data: Pointer) of object;
  TURLContentProgressEvent = procedure (const URLContent: TURLContentAsync;
      const Data: Pointer; const Buffer; const Size: Integer;
      var Abort: Boolean) of object;
  TURLContentMode = (
      ucGetContentString,        // Return content in ContentString property
      ucSaveContentFile,         // Save content to ContentFileName
      ucNotifyContentBlocks);    // Call OnProgress with content blocks
  TURLContentAsync = class(TThreadEx)
  private
    FProtocol        : String;
    FHost            : String;
    FPath            : String;
    FContentMode     : TURLContentMode;
    FContentFileName : String;
    FData            : Pointer;
    FOnProgress      : TURLContentProgressEvent;
    FOnFinished      : TURLContentNotifyEvent;
    FFinished        : Boolean;
    FSuccess         : Boolean;
    FErrorMessage    : String;
    FContentSize     : Integer;
    FContentProgress : Integer;
    FContentType     : String;
    FContentString   : String;

  protected
    procedure TriggerProgress(const Buffer; const Size: Integer;
              var Abort: Boolean); virtual;
    procedure TriggerFinished; virtual;
   
    procedure Execute; override;

  public
    constructor Create(
                const Protocol, Host, Path: String;
                const ContentMode: TURLContentMode = ucGetContentString;
                const ContentFileName: String = '';
                const Data: Pointer = nil;
                const OnProgress: TURLContentProgressEvent = nil;
                const OnFinished: TURLContentNotifyEvent = nil);

    property  Protocol: String read FProtocol;
    property  Host: String read FHost;
    property  Path: String read FPath;
    property  ContentMode: TURLContentMode read FContentMode;
    property  ContentFileName: String read FContentFileName;
    property  Data: Pointer read FData;

    property  Finished: Boolean read FFinished;
    property  Success: Boolean read FSuccess;
    property  ErrorMessage: String read FErrorMessage;

    property  ContentSize: Integer read FContentSize;
    property  ContentProgress: Integer read FContentProgress;
    property  ContentType: String read FContentType;
    property  ContentString: String read FContentString;
  end;

function  GetURLProtocolContentAsync(
          const Protocol, Host, Path: String;
          const ContentMode: TURLContentMode = ucGetContentString;
          const ContentFileName: String = '';
          const Data: Pointer = nil;
          const OnProgress: TURLContentProgressEvent = nil;
          const OnFinished: TURLContentNotifyEvent = nil): TURLContentAsync;
function  GetURLContentAsync(
          const URL: String;
          const ContentMode: TURLContentMode = ucGetContentString;
          const ContentFileName: String = '';
          const Data: Pointer = nil;
          const OnProgress: TURLContentProgressEvent = nil;
          const OnFinished: TURLContentNotifyEvent = nil): TURLContentAsync;

{                                                                              }
{ URL file protocol                                                            }
{                                                                              }
type
  TURLFileProtocol = class(AURLProtocol)
  public
    function  IsContentSupported(const Protocol, Host, Path: String): Boolean; override;
    function  GetContentReader(const Protocol, Host, Path: String;
              var ContentType: String): AReaderEx; override;
    function  GetContentString(const Protocol, Host, Path: String;
              var Content, ContentType: String): Boolean; override;
  end;

procedure RegisterURLFileProtocol;

{                                                                              }
{ Self-testing code                                                            }
{                                                                              }
procedure SelfTest;

implementation

uses
  { Fundamentals }
  cUtils,
  cStrings,
  cWriters,
  cStreams,
  cFileUtils,
  cInternetUtils;

{                                                                              }
{ AURLProtocol                                                                 }
{                                                                              }
function AURLProtocol.DecodeURL(const URL: String; var Protocol, Host, Path: String): Boolean;
begin
  Protocol := '';
  Host := '';
  Path := '';
  Result := False;
end;

function AURLProtocol.IsContentSupported(const Protocol, Host, Path: String): Boolean;
begin
  Result := False;
end;

function AURLProtocol.GetContentReader(const Protocol, Host, Path: String;
    var ContentType: String): AReaderEx;
begin
  ContentType := '';
  Result := nil;
end;

function AURLProtocol.GetContentString(const Protocol, Host, Path: String;
    var Content, ContentType: String): Boolean;
begin
  Content := '';
  ContentType := '';
  Result := False;
end;

{                                                                              }
{ URL Protocol implementations                                                 }
{                                                                              }
var
  URLProtocols : Array of AURLProtocol = nil;

procedure RegisterURLProtocol(const Handler: AURLProtocol);
begin
  if not Assigned(Handler) then
    raise EURLProtocol.Create('URL protocol handler required');
  Append(ObjectArray(URLProtocols), Handler);
end;

{                                                                              }
{ URL string                                                                   }
{                                                                              }
function urlDecodeHTTP(const S: String; var Protocol, Host, Path: String): Boolean;
var I, J: Integer;
begin
  Protocol := '';
  Host := '';
  Path := '';
  if StrMatchLeft(S, 'http:', False) then
    Protocol := protoHTTP else
  if StrMatchLeft(S, 'https:', False) then
    Protocol := protoHTTPS;
  Result := Protocol <> '';
  if not Result then
    exit;
  I := PosChar(':', S);
  Assert(I > 0, 'I > 0');
  if StrMatch(S, '//', I + 1) then
    Inc(I, 2);
  J := PosChar('/', S, I + 1);
  if J = 0 then
    Host := CopyFrom(S, I + 1) else
    begin
      Host := CopyRange(S, I + 1, J - 1);
      Path := CopyFrom(S, J);
    end;
end;

function urlDecodeEMail(const S: String; var Protocol, Host, Path: String): Boolean;
begin
  Protocol := '';
  Host := '';
  Path := '';
  if StrMatchLeft(S, 'mailto:', False) then
    begin
      Protocol := protoEMail;
      Host := CopyFrom(S, 8);
    end else
  if (PosChar([':', '/', '/'], S) = 0) and
     (PosChar('@', S) > 1) then
    begin
      Protocol := protoEMail;
      Host := S;
    end;
  Result := Protocol <> '';
  if not Result then
    exit;
  TrimInPlace(Host, SPACE);
end;

function urlDecodeFile(const S: String; var Protocol, Host, Path: String): Boolean;
begin
  Protocol := '';
  Host := '';
  Path := '';
  if S <> '' then
    if StrMatchLeft(S, 'file:', False) then
      begin
        Protocol := protoFile;
        Path := CopyFrom(S, 6);
      end else
    if (PChar(S)^ = '/') or
       (PathHasDriveLetter(S) and StrMatch(S, '/', 3))  then
      begin
        Protocol := protoFile;
        Path := S;
      end;
  Result := Protocol <> '';
end;

function urlDecodeKnownProtocol(const S: String; var Protocol, Host, Path: String): Boolean;
begin
  Result := urlDecodeHTTP(S, Protocol, Host, Path);
  if Result then
    exit;
  Result := urlDecodeEMail(S, Protocol, Host, Path);
  if Result then
    exit;
  Result := urlDecodeFile(S, Protocol, Host, Path);
  if Result then
    exit;
end;

function urlDecodePath(const S: String; var Protocol, Host, Path: String): Boolean;
var I: Integer;
begin
  Protocol := '';
  Host := '';
  Path := '';
  Result := False;
  // special cases
  if (S = '') or (S = '*') or (S = '/') then
    begin
      Path := S;
      Result := True;
    end else
  // relative path
  if StrMatchLeft(S, '../') or StrMatchLeft(S, './') then
    begin
      Path := S;
      Result := True;
    end else
  // "/" prefix
  if PChar(S)^ = '/' then
    begin
      if StrMatchLeft(S, '//') then
        begin
          // "//"host["/"path]
          I := PosChar('/', S, 3);
          if I = 0 then
            // "//"host
            Host := CopyFrom(S, 3) else
            begin
              // "//"host"/"path
              Host := CopyRange(S, 3, I - 1);
              Path := CopyFrom(S, I);
            end;
        end else
        // "/"path
        Path := S;
      Result := True;
    end;
end;

procedure urlDecodeGeneral(const S: String; var Protocol, Host, Path: String);
var I, J : Integer;
    T    : String;
begin
  Protocol := '';
  Host := '';
  Path := '';
  I := PosStr('://', S);
  J := PosChar('/', S);
  if (I > 0) and (J = I + 1) then
    begin
      // protocol"://"
      Protocol := Trim(CopyLeft(S, I - 1), SPACE);
      J := PosChar('/', S, I + 3);
      if J = 0 then
        begin
          Host := Trim(CopyFrom(S, I + 3), SPACE);
          Path := '';
        end else
        begin
          Host := Trim(CopyRange(S, I + 3, J - 1), SPACE);
          Path := Trim(CopyFrom(S, J), SPACE);
        end;
      exit;
    end;
  I := PosChar(':', S);
  if (I = 0) or ((I > 0) and (J > 0) and (J < I)) then
    begin
      // no protocol
      Path := S;
      exit;
    end;
  // check text between ":" and "/"
  if J > 0 then
    T := CopyRange(S, I + 1, J - 1) else
    T := CopyFrom(S, I + 1);
  if StrIsNumeric(T) then
    begin
      // address":"port/path
      if J = 0 then
        Host := S else
        begin
          Host := CopyLeft(S, J - 1);
          Path := CopyFrom(S, J);
        end;
      exit;
    end;
  // protocol":"host"/"path
  Protocol := Trim(CopyLeft(S, I - 1), SPACE);
  if J = 0 then
    Host := CopyFrom(S, I + 1) else
    begin
      Host := CopyRange(S, I + 1, J - 1);
      Path := CopyFrom(S, J);
    end;
end;

procedure DecodeURL(const URL: String; var Protocol, Host, Path: String);
const KnownProtocols = 3;
      KnownProtocol: Array [1..KnownProtocols] of String = (protoEMail,
                    protoNNTP, protoFile);
var S : String;
    I : Integer;
begin
  Protocol := '';
  Host := '';
  Path := '';
  // clean URL
  S := Trim(URL, SPACE);
  if S = '' then
    exit;
  // check if url is a path only
  if urlDecodePath(S, Protocol, Host, Path) then
    exit;
  // check url protocol handlers
  For I := 0 to Length(URLProtocols) - 1 do
    if URLProtocols[I].DecodeURL(URL, Protocol, Host, Path) then
      exit;
  // check known protocol
  if urlDecodeKnownProtocol(S, Protocol, Host, Path) then
    exit;
  // check general format
  urlDecodeGeneral(S, Protocol, Host, Path);
end;

function EncodeURL(const Protocol, Host, Path: String): String;
begin
  Result := '';
  if Protocol <> '' then
    if StrEqualNoCase(protoHTTP, Protocol) or
       StrEqualNoCase(protoHTTPS, Protocol) then
      Result := Protocol + '://' else
      Result := Protocol + ':';
  Result := Result + Host;
  if Path <> '' then
    if not (Path [1] in [':', '/', '/', '@', ',']) then
      Result := Result + '/' + Path else
      Result := Result + Path;
end;

{                                                                              }
{ URL content (blocking functions)                                             }
{                                                                              }
function GetURLProtocolContentReader(const Protocol, Host, Path: String;
    var ContentType: String): AReaderEx;
var I : Integer;
    P : AURLProtocol;
    S : String;
begin
  For I := 0 to Length(URLProtocols) - 1 do
    begin
      P := URLProtocols[I];
      if P.IsContentSupported(Protocol, Host, Path) then
        begin
          Result := P.GetContentReader(Protocol, Host, Path, ContentType);
          if Assigned(Result) then
            exit;
          if P.GetContentString(Protocol, Host, Path, S, ContentType) then
            begin
              Result := TStringReader.Create(S);
              exit;
            end;
        end;
    end;
  ContentType := '';
  Result := nil;
end;

function GetURLProtocolContentString(const Protocol, Host, Path: String;
    var Content, ContentType: String): Boolean;
var I : Integer;
    P : AURLProtocol;
    R : AReaderEx;
begin
  For I := 0 to Length(URLProtocols) - 1 do
    begin
      P := URLProtocols[I];
      if P.IsContentSupported(Protocol, Host, Path) then
        begin
          Result := P.GetContentString(Protocol, Host, Path, Content, ContentType);
          if Result then
            exit;
          R := P.GetContentReader(Protocol, Host, Path, ContentType);
          if Assigned(R) then
            begin
              try
                Content := R.GetToEOF;
              finally
                R.Free;
              end;
              Result := True;
              exit;
            end;
        end;
    end;
  Content := '';
  ContentType := '';
  Result := False;
end;

function GetURLContentReader(const URL: String; var ContentType: String): AReaderEx;
var Protocol, Host, Path : String;
begin
  if URL = '' then
    raise EURLProtocol.Create('URL required');
  DecodeURL(URL, Protocol, Host, Path);
  Result := GetURLProtocolContentReader(Protocol, Host, Path, ContentType);
end;

function GetURLContentString(const URL: String;
    var Content, ContentType: String): Boolean;
var Protocol, Host, Path : String;
begin
  if URL = '' then
    raise EURLProtocol.Create('URL required');
  DecodeURL(URL, Protocol, Host, Path);
  Result := GetURLProtocolContentString(Protocol, Host, Path, Content, ContentType);
end;

function RequireURLProtocolContentReader(const Protocol, Host, Path: String;
    var ContentType: String): AReaderEx;
begin
  Result := GetURLProtocolContentReader(Protocol, Host, Path, ContentType);
  if not Assigned(Result) then
    raise EURLProtocol.Create('URL not supported');
end;

function RequireURLProtocolContentString(const Protocol, Host, Path: String;
    var ContentType: String): String;
begin
  if not GetURLProtocolContentString(Protocol, Host, Path, Result, ContentType) then
    raise EURLProtocol.Create('URL not supported');
end;

function RequireURLContentReader(const URL: String; var ContentType: String): AReaderEx;
begin
  Result := GetURLContentReader(URL, ContentType);
  if not Assigned(Result) then
    raise EURLProtocol.Create('URL not supported');
end;

function RequireURLContentString(const URL: String; var ContentType: String): String;
begin
  if not GetURLContentString(URL, Result, ContentType) then
    raise EURLProtocol.Create('URL not supported');
end;

{                                                                              }
{ URL content (asynchronous functions)                                         }
{                                                                              }
const
  ProgressBlockSize = 4096;

constructor TURLContentAsync.Create(
    const Protocol, Host, Path: String;
    const ContentMode: TURLContentMode;
    const ContentFileName: String;
    const Data: Pointer;
    const OnProgress: TURLContentProgressEvent;
    const OnFinished: TURLContentNotifyEvent);
begin
  FProtocol := Protocol;
  FHost := Host;
  FPath := Path;
  FContentMode := ContentMode;
  FContentFileName := ContentFileName;
  FData := Data;
  FOnProgress := OnProgress;
  FOnFinished := OnFinished;
  FreeOnTerminate := False;
  inherited Create(False);
end;

procedure TURLContentAsync.TriggerProgress(const Buffer; const Size: Integer;
    var Abort: Boolean);
begin
  if Assigned(FOnProgress) then
    FOnProgress(self, FData, Buffer, Size, Abort);
end;

procedure TURLContentAsync.TriggerFinished;
begin
  if Assigned(FOnFinished) then
    FOnFinished(self, FData);
end;

procedure TURLContentAsync.Execute;
var Reader : AReaderEx;
    Writer : TFileWriter;
    Buf    : Array[0..ProgressBlockSize - 1] of Byte;
    I      : Integer;
    A      : Boolean;
begin
  FErrorMessage := '';
  try try
    if FContentMode = ucGetContentString then
      begin
        FContentString := RequireURLProtocolContentString(FProtocol, FHost, FPath, FContentType);
        FContentSize := Length(FContentString);
        FSuccess := True;
      end else
    if FContentMode in [ucNotifyContentBlocks, ucSaveContentFile] then
      begin
        Reader := RequireURLProtocolContentReader(FProtocol, FHost, FPath, FContentType);
        try
          FContentSize := Reader.Size;
          if FContentMode = ucSaveContentFile then
            begin
              if FContentFileName = '' then
                raise EURLProtocol.Create('Filename required');
              Writer := TFileWriter.Create(FContentFileName, fwomCreate)
            end
          else
            Writer := nil;
          try
            A := False;
            While not Reader.EOF and not Terminated do
              begin
                I := Reader.Read(Buf[0], ProgressBlockSize);
                if (I = 0) and not Reader.EOF then
                  raise EURLProtocol.Create('Read error');
                Inc(FContentProgress, I);
                if Terminated then
                  exit;
                TriggerProgress(Buf[0], I, A);
                if A then
                  raise EURLProtocol.Create('Aborted');
                if Assigned(Writer) then
                  Writer.WriteBuffer(Buf[0], I);
              end;
          finally
            Writer.Free;
          end;
        finally
          Reader.Free;
        end;
        FContentSize := FContentProgress;
        FSuccess := True;
      end;
  except
    on E : Exception do
      FErrorMessage := E.Message;
  end;
  finally
    FFinished := True;
    TriggerFinished;
  end;
end;

function GetURLProtocolContentAsync(const Protocol, Host, Path: String;
    const ContentMode: TURLContentMode;
    const ContentFileName: String;
    const Data: Pointer;
    const OnProgress: TURLContentProgressEvent;
    const OnFinished: TURLContentNotifyEvent): TURLContentAsync;
begin
  Result := TURLContentAsync.Create(Protocol, Host, Path,
      ContentMode, ContentFileName, Data, OnProgress, OnFinished);
end;

function GetURLContentAsync(
    const URL: String;
    const ContentMode: TURLContentMode;
    const ContentFileName: String;
    const Data: Pointer;
    const OnProgress: TURLContentProgressEvent;
    const OnFinished: TURLContentNotifyEvent): TURLContentAsync;
var Protocol, Host, Path : String;
begin
  DecodeURL(URL, Protocol, Host, Path);
  Result := GetURLProtocolContentAsync(Protocol, Host, Path,
      ContentMode, ContentFileName, Data, OnProgress, OnFinished);
end;

{                                                                              }
{ URL File Protocol                                                            }
{                                                                              }
function TURLFileProtocol.IsContentSupported(const Protocol, Host, Path: String): Boolean;
begin
  Result := StrEqualNoCase(Protocol, protoFile) and (Host = '') and (Path <> '');
end;

function TURLFileProtocol.GetContentReader(const Protocol, Host, Path: String;
    var ContentType: String): AReaderEx;
begin
  ContentType := MIMEContentTypeFromExtention(ExtractFileExt(Path));
  Result := TFileReader.Create(Path);
end;

function TURLFileProtocol.GetContentString(const Protocol, Host, Path: String;
    var Content, ContentType: String): Boolean;
begin
  Content := ReadFileToStr(Path);
  ContentType := MIMEContentTypeFromExtention(ExtractFileExt(Path));
  Result := True;
end;

var
  URLFileProtocol : AURLProtocol = nil;

procedure RegisterURLFileProtocol;
begin
  if Assigned(URLFileProtocol) then
    exit;
  URLFileProtocol := TURLFileProtocol.Create;
  RegisterURLProtocol(URLFileProtocol);
end;

{                                                                              }
{ Self-testing code                                                            }
{                                                                              }
{$ASSERTIONS ON}
procedure SelfTest;
var P, M, U : String;
begin
  { DecodeURL                                                                  }
  DecodeURL('http://abc.com/index.html', P, M, U);
  Assert((P = protoHTTP) and (M = 'abc.com') and (U = '/index.html'), 'DecodeURL');
  DecodeURL('a://b.c/1/2/3', P, M, U);
  Assert((P = 'a') and (M = 'b.c') and (U = '/1/2/3'), 'DecodeURL');
  DecodeURL('http://b:80/i.html', P, M, U);
  Assert((P = protoHTTP) and (M = 'b:80') and (U = '/i.html'), 'DecodeURL');
  DecodeURL('mailto:a@b', P, M, U);
  Assert((P = protoEMail) and (M = 'a@b') and (U = ''), 'DecodeURL');

  { EncodeURL                                                                  }
  Assert(EncodeURL('http', 'abc.com', '/') = 'http://abc.com/', 'EncodeURL');
  Assert(EncodeURL('news', 'a.b', '') = 'news:a.b', 'EncodeURL');
  Assert(EncodeURL('https', 'abc.com', '/') = 'https://abc.com/', 'EncodeURL');
end;

initialization
finalization
  FreeAndNil(URLFileProtocol);
end.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息