This tip show the way to cath the whole text of a PDF document.

You will need:
– 1 TMemo, 5 TLabel, 1 TButton and 1 OpenDialog
– to import the typelibrary from Adobe Acrobat (look fo Acrobat.tbl)

unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, OleCtrls, acrobat_tlb;

type
  TForm1 = class(TForm)
    Button1: TButton;
    Memo1: TMemo;
    OpenDialog1: TOpenDialog;
    GroupBox1: TGroupBox;
    Label1: TLabel;
    Label2: TLabel;
    Label3: TLabel;
    Label4: TLabel;
    Label5: TLabel;
    procedure Button1Click(Sender: TObject);
  private
    { Private-Deklarationen }
  public
    { Public-Deklarationen }
  end;


var
  Form1: TForm1;

implementation

uses ComObj;

{$R *.dfm}
{$TYPEDADDRESS OFF} //muss so sein (this have to be)
var
  PDDoc: Acrobat_TLB.CAcroPDDoc;
  PDPage: Variant;
  PDHili: Variant;
  PDTextS: Variant;
  acrobat: Variant;
  Result: Boolean;
  NTL, i, j, Pagecount: Integer;
  zeilen: string;
  stichwortcounter: Integer;
  Size: Integer;
  gesamtstring: AnsiString;
  zwreal: Real;

procedure TForm1.Button1Click(Sender: TObject);
  function removecrlf(workstring: string): string;
  var
    i: Integer;
  begin
    removecrlf := '';
    for i := 0 to Length(workstring) do
    begin
      if workstring[i] = #13 then
        workstring[i] := ' ';
      if workstring[i] = #10 then
        workstring[i] := ' ';
    end;

    removecrlf := workstring;
  end;
begin
  if not opendialog1.Execute then Exit;

  memo1.Clear;

  gesamtstring := '';
  stichwortcounter := 0;
  Size := 0;
  try

   
    acrobat := CreateOleObject('AcroExch.pdDoc');

   
    Result := acrobat.Open(opendialog1.FileName);


    if Result = False then
    begin
      messagedlg('Kann Datei nicht öffnen', mtWarning, [mbOK], 0);
      Exit;
    end;

    for j := 0 to acrobat.GetNumPages - 1 do
    begin
      memo1.Lines.Add('----------------------------------------------');
     
      PDPage := acrobat.acquirePage(j);

     
      PDHili := CreateOleObject('AcroExch.HiliteList');
      Result := PDHili.Add(0, 4096);

     
      PDTextS := PDPage.CreatePageHilite(PDHili);

      ntl := PDTextS.GetNumText;

      for i := 0 to ntl - 1 do
      begin
        zeilen := PDTextS.GetText(i);
        if (Length(zeilen) > 0) and (zeilen <> '') then
          memo1.Lines.Add(removecrlf(zeilen));
        gesamtstring := gesamtstring + removecrlf(zeilen);
       
        Size := Size + SizeOf(zeilen);
        Inc(stichwortcounter);

        Application.ProcessMessages;
      end;

   
      pdhili         := Unassigned;
      pdtextS        := Unassigned;
      pdpage         := Unassigned;
      label2.Caption := IntToStr(stichwortcounter);
      label4.Caption := IntToStr(Size);
      label2.Refresh;
      label4.Refresh;
    end; //for i to pagecount


  except
    on e: Exception do
    begin
      messagedlg('Fehler: ' + e.Message, mtError, [mbOK], 0);
      Exit;
    end;
  end;
  if Size > 1024 then
  begin
    zwreal := Size / 1024;
    str(zwreal: 2: 1,zeilen);
    label4.Caption := zeilen;
    label5.Caption := 'KB';
  end;
  memo1.Lines.SaveToFile(Extractfilepath(Application.exename) + '\debug.txt');
end;

end.