www.pudn.com > DriveRescuev1.8.zip > filedet.pas


// file detection unit 
 
unit filedet; 
 
interface 
 
const 
  { file data types } 
  data_unknown = 1; 
  data_stream  = 2; 
 
 
function IsFile(abuf: pointer; bufsize: longword; var fileext: string; var datatype: byte): boolean; 
 
 
implementation 
 
uses common, helpers, sysutils; 
 
 
function IsFile(abuf: pointer; bufsize: longword; var fileext: string; var datatype: byte): boolean; 
var 
  CRC: word; 
 
   procedure crc16_addbits(bitstring: longword; len: byte); 
   const 
     polynomial = $8005; 
   var 
     bitmask: longword; 
   begin 
     bitmask:=1 SHL (len-1); 
     repeat 
       if (NOT(crc AND $8000)) XOR (NOT(bitstring AND bitmask)) > 0 then 
       begin 
         crc:=crc SHL 1; 
         crc:=crc XOR polynomial; 
       end else crc:=crc SHL 1; 
       bitmask := bitmask SHR 1; 
     until (bitmask = 0); 
   end; 
 
const 
  BitRates: array[0..5, 0..15] of word= 
    { Version 2 and V2.5: L3,L2,L1 } 
   ((0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160,0), 
    (0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160,0), 
    (0, 32, 48, 56, 64, 80, 96,112,128,144,160,176,192,224,256,0), 
 
    { Version 1:  L3,L2,L1 } 
    (0, 32, 40, 48, 56, 64, 80, 96,112,128,160,192,224,256,320,0), 
    (0, 32, 48, 56, 64, 80, 96,112,128,160,192,224,256,320,384,0), 
    (0, 32, 64, 96,128,160,192,224,256,288,320,352,384,416,448,0)); 
 
  // Sampling rate frequency index (values are in Hz) 
  // for MPEG2.5, Reserved, MPEG2, MPEG1 
  SampleRates: array[0..3, 0..3] of word = 
     ((11025, 12000,  8000, 0), 
      (    0,     0,     0, 0), 
      (22050, 24000, 16000, 0), 
      (44100, 48000, 32000, 0)); 
 
  // MPEG Audio version IDs 
  MPEGIDVer25     =  0; 
  MPEGIDReserved  =  1; 
  MPEGIDVer2      =  2; 
  MPEGIDVer1      =  3; 
 
  // Layer descriptions 
  LayerReserved   = 0; 
  LayerIII        = 1; 
  LayerII         = 2; 
  LayerI          = 3; 
 
  ChannelStereo    = 0; 
  ChannelJointStereo = 1; 
  ChannelDual        = 2; 
  ChannelSingle      = 3; 
 
var 
  buf: pbytearray; 
  bufpos: pbytearray; 
  maxbufpos: pbytearray; 
  i, j, row: integer; 
  framecount: integer; 
  hdr: longword; 
  MPEGAudioVer: byte; 
  Layer       : byte; 
  ChannelMode : byte; 
  SampleRate  : word; 
  Padding     : byte; 
  BitRateIdx, SampleRateIdx: byte; 
  BitRate     : word; 
  CRCProtected: boolean; 
  MPEG_CRC: word; 
  Emphasis    : byte; 
  FrameLengthInBytes: word; 
  invalid: boolean; 
  s: shortstring; 
  recordno: word; 
  vers: byte; 
  recorddata: pbytearray; 
label fin; 
begin 
  result:=FALSE; datatype:=data_unknown; 
 
  try 
    longword(buf):=longword(abuf); 
    // -------------------- BINARY coded files ------------------------------------ 
    // check for JPG... 
    { SOI = Start Of Image = 'FFD8'  This marker must be present in any JPG file *once* at the beginning of the file. (Any JPG file starts with the sequence FFD8.) 
      EOI = End Of Image = 'FFD9'    Similar to EOI: any JPG file ends with FFD9. } 
    if (buf^[0] = $FF) AND (buf^[1]=$D8) then 
    begin 
      result:=TRUE; fileext:='JPG'; 
      goto fin; 
    end; 
 
    // check for MS-WORD... 
   (*  // Word's File-Information-Block (FIB) structure... 
    typedef struct _fib { 
    short magicNumber; 
    // Word 6.0: 0xA5DC 
    // Word 7.0 (95): 0xA5DC 
    // Word 8.0 (97): 0xA5EC 
 
    short version;   // >= 101 for Word 6.0 and higher... 
    // Word 6.0: 101 
    // Word 7.0 (95): 104 
    // Word 8.0 (97): 105 
   } FIB, *LPFIB; *) 
    if (pword(@buf^[0])^ = $A5DC) 
      OR (pword(@buf^[0])^ = $A5DB) 
      OR (pword(@buf^[0])^ = $A5EC) 
      OR ((buf^[0] = $D0) AND (buf^[1]= $CF) AND (buf^[2]=$11) AND (buf^[3]=$E0)) then 
    begin 
      result:=TRUE; fileext:='DOC'; 
      goto fin; 
    end; 
 
    // check for BIFF... 
    recordno:=pword(@buf^[0])^; 
    if (recordno AND $00FF) = $0009 then   // BOF record (Beginning of file) ? 
    begin 
      vers:=recordno SHR 8; 
      if (vers = $04) OR (vers = $02) OR (vers = $00) OR  (vers = $08) then  // BIFF 3,4,5,7,8 ? 
      begin 
        result:=TRUE; fileext:='XLS';               // BIFF => XLS file 
        goto fin; 
      end; 
    end; 
 
 
    // check for BMP... 
    if (buf^[0] = $42) AND (buf^[1]= $4D) then // the header is "BM" 
    begin 
      result:=TRUE; fileext:='BMP'; 
      goto fin; 
    end; 
 
    // check for GIF... 
    if (buf^[0] = $47) AND (buf^[1]= $49) AND (buf^[2]=$46) AND (buf^[3]=$38) 
      AND ((buf^[4]=$39) OR (buf^[4]=$37)) AND (buf^[4]=$61) then       // the header is "GIF89a" or "GIF87a" 
    begin 
      result:=TRUE; fileext:='BMP'; 
      goto fin; 
    end; 
 
    // check for TIFF... 
    if ((buf^[0] = $49) AND (buf^[1]= $49)) 
      OR ((buf^[0]=$4D) AND (buf^[1]=$4D)) then       // the header is "4949" or "4D4D" 
    begin 
      result:=TRUE; fileext:='TIF'; 
      goto fin; 
    end; 
 
    // check for PNG... 
    // magic: 137 80 78 71 13 10 26 10 
    if (buf^[0] = 137) AND (buf^[1]= 80) AND (buf^[2]=78) AND (buf^[3]=71) 
      AND (buf^[4]=13) AND (buf^[5]=10) AND (buf^[6]=26) AND (buf^[7]=10) then 
    begin 
      result:=TRUE; fileext:='PNG'; 
      goto fin; 
    end; 
 
    // check for RIFF... 
    if IsDataEqual(@buf^[0], 'RIFF') then             // RIFF ID 
    begin 
      // RIFF file found... 
 
      // check for CDR... 
      if IsDataEqual(@buf^[8], 'CDR') then           // the format is "CDR" 
      begin 
        result:=TRUE; fileext:='CDR'; 
        goto fin; 
      end; 
      // check for WAVE... 
      if IsDataEqual(@buf^[8], 'WAVE') then           // the format is "WAVE" 
      begin 
        result:=TRUE; fileext:='WAV'; 
        goto fin; 
      end; 
      // check for AVI... 
      if IsDataEqual(@buf^[8], 'AVI ') then           // the format is "AVI " 
      begin 
        result:=TRUE; fileext:='AVI'; 
        goto fin; 
      end; 
 
      // else unknown RIFF... 
      result:=TRUE; fileext:='RIFF'; 
      goto fin; 
    end; 
 
    // check for EXE... 
    if (buf^[0] = $4D) AND (buf^[1]= $5A) then           // the header is "MZ" 
    begin 
      result:=TRUE; fileext:='EXE'; 
      goto fin; 
    end; 
 
    // check for DBF... 
    if ((buf^[0] = $03) OR (buf^[0]= $83) OR (buf^[0]= $F5) OR (buf^[0]= $8B) OR (buf^[0]= $8E))  // dBase/FoxBase/FoxPro ? 
      AND (buf^[12] = 0) AND (buf^[13] = 0)                     // res=0 ? 
      AND (buf^[30] = 0) AND (buf^[31] = 0) then                // res=0 ? 
    begin 
      result:=TRUE; fileext:='DBF'; 
      goto fin; 
    end; 
 
    // check for HLP... 
    if plong(@buf^[0])^ = $00035f3f then  // the magic is 0x00035f3f 
    begin 
      result:=TRUE; fileext:='HLP'; 
      goto fin; 
    end; 
 
    // check for TAR... 
    if IsDataEqual(@buf^[257], 'ustar'+#0) then  // the magic is "ustar"+0 
    begin 
      result:=TRUE; fileext:='TAR'; 
      goto fin; 
    end; 
 
    // check for LZH... 
    if IsDataEqual(@buf^[2], '-lh') OR IsDataEqual(@buf^[2], '-lz') then  // the magic is "-lh" or "-lz" 
    begin 
      result:=TRUE; fileext:='LZH'; 
      goto fin; 
    end; 
 
    // check for ZIP... 
    if plong(@buf^[0])^ = $04034b50 then  // the local file header is (0x04034b50) 
    begin 
      result:=TRUE; fileext:='ZIP'; 
      goto fin; 
    end; 
 
    // check for ARJ... 
    if pword(@buf^[0])^ = $EA60 then  // ARJ ID=EA60h 
    begin 
      result:=TRUE; fileext:='ARJ'; 
      goto fin; 
    end; 
 
 
    // check for RAR... 
    // The marker block is actually considered as a fixed byte 
    // sequence: 0x52 0x61 0x72 0x21 0x1a 0x07 0x00 
    if (buf^[0] = $52) AND (buf^[1]= $61) AND (buf^[2]=$72) AND (buf^[3]=$21) 
      AND (buf^[4]=$1a) AND (buf^[5]=$07) AND (buf^[6]=$00) then 
    begin 
      result:=TRUE; fileext:='RAR'; 
      goto fin; 
    end; 
 
    // check for PDF... 
    // The first line of a PDF file specifies the version number of the PDF specification to 
    // which the file adheres. The current version is 1.2; the first line of a 1.2-conforming 
    // PDF file should be %PDF-1.2. However, 1.0-conforming files and 1.1- 
    // conforming files are also 1.2-conforming files, so an application that understands 
    // PDF 1.2 also accepts a file that begins with either %PDF-1.1 or %PDF-1.0. 
    if (buf^[0] = $25) AND (buf^[1]= $50) AND (buf^[2]=$44) AND (buf^[3]=$46) 
      AND (buf^[4]=$2D) AND (buf^[5]=$31) AND (buf^[6]=$2E) 
      AND ((buf^[7]=$32) OR (buf^[7]=$31) OR (buf^[7]=$30))  then 
    begin 
      result:=TRUE; fileext:='PDF'; 
      goto fin; 
    end; 
 
    // check for MIDI... 
    if IsDataEqual(@buf^[0], 'MThd') then             // head chunk 
    begin 
      result:=TRUE; fileext:='MID'; 
      goto fin; 
    end; 
 
    // check for QuickTime MOV... 
    if IsDataEqual(@buf^[4], 'moov') then           // is basic atom type? 
    begin 
      result:=TRUE; fileext:='MOV'; 
      goto fin; 
    end; 
 
 
 
    // -------------------- TEXT coded files ------------------------------------ 
    // check for HTML... 
    i:=0; 
    while (i < bufsize-6) do 
    begin 
      if IsDataEqual(@buf^[i], '') then   // HTML Tag found? 
      begin 
        result:=TRUE; fileext:='HTML'; 
        goto fin; 
      end; 
      inc(i); 
    end; 
 
    longword(bufpos):=longword(buf); 
    maxbufpos:=pointer(longword(bufpos) + 32); 
    repeat 
      s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos)); 
      // check for DXF... 
      // (find entries 0, SECTION, 2, HEADER) 
      if pos('0', s) <> 0 then 
      begin 
        s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos)); 
        if s = 'SECTION' then 
        begin 
          s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos)); 
          if pos('2', s) <> 0 then 
          begin 
            s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos)); 
            if s = 'HEADER' then 
            begin 
              result:=TRUE; fileext:='DXF'; 
              goto fin; 
            end; 
          end; 
        end; 
      end 
      // check for RTF... 
      else if pos('{\rtf', s) <> 0 then 
      begin 
        result:=TRUE; fileext:='RTF'; 
        goto fin; 
      end; 
    until (s =''); 
 
 
    // check for MPEG... 
    { Remember, this is not enough, frame sync can be easily (and very frequently) found in any binary file. 
      Also it is likely that MPEG file contains garbage on it's beginning which also may contain false sync. 
      Thus, you have to check two or more frames in a row to assure you are really dealing with MPEG audio file } 
    framecount:=0; 
    i:=0; 
    while i < bufsize-4 do 
    begin 
      if (buf^[i]=$FF) AND (buf^[i+1] AND $E0=$E0) then 
      begin 
        // sync found... 
        invalid:=TRUE; 
        hdr:=plong(@buf^[i])^; 
        MPEGAudioVer :=(buf^[i+1] SHR 3) AND 3; 
        if MPEGAudioVer <> MPEGIDReserved then 
        begin 
          Layer        :=(buf^[i+1] SHR 1) AND 3; 
          if Layer <> LayerReserved then 
          begin 
            CRCProtected :=boolean((buf^[i+1]) AND 1); 
            BitRateIdx   :=(buf^[i+2] SHR 4) AND $F; 
            if (BitRateIdx <> $F) AND (BitRateIdx <> 0) then 
            begin 
              case MPEGAudioVer of 
                MPEGIDVer25, MPEGIDVer2: row := 0; 
                MPEGIDVer1             : row := 3; 
              end; 
              BitRate:=BitRates[row + (Layer-1), BitRateIdx]; 
              SampleRateIdx:=(buf^[i+2] SHR 2) AND 3; 
              if SampleRateIdx <> 3 then 
              begin 
                row:=MPEGAudioVer; 
                SampleRate:=SampleRates[row, SampleRateIdx]; 
                Padding :=((buf^[i+2]) SHR 1) AND 1; 
                ChannelMode:=(buf^[i+3] SHR 6) AND 3; 
                Emphasis:=(buf^[i+3] AND 3); 
                if Emphasis <> 2 then 
                begin 
                  // calculate frame lenth (length of compressed frame)... 
                  if Layer = LayerI then 
                    FrameLengthInBytes := (12 * BitRate*1000 div SampleRate + Padding) * 4 
                  else if Layer IN [LayerII, LayerIII] then 
                    FrameLengthInBytes := 144 * BitRate*1000 div SampleRate + Padding; 
                  if FrameLengthInBytes >= 32 then 
                  begin 
                    if CRCProtected then 
                    begin 
                      MPEG_CRC:=pword(@buf^[i+4])^; 
 
                      if MPEG_CRC <> 0 then 
                      begin 
                        // check CRC... 
                        crc:=0; 
                        for j:=i+4 to i+FrameLengthInBytes -1 do 
                        begin 
                          crc16_addbits(buf^[j], 8); 
                        end; 
                        if (CRC >= MPEG_CRC-8) AND (CRC <= MPEG_CRC+8) then invalid:=FALSE; 
                      end else invalid:=FALSE; 
                    end else invalid:=FALSE; 
                  end; 
                end; 
              end; 
            end; 
          end; 
        end; 
        if invalid then 
        begin 
          inc(i); 
          framecount:=0; // reset frame count to zero 
          break; 
        end  else 
        begin 
          // MP3 header is valid... 
          inc(i, FrameLengthInBytes); 
          inc(framecount); 
        end 
      end else inc(i); 
    end; 
    if (framecount >= bufsize DIV 512) then 
    begin 
      result:=TRUE; fileext:='MP3'; datatype:=data_stream; 
      goto fin; 
    end; 
 
    fin: 
 
  except 
    on E : Exception do 
    begin 
      result:=FALSE; 
    end; 
  end; 
end; 
 
 
end.