My TAR extraction routines

Share your advanced PureBasic knowledge/code with the community.
PrincieD
Addict
Addict
Posts: 858
Joined: Wed Aug 10, 2005 2:08 pm
Location: Yorkshire, England
Contact:

My TAR extraction routines

Post by PrincieD »

Hi guys!

Thought you might find my TAR extraction routines useful that I use in DCC Manager (for extracting in realtime as the files are downloaded). At the moment the routines can't handle "long links" but this is a trivial matter to implement (i'll do this myself at a later date), however the routines are thread safe! :)

Code: Select all

#TARBUFFERSIZE = 4194304 ;4 meg memory Buffer For file extraction routines
#TARBLOCK = 512
#TARFILESIZEOFFSET = 124
#TARFILENAMESIZE = 100
Structure TAR_Archive
  Handle.l
  pos.l
EndStructure

Procedure TAR_Open(filename.s, *Tar.TAR_Archive)
  
  Debug "archive: "+filename
  Handle = CreateFile_(filename, #FILE_READ_DATA|#FILE_WRITE_DATA, #FILE_SHARE_READ|#FILE_SHARE_WRITE, 0, #OPEN_EXISTING, #FILE_ATTRIBUTE_NORMAL|#FILE_FLAG_SEQUENTIAL_SCAN, 0)
  If Handle <> #INVALID_HANDLE_VALUE  ; if opened file successfully
    *Tar\Handle = Handle
    ProcedureReturn #True
  EndIf
  
  ProcedureReturn 0
EndProcedure

Procedure TAR_Close(*Tar.TAR_Archive)
  CloseHandle_(*Tar\Handle)
EndProcedure

Procedure TAR_CountFiles(*Tar.TAR_Archive)
  
  pos = #TARFILESIZEOFFSET
  filecount = 0
  *fileSizeBuff = AllocateMemory(12)
  If *fileSizeBuff
    Repeat
      
      ; go to next file size offset
      If SetFilePointer_(*Tar\Handle, pos, 0, #FILE_BEGIN) = $FFFFFFFF
        Debug "INVALID_SET_FILE_POINTER"
        Break
      EndIf
  
      ; reads 12 bytes file size field and converts from octal to dec on the fly, cool huh?
      ReadFile_(*Tar\Handle, *fileSizeBuff, 11, @BytesRead, 0)
      filesize = 0
      p = 10
      For n = 1 To 11
        filesize = filesize + Val(Chr(PeekB(*fileSizeBuff+n-1)))*Pow(8,p)
        p = p - 1
      Next
  
      pos = pos + (#TARBLOCK-#TARFILESIZEOFFSET) ; move postion to next block
      If filesize <> 0
        filesize = Round(filesize/#TARBLOCK, #pb_round_up)*#TARBLOCK ; round filesize into 512 byte blocks
        pos = pos + filesize
      EndIf
      pos = pos + #TARFILESIZEOFFSET
      filecount = filecount + 1
      
    Until pos > GetFileSize_(*Tar\Handle, 0)
  
    FreeMemory(*fileSizeBuff)
    ProcedureReturn filecount
  EndIf
  
  ProcedureReturn -1
  
EndProcedure

; creates new directory structure by appending newPath to Path
Procedure TAR_CreateDirectories(Path.s, newPath.s)
  
  ; make sure path is a path
  If Mid(Path, Len(Path), 1) <> "\"
    Path = Path + "\"
  EndIf
  If FileSize(Path) <> -2
    ProcedureReturn 0
  EndIf

  Repeat
    p = FindString(newPath, "\", p+1)
    If p > 0
      newdir.s = Path+Mid(newPath, 1, p-1)
      CreateDirectory(newdir)
    EndIf
  Until p = 0 Or p = Len(newPath)
  
EndProcedure


Procedure TAR_ExtractAll(*Tar.TAR_Archive, Path.s)

  ; make sure path is a path
  If Mid(Path, Len(Path), 1) <> "\"
    Path = Path + "\"
  EndIf
  If FileSize(Path) <> -2
    ProcedureReturn 0
  EndIf
  
  ; makesure archive isn't empty
  If GetFileSize_(*Tar\Handle, 0) = 0
    ProcedureReturn 0
  EndIf
  
  *Buffer = AllocateMemory(#TARBUFFERSIZE)
  If *Buffer = 0
    ProcedureReturn 0
  EndIf
  
  *fileSizeBuff = AllocateMemory(12)
  If *fileSizeBuff = 0
    ProcedureReturn 0
  EndIf
  
  Repeat
    
    If SetFilePointer_(*Tar\Handle, pos, 0, #FILE_BEGIN) = $FFFFFFFF
      Debug "INVALID_SET_FILE_POINTER"
      Break
    EndIf
    
    ; get file name
    filename.s = Space(#TARFILENAMESIZE)
    ReadFile_(*Tar\Handle, @filename, #TARFILENAMESIZE, @BytesRead, 0)
    ReplaceString(filename, "/", "\", #PB_String_InPlace)
    Debug filename
    
    ; is the filename a directory?
    If Mid(filename, Len(filename), 1) = "\"
      ; check if it already exists
      If FileSize(Path+filename) = -1
        ; create the directory structure
        TAR_CreateDirectories(Path, filename)
      EndIf
    EndIf
    
    pos = pos + #TARFILESIZEOFFSET
    If pos >= GetFileSize_(*Tar\Handle, 0)
      Break
    EndIf
    
    ; go file size offset
    If SetFilePointer_(*Tar\Handle, pos, 0, #FILE_BEGIN) = $FFFFFFFF
      Debug "INVALID_SET_FILE_POINTER"
      Break
    EndIf
    
    ; reads 12 bytes file size field and converts from octal to dec on the fly, cool huh?
    ReadFile_(*Tar\Handle, *fileSizeBuff, 11, @BytesRead, 0)
    filesize = 0
    p = 10
    For n = 1 To 11
      filesize = filesize + Val(Chr(PeekB(*fileSizeBuff+n-1)))*Pow(8,p)
      p = p - 1
    Next
    
    pos = pos + (#TARBLOCK-#TARFILESIZEOFFSET) ; move postion to next block
    If pos >= GetFileSize_(*Tar\Handle, 0)
      Break
    EndIf
    If filesize <> 0
      
      ; calc real size of file in archive if archive is not complete
      If pos+filesize > GetFileSize_(*Tar\Handle, 0)
        actualSize = GetFileSize_(*Tar\Handle, 0)-pos
      Else
        actualSize = filesize
      EndIf
        
      ; check if the file already exists and has been extracted fully
      If FileSize(Path+filename) <> actualSize
        
        ; extract file
        newFile = CreateFile_(Path+filename, #FILE_WRITE_DATA|#FILE_READ_DATA, #FILE_SHARE_READ, 0, #OPEN_ALWAYS, #FILE_ATTRIBUTE_NORMAL|#FILE_FLAG_SEQUENTIAL_SCAN, 0)
        If newFile <> #INVALID_HANDLE_VALUE  ; if opened file successfully
          
          ; go to end of newfile
          newFileSize = GetFileSize_(newFile, 0)
          If newFileSize > 0
            If SetFilePointer_(newFile, 0, 0, #FILE_END) = $FFFFFFFF
              Debug "INVALID_SET_FILE_POINTER"
              Break
            EndIf
            If SetFilePointer_(*Tar\Handle, pos+newFileSize, 0, #FILE_BEGIN) = $FFFFFFFF
              Debug "INVALID_SET_FILE_POINTER"
              Break
            EndIf
            totalRead = newFileSize
          Else
            If SetFilePointer_(*Tar\Handle, pos, 0, #FILE_BEGIN) = $FFFFFFFF
              Debug "INVALID_SET_FILE_POINTER"
              Break
            EndIf
            totalRead = 0
          EndIf
          
          ; read archived file into mem
          Repeat
            Debug "Reading..."
            
            If filesize - totalRead > #TARBUFFERSIZE
              sizeToRead = #TARBUFFERSIZE
            Else
              sizeToRead = filesize - totalRead
            EndIf
  
            ReadFile_(*Tar\Handle, *Buffer, sizeToRead, @dataRead, 0)
            totalRead = totalRead + dataRead
            Debug "Writing "+Str(dataRead)+" bytes..."
            WriteFile_(newFile, *Buffer, dataRead, @bytesWritten, 0)
          Until totalRead = filesize Or dataRead < #TARBUFFERSIZE

          CloseHandle_(newFile)
        EndIf
      EndIf
      
      filesize = Round(filesize/#TARBLOCK, #pb_round_up)*#TARBLOCK ; round filesize into 512 byte blocks
      pos = pos + filesize
    EndIf

  Until pos >= GetFileSize_(*Tar\Handle, 0)
  
  Debug "Extraction complete."
  
  FreeMemory(*Buffer)
  FreeMemory(*fileSizeBuff)
  
  ProcedureReturn #True
  
EndProcedure
Cheers!

Chris.
ProGUI - Professional Graphical User Interface Library - http://www.progui.co.uk
eesau
Enthusiast
Enthusiast
Posts: 589
Joined: Fri Apr 27, 2007 12:38 pm
Location: Finland

Re: My TAR extraction routines

Post by eesau »

Thanks a lot, this will come in handy :)
PrincieD
Addict
Addict
Posts: 858
Joined: Wed Aug 10, 2005 2:08 pm
Location: Yorkshire, England
Contact:

Re: My TAR extraction routines

Post by PrincieD »

eesau wrote:Thanks a lot, this will come in handy :)
no worries eesau :) im happy you'll find it useful :)
ProGUI - Professional Graphical User Interface Library - http://www.progui.co.uk
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

Re: My TAR extraction routines

Post by SeregaZ »

any example of using this code? i try to use this one:

Code: Select all

Define Pointer.TAR_Archive
Debug TAR_Open("C:\file.tar.bz2", @Pointer)
Debug TAR_CountFiles(@Pointer)
TAR_Close(@Pointer)
but If SetFilePointer_(*Tar\Handle, pos, 0, #FILE_BEGIN) = $FFFFFFFF is stop all function.

than i try to use this one:

Code: Select all

Define *Pointer.TAR_Archive
Debug TAR_Open("C:\file.tar.bz2", *Pointer)
Debug TAR_CountFiles(*Pointer)
TAR_Close(*Pointer)
but it says pointer is null at *Tar\Handle = Handle in TAR_Open function...

and probably my archive not almost tar archive... can you attach any small correct tar archive in this topic for my test? (in this tar must be a few files + folder and a few files inside folder)
infratec
Always Here
Always Here
Posts: 7582
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: My TAR extraction routines

Post by infratec »

@SeregaZ

tar.bz2 is not a tar file, it is a bz2 file.
When you unbzip it, than you'll get a .tar file
and this should be handled by this procedures.

Bernd
User avatar
luis
Addict
Addict
Posts: 3893
Joined: Wed Aug 31, 2005 11:09 pm
Location: Italy

Re: My TAR extraction routines

Post by luis »

infratec wrote: When you unbzip it, than you'll get a .tar file
I hoped this was already clear :P

http://www.purebasic.fr/english/viewtop ... 34#p429034

@seregaz you just need to find/write/port some code/link some lib/spawn some exe to uncompress the file and then use (hopefully) this one to extract from the tar. To test this code just look around with google for some .tar files and try with them or decompress the ones you already have.

http://gnuwin32.sourceforge.net/packages/bzip2.htm
"Have you tried turning it off and on again ?"
A little PureBasic review
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

Re: My TAR extraction routines

Post by SeregaZ »

this source is great mystery for me...

so i try to understand this by study of PB examples. as i can understand my arhive first - must be unpack by BZIP2, than result of this must be unpack by TAR.

it is not my arhive file. even it is not standart arhive - at the originaly this file cant be open by bzip, because signature of file is specialy change for confusion. in originaly looks like on image, so i must to a little edit file, only after this BZIP2 can work with this file.
Image

and i want to ask question: i see some code, how to hide DLL file inside main exe file. it is possibly do with LIB file?
Last edited by SeregaZ on Fri Oct 25, 2013 10:51 pm, edited 1 time in total.
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

Re: My TAR extraction routines

Post by SeregaZ »

i read some GnuWin32 pdf materials, not very understand it, but i try to change my code to this:

Code: Select all

Prototype.i ProtoBZ2_bzBuffToBuffDecompress(*DestBuff, *DestSize, *SourceBuff, iSourceSize, small, verbosity)
Global BZ2_bzBuffToBuffDecompress.ProtoBZ2_bzBuffToBuffDecompress

hdll = OpenLibrary(#PB_Any,"C:\bzip2.dll")
If hdll
  BZ2_bzBuffToBuffDecompress = GetFunction(hdll,"BZ2_bzBuffToBuffDecompress")
  
  If BZ2_bzBuffToBuffDecompress
    
    ; first i need to make some kind of image of file:
    FileName$ = "C:\file.tar.bz2"
    nFileIn = ReadFile(#PB_Any, FileName$)
    If nFileIn
      ; read packed data
      iSourceSize = Lof(nFileIn)  
      *SourceBuff = AllocateMemory(iSourceSize)
      If *SourceBuff
        ReadData(nFileIn, *SourceBuff, iSourceSize)
        
        *DestBuff = AllocateMemory(100000)
        If *DestBuff
          *DestSize = AllocateMemory(4)
          If *DestSize
            PokeL(*DestSize, 100000)  
        
            If BZ2_bzBuffToBuffDecompress(*DestBuff, *DestSize, *SourceBuff, iSourceSize, 0, 0) = 0
            
              If CreateFile(0, "C:\tarpak.tar")           ; we create a new text file...
                WriteData(0, *DestBuff, PeekL(*DestSize))         ; write the first 10 chars from the memory block into the file
                CloseFile(0)                         ; close the previously opened file and so store the written data
              Else
                Debug "may not create the file!"
              EndIf
            EndIf
            
            FreeMemory(*DestSize) 
            
          EndIf
          FreeMemory(*DestBuff)
        EndIf
        
        FreeMemory(*SourceBuff) 
        
      EndIf
      CloseFile(nFileIn)
      
    EndIf
    
  EndIf
  
EndIf
done :) now i must to mix bzip with tar code.

result file is nice unpack by tar code, but it is not unicode, and it made from file :( how to do this in memory? without any temporaly files?
Deluxe0321
User
User
Posts: 69
Joined: Tue Sep 16, 2008 6:11 am
Location: ger

Re: My TAR extraction routines

Post by Deluxe0321 »

Maybe this will help you:
http://www.purebasic.fr/english/viewtop ... 0&p=419000

It's tar.gz but the tar part (inside OpenTarGz(File.s) procedure) should explain how the tar format works.
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

Re: My TAR extraction routines

Post by SeregaZ »

easy to say :) cant understand it... and it is for 5.20, not unicode again, work with file - not memory, and use some additional library.

this topic code not use library - only PB code - it is nice :) i want to find what part of code not work with unicode and how to change file handle to memory pointer.
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

Re: My TAR extraction routines

Post by SeregaZ »

as i see http://www.gamedev.net/topic/273143-tar ... r-windows/ i must to make some kind of this plan:
512b + size of file + 512 + size of file... but it cant read correctly :)

filename$ = PeekS(*DestBuff, 100, #PB_Ascii)
filecontext$ = PeekS(*DestBuff+512, -1, #PB_Ascii)

size = 0045674 1223 - what is this? after unpack size of file null.xml is 18,9 КБ (19 388 b)
SeregaZ
Enthusiast
Enthusiast
Posts: 628
Joined: Fri Feb 20, 2009 9:24 am
Location: Almaty (Kazakhstan. not Borat, but Triple G)
Contact:

TAR for teapot

Post by SeregaZ »

TAR for teapot, like me :)
i cant understand many things, but i try to explain what it is TAR-file. tar it is container-file, where is lay all files, that was choice when you create this archive. all file is divided into blocks with 512b. at the first 512b lay all information about first file in this container - this header have this structure:
name[100];
mode[8];
uid[8];
gid[8];
size[12];
mtime[12];
chksum[8];
typeflag;
linkname[100];
magic[6];
version[2];
uname[32];
gname[32];
devmajor[8];
devminor[8];
prefix[155];
padding[12];
*gnu_longname;
*gnu_longlink;

name of file it is fisrt 100b inside container, i can get it by use PeekS(*DestBuff, 100, #PB_Ascii)

than we need to get size of file. size is starts from 124b in this file: (name - 100, + mode - 8, + uid - 8, + gid - 8 )
and length of data for size 12. this value it write some special format, and i dont know what exactly... but we can get it by use function of PrincieD:

Code: Select all

              filesize = 0
              p = 10
              For n = 1 To 11
                filesize = filesize + Val(Chr(PeekB(*DestBuff+124+n-1)))*Pow(8,p)
                p = p - 1
              Next
    
              Debug filesize
do you remember our archive have blocks 512b? so size of file can be over this 512b. so we must to know how many blocks fill by our file. in my case file is 19kb. 19k\512b = 37.10***. 37 full block and last one 38 - fill only at the beginning, another part of this 38 block will be fill by nulls. so we must to round this quantity 37.10*** of blocks to UP - 38. second file starts only from 39 block, not from middle of 37 block. for this we can see this function of PrincieD:

Code: Select all

pos = Round(filesize/512, #PB_Round_Up)*512
so we have name, size and context of file in memory. we can create file, or continue to work with them in memory.

for get second file we must repeat all operation from beginning:
get name, size and context of file, but *DestBuff must be change to:
*DestBuff + 512 (for skip first file header) + 512*38 (for skip context of first file, where 38 is quantity of blocks that take context of first file)
by my theory if name of second file is empty - it means the end of file-container. but i am not sure... because when i see on this container file in notepad - i see more data after this anticipated end of file... what it is? garbage? of some kind of data for restoration files when they damaged?
Image



now i have question: how to get length of memory? i mean not AllocateMemory, but what length of memory that was write to this AllocateMemory. i mean AllocateMemory create 100b, but i fill only 15 of them - how i can get length of used memory inside this AllocateMemory?
Post Reply