I needed a function to compare two files (are the identical or not, regarding content) and also one to find duplicates. Don't know if anyone already posted something similar, but here is my attempt (which surely can be improved):
Code: Select all
UseMD5Fingerprint()
#FC_BufferSize = 10485760;Buffer size is 10MB by default
EnumerationBinary FileCompare_CreateFileList
#FC_CFL_SubDirectories
EndEnumeration
Structure FC_Files
List Files.s()
EndStructure
Structure FC_Compare
Map SizeList.FC_Files()
Map MD5List.FC_Files()
EndStructure
Structure FC_Result
List Files.s()
EndStructure
Procedure CreateFileList(List MyList.s(),Directory$,Flags=#False);Flags:#FC_CFL_SubDirectories -> also list subdirectories
Protected NewList TempList.s(),exa,tempdir$,OrigSize
OrigSize=ListSize(MyList())
If Right(Directory$,1)<>"\"
Directory$+"\"
EndIf
AddElement(TempList())
TempList()=Directory$
While ListSize(TempList())
FirstElement(TempList())
tempdir$=TempList()
exa=ExamineDirectory(#PB_Any,tempdir$,"*.*")
If exa
While NextDirectoryEntry(exa)
Select DirectoryEntryType(exa)
Case #PB_DirectoryEntry_Directory
If (Flags&#FC_CFL_SubDirectories) And ReplaceString(DirectoryEntryName(exa),".","")<>""
AddElement(TempList())
TempList()=tempdir$+DirectoryEntryName(exa)+"\"
EndIf
Case #PB_DirectoryEntry_File
AddElement(MyList())
MyList()=tempdir$+DirectoryEntryName(exa)
EndSelect
Wend
FinishDirectory(exa)
EndIf
FirstElement(TempList())
DeleteElement(TempList())
Wend
ProcedureReturn ListSize(MyList())-OrigSize;Returns how many files were found
EndProcedure
Procedure CompareFilesSimple(File1$,File2$,BufferSize=#FC_BufferSize)
Protected file1,file2,*Buffer1,*Buffer2,BufRead1,BufRead2,Result=#True
If FileSize(file1$)<>FileSize(File2$)
ProcedureReturn #False;Files are not identical (different file sizes)
Else
file1=ReadFile(#PB_Any,File1$,#PB_File_SharedRead)
file2=ReadFile(#PB_Any,File2$,#PB_File_SharedRead)
If file1 And file2
If BufferSize<=0
BufferSize=#FC_BufferSize
EndIf
*Buffer1=AllocateMemory(BufferSize,#PB_Memory_NoClear)
*Buffer1=AllocateMemory(BufferSize,#PB_Memory_NoClear)
If *Buffer1 And *Buffer2
While Not Eof(file1)
BufRead1=ReadData(file1,*Buffer1,BufferSize)
BufRead2=ReadData(file2,*Buffer2,BufferSize)
If BufRead1<>BufRead2 Or CompareMemory(*Buffer1,*Buffer2,BufRead1)
Result=#False
Break
EndIf
Wend
FreeMemory(*Buffer1)
FreeMemory(*Buffer2)
CloseFile(file1)
CloseFile(file2)
ProcedureReturn Result
Else
If *Buffer1:FreeMemory(*Buffer1):EndIf
If *Buffer2:FreeMemory(*Buffer2):EndIf
ProcedureReturn #False;At least one buffer couldn't be allocated
EndIf
Else
If file1:CloseFile(file1):EndIf
If file2:CloseFile(file2):EndIf
ProcedureReturn #False;At least one file cannot be opened
EndIf
EndIf
EndProcedure
Procedure CompareFileList(List MyList.s(),List ResultList.FC_Result())
Protected Temp.FC_Compare,FileSize,FileSize$,fp$
ForEach MyList()
FileSize=FileSize(MyList())
If FileSize>=0
AddElement(Temp\SizeList(FileSize$)\Files())
Temp\SizeList(FileSize$)\Files()=MyList()
EndIf
Next
ForEach Temp\SizeList()
If ListSize(Temp\SizeList()\Files())>1
If MapKey(Temp\SizeList())="0"
AddElement(ResultList())
ForEach Temp\SizeList()\Files()
AddElement(ResultList()\Files())
ResultList()\Files()=Temp\SizeList()\Files()
Next
Else
ForEach Temp\SizeList()\Files()
fp$=FileFingerprint(Temp\SizeList()\Files(),#PB_Cipher_MD5)
If fp$<>""
AddElement(Temp\MD5List(fp$)\Files())
Temp\MD5List(fp$)\Files()=Temp\SizeList()\Files()
EndIf
Next
EndIf
EndIf
Next
ForEach Temp\MD5List()
If ListSize(Temp\MD5List()\Files())>1
AddElement(ResultList())
ForEach Temp\MD5List()\Files()
AddElement(ResultList()\Files())
ResultList()\Files()=Temp\MD5List()\Files()
Next
EndIf
Next
EndProcedure
Define Dir$=PathRequester("Choose Directory","")
If dir$<>""
Define NewList FileList.s(),NewList ResultList.FC_Result()
CreateFileList(FileList(),dir$,#FC_CFL_SubDirectories)
CompareFileList(FileList(),ResultList())
Debug Str(ListSize(ResultList()))+" group(s) found"
ForEach ResultList()
Debug "Group "+Str(ListIndex(ResultList()))+":"
ForEach ResultList()\Files()
Debug ResultList()\Files()
Next
Next
EndIf
