Performance is about ten times faster than PureBasic's CRC32.
Then again this is a much simpler form of checksum since it's not a Cyclic Redundancy Check (CRC),
XORC32 has more in common with a Longitudinal Redundancy Check (LRC)
The ASM can probably be optimized some more too.
Please note that I have not done any collision tests or entropy spread tests.
In fact, I don't really care about the typical spread that other checksums or hashes have.
This is just intended to quickly check if the data is changed or not.
A small change will cause a small difference, and a large a large difference.
This checksum is best used in combination with filename + filesize + XORC32 and where possible file modified date as well.
If anyone has some links for collision test tools handy please post'em,
I will most likely tweak this one in the future if there are any particular issues and update this post.
I ended up making this after a tired night surfing around looking for a simple XOR based checksum
(as opposed to the several dozens of cycling redundancy checksums (CRC) out there)
so if anyone got some XOR checksum code they know about please shout out.
Remember! ANY checksum or hash algorithm has collisions, if a checksum or hash match, there is still a chance the file is different, even with SHA512 etc.
The only way to 100% positively check if a files content is different or not is to do a byte for byte comparison.
That being said, there is one thing you can count on always... If a checksum/hash does NOT match, then it's 100% likely the file is NOT identical.
In other words you may get false positives, but you will never get false negatives, a lot of people don't always remember that fact!

Code: Select all
;XORC32 v1.1
;Copyright (c) 2009 Roger Hågensen, EmSai.
;http://EmSai.net/
;
;This software is provided 'as-is', without any express or implied
;warranty. In no event will the authors be held liable for any damages
;arising from the use of this software.
;
;Permission is granted to anyone to use this software for any purpose,
;including commercial applications, and to alter it and redistribute it
;freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source
; distribution.
EnableExplicit
;Trivia: $43524F58=CROX in other words "XORC" but in little endian order.
;Note!
;We will use $43524F58 to make sure $0 or $FFFFFFFF when XOR'ed with
;$0 or $FFFFFFFF do not result in a checksum of $0 and similar issues,
;which is important if a file start with or contains a long series of these,
;which is not that uncommon in data files for example.
;Obviously this means we move this XOR behaviour to someplace else,
;but we're much less likely to find a series of $BB (example) than $0 or $FF in files.
;The ROL (rotate left) and BSWAP (byteswap) is done to ensure that repeats of the same data in a row
;produces different checksums for the next 4 bytes read.
Procedure.l XORC32(*mem,len.i,crc.l=$43524F58)
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!MOV rcx,qword[p.v_len]
!MOV rdx,qword[p.p_mem]
CompilerElse
!MOV ecx,dword[p.v_len]
!MOV edx,dword[p.p_mem]
CompilerEndIf
!MOV eax,dword[p.v_crc]
!MOV edi,$43524F58
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!MOV rbx,rdx
!CMP rbx,0
CompilerElse
!MOV ebx,edx
!CMP ebx,0
CompilerEndIf
!JE XORC32_END
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!CMP rcx,3
CompilerElse
!CMP ecx,3
CompilerEndIf
!JLE XORC32_LOOPEND
!XORC32_LOOP:
!BSWAP eax
!ROL eax,1
!ADD eax,edi
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!XOR eax,dword[rdx]
!ADD rdx,4
!ADD rcx,-4
!CMP rcx,3
CompilerElse
!XOR eax,dword[edx]
!ADD edx,4
!ADD ecx,-4
!CMP ecx,3
CompilerEndIf
!JG XORC32_LOOP
!XORC32_LOOPEND:
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!CMP rcx,0
CompilerElse
!CMP ecx,0
CompilerEndIf
!JE XORC32_END
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!MOV rbx,0
!CMP rcx,1
CompilerElse
!MOV ebx,0
!CMP ecx,1
CompilerEndIf
!JE XORC32_END1
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!CMP rcx,3
CompilerElse
!CMP ecx,3
CompilerEndIf
!JE XORC32_END3
!XORC32_END2:
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!MOV bx,word[rdx]
CompilerElse
!MOV bx,word[edx]
CompilerEndIf
!JMP XORC32_ENDXOR
!XORC32_END3:
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!MOV bx,word[rdx+1]
CompilerElse
!MOV bx,word[edx+1]
CompilerEndIf
!SAL ebx,8
!XORC32_END1:
CompilerIf #PB_Compiler_Processor=#PB_Processor_x64
!ADD bl,byte[rdx]
CompilerElse
!ADD bl,byte[edx]
CompilerEndIf
!XORC32_ENDXOR:
!BSWAP eax
!ROL eax,1
!ADD eax,edi
!XOR eax,ebx
!XORC32_END:
ProcedureReturn
EndProcedure
Code: Select all
Define dstl.l
Define start.l,stop.l,n.l,i.l,*src
Define a.l,b.l,len.l
timeBeginPeriod_(1)
ReadFile(1,#PB_Compiler_Home+"PureBasic.exe")
len=Lof(1)
*src=AllocateMemory(len)
i=1 ;change the loop count to something higher for speed tests.
dstl=$43524F58 ;XORC
start=timeGetTime_()
For n=1 To i
dstl=CRC32Fingerprint(*src,len)
Next
stop=timeGetTime_()
Debug Hex(dstl,#PB_Long)
Debug Bin(dstl,#PB_Long)
a=stop-start
dstl=$43524F58 ;XORC
start=timeGetTime_()
For n=1 To i
dstl=XORC32(*src,len)
Next
stop=timeGetTime_()
Debug Hex(dstl,#PB_Long)
Debug Bin(dstl,#PB_Long)
b=stop-start
timeEndPeriod_(1)
MessageRequester("",Str(a)+#LF$+Str(b))
Code: Select all
;Make sure to turn off "Create unicode executable" in compiler options
;so you can see the collision matches of CRC32.
;Collisions exists for XORC32 obviously, but I'm too lazy to search for them! :)
;The number of collisions in XORC32 should be close to that of CRC32,
;but may possibly be better since XORC32 does not have issues with $0
;that CRC32 especially struggles with at the beginning of files.
Define.l c
Define test1$,test2$
test1$="fc0591"
test2$="123rainerbommert"
c=XORC32(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=XORC32(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
Debug ""
test1$="a1sellers"
test2$="advertees"
c=XORC32(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=XORC32(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
Debug ""
test1$="Maria has nine red beds."
test2$="Steven has fifteen white tables."
c=XORC32(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=XORC32(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
Debug ""
test1$="Joe has fourteen magenta things."
test2$="Lars has thirteen black balls."
c=XORC32(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=XORC32(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
Debug ""
;Let's see how the checksum algos behave when there is a tiny change,
;this lets you see the fundamental difference in how XORC32 and CRC32 work.
test1$="The quick brown fox jumps over the lazy cog"
test2$="The quick brown fox jumps over the lazy dog"
c=XORC32(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=XORC32(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test1$,Len(test1$))
Debug Hex(c,#PB_Long)
c=CRC32Fingerprint(@test2$,Len(test2$))
Debug Hex(c,#PB_Long)
Debug ""
;And here is a empty string, let's see what they return with that.
test1$=""
c=XORC32(@test1$,Len(test1$)) ;will either return $43524F58 or whatever you use as the init/chain value.
Debug Hex(c,#PB_Long)
;c=CRC32Fingerprint(@test1$,Len(test2$)) ;commented out since it crashes... oops? ;) (PB 4.40B7)
;Debug Hex(c,#PB_Long)
Debug ""