
Hopefully everything works. Use at your own risk

If you need more speed, you can consider using ChaCha12 (set rounds to 12).
It's not as secure as ChaCha20 but still pretty good.
Code: Select all
; ChaCha module by Wilbert (SSE2 required)
; last update August 6, 2015
; algorithm by D. J. Bernstein (Public domain)
DeclareModule ChaCha
Structure ctx_ChaCha
input.l[16]
drounds.l
EndStructure
Declare SetKey(*ctx.ctx_ChaCha, *Key.Ascii, KeySize = 256, Rounds = 20)
Declare SetAsciiKey(*ctx.ctx_ChaCha, Key.s, Rounds = 20)
Declare SetIV(*ctx.ctx_ChaCha, *IV.Ascii, Counter.q = 0)
Declare Crypt(*ctx.ctx_ChaCha, *Input, *Output, Size)
Declare CryptPacket(*ctx.ctx_ChaCha, *IV.Ascii, *Input, *Output, Size)
Declare CryptFile(*ctx.ctx_ChaCha, *IV.Ascii, InputFile.s, OutputFile.s)
EndDeclareModule
Module ChaCha
EnableASM
EnableExplicit
DisableDebugger
; *** Macros and DataSection ***
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rax : eax : EndMacro
Macro rbx : ebx : EndMacro
Macro rcx : ecx : EndMacro
Macro rdx : edx : EndMacro
Macro rsi : esi : EndMacro
Macro rdi : edi : EndMacro
Macro rsp : esp : EndMacro
CompilerEndIf
Macro M_movdqa(arg1, arg2)
!movdqa arg1, arg2
EndMacro
Macro M_movdqu(arg1, arg2)
!movdqu arg1, arg2
EndMacro
Macro M_movq(arg1, arg2)
!movq arg1, arg2
EndMacro
Macro M_paddd(arg1, arg2)
!paddd arg1, arg2
EndMacro
Macro M_CryptQR(reg0, reg1, reg2, lr)
!paddd reg0, reg1
!pxor reg2, reg0
CompilerIf lr = 16
!pshuflw reg2, reg2, 10110001b
!pshufhw reg2, reg2, 10110001b
CompilerElse
!movdqa xmm4, reg2
!pslld reg2, lr
!psrld xmm4, 32-lr
!por reg2, xmm4
CompilerEndIf
EndMacro
Macro M_CryptXor(reg)
sub rbx, 16
!movdqa xmm4, reg
!jc chacha.l_crypt2
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
!movdqu xmm4, [esi]
!pxor xmm4, reg
!movdqu [edi], xmm4
CompilerElse
!movdqu xmm4, [rsi]
!pxor xmm4, reg
!movdqu [rdi], xmm4
CompilerEndIf
add rsi, 16
add rdi, 16
EndMacro
DataSection
!chacha.l_sigma: db 'expand 32-byte k'
!chacha.l_tau: db 'expand 16-byte k'
EndDataSection
; *** SetKey procedure ***
; Key has to be 128 or 256 bits
Procedure SetKey(*ctx.ctx_ChaCha, *Key.Ascii, KeySize = 256, Rounds = 20)
mov rdx, [p.p_ctx]
mov rax, [p.p_Key]
mov rcx, [p.v_KeySize]
cmp rcx, 256
M_movdqu(xmm1, [rax])
!pxor xmm3, xmm3
!jne chacha.l_key0
; 256 bit key
M_movdqu(xmm2, [rax + 16])
lea rax, [chacha.l_sigma]
!jmp chacha.l_key1
; 128 bit key
!chacha.l_key0:
!movdqa xmm2, xmm1
lea rax, [chacha.l_tau]
!chacha.l_key1:
M_movdqu(xmm0, [rax])
mov rcx, [p.v_Rounds]
shr rcx, 1
M_movdqu([rdx], xmm0)
M_movdqu([rdx + 16], xmm1)
M_movdqu([rdx + 32], xmm2)
M_movdqu([rdx + 48], xmm3)
mov [rdx + 64], ecx
EndProcedure
; *** SetAsciiKey procedure ***
Procedure SetAsciiKey(*ctx.ctx_ChaCha, Key.s, Rounds = 20)
Protected.i pos
Protected Dim k.a(32)
Repeat
pos + PokeS(@k(pos), Key, 32 - pos, #PB_Ascii)
Until pos = 32
SetKey(*ctx, @k(), 256, Rounds)
EndProcedure
; *** SetIV procedure ***
; IV (initialization vector) has to be 64 bits
Procedure SetIV(*ctx.ctx_ChaCha, *IV.Ascii, Counter.q = 0)
mov rdx, [p.p_ctx]
mov rax, [p.p_IV]
M_movq(xmm3, [p.v_Counter])
M_movq(xmm4, [rax])
!punpcklqdq xmm3, xmm4
M_movdqu([rdx + 48], xmm3)
EndProcedure
; *** Crypt procedure ***
; returns 0 on error, 1 on success
Procedure Crypt(*ctx.ctx_ChaCha, *Input, *Output, Size)
; backup registers
mov [rsp - 8], rbx
mov [rsp - 16], rsi
mov [rsp - 24], rdi
; load procedure parameters
sub rax, rax
mov rbx, [p.v_Size]
test rbx, rbx
!jz chacha.l_crypt5
mov rdx, [p.p_ctx]
mov ecx, [rdx + 64]
!cmp ecx, 0
!jng chacha.l_crypt5
mov rsi, [p.p_Input]
mov rdi, [p.p_Output]
; load state and make aligned copy
lea rax, [rsp - 88]
shr rax, 4
shl rax, 4
M_movdqu(xmm0, [rdx])
M_movdqu(xmm1, [rdx + 16])
M_movdqu(xmm2, [rdx + 32])
M_movdqu(xmm3, [rdx + 48])
M_movdqa([rax], xmm0)
M_movdqa([rax + 16], xmm1)
M_movdqa([rax + 32], xmm2)
M_movdqa([rax + 48], xmm3)
; perform double rounds
!mov ecx, 1
!movd xmm5, ecx
!chacha.l_crypt0:
mov ecx, [rdx + 64]
!chacha.l_crypt1:
; xmm0 = 3 2 1 0
; xmm1 = 7 6 5 4
; xmm2 = 11 10 9 8
; xmm3 = 15 14 13 12
M_CryptQR(xmm0, xmm1, xmm3, 16)
M_CryptQR(xmm2, xmm3, xmm1, 12)
M_CryptQR(xmm0, xmm1, xmm3, 8)
M_CryptQR(xmm2, xmm3, xmm1, 7)
!pshufd xmm1, xmm1, 00111001b
!pshufd xmm2, xmm2, 01001110b
!pshufd xmm3, xmm3, 10010011b
; xmm1 = 4 7 6 5
; xmm2 = 9 8 11 10
; xmm3 = 14 13 12 15
M_CryptQR(xmm0, xmm1, xmm3, 16)
M_CryptQR(xmm2, xmm3, xmm1, 12)
M_CryptQR(xmm0, xmm1, xmm3, 8)
M_CryptQR(xmm2, xmm3, xmm1, 7)
!pshufd xmm1, xmm1, 10010011b
!pshufd xmm2, xmm2, 01001110b
!pshufd xmm3, xmm3, 00111001b
!sub ecx, 1
!jnz chacha.l_crypt1
; add
M_movdqa(xmm4, [rax + 48])
!paddd xmm3, xmm4
!paddq xmm4, xmm5; block += 1
M_movdqa([rax + 48], xmm4)
M_paddd(xmm2, [rax + 32])
M_paddd(xmm1, [rax + 16])
M_paddd(xmm0, [rax])
; xor
M_CryptXor(xmm0)
M_CryptXor(xmm1)
M_CryptXor(xmm2)
M_CryptXor(xmm3)
test rbx, rbx
!jz chacha.l_crypt4
M_movdqa(xmm0, [rax])
M_movdqa(xmm1, [rax + 16])
M_movdqa(xmm2, [rax + 32])
M_movdqa(xmm3, [rax + 48])
!jmp chacha.l_crypt0
!chacha.l_crypt2:
add rbx, 16
!jz chacha.l_crypt4
M_movdqa([rax], xmm4)
!chacha.l_crypt3:
movzx ecx, byte [rax + rbx - 1]
XOr cl, [rsi + rbx - 1]
mov [rdi + rbx - 1], cl
sub rbx, 1
!jnz chacha.l_crypt3
; update block counter in state
!chacha.l_crypt4:
M_movq(xmm3, [rax + 48])
M_movq([rdx + 48], xmm3)
mov rax, 1
; restore registers
!chacha.l_crypt5:
mov rbx, [rsp - 8]
mov rsi, [rsp - 16]
mov rdi, [rsp - 24]
ProcedureReturn
EndProcedure
; *** CryptPacket procedure ***
; returns 0 on error, 1 on success
Procedure CryptPacket(*ctx.ctx_ChaCha, *IV.Ascii, *Input, *Output, Size)
SetIV(*ctx, *IV)
ProcedureReturn Crypt(*ctx, *Input, *Output, Size)
EndProcedure
; *** CryptFile procedure ***
; returns 0 on error, 1 on success
; OutputFile is overwritten !!!
Procedure CryptFile(*ctx.ctx_ChaCha, *IV.Ascii, InputFile.s, OutputFile.s)
Protected.i result, inFile, outFile, pos, nBytes
Protected Dim Buffer.l(8191)
SetIV(*ctx, *IV)
If InputFile = OutputFile
inFile = OpenFile(#PB_Any, InputFile)
outFile = inFile
Else
inFile = ReadFile(#PB_Any, InputFile)
outFile = CreateFile(#PB_Any, OutputFile)
EndIf
If inFile And outFile
Repeat
pos = Loc(inFile)
nBytes = ReadData(inFile, @Buffer(), 32768)
If nBytes
FileSeek(outFile, pos)
result = Crypt(*ctx, @Buffer(), @Buffer(), nBytes)
WriteData(outFile, @Buffer(), nBytes)
EndIf
Until nBytes = 0
EndIf
If IsFile(inFile) : CloseFile(inFile) : EndIf
If IsFile(outFile) : CloseFile(outFile) : EndIf
ProcedureReturn result
EndProcedure
EndModule