ChaCha cipher module

Share your advanced PureBasic knowledge/code with the community.
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

ChaCha cipher module

Post by wilbert »

Should be pretty fast and secure. :)
Hopefully everything works. Use at your own risk :wink:

If you need more speed, you can consider using ChaCha12 (set rounds to 12).
It's not as secure as ChaCha20 but still pretty good.

Code: Select all

; ChaCha module by Wilbert (SSE2 required)

; last update August 6, 2015

; algorithm by D. J. Bernstein (Public domain)


DeclareModule ChaCha
  
  Structure ctx_ChaCha
    input.l[16]
    drounds.l
  EndStructure
  
  Declare SetKey(*ctx.ctx_ChaCha, *Key.Ascii, KeySize = 256, Rounds = 20)
  Declare SetAsciiKey(*ctx.ctx_ChaCha, Key.s, Rounds = 20)
  Declare SetIV(*ctx.ctx_ChaCha, *IV.Ascii, Counter.q = 0)
  Declare Crypt(*ctx.ctx_ChaCha, *Input, *Output, Size)
  Declare CryptPacket(*ctx.ctx_ChaCha, *IV.Ascii, *Input, *Output, Size)
  Declare CryptFile(*ctx.ctx_ChaCha, *IV.Ascii, InputFile.s, OutputFile.s)
  
EndDeclareModule

Module ChaCha
  
  EnableASM
  EnableExplicit
  DisableDebugger
  
  ; *** Macros and DataSection ***
  
  CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
    Macro rax : eax : EndMacro
    Macro rbx : ebx : EndMacro   
    Macro rcx : ecx : EndMacro
    Macro rdx : edx : EndMacro
    Macro rsi : esi : EndMacro
    Macro rdi : edi : EndMacro
    Macro rsp : esp : EndMacro
  CompilerEndIf
  
  Macro M_movdqa(arg1, arg2)
    !movdqa arg1, arg2
  EndMacro
  
  Macro M_movdqu(arg1, arg2)
    !movdqu arg1, arg2
  EndMacro
  
  Macro M_movq(arg1, arg2)
    !movq arg1, arg2
  EndMacro
  
  Macro M_paddd(arg1, arg2)
    !paddd arg1, arg2
  EndMacro
  
  Macro M_CryptQR(reg0, reg1, reg2, lr)
    !paddd reg0, reg1
    !pxor reg2, reg0
    CompilerIf lr = 16
      !pshuflw reg2, reg2, 10110001b
      !pshufhw reg2, reg2, 10110001b
    CompilerElse
      !movdqa xmm4, reg2
      !pslld reg2, lr
      !psrld xmm4, 32-lr
      !por reg2, xmm4
    CompilerEndIf
  EndMacro
  
  Macro M_CryptXor(reg)
    sub rbx, 16
    !movdqa xmm4, reg
    !jc chacha.l_crypt2
    CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
      !movdqu xmm4, [esi]
      !pxor xmm4, reg
      !movdqu [edi], xmm4
    CompilerElse
      !movdqu xmm4, [rsi]
      !pxor xmm4, reg
      !movdqu [rdi], xmm4
    CompilerEndIf
    add rsi, 16
    add rdi, 16
  EndMacro
  
  DataSection
    !chacha.l_sigma: db 'expand 32-byte k'
    !chacha.l_tau: db 'expand 16-byte k'
  EndDataSection
  
  
  ; *** SetKey procedure ***
  ; Key has to be 128 or 256 bits
  
  Procedure SetKey(*ctx.ctx_ChaCha, *Key.Ascii, KeySize = 256, Rounds = 20)
    
    mov rdx, [p.p_ctx]
    mov rax, [p.p_Key]
    mov rcx, [p.v_KeySize]
    cmp rcx, 256
    M_movdqu(xmm1, [rax])
    !pxor xmm3, xmm3
    !jne chacha.l_key0
    
    ; 256 bit key
    M_movdqu(xmm2, [rax + 16])
    lea rax, [chacha.l_sigma]
    !jmp chacha.l_key1
    
    ; 128 bit key
    !chacha.l_key0:
    !movdqa xmm2, xmm1
    lea rax, [chacha.l_tau]
    
    !chacha.l_key1:
    M_movdqu(xmm0, [rax])
    mov rcx, [p.v_Rounds]
    shr rcx, 1
    M_movdqu([rdx], xmm0)
    M_movdqu([rdx + 16], xmm1)
    M_movdqu([rdx + 32], xmm2)
    M_movdqu([rdx + 48], xmm3)
    mov [rdx + 64], ecx
    
  EndProcedure
  
  
  ; *** SetAsciiKey procedure ***
  
  Procedure SetAsciiKey(*ctx.ctx_ChaCha, Key.s, Rounds = 20)
    
    Protected.i pos
    Protected Dim k.a(32)
    
    Repeat
      pos + PokeS(@k(pos), Key, 32 - pos, #PB_Ascii)
    Until pos = 32
    
    SetKey(*ctx, @k(), 256, Rounds)
    
  EndProcedure
  
  
  ; *** SetIV procedure ***
  ; IV (initialization vector) has to be 64 bits  
  
  Procedure SetIV(*ctx.ctx_ChaCha, *IV.Ascii, Counter.q = 0)
    
    mov rdx, [p.p_ctx]
    mov rax, [p.p_IV]
    M_movq(xmm3, [p.v_Counter])
    M_movq(xmm4, [rax])
    !punpcklqdq xmm3, xmm4
    M_movdqu([rdx + 48], xmm3)
    
  EndProcedure
  
  
  
  ; *** Crypt procedure ***
  ; returns 0 on error, 1 on success
  
  Procedure Crypt(*ctx.ctx_ChaCha, *Input, *Output, Size)
    
    ; backup registers
    mov [rsp - 8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
    
    ; load procedure parameters
    sub rax, rax
    mov rbx, [p.v_Size]
    test rbx, rbx
    !jz chacha.l_crypt5
    mov rdx, [p.p_ctx]
    mov ecx, [rdx + 64]
    !cmp ecx, 0
    !jng chacha.l_crypt5
    mov rsi, [p.p_Input]
    mov rdi, [p.p_Output]
    
    ; load state and make aligned copy
    lea rax, [rsp - 88]
    shr rax, 4
    shl rax, 4
    M_movdqu(xmm0, [rdx])
    M_movdqu(xmm1, [rdx + 16])
    M_movdqu(xmm2, [rdx + 32])
    M_movdqu(xmm3, [rdx + 48])
    M_movdqa([rax], xmm0)
    M_movdqa([rax + 16], xmm1)
    M_movdqa([rax + 32], xmm2)
    M_movdqa([rax + 48], xmm3)
    
    ; perform double rounds
    !mov ecx, 1
    !movd xmm5, ecx
    !chacha.l_crypt0:
    mov ecx, [rdx + 64]
    !chacha.l_crypt1:
    ; xmm0 = 3 2 1 0
    ; xmm1 = 7 6 5 4
    ; xmm2 = 11 10 9 8
    ; xmm3 = 15 14 13 12
    M_CryptQR(xmm0, xmm1, xmm3, 16)
    M_CryptQR(xmm2, xmm3, xmm1, 12)
    M_CryptQR(xmm0, xmm1, xmm3, 8)
    M_CryptQR(xmm2, xmm3, xmm1, 7)
    !pshufd xmm1, xmm1, 00111001b
    !pshufd xmm2, xmm2, 01001110b
    !pshufd xmm3, xmm3, 10010011b
    ; xmm1 = 4 7 6 5
    ; xmm2 = 9 8 11 10
    ; xmm3 = 14 13 12 15
    M_CryptQR(xmm0, xmm1, xmm3, 16)
    M_CryptQR(xmm2, xmm3, xmm1, 12)
    M_CryptQR(xmm0, xmm1, xmm3, 8)
    M_CryptQR(xmm2, xmm3, xmm1, 7)
    !pshufd xmm1, xmm1, 10010011b
    !pshufd xmm2, xmm2, 01001110b
    !pshufd xmm3, xmm3, 00111001b
    !sub ecx, 1
    !jnz chacha.l_crypt1
    
    ; add
    M_movdqa(xmm4, [rax + 48])
    !paddd xmm3, xmm4
    !paddq xmm4, xmm5; block += 1
    M_movdqa([rax + 48], xmm4)
    M_paddd(xmm2, [rax + 32])
    M_paddd(xmm1, [rax + 16])
    M_paddd(xmm0, [rax])
    
    ; xor
    M_CryptXor(xmm0)
    M_CryptXor(xmm1)
    M_CryptXor(xmm2)
    M_CryptXor(xmm3)
    test rbx, rbx
    !jz chacha.l_crypt4
    M_movdqa(xmm0, [rax])
    M_movdqa(xmm1, [rax + 16])
    M_movdqa(xmm2, [rax + 32])
    M_movdqa(xmm3, [rax + 48])
    !jmp chacha.l_crypt0
    !chacha.l_crypt2:
    add rbx, 16
    !jz chacha.l_crypt4
    M_movdqa([rax], xmm4)
    !chacha.l_crypt3:
    movzx ecx, byte [rax + rbx - 1]
    XOr cl, [rsi + rbx - 1]
    mov [rdi + rbx - 1], cl
    sub rbx, 1
    !jnz chacha.l_crypt3
    
    ; update block counter in state
    !chacha.l_crypt4:
    M_movq(xmm3, [rax + 48])
    M_movq([rdx + 48], xmm3)
    mov rax, 1
    
    ; restore registers
    !chacha.l_crypt5:
    mov rbx, [rsp - 8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24]
    ProcedureReturn
    
  EndProcedure
  
  
  ; *** CryptPacket procedure ***
  ; returns 0 on error, 1 on success
  
  Procedure CryptPacket(*ctx.ctx_ChaCha, *IV.Ascii, *Input, *Output, Size)
    SetIV(*ctx, *IV)
    ProcedureReturn Crypt(*ctx, *Input, *Output, Size)
  EndProcedure
  
  
  ; *** CryptFile procedure ***
  ; returns 0 on error, 1 on success
  ; OutputFile is overwritten !!!
  
  Procedure CryptFile(*ctx.ctx_ChaCha, *IV.Ascii, InputFile.s, OutputFile.s)
    
    Protected.i result, inFile, outFile, pos, nBytes
    Protected Dim Buffer.l(8191)
    
    SetIV(*ctx, *IV)
    If InputFile = OutputFile
      inFile = OpenFile(#PB_Any, InputFile)
      outFile = inFile
    Else
      inFile = ReadFile(#PB_Any, InputFile)
      outFile = CreateFile(#PB_Any, OutputFile)
    EndIf
    
    If inFile And outFile
      Repeat
        pos = Loc(inFile)
        nBytes = ReadData(inFile, @Buffer(), 32768)
        If nBytes
          FileSeek(outFile, pos)
          result = Crypt(*ctx, @Buffer(), @Buffer(), nBytes)
          WriteData(outFile, @Buffer(), nBytes)  
        EndIf
      Until nBytes = 0
    EndIf
    
    If IsFile(inFile) : CloseFile(inFile) : EndIf
    If IsFile(outFile) : CloseFile(outFile) : EndIf
    
    ProcedureReturn result
    
  EndProcedure
  
EndModule
Last edited by wilbert on Thu Aug 06, 2015 10:33 am, edited 5 times in total.
Windows (x64)
Raspberry Pi OS (Arm64)
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: ChaCha module

Post by wilbert »

Examples

Code: Select all

DataSection
  key:
  Data.b $c4,$6e,$c1,$b1,$8c,$e8,$a8,$78
  Data.b $72,$5a,$37,$e7,$80,$df,$b7,$35
  Data.b $1f,$68,$ed,$2e,$19,$4c,$79,$fb
  Data.b $c6,$ae,$be,$e1,$a6,$67,$97,$5d
  iv:
  Data.b $1a,$da,$31,$d5,$cf,$68,$82,$21
EndDataSection

ctx.ChaCha::ctx_ChaCha
ChaCha::SetKey(@ctx, ?key)

t1 = ElapsedMilliseconds()
ChaCha::CryptFile(@ctx, ?iv, "test.jpg", "test_.jpg")
t2 = ElapsedMilliseconds()
MessageRequester("",Str(t2-t1))

Code: Select all

; create context and set key
ctx.ChaCha::ctx_ChaCha
ChaCha::SetAsciiKey(@ctx, "PureBasic ChaCha")

; set 64 bit initialization vector
RandomSeed(123)
RandomData(@iv.q, 8)

MyString.s = "This is a small test string"
MyStringLen = StringByteLength(MyString)

; encrypt
*Encrypted = AllocateMemory(MyStringLen)
ChaCha::CryptPacket(@ctx, @iv, @MyString, *Encrypted, MyStringLen)
ShowMemoryViewer(*Encrypted, MyStringLen)

; decrypt
ChaCha::CryptPacket(@ctx, @iv, *Encrypted, @MyString, MyStringLen)

Debug MyString
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
netmaestro
PureBasic Bullfrog
PureBasic Bullfrog
Posts: 8451
Joined: Wed Jul 06, 2005 5:42 am
Location: Fort Nelson, BC, Canada

Re: ChaCha module

Post by netmaestro »

Looks excellent and thanks for sharing. Good clean code as always. One question, are zeros going to appear in the encrypted output? With AES we have to encrypt->base64->encrypted string if we want to show the result as a string, I'm just wondering is that necessary with this too?
BERESHEIT
User avatar
idle
Always Here
Always Here
Posts: 5903
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: ChaCha module

Post by idle »

nice, will be interesting to see how it compares to AES for speed
Windows 11, Manjaro, Raspberry Pi OS
Image
IdeasVacuum
Always Here
Always Here
Posts: 6426
Joined: Fri Oct 23, 2009 2:33 am
Location: Wales, UK
Contact:

Re: ChaCha module

Post by IdeasVacuum »

Very very interesting!
I nearly didn't bother to read this post though, thought it was to do with ballroom dancing... :mrgreen:
IdeasVacuum
If it sounds simple, you have not grasped the complexity.
User avatar
Keya
Addict
Addict
Posts: 1890
Joined: Thu Jun 04, 2015 7:10 am

Re: ChaCha module

Post by Keya »

wilbert she is really a beautiful elegant cipher isnt she! we learn a little about her at uni this year. Thankyou for your share :)
I saw this page https://eden.dei.uc.pt/~sneves/chacha/chacha.html
he change the following code to gain a speed improvement from 3.9 to 3.18 seconds/6146 to 7555 Mbps:

Code: Select all

movdqa %xmm15,%xmm6
psrld $16,%xmm15
pslld $16,%xmm6
pxor %xmm6,%xmm15
   ... to ...
pshufb %xmm6, %xmm15
i look at your code but its hard for me to tell but it looks like maybe you are doing it in this bit (and you do por where he does pxor?):

Code: Select all

  Macro M_CryptQR(reg0, reg1, reg2, lr)
    !paddd reg0, reg1
    !pxor reg2, reg0
    CompilerIf lr = 16
      !pshuflw reg2, reg2, 10110001b
      !pshufhw reg2, reg2, 10110001b
    CompilerElse
      !movdqa xmm4, reg2
      !pslld reg2, lr
      !psrld xmm4, 32-lr
      !por reg2, xmm4
    CompilerEndIf
  EndMacro
But the article suggests you can do it for both 8 and 16, your code if it is doing it is only doing it for 16?

Now all it needs is Poly1305 for authentication! lol :D
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: ChaCha module

Post by wilbert »

netmaestro wrote:One question, are zeros going to appear in the encrypted output? With AES we have to encrypt->base64->encrypted string if we want to show the result as a string, I'm just wondering is that necessary with this too?
Yes, zeros are going to appear in the encrypted output with this one also.
Keya wrote:I saw this page https://eden.dei.uc.pt/~sneves/chacha/chacha.html
he change the following code to gain a speed improvement from 3.9 to 3.18 seconds/6146 to 7555 Mbps:
I noticed that page also. It's true I only optimized for rotating 16 bits.
The reason for this, is that I wanted to stick with SSE2. Pshufb is a SSSE3 instruction.
I also wanted code that worked both on 32 and 64 bit systems so I also didn't use the extra registers 64 bit has.
When using newer instruction sets like for example AVX or AVX2, you can improve the speed of course.
My code isn't as fast as the one referenced there by the way (the referenced code processes 4 blocks in parallel while I'm handling only 1 block at a time).
idle wrote:nice, will be interesting to see how it compares to AES for speed
If you know the answer, I'd like to hear it :)
Last edited by wilbert on Thu Aug 06, 2015 12:54 pm, edited 1 time in total.
Windows (x64)
Raspberry Pi OS (Arm64)
Inf0Byt3
PureBasic Fanatic
PureBasic Fanatic
Posts: 2236
Joined: Fri Dec 09, 2005 12:15 pm
Location: Elbonia

Re: ChaCha cipher module

Post by Inf0Byt3 »

Nice code Wilbert, thanks! It's lightning fast too.
None are more hopelessly enslaved than those who falsely believe they are free. (Goethe)
User avatar
Erich
User
User
Posts: 49
Joined: Thu Sep 30, 2010 9:21 pm

Re: ChaCha cipher module

Post by Erich »

Very cool. 8)

Did you check the implementation with test vectors?
"I have never let my schooling interfere with my education." - Mark Twain
User avatar
netmaestro
PureBasic Bullfrog
PureBasic Bullfrog
Posts: 8451
Joined: Wed Jul 06, 2005 5:42 am
Location: Fort Nelson, BC, Canada

Re: ChaCha cipher module

Post by netmaestro »

Code: Select all

CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
    Macro rax : eax : EndMacro
    Macro rbx : ebx : EndMacro   
    Macro rcx : ecx : EndMacro
    Macro rdx : edx : EndMacro
    Macro rsi : esi : EndMacro
    Macro rdi : edi : EndMacro
    Macro rsp : esp : EndMacro
  CompilerEndIf
Ok, now you're just showing off!!

(kidding aside, very clever)
BERESHEIT
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: ChaCha cipher module

Post by wilbert »

Erich wrote:Very cool. 8)

Did you check the implementation with test vectors?
Yes, here are some test vectors if you want to verify
https://tools.ietf.org/html/draft-strom ... vectors-00
I didn't implement a special keystream procedure but you can just use a memory area with all zero's for both input and output.
The output should match the test vectors if you used the same key and iv.
netmaestro wrote:Ok, now you're just showing off!!

(kidding aside, very clever)
If I remember correctly I got this approach from somewhere on this forum.
Anyway, it works great to create code compatible with both x86 and x64.
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
Tenaja
Addict
Addict
Posts: 1959
Joined: Tue Nov 09, 2010 10:15 pm

Re: ChaCha cipher module

Post by Tenaja »

wilbert wrote:
netmaestro wrote:Ok, now you're just showing off!!

(kidding aside, very clever)
If I remember correctly I got this approach from somewhere on this forum.
Anyway, it works great to create code compatible with both x86 and x64.
http://purebasic.fr/english/viewtopic.php?f=35&t=60280

I tried to get it to work, and came up with a dirtier solution, but Stargate made it clean.
User avatar
RichAlgeni
Addict
Addict
Posts: 935
Joined: Wed Sep 22, 2010 1:50 am
Location: Bradenton, FL

Re: ChaCha module

Post by RichAlgeni »

IdeasVacuum wrote:Very very interesting!
I nearly didn't bother to read this post though, thought it was to do with ballroom dancing... :mrgreen:
But you're a beautiful dancer! :lol:
User avatar
netmaestro
PureBasic Bullfrog
PureBasic Bullfrog
Posts: 8451
Joined: Wed Jul 06, 2005 5:42 am
Location: Fort Nelson, BC, Canada

Re: ChaCha cipher module

Post by netmaestro »

I got an email telling me there was a new reply in this topic but there isn't. I'm glad I got it though because I'd forgotten this little gem. Maybe some spammer got squashed before I got here?
BERESHEIT
User avatar
pdwyer
Addict
Addict
Posts: 2813
Joined: Tue May 08, 2007 1:27 pm
Location: Chiba, Japan

Re: ChaCha cipher module

Post by pdwyer »

Thanks for the bump, I hadn't seen this at all.
Computerfile has a good vid on this algo for those interested https://www.youtube.com/watch?v=UeIpq-C-GSA
Paul Dwyer

“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
Post Reply