a smaller faster AlphaBlend()
Posted: Thu Nov 17, 2016 3:58 pm
I noticed the size of AlphaBlend() is approx 20kb with support routines and has a lot of divisions (yeah i know #1stworldproblems!), alphablending isnt something ive worked a lot with personally so i had a look around to see what else was out there and came across this little C routine which only uses 6 multiplies and 3 bitshifts, no divisions. (There's SSE etc versions also but this is basically all I was after). I just ripped it out after compiling in gcc -O3. It's only a couple hundred bytes and nearly twice as fast as the PB one at least on Win32, and just slightly faster on my Linux64. It isn't quite a drop-in replacement though as it assumes $FF for the bgcolor's Alpha, but that's probably the majority use case anyway (using the fgcolor's Alpha to control the level), so with that in mind most people probably can use it as a drop-in replacement, and this demo uses them in an that interchangeable manner. Anyway just another option, can never have too many! All OS supported.
Code: Select all
EnableExplicit
;Original C source - http://stackoverflow.com/questions/12011081/alpha-blending-2-rgba-colors-in-c
; void blend(unsigned char result[4], unsigned char fg[4], unsigned char bg[4]) {
; unsigned int alpha = fg[3] + 1;
; unsigned int inv_alpha = 256 - fg[3];
; result[0] = (unsigned char)((alpha * fg[0] + inv_alpha * bg[0]) >> 8);
; result[1] = (unsigned char)((alpha * fg[1] + inv_alpha * bg[1]) >> 8);
; result[2] = (unsigned char)((alpha * fg[2] + inv_alpha * bg[2]) >> 8);
; result[3] = 0xff;
; }
Procedure.l AlphaBlendAsm(*fgcolor, *bgcolor) ;returns 32bit RGBA
Protected result, *result=@result
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
!push ebp
!push edi
!push esi
!push ebx
!mov esi, 256
!mov edi, dword [p.p_fgcolor+16]
!mov edx, dword [p.p_bgcolor+16]
!mov ebx, dword [p.p_result+16]
!movzx ecx, byte [edi+3]
!movzx eax, byte [edx]
!sub esi, ecx
!lea ebp, [ecx+1]
!imul eax, esi
!mov ecx, eax
!movzx eax, byte [edi]
!imul eax, ebp
!add eax, ecx
!shr eax, 8
!mov byte [ebx], al
!movzx ecx, byte [edx+1]
!movzx eax, byte [edi+1]
!imul ecx, esi
!imul eax, ebp
!add ecx, eax
!shr ecx, 8
!mov byte [ebx+1], cl
!movzx eax, byte [edx+2]
!movzx edx, byte [edi+2]
!mov byte [ebx+3], 0xFF
!imul eax, esi
!imul edx, ebp
!add eax, edx
!shr eax, 8
!mov byte [ebx+2], al
!pop ebx
!pop esi
!pop edi
!pop ebp
CompilerElse
!mov rdx, [p.p_fgcolor]
!mov r8, [p.p_bgcolor]
!mov rcx, [p.p_result]
!movzx r9d, byte [rdx+3]
!mov eax, 256
!lea r10d, dword [r9+1]
!sub eax, r9d
!movzx r9d, byte [rdx]
!mov r11d, r9d
!movzx r9d, byte [r8]
!imul r11d, r10d
!imul r9d, eax
!add r9d, r11d
!shr r9d, 8
!mov byte [rcx], r9b
!movzx r9d, byte [rdx+1]
!mov r11d, r9d
!movzx r9d, byte [r8+1]
!imul r11d, r10d
!imul r9d, eax
!add r9d, r11d
!shr r9d, 8
!mov byte [rcx+1], r9b
!movzx edx, byte [rdx+2]
!imul r10d, edx
!movzx edx, byte [r8+2]
!mov byte [rcx+3], 0xFF
!imul eax, edx
!add eax, r10d
!shr eax, 8
!mov byte [rcx+2], al
CompilerEndIf
ProcedureReturn result
EndProcedure
Define result =0, time1, time2, i
Define fgcol = $7F112233
Define bgcol = $FF445566
time1 = ElapsedMilliseconds()
For i = 1 To 25000000
result = AlphaBlendAsm(@fgcol, @bgcol) ;187ms Win32, 201 Linux64
;result = AlphaBlend(fgcol, bgcol) ;325ms Win32, 268 Linux64
Next i
time2 = ElapsedMilliseconds()
MessageRequester( "Done", Hex(result, #PB_Long) + " Time=" + Str(time2-time1))