The blur is accomplished by simply mixing part of the previous pixel with the new pixel. As a result of this, the blur is in one direction.
To make it more like a normal blur, a blur from all four directions sequentially can be done. It is not exactly the same but for a lot of purposes it will be good enough I hope.
This also means that #Blur_Full is slower as #Blur_LeftRight and also has a stronger blur at the same strength (because it actually is 4 times a blur).
The advantage is that a stronger blur is not slower as a softer blur.
Support for 24 bit images / drawing buffers is done very simple by temporary creating a 32 bit buffer.
As a result of this, working with 24 bit images is quite a bit slower.
LineStride is similar to DrawingBufferPitch(); It's the amount of bytes a line occupies in memory.
Code: Select all
DeclareModule DirectionalBlur
; DirectionalBlur module by Wilbert (SSE2 required)
; Last updated : October 17, 2019
; Strength: 0 - 31
; Direction:
#Blur_LeftRight = 1
#Blur_RightLeft = 2
#Blur_Horizontal = 3
#Blur_TopBottom = 4
#Blur_BottomTop = 8
#Blur_Vertical = 12
#Blur_Full = 15
Declare BlurPixelBuf32(*PixelBuf32, Width, Height, Strength, Direction = #Blur_Full, LineStride = 0)
Declare BlurPixelBuf24(*PixelBuf24, Width, Height, Strength, Direction = #Blur_Full, LineStride = 0)
Declare BlurRect(x, y, Width, Height, Strength, Direction = #Blur_Full)
Declare BlurImage(Image, Strength, Direction = #Blur_Full)
EndDeclareModule
Module DirectionalBlur
DisableDebugger ; leave disabled !!!
EnableExplicit
EnableASM
#Blur_ReversedY = 16 ; required for flipped drawing buffers
;->> Private macros and procedures <<
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rbx : ebx : EndMacro
Macro rcx : ecx : EndMacro
Macro rdx : edx : EndMacro
Macro rsi : esi : EndMacro
Macro rdi : edi : EndMacro
Macro rsp : esp : EndMacro
CompilerEndIf
Macro M_save_xmm(dst, reg)
!movdqu [dst], reg
EndMacro
Macro M_load_xmm(reg, src)
!movdqu reg, [src]
EndMacro
Procedure CopyPixels(*Src, SrcLS, *Dst, DstLS, Width, Height)
; Backup registers
mov [rsp - 8], rsi
mov [rsp - 16], rdi
mov [rsp - 24], rbx
; Copy pixels
!mov eax, [p.v_Width]
!shl eax, 2
!mov ebx, [p.v_SrcLS]
!mov edx, [p.v_DstLS]
!sub ebx, eax
!sub edx, eax
mov rsi, [p.p_Src]
mov rdi, [p.p_Dst]
!mov eax, [p.v_Height]
!.l0:
!mov ecx, [p.v_Width]
!.l1:
movd xmm0, [rsi]
movd [rdi], xmm0
add rsi, 4
add rdi, 4
!sub ecx, 1
!jnz .l1
add rsi, rbx
add rdi, rdx
!sub eax, 1
!jnz .l0
; Restore registers
mov rsi, [rsp - 8]
mov rdi, [rsp - 16]
mov rbx, [rsp - 24]
EndProcedure
Procedure SetStrength(Strength)
!jmp .l0
; Lookup table
!.table:
!db 48, 80, 112, 128, 144, 160, 176, 184
!db 192, 200, 208, 216, 220, 224, 228, 232
!db 236, 238, 240, 242, 244, 245, 246, 247
!db 248, 249, 250, 251, 252, 253, 254, 255
!.l0:
; Lookup strength from table
lea rdx, [.table]
!mov ecx, [p.v_Strength]
!and ecx, 31
movzx ecx, byte [rdx + rcx]
; Init xmm4 and xmm5
!movd xmm5, ecx
!psllw xmm5, 8
!punpcklwd xmm5, xmm5
!pshufd xmm5, xmm5, 0
!pxor xmm4, xmm4
!psubw xmm4, xmm5
EndProcedure
Macro M_Process4Pixels()
!punpcklbw xmm0, xmm0 ; convert 8 > 16 bit
!punpcklbw xmm1, xmm1
!pmulhuw xmm0, xmm4 ; multiply new by xmm4
!pmulhuw xmm2, xmm5 ; multiply old by xmm5
!pmulhuw xmm1, xmm4
!pmulhuw xmm3, xmm5
!paddw xmm0, xmm2 ; add old to new
!paddw xmm1, xmm3
!movdqa xmm2, xmm0 ; copy new to old
!movdqa xmm3, xmm1
!psrlw xmm0, 8 ; convert 16 > 8 bit
!psrlw xmm1, 8
!packuswb xmm0, xmm0
!packuswb xmm1, xmm1
EndMacro
Macro M_Horizontal(n)
!mov eax, [p.v_Height]
!and eax, -4
!jz .h#n#2
mov rdi, [p.p_PixelBuf32]
!mov edx, [p.v_LineStride]
CompilerIf n=1 ; adjust for right->left
!mov ecx, [p.v_Width]
lea rdi, [rdi + rcx*4-4]
CompilerEndIf
!lea ebx, [edx*3] ; stride*3
!.h#n#0:
!mov ecx, [p.v_Width]
mov rsi, rdi
lea rdi, [rdi + rdx*4]
movd xmm2, [rsi] ; line 0
movd xmm6, [rsi + rdx] ; line 1
movd xmm3, [rsi + rdx*2] ; line 2
movd xmm7, [rsi + rbx] ; line 3
!punpckldq xmm2, xmm6 ; xmm0 = line 0 and 1
!punpckldq xmm3, xmm7 ; xmm1 = line 2 and 3
!punpcklbw xmm2, xmm2 ; expand 8 bit into 16 bit
!punpcklbw xmm3, xmm3
!.h#n#1:
movd xmm0, [rsi] ; line 0
movd xmm6, [rsi + rdx] ; line 1
movd xmm1, [rsi + rdx*2] ; line 2
movd xmm7, [rsi + rbx] ; line 3
!punpckldq xmm0, xmm6 ; xmm0 = line 0 and 1
!punpckldq xmm1, xmm7 ; xmm1 = line 2 and 3
M_Process4Pixels()
!pshufd xmm6, xmm0, 1
!pshufd xmm7, xmm1, 1
movd [rsi], xmm0 ; store line 0
movd [rsi + rdx], xmm6 ; store line 1
movd [rsi + rdx*2], xmm1 ; store line 2
movd [rsi + rbx], xmm7 ; store line 3
CompilerIf n=0
add rsi, 4
CompilerElse
sub rsi, 4
CompilerEndIf
!sub ecx, 1
!jnz .h#n#1
!sub eax, 4
!jnz .h#n#0
!.h#n#2:
EndMacro
Macro M_Vertical()
!mov eax, [p.v_Width]
!and eax, -4
!jz .v2
mov rdi, [p.p_PixelBuf32]
!mov edx, [p.v_LineStride]
!mov ebx, [p.v_BottomTop]
!test ebx, ebx ; adjust for bottom->top
!jz .v0
!mov ecx, [p.v_Height]
!sub ecx, 1
!imul ecx, edx
add rdi, rcx
neg rdx
!.v0:
!mov ecx, [p.v_Height]
mov rsi, rdi
add rdi, 16
movq xmm2, [rsi] ; column 0 and 1
movq xmm3, [rsi + 8] ; column 2 and 3
!punpcklbw xmm2, xmm2 ; expand 8 bit into 16 bit
!punpcklbw xmm3, xmm3
!.v1:
movq xmm0, [rsi] ; column 0 and 1
movq xmm1, [rsi + 8] ; column 2 and 3
M_Process4Pixels()
movq [rsi], xmm0 ; store column 0 and 1
movq [rsi + 8], xmm1 ; store column 2 and 3
add rsi, rdx ; add stride
!sub ecx, 1
!jnz .v1
!sub eax, 4
!jnz .v0
!.v2:
EndMacro
;->> Public procedures <<
;- Blur 32 bit color pixel buffer
Procedure BlurPixelBuf32(*PixelBuf32, Width, Height, Strength, Direction = #Blur_Full, LineStride = 0)
Protected Dim RegSave.l(13); 56 bytes for register backup
Protected *Mem, And3, BottomTop, i
; Backup registers
mov rdx, [p.a_RegSave]
mov [rdx], rsi
mov [rdx + 8], rdi
mov [rdx + 16], rbx
M_save_xmm(rdx + 24, xmm6)
M_save_xmm(rdx + 40, xmm7)
; Set LineStride to width*4 if 0
If LineStride = 0
LineStride = Width << 2
EndIf
If Direction & #Blur_Horizontal And Width > 1
; -= Horizontal blur =-
And3 = Height & 3
If And3
*Mem = AllocateMemory(Width << 4, #PB_Memory_NoClear)
If *Mem
CopyPixels(*PixelBuf32+(Height-And3)*LineStride, LineStride, *Mem, Width<<2, Width, And3)
BlurPixelBuf32(*Mem, Width, 4, Strength, Direction & #Blur_Horizontal)
CopyPixels(*Mem, Width<<2, *PixelBuf32+(Height-And3)*LineStride, LineStride, Width, And3)
FreeMemory(*Mem)
EndIf
EndIf
SetStrength(Strength)
If Direction & #Blur_LeftRight
M_Horizontal(0)
EndIf
If Direction & #Blur_RightLeft
M_Horizontal(1)
EndIf
EndIf
If Direction & #Blur_Vertical And Height > 1
; -= Vertical blur =-
And3 = Width & 3
If And3
*Mem = AllocateMemory(Height << 4, #PB_Memory_NoClear)
If *Mem
CopyPixels(*PixelBuf32+(Width-And3)<<2, LineStride, *Mem, 16, And3, Height)
BlurPixelBuf32(*Mem, 4, Height, Strength, Direction & (#Blur_Vertical|#Blur_ReversedY))
CopyPixels(*Mem, 16, *PixelBuf32+(Width-And3)<<2, LineStride, And3, Height)
FreeMemory(*Mem)
EndIf
EndIf
SetStrength(Strength)
For i = 4 To 8 Step 4
If Direction & i
If Direction & #Blur_ReversedY
BottomTop = ~i & #Blur_BottomTop
Else
BottomTop = i & #Blur_BottomTop
EndIf
M_Vertical()
EndIf
Next
EndIf
; Restore registers
mov rdx, [p.a_RegSave]
mov rsi, [rdx]
mov rdi, [rdx + 8]
mov rbx, [rdx + 16]
M_load_xmm(xmm6, rdx + 24)
M_load_xmm(xmm7, rdx + 40)
EndProcedure
;- Blur 24 bit color pixel buffer
Procedure BlurPixelBuf24(*PixelBuf24, Width, Height, Strength, Direction = #Blur_Full, LineStride = 0)
Protected Dim RegSave.l(5); 24 bytes for register backup
Protected *Mem
If Width > 0 And Height > 0
*Mem = AllocateMemory(Width * Height << 2, #PB_Memory_NoClear)
If *Mem
; Set LineStride to width*3 if 0
If LineStride = 0
LineStride = Width * 3
EndIf
; Backup registers
mov rdx, [p.a_RegSave]
mov [rdx], rsi
mov [rdx + 8], rdi
mov [rdx + 16], rbx
; Copy to *Mem
!mov eax, [p.v_Width]
!lea eax, [eax*3]
!mov ebx, [p.v_LineStride]
!sub ebx, eax
mov rsi, [p.p_PixelBuf24]
mov rdi, [p.p_Mem]
!mov eax, [p.v_Height]
!.l0:
!mov ecx, [p.v_Width]
!sub ecx, 1
!jz .l2
!.l1:
movd xmm0, [rsi]
movd [rdi], xmm0
add rsi, 3
add rdi, 4
!sub ecx, 1
!jnz .l1
!.l2:
movzx edx, word [rsi]
mov [rdi], dx
movzx edx, byte [rsi + 2]
mov [rdi + 2], dl
add rsi, 3
add rdi, 4
add rsi, rbx
!sub eax, 1
!jnz .l0
BlurPixelBuf32(*Mem, Width, Height, Strength, Direction)
; Copy from mem
!mov eax, [p.v_Width]
!lea eax, [eax*3]
!mov ebx, [p.v_LineStride]
!sub ebx, eax
mov rsi, [p.p_Mem]
mov rdi, [p.p_PixelBuf24]
!mov eax, [p.v_Height]
!.l3:
!mov ecx, [p.v_Width]
!sub ecx, 1
!jz .l5
!.l4:
movd xmm0, [rsi]
movd [rdi], xmm0
add rsi, 4
add rdi, 3
!sub ecx, 1
!jnz .l4
!.l5:
movzx edx, word [rsi]
mov [rdi], dx
movzx edx, byte [rsi + 2]
mov [rdi + 2], dl
add rsi, 4
add rdi, 3
add rdi, rbx
!sub eax, 1
!jnz .l3
; Restore registers
mov rdx, [p.a_RegSave]
mov rsi, [rdx]
mov rdi, [rdx + 8]
mov rbx, [rdx + 16]
FreeMemory(*Mem)
EndIf
EndIf
EndProcedure
;- Blur rectangle inside StartDrawing() / StopDrawing()
Procedure BlurRect(x, y, Width, Height, Strength, Direction = #Blur_Full)
Protected.i MaxWidth, MaxHeight, PixelFormat, Pitch, *PixelBuf
; Check x, y, Width and Height
If Width < 0 : x + Width : Width = -Width : EndIf
If Height < 0 : y + Height : Height = -Height : EndIf
If x < 0 : Width + x : x = 0 : EndIf
If y < 0 : Height + y : y = 0 : EndIf
MaxWidth = OutputWidth() - x
MaxHeight = OutputHeight() - y
If Width > MaxWidth : Width = MaxWidth : EndIf
If Height > MaxHeight : Height = MaxHeight : EndIf
If Width <= 0 Or Height <= 0 : ProcedureReturn : EndIf
; Calculate *PixelBuf start position
*PixelBuf = DrawingBuffer()
If *PixelBuf
PixelFormat = DrawingBufferPixelFormat()
Pitch = DrawingBufferPitch()
If PixelFormat & #PB_PixelFormat_ReversedY
Direction ! #Blur_ReversedY
*PixelBuf + (OutputHeight() - Height - y) * Pitch
Else
*PixelBuf + y * Pitch
EndIf
If PixelFormat & (#PB_PixelFormat_24Bits_RGB|#PB_PixelFormat_24Bits_BGR)
*PixelBuf + x * 3
BlurPixelBuf24(*PixelBuf, Width, Height, Strength, Direction, Pitch)
Else
*PixelBuf + x << 2
BlurPixelBuf32(*PixelBuf, Width, Height, Strength, Direction, Pitch)
EndIf
EndIf
EndProcedure
;- Blur a PB image
Procedure BlurImage(Image, Strength, Direction = #Blur_Full)
StartDrawing(ImageOutput(Image))
BlurRect(0, 0, OutputWidth(), OutputHeight(), Strength, Direction)
StopDrawing()
EndProcedure
EndModule