Each pixel consists of 4 floats (rgba) with values ranging [0, 1].
For large images, it is recommended to process an image as multiple smaller blocks because each pixel takes up 16 bytes of memory.
Code: Select all
; f4 module by Wilbert
; Supported platforms : All (Cross platform)
; CPU requirements : SSE2 support
; Last change : May 19, 2016
;- *** Module declaration ***
DeclareModule f4
Structure rgba_f
r.f
g.f
b.f
a.f
EndStructure
Structure f4
StructureUnion
f.f[4]
c.rgba_f
EndStructureUnion
EndStructure
; pxGet and pxSet
; should be called inside StartDrawing() / StopDrawing() block
Declare pxGet(x, y, width, height, *px_f4)
Declare pxSet(x, y, width, height, *px_f4)
; pxGetImage and pxSetImage
; should be called outside of StartDrawing() / StopDrawing() block
Declare pxGetImage(Image, *px_f4)
Declare pxSetImage(Image, *px_f4)
; low level conversion procedures
Declare rgba2float(*px, *px_f4, num_px.l)
Declare bgra2float(*px, *px_f4, num_px.l)
Declare float2rgba(*px_f4, *px, num_px.l)
Declare float2bgra(*px_f4, *px, num_px.l)
Declare rgb2float(*px, *px_f4, num_px.l)
Declare bgr2float(*px, *px_f4, num_px.l)
Declare float2rgb(*px_f4, *px, num_px.l)
Declare float2bgr(*px_f4, *px, num_px.l)
EndDeclareModule
;- *** Module implementation ***
Module f4
DisableDebugger
EnableExplicit
EnableASM
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rax:eax:EndMacro
Macro rdx:edx:EndMacro
CompilerEndIf
Macro M_mem2xmm(arg1, arg2)
!movups arg1, [arg2]
EndMacro
Macro M_xmm2mem(arg1, arg2)
!movups [arg1], arg2
EndMacro
;- ** rgb(a) to f4 conversion **
Macro M_rgb2float(bgr = 0, st = 3)
; init xmm2, xmm3 and xmm4
!mov eax, 0x4b004b00
!mov ecx, 0x37800080
!pxor xmm1, xmm1
!movd xmm2, eax
!movd xmm4, ecx
!pshufd xmm2, xmm2, 0
!pshufd xmm3, xmm2, 0
!pshufd xmm4, xmm4, 0
!pslld xmm3, 16
; load procedure arguments
!mov ecx, [p.v_num_px]
mov rdx, *px_f4
mov rax, *px
CompilerIf st = 3
!pcmpeqd xmm5, xmm5
!pslld xmm5, 24
CompilerEndIf
; check num_px
!sub ecx, 1
!jz f4.rgb2float#bgr#st#_l1
!jl f4.rgb2float#bgr#st#_l2
; handle two pixels
!f4.rgb2float#bgr#st#_l0:
CompilerIf st = 3
movd xmm0, [rax]
movd xmm1, [rax + 2]
!psllq xmm1, 24
!por xmm0, xmm1
!por xmm0, xmm5
CompilerElse
movq xmm0, [rax]
CompilerEndIf
!punpcklbw xmm0, xmm0
CompilerIf bgr
!pshufhw xmm1, xmm0, 11000110b
!pshuflw xmm0, xmm0, 11000110b
CompilerElse
!pshufhw xmm1, xmm0, 11100100b
CompilerEndIf
!punpcklwd xmm0, xmm2
!punpckhwd xmm1, xmm2
!subps xmm0, xmm3
!subps xmm1, xmm3
!mulps xmm0, xmm4
!mulps xmm1, xmm4
M_xmm2mem(rdx, xmm0)
M_xmm2mem(rdx + 16, xmm1)
add rax, st * 2
add rdx, 32
!sub ecx, 2
!ja f4.rgb2float#bgr#st#_l0
!jc f4.rgb2float#bgr#st#_l2
; handle single pixel
!f4.rgb2float#bgr#st#_l1:
CompilerIf st = 3
movzx ecx, word [rax + 1]
!shl ecx, 8
mov cl, [rax]
!movd xmm0, ecx
!por xmm0, xmm5
CompilerElse
movd xmm0, [rax]
CompilerEndIf
!punpcklbw xmm0, xmm0
CompilerIf bgr
!pshuflw xmm0, xmm0, 11000110b
CompilerEndIf
!punpcklwd xmm0, xmm2
!subps xmm0, xmm3
!mulps xmm0, xmm4
M_xmm2mem(rdx, xmm0)
!f4.rgb2float#bgr#st#_l2:
EndMacro
Procedure rgb2float(*px, *px_f4, num_px.l)
M_rgb2float(0, 3)
EndProcedure
Procedure bgr2float(*px, *px_f4, num_px.l)
M_rgb2float(1, 3)
EndProcedure
Procedure rgba2float(*px, *px_f4, num_px.l)
M_rgb2float(0, 4)
EndProcedure
Procedure bgra2float(*px, *px_f4, num_px.l)
M_rgb2float(1, 4)
EndProcedure
;- ** f4 to rgb(a) conversion **
Macro M_float2rgb(bgr = 0, st = 3)
; init xmm2
!mov eax, 0x437f0000
!movd xmm2, eax
!pshufd xmm2, xmm2, 0
; load procedure arguments
!mov ecx, [p.v_num_px]
mov rax, *px_f4
mov rdx, *px
; check num_px
!sub ecx, 1
!jz f4.float2rgb#bgr#st#_l1
!jl f4.float2rgb#bgr#st#_l2
; handle two pixels
!f4.float2rgb#bgr#st#_l0:
M_mem2xmm(xmm0, rax)
M_mem2xmm(xmm1, rax + 16)
!mulps xmm0, xmm2
!mulps xmm1, xmm2
!cvtps2dq xmm0, xmm0
!cvtps2dq xmm1, xmm1
CompilerIf bgr
CompilerIf st = 3
!pshufd xmm0, xmm0, 00000110b
CompilerElse
!pshufd xmm0, xmm0, 11000110b
CompilerEndIf
!pshufd xmm1, xmm1, 11000110b
CompilerElseIf st = 3
!pshufd xmm0, xmm0, 10100100b
CompilerEndIf
!packssdw xmm0, xmm1
!packuswb xmm0, xmm0
CompilerIf st = 3
movd [rdx], xmm0
!psrlq xmm0, 24
movd [rdx + 2], xmm0
CompilerElse
movq [rdx], xmm0
CompilerEndIf
add rax, 32
add rdx, st * 2
!sub ecx, 2
!ja f4.float2rgb#bgr#st#_l0
!jc f4.float2rgb#bgr#st#_l2
; handle single pixel
!f4.float2rgb#bgr#st#_l1:
M_mem2xmm(xmm0, rax)
!mulps xmm0, xmm2
!cvtps2dq xmm0, xmm0
CompilerIf bgr
!pshufd xmm0, xmm0, 11000110b
CompilerEndIf
!packssdw xmm0, xmm0
!packuswb xmm0, xmm0
CompilerIf st = 3
!movd ecx, xmm0
mov [rdx], cx
!shr ecx, 16
mov [rdx + 2], cl
CompilerElse
movd [rdx], xmm0
CompilerEndIf
!f4.float2rgb#bgr#st#_l2:
EndMacro
Procedure float2rgb(*px_f4, *px, num_px.l)
M_float2rgb(0, 3)
EndProcedure
Procedure float2bgr(*px_f4, *px, num_px.l)
M_float2rgb(1, 3)
EndProcedure
Procedure float2rgba(*px_f4, *px, num_px.l)
M_float2rgb(0, 4)
EndProcedure
Procedure float2bgra(*px_f4, *px, num_px.l)
M_float2rgb(1, 4)
EndProcedure
DisableASM
Prototype _conv_proc(*src, *dst, num_px.l)
;- ** pxGet & pxSet **
Procedure pxGet(x, y, width, height, *px_f4)
Protected conv_proc._conv_proc
Protected.i db, dp, pb, pf, w, h, px_f4_step
w = OutputWidth() : h = OutputHeight()
db = DrawingBuffer() : dp = DrawingBufferPitch()
pb = OutputDepth() >> 3 : pf = DrawingBufferPixelFormat()
Select pf & ~#PB_PixelFormat_ReversedY
Case #PB_PixelFormat_24Bits_BGR
conv_proc = @bgr2float()
Case #PB_PixelFormat_32Bits_BGR
conv_proc = @bgra2float()
Case #PB_PixelFormat_24Bits_RGB
conv_proc = @rgb2float()
Default
conv_proc = @rgba2float()
EndSelect
If pf & #PB_PixelFormat_ReversedY
db + dp * h - dp
dp = -dp
EndIf
px_f4_step = width << 4
If x < 0 : *px_f4 - x << 4 : width + x : EndIf
If y < 0 : *px_f4 - y * px_f4_step : height + y : EndIf
If x > 0 : db + x * pb : w - x : EndIf
If y > 0 : db + y * dp : h - y : EndIf
If width > w : width = w : EndIf
If height > h : height = h : EndIf
If width > 0
While height > 0
conv_proc(db, *px_f4, width)
db + dp
*px_f4 + px_f4_step
height - 1
Wend
EndIf
EndProcedure
Procedure pxSet(x, y, width, height, *px_f4)
Protected conv_proc._conv_proc
Protected.i db, dp, pb, pf, w, h, px_f4_step
w = OutputWidth() : h = OutputHeight()
db = DrawingBuffer() : dp = DrawingBufferPitch()
pb = OutputDepth() >> 3 : pf = DrawingBufferPixelFormat()
Select pf & ~#PB_PixelFormat_ReversedY
Case #PB_PixelFormat_24Bits_BGR
conv_proc = @float2bgr()
Case #PB_PixelFormat_32Bits_BGR
conv_proc = @float2bgra()
Case #PB_PixelFormat_24Bits_RGB
conv_proc = @float2rgb()
Default
conv_proc = @float2rgba()
EndSelect
If pf & #PB_PixelFormat_ReversedY
db + dp * h - dp
dp = -dp
EndIf
px_f4_step = width << 4
If x < 0 : *px_f4 - x << 4 : width + x : EndIf
If y < 0 : *px_f4 - y * px_f4_step : height + y : EndIf
If x > 0 : db + x * pb : w - x : EndIf
If y > 0 : db + y * dp : h - y : EndIf
If width > w : width = w : EndIf
If height > h : height = h : EndIf
If width > 0
While height > 0
conv_proc(*px_f4, db, width)
db + dp
*px_f4 + px_f4_step
height - 1
Wend
EndIf
EndProcedure
;- ** pxGetImage & pxSetImage **
Procedure pxGetImage(Image, *px_f4)
If IsImage(Image) And StartDrawing(ImageOutput(Image))
pxGet(0, 0, OutputWidth(), OutputHeight(), *px_f4)
StopDrawing()
EndIf
EndProcedure
Procedure pxSetImage(Image, *px_f4)
If IsImage(Image) And StartDrawing(ImageOutput(Image))
pxSet(0, 0, OutputWidth(), OutputHeight(), *px_f4)
StopDrawing()
EndIf
EndProcedure
EndModule