I needed a fast algorithm for image blurring for a task. Artificial intelligence wrote this code for me and of course it worked properly with some debugging. experiment with testing the speed of blurring your images.
I haven't tried it on x64 yet.
Code: Select all
;==============================================================
; Multi-threaded Separable Box Blur
; - Linear pointers (AllocateMemory)
; - No RGBA() (bit-shift write)
; - Separable (horizontal then vertical)
; - Multithreaded: horizontal pass parallel by rows, vertical pass parallel by columns
; - Written by AI in collaboration With Punak
;==============================================================
EnableExplicit
; === Editable ===
#THREADS = 4
Structure _SIZEF
Width.i
Height.i
EndStructure
; --- Globals for threads (populated before creating threads) ---
Global g_width.i, g_height.i, g_radius.i, g_threads.i
Global *g_buffer, g_temp, g_pitch.i
; ----------------------------
; Thread: Horizontal pass for a chunk of rows
; parameter = threadIndex (0..g_threads-1)
; ----------------------------
Procedure.l HorizontalPassThread(threadIndex.l)
Protected startRow = (threadIndex * g_height) / g_threads
Protected endRow = (((threadIndex + 1) * g_height) / g_threads) - 1
If endRow < startRow : endRow = startRow : EndIf
Protected x, y, i, left, right, count
Protected c.l
Protected sumR.i, sumG.i, sumB.i, sumA.i
Protected srcOffset, tmpOffset
For y = startRow To endRow
srcOffset = y * g_pitch
; initial window sum for x = 0
sumR = 0 : sumG = 0 : sumB = 0 : sumA = 0
For i = -g_radius To g_radius
x = i
If x < 0 : x = 0 : EndIf
If x >= g_width : x = g_width - 1 : EndIf
c = PeekL(*g_buffer + srcOffset + x * 4)
sumR + (c & $FF)
sumG + ((c >> 8) & $FF)
sumB + ((c >> 16) & $FF)
sumA + ((c >> 24) & $FF)
Next
For x = 0 To g_width - 1
count = g_radius * 2 + 1
tmpOffset = (y * g_width + x) * 4
PokeL(g_temp + tmpOffset, ((sumA / count) << 24) | ((sumB / count) << 16) | ((sumG / count) << 8) | (sumR / count))
; slide window: remove left, add right
left = x - g_radius
right = x + g_radius + 1
If left < 0 : left = 0 : EndIf
If right >= g_width : right = g_width - 1 : EndIf
c = PeekL(*g_buffer + srcOffset + left * 4)
sumR - (c & $FF)
sumG - ((c >> 8) & $FF)
sumB - ((c >> 16) & $FF)
sumA - ((c >> 24) & $FF)
c = PeekL(*g_buffer + srcOffset + right * 4)
sumR + (c & $FF)
sumG + ((c >> 8) & $FF)
sumB + ((c >> 16) & $FF)
sumA + ((c >> 24) & $FF)
Next
Next
ProcedureReturn 0
EndProcedure
; ----------------------------
; Thread: Vertical pass for a chunk of columns
; parameter = threadIndex (0..g_threads-1)
; ----------------------------
Procedure.l VerticalPassThread(threadIndex.l)
Protected startCol = (threadIndex * g_width) / g_threads
Protected endCol = (((threadIndex + 1) * g_width) / g_threads) - 1
If endCol < startCol : endCol = startCol : EndIf
Protected x, y, i, top, bottom, count
Protected c.l
Protected sumR.i, sumG.i, sumB.i, sumA.i
Protected dstOffset, tmpOffset
For x = startCol To endCol
; initial window for y = 0
sumR = 0 : sumG = 0 : sumB = 0 : sumA = 0
For i = -g_radius To g_radius
y = i
If y < 0 : y = 0 : EndIf
If y >= g_height : y = g_height - 1 : EndIf
tmpOffset = (y * g_width + x) * 4
c = PeekL(g_temp + tmpOffset)
sumR + (c & $FF)
sumG + ((c >> 8) & $FF)
sumB + ((c >> 16) & $FF)
sumA + ((c >> 24) & $FF)
Next
For y = 0 To g_height - 1
count = g_radius * 2 + 1
dstOffset = y * g_pitch + x * 4
PokeL(*g_buffer + dstOffset, ((sumA / count) << 24) | ((sumB / count) << 16) | ((sumG / count) << 8) | (sumR / count))
top = y - g_radius
bottom = y + g_radius + 1
If top < 0 : top = 0 : EndIf
If bottom >= g_height : bottom = g_height - 1 : EndIf
tmpOffset = (top * g_width + x) * 4
c = PeekL(g_temp + tmpOffset)
sumR - (c & $FF)
sumG - ((c >> 8) & $FF)
sumB - ((c >> 16) & $FF)
sumA - ((c >> 24) & $FF)
tmpOffset = (bottom * g_width + x) * 4
c = PeekL(g_temp + tmpOffset)
sumR + (c & $FF)
sumG + ((c >> 8) & $FF)
sumB + ((c >> 16) & $FF)
sumA + ((c >> 24) & $FF)
Next
Next
ProcedureReturn 0
EndProcedure
; ----------------------------
; Main multi-threaded separable blur
; hImg : image handle (ImageOutput)
; *ImgSize : pointer to _SIZEF (width,height)
; radius : blur radius (integer)
; ----------------------------
Procedure IntegralSeparableBlur_MT(hImg, *ImgSize._SIZEF, radius)
Protected width = *ImgSize\Width
Protected height = *ImgSize\Height
Protected threads = #THREADS
; set globals
g_width = width
g_height = height
g_radius = radius
g_threads = threads
; prepare drawing buffer once (main thread)
StartDrawing(ImageOutput(hImg))
*g_buffer = DrawingBuffer()
g_pitch = DrawingBufferPitch()
; allocate temp buffer (linear width*height*4)
g_temp = AllocateMemory(width * height * 4)
If g_temp = 0
StopDrawing()
MessageRequester("Error", "AllocateMemory failed")
ProcedureReturn
EndIf
; --- Horizontal pass: create threads ---
Dim threadIDs.l(threads - 1)
Protected i
For i = 0 To threads - 1
threadIDs(i) = CreateThread(@HorizontalPassThread(), i)
Next
; wait for all horizontal threads
For i = 0 To threads - 1
If threadIDs(i) <> 0
WaitThread(threadIDs(i))
EndIf
Next
; --- Vertical pass: create threads ---
For i = 0 To threads - 1
threadIDs(i) = CreateThread(@VerticalPassThread(), i)
Next
; wait for all vertical threads
For i = 0 To threads - 1
If threadIDs(i) <> 0
WaitThread(threadIDs(i))
EndIf
Next
FreeMemory(g_temp)
StopDrawing()
EndProcedure
UsePNGImageDecoder()
Define hImg = LoadImage(#PB_Any, "LargImage.PNG")
If hImg = 0
MessageRequester("Error", "Load file fail.")
End
EndIf
Define ImgSize._SIZEF
ImgSize\Width = ImageWidth(hImg)
ImgSize\Height = ImageHeight(hImg)
OpenWindow(0, 0, 0, 800, 600, "Separable Blur MT", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
IntegralSeparableBlur_MT(hImg, @ImgSize, 200)
ImageGadget(1, 0, 0, ImgSize\Width, ImgSize\Height, ImageID(hImg))
Repeat
If WaitWindowEvent() = #PB_Event_CloseWindow
End
EndIf
ForEver
