OpenCL Image Processing

Just starting out? Need help? Post your questions and find answers here.
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

OpenCL Image Processing

Post by wilbert »

I've been experimenting with OpenCL for image processing.
Feedback and testing is appreciated since I know very little about OpenCL. :oops: :)

This thread started by Fred was very helpful
http://www.purebasic.fr/english/viewtop ... 12&t=53249

What also has been very helpful is the OpenCL API Quick Reference Card
https://www.khronos.org/files/opencl-qu ... e-card.pdf

For Windows I used the "Intel® SDK for OpenCL™ Applications".
Depending on the CPU / GPU you have you need to change the import path.

Here's what I have done so far; a simple grayscale conversion ...

Code: Select all

; *** Some OpenCL constants ***

Enumeration
  #CL_DEVICE_TYPE_DEFAULT = 1
  #CL_DEVICE_TYPE_CPU     = 2
  #CL_DEVICE_TYPE_GPU     = 4
EndEnumeration

Enumeration
  #CL_MEM_READ_WRITE      = 1
  #CL_MEM_WRITE_ONLY      = 2
  #CL_MEM_READ_ONLY       = 4
  #CL_MEM_USE_HOST_PTR    = 8
  #CL_MEM_ALLOC_HOST_PTR  = 16
  #CL_MEM_COPY_HOST_PTR   = 32
EndEnumeration


; *** Linker settings ***
    
CompilerSelect #PB_Compiler_OS
  CompilerCase #PB_OS_Windows
    CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
      #OpenCL_Import = "C:\Program Files (x86)\Intel\OpenCL SDK\6.1\lib\x64\OpenCL.lib"
    CompilerElse
      #OpenCL_Import = "C:\Program Files (x86)\Intel\OpenCL SDK\6.1\lib\x86\OpenCL.lib"
    CompilerEndIf
  CompilerCase #PB_OS_Linux
    #OpenCL_Import = "-l OpenCL"
  CompilerCase #PB_OS_MacOS
    #OpenCL_Import = "-framework OpenCL"
CompilerEndSelect


; *** Macro to use Import on Windows and ImportC on OSX and Linux

CompilerIf #PB_Compiler_OS = #PB_OS_Windows
  Macro ImportCL:Import:EndMacro
  Macro EndImportCL:EndImport:EndMacro
CompilerElse
  Macro ImportCL:ImportC:EndMacro
  Macro EndImportCL:EndImport:EndMacro
CompilerEndIf


; *** Import required OpenCL functions ***

ImportCL #OpenCL_Import
  clBuildProgram(program, num_devices, *device_list, options.p-ascii, *pfn_notify, *user_data)
  clCreateBuffer(context, flags.q, size, *host_ptr, *errcode_ret)
  clCreateCommandQueue(context, device, properties.q, *errcode_ret)
  clCreateContext(*properties, num_devices, *devices, *pfn_notify, *user_data, *errcode_ret)
  clCreateKernel(program, kernel_name.p-ascii, *errcode_ret)
  clCreateProgramWithSource(context, count, *strings, *lengths, *errcode_ret)
  clEnqueueNDRangeKernel(command_queue, kernel, work_dim, *global_work_offset, *global_work_size, *local_work_size, num_events_in_wait_list, *event_wait_list, *event)
  clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, size, *ptr, num_events_in_wait_list, *event_wait_list, *event)
  clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset, size, *ptr, num_events_in_wait_list, *event_wait_list, *event)
  clFinish(command_queue)
  clGetContextInfo(context, param_name, param_value_size, *param_value, *param_value_size_ret)
  clGetDeviceIDs(platform, device_type.q, num_entries, *devices, *num_devices)
  clGetDeviceInfo(device, param_name, param_value_size, *param_value, *param_value_size_ret)
  clGetKernelWorkGroupInfo(kernel, device, param_name, param_value_size, *param_value, *param_value_size_ret)
  clGetPlatformIDs(num_entries, *platforms, *num_platforms)
  clGetProgramBuildInfo(program, device, param_name, param_value_size, *param_value, *param_value_size_ret)
  clReleaseCommandQueue(command_queue)
  clReleaseContext(context)
  clReleaseKernel(kernel)
  clReleaseMemObject(memobj)
  clReleaseProgram(program)
  clSetKernelArg(kernel, arg_index, arg_size, *arg_value)
EndImportCL


; *** Some procedures to make things easier ***

Procedure GetContextDeviceAndQueue(*Context.Integer, *Device.Integer, *Queue.Integer, PreferredType.q = #CL_DEVICE_TYPE_DEFAULT)
  Protected.i Platform
  clGetPlatformIDs(1, @Platform, #Null)
  clGetDeviceIDs(Platform, PreferredType, 1, *Device, #Null)
  If *Device\i = 0
    clGetDeviceIDs(Platform, #CL_DEVICE_TYPE_DEFAULT, 1, *Device, #Null)
  EndIf
  *Context\i = clCreateContext(#Null, 1, *Device, #Null, #Null, #Null)
  *Queue\i = clCreateCommandQueue(*Context\i, *Device\i, 0, #Null)
  ProcedureReturn *Queue\i
EndProcedure

Procedure CompileProgram(Context, Code.s, Options.s = "")
  Protected.i *Code, CodeLen = Len(Code), Device, *Log, LogSize, Program
  *Code = AllocateMemory(CodeLen + 1, #PB_Memory_NoClear) : PokeS(*Code, Code, -1, #PB_Ascii)
  Program = clCreateProgramWithSource(Context, 1, @*Code, @CodeLen, #Null)
  If Program And clBuildProgram(Program, 0, #Null, Options, #Null, #Null)
    clGetContextInfo(Context, $1081, SizeOf(Integer), @Device, #Null)
    clGetProgramBuildInfo(Program, Device, $1183, 0, #Null, @LogSize)
    If LogSize
      *Log = AllocateMemory(LogSize, #PB_Memory_NoClear)
      If *Log
        clGetProgramBuildInfo(Program, Device, $1183, LogSize, *Log, #Null)
        Debug PeekS(*Log, LogSize, #PB_Ascii)
        FreeMemory(*Log)
      EndIf
    EndIf
    clReleaseProgram(Program) : Program = 0
  EndIf
  ProcedureReturn Program
EndProcedure

Procedure GetLocalWorkSize(Kernel, Device)
  Protected.i WorkSize
  clGetKernelWorkGroupInfo(Kernel, Device, $11B0, SizeOf(Integer), @WorkSize, #Null)
  ProcedureReturn WorkSize  
EndProcedure

Procedure.i LoadImage32Bit(Image, FileName.s)
  Protected.i Depth, Tmp, Img = LoadImage(Image, FileName)
  If Img = 0
    ProcedureReturn 0
  ElseIf Image = #PB_Any
    Depth = ImageDepth(Img)
  Else
    Depth = ImageDepth(Image)
  EndIf
  If Depth <> 32
    If Image = #PB_Any
      Tmp = Img
    Else
      Tmp = CopyImage(Image, #PB_Any)
    EndIf
    Img = CreateImage(Image, ImageWidth(Tmp), ImageHeight(Tmp), 32)
    If Img
      If Image = #PB_Any
        StartDrawing(ImageOutput(Img))
      Else
        StartDrawing(ImageOutput(Image))
      EndIf
      DrawImage(ImageID(Tmp), 0, 0)
      StopDrawing()
    EndIf
    FreeImage(Tmp)
  EndIf
  ProcedureReturn Img
EndProcedure

Code: Select all

; *** Main code ***

If GetContextDeviceAndQueue(@Context, @Device, @Queue, #CL_DEVICE_TYPE_GPU) = 0
  MessageRequester("Error", "No OpenCL device found !") : End
EndIf

; Vector components xyzw => rgba (OSX, Linux), bgra (Windows)

CompilerIf #PB_Compiler_OS = #PB_OS_Windows
  Code.s = "__kernel void filter(__global uchar4* px, const uint count)" +
           "{" +
           "  uint i = get_global_id(0);" +
           "  if (i < count){" +
           "    px[i].xyz = sqrt(0.23f * px[i].z * px[i].z + 0.70f * px[i].y * px[i].y + 0.07f * px[i].x * px[i].x);" +
           "  }" +
           "}"
CompilerElse
  Code.s = "__kernel void filter(__global uchar4* px, const uint count)" +
           "{" +
           "  uint i = get_global_id(0);" +
           "  if (i < count){" +
           "    px[i].xyz = sqrt(0.23f * px[i].x * px[i].x + 0.70f * px[i].y * px[i].y + 0.07f * px[i].z * px[i].z);" +
           "  }" +
           "}"
CompilerEndIf

Program = CompileProgram(Context, Code)
Kernel = clCreateKernel(Program, "filter", #Null)
If Kernel = 0
  MessageRequester("Error", "Unable to create kernel !") : End
EndIf

LocalWorkSize = GetLocalWorkSize(Kernel, Device)

UseJPEGImageDecoder()
LoadImage32Bit(0, "test.jpg")

If IsImage(0) And StartDrawing(ImageOutput(0))
  
  BufferSize = DrawingBufferPitch() * OutputHeight()
  PixelCount.l = BufferSize >> 2
  GlobalWorkSize = LocalWorkSize * ((PixelCount + LocalWorkSize - 1) / LocalWorkSize)
  
  Buffer = clCreateBuffer(Context, #CL_MEM_USE_HOST_PTR, BufferSize, DrawingBuffer(), #Null)
  
  clSetKernelArg(Kernel, 0, SizeOf(Integer), @Buffer)
  clSetKernelArg(Kernel, 1, SizeOf(Long), @PixelCount)
  
  err = clEnqueueNDRangeKernel(Queue, Kernel, 1, #Null, @GlobalWorkSize, @LocalWorkSize, 0, #Null, #Null)
  clFinish(Queue)
  err = clEnqueueReadBuffer(Queue, Buffer, #True, 0, BufferSize, DrawingBuffer(), 0, #Null, #Null)
  
  clReleaseMemObject(Buffer)
  
  StopDrawing()
EndIf

OpenWindow(0, 0, 0, 700, 500, "OpenCL Image Processing", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
ImageGadget(0, 0, 0, 700, 500, ImageID(0))
Repeat
  Event = WaitWindowEvent()
Until Event = #PB_Event_CloseWindow

clReleaseKernel(Kernel)
clReleaseProgram(Program)
clReleaseCommandQueue(Queue)
clReleaseContext(Context)
Last edited by wilbert on Thu Mar 25, 2021 3:02 pm, edited 6 times in total.
Windows (x64)
Raspberry Pi OS (Arm64)
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: OpenCL Image Processing

Post by wilbert »

Another test, this time with a kernel using x and y coordinates.
First part of the code (imports and some general procedures) can be copied from the first post.

Code: Select all

; *** Main code ***

If GetContextDeviceAndQueue(@Context, @Device, @Queue, #CL_DEVICE_TYPE_GPU) = 0
  MessageRequester("Error", "No OpenCL device found !") : End
EndIf

; Vector components xyzw => rgba (OSX, Linux), bgra (Windows)

CompilerIf #PB_Compiler_OS = #PB_OS_Windows
  Code.s = "__kernel void filter(__global uchar4* px, const uint width, const uint height)" +
           "{" +
           "  uint x = get_global_id(0);" +
           "  if (x < width){" +
           "    uint y = get_global_id(1);" +
           "    uint i = x + (height - y - 1) * width;" +     ; flipped y coordinate
           "    px[i].z = (x ^ y);" +   
           "    px[i].y = y;" +
           "    px[i].x = x;" +           
           "    px[i].w = 255 * x / width;" +  
           "  }" +
           "}"
CompilerElse
  Code.s = "__kernel void filter(__global uchar4* px, const uint width, const uint height)" +
           "{" +
           "  uint x = get_global_id(0);" +
           "  if (x < width){" +
           "    uint y = get_global_id(1);" +
           "    uint i = x + y * width;" +
           "    px[i].x = (x ^ y);" +   
           "    px[i].y = y;" +
           "    px[i].z = x;" +
           "    px[i].w = 255 * x / width;" +             
           "  }" +
           "}"
CompilerEndIf

Program = CompileProgram(Context, Code)
Kernel = clCreateKernel(Program, "filter", #Null)
If Kernel = 0
  MessageRequester("Error", "Unable to create kernel !") : End
EndIf

Dim GS.i(1)
Dim LS.i(1)
LS(0) = GetLocalWorkSize(Kernel, Device)
LS(1) = 1

CreateImage(0, 512, 512, 32)

If IsImage(0) And StartDrawing(ImageOutput(0))
  
  Width.l = DrawingBufferPitch() >> 2
  Height.l = OutputHeight()
  GS(0) = LS(0) * ((Width + LS(0) - 1) / LS(0))
  GS(1) = Height
  
  BufferSize = DrawingBufferPitch() * Height
  Buffer = clCreateBuffer(Context, #CL_MEM_USE_HOST_PTR, BufferSize, DrawingBuffer(), #Null)
  
  clSetKernelArg(Kernel, 0, SizeOf(Integer), @Buffer)
  clSetKernelArg(Kernel, 1, SizeOf(Long), @Width)
  clSetKernelArg(Kernel, 2, SizeOf(Long), @Height)
  
  err = clEnqueueNDRangeKernel(Queue, Kernel, 2, #Null, @GS(), @LS(), 0, #Null, #Null)
  clFinish(Queue)
  err = clEnqueueReadBuffer(Queue, Buffer, #True, 0, BufferSize, DrawingBuffer(), 0, #Null, #Null)
  
  clReleaseMemObject(Buffer)
  
  StopDrawing()
EndIf

OpenWindow(0, 0, 0, 512, 512, "OpenCL Image Processing", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
ImageGadget(0, 0, 0, 512, 512, ImageID(0))
Repeat
  Event = WaitWindowEvent()
Until Event = #PB_Event_CloseWindow

clReleaseKernel(Kernel)
clReleaseProgram(Program)
clReleaseCommandQueue(Queue)
clReleaseContext(Context)
Don't know what would be best.
A two dimensional kernel like I did now or a one dimensional and compute x and y inside the kernel. :?
Windows (x64)
Raspberry Pi OS (Arm64)
Post Reply