Feedback and testing is appreciated since I know very little about OpenCL.


This thread started by Fred was very helpful
http://www.purebasic.fr/english/viewtop ... 12&t=53249
What also has been very helpful is the OpenCL API Quick Reference Card
https://www.khronos.org/files/opencl-qu ... e-card.pdf
For Windows I used the "Intel® SDK for OpenCL™ Applications".
Depending on the CPU / GPU you have you need to change the import path.
Here's what I have done so far; a simple grayscale conversion ...
Code: Select all
; *** Some OpenCL constants ***
Enumeration
#CL_DEVICE_TYPE_DEFAULT = 1
#CL_DEVICE_TYPE_CPU = 2
#CL_DEVICE_TYPE_GPU = 4
EndEnumeration
Enumeration
#CL_MEM_READ_WRITE = 1
#CL_MEM_WRITE_ONLY = 2
#CL_MEM_READ_ONLY = 4
#CL_MEM_USE_HOST_PTR = 8
#CL_MEM_ALLOC_HOST_PTR = 16
#CL_MEM_COPY_HOST_PTR = 32
EndEnumeration
; *** Linker settings ***
CompilerSelect #PB_Compiler_OS
CompilerCase #PB_OS_Windows
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
#OpenCL_Import = "C:\Program Files (x86)\Intel\OpenCL SDK\6.1\lib\x64\OpenCL.lib"
CompilerElse
#OpenCL_Import = "C:\Program Files (x86)\Intel\OpenCL SDK\6.1\lib\x86\OpenCL.lib"
CompilerEndIf
CompilerCase #PB_OS_Linux
#OpenCL_Import = "-l OpenCL"
CompilerCase #PB_OS_MacOS
#OpenCL_Import = "-framework OpenCL"
CompilerEndSelect
; *** Macro to use Import on Windows and ImportC on OSX and Linux
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
Macro ImportCL:Import:EndMacro
Macro EndImportCL:EndImport:EndMacro
CompilerElse
Macro ImportCL:ImportC:EndMacro
Macro EndImportCL:EndImport:EndMacro
CompilerEndIf
; *** Import required OpenCL functions ***
ImportCL #OpenCL_Import
clBuildProgram(program, num_devices, *device_list, options.p-ascii, *pfn_notify, *user_data)
clCreateBuffer(context, flags.q, size, *host_ptr, *errcode_ret)
clCreateCommandQueue(context, device, properties.q, *errcode_ret)
clCreateContext(*properties, num_devices, *devices, *pfn_notify, *user_data, *errcode_ret)
clCreateKernel(program, kernel_name.p-ascii, *errcode_ret)
clCreateProgramWithSource(context, count, *strings, *lengths, *errcode_ret)
clEnqueueNDRangeKernel(command_queue, kernel, work_dim, *global_work_offset, *global_work_size, *local_work_size, num_events_in_wait_list, *event_wait_list, *event)
clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, size, *ptr, num_events_in_wait_list, *event_wait_list, *event)
clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset, size, *ptr, num_events_in_wait_list, *event_wait_list, *event)
clFinish(command_queue)
clGetContextInfo(context, param_name, param_value_size, *param_value, *param_value_size_ret)
clGetDeviceIDs(platform, device_type.q, num_entries, *devices, *num_devices)
clGetDeviceInfo(device, param_name, param_value_size, *param_value, *param_value_size_ret)
clGetKernelWorkGroupInfo(kernel, device, param_name, param_value_size, *param_value, *param_value_size_ret)
clGetPlatformIDs(num_entries, *platforms, *num_platforms)
clGetProgramBuildInfo(program, device, param_name, param_value_size, *param_value, *param_value_size_ret)
clReleaseCommandQueue(command_queue)
clReleaseContext(context)
clReleaseKernel(kernel)
clReleaseMemObject(memobj)
clReleaseProgram(program)
clSetKernelArg(kernel, arg_index, arg_size, *arg_value)
EndImportCL
; *** Some procedures to make things easier ***
Procedure GetContextDeviceAndQueue(*Context.Integer, *Device.Integer, *Queue.Integer, PreferredType.q = #CL_DEVICE_TYPE_DEFAULT)
Protected.i Platform
clGetPlatformIDs(1, @Platform, #Null)
clGetDeviceIDs(Platform, PreferredType, 1, *Device, #Null)
If *Device\i = 0
clGetDeviceIDs(Platform, #CL_DEVICE_TYPE_DEFAULT, 1, *Device, #Null)
EndIf
*Context\i = clCreateContext(#Null, 1, *Device, #Null, #Null, #Null)
*Queue\i = clCreateCommandQueue(*Context\i, *Device\i, 0, #Null)
ProcedureReturn *Queue\i
EndProcedure
Procedure CompileProgram(Context, Code.s, Options.s = "")
Protected.i *Code, CodeLen = Len(Code), Device, *Log, LogSize, Program
*Code = AllocateMemory(CodeLen + 1, #PB_Memory_NoClear) : PokeS(*Code, Code, -1, #PB_Ascii)
Program = clCreateProgramWithSource(Context, 1, @*Code, @CodeLen, #Null)
If Program And clBuildProgram(Program, 0, #Null, Options, #Null, #Null)
clGetContextInfo(Context, $1081, SizeOf(Integer), @Device, #Null)
clGetProgramBuildInfo(Program, Device, $1183, 0, #Null, @LogSize)
If LogSize
*Log = AllocateMemory(LogSize, #PB_Memory_NoClear)
If *Log
clGetProgramBuildInfo(Program, Device, $1183, LogSize, *Log, #Null)
Debug PeekS(*Log, LogSize, #PB_Ascii)
FreeMemory(*Log)
EndIf
EndIf
clReleaseProgram(Program) : Program = 0
EndIf
ProcedureReturn Program
EndProcedure
Procedure GetLocalWorkSize(Kernel, Device)
Protected.i WorkSize
clGetKernelWorkGroupInfo(Kernel, Device, $11B0, SizeOf(Integer), @WorkSize, #Null)
ProcedureReturn WorkSize
EndProcedure
Procedure.i LoadImage32Bit(Image, FileName.s)
Protected.i Depth, Tmp, Img = LoadImage(Image, FileName)
If Img = 0
ProcedureReturn 0
ElseIf Image = #PB_Any
Depth = ImageDepth(Img)
Else
Depth = ImageDepth(Image)
EndIf
If Depth <> 32
If Image = #PB_Any
Tmp = Img
Else
Tmp = CopyImage(Image, #PB_Any)
EndIf
Img = CreateImage(Image, ImageWidth(Tmp), ImageHeight(Tmp), 32)
If Img
If Image = #PB_Any
StartDrawing(ImageOutput(Img))
Else
StartDrawing(ImageOutput(Image))
EndIf
DrawImage(ImageID(Tmp), 0, 0)
StopDrawing()
EndIf
FreeImage(Tmp)
EndIf
ProcedureReturn Img
EndProcedure
Code: Select all
; *** Main code ***
If GetContextDeviceAndQueue(@Context, @Device, @Queue, #CL_DEVICE_TYPE_GPU) = 0
MessageRequester("Error", "No OpenCL device found !") : End
EndIf
; Vector components xyzw => rgba (OSX, Linux), bgra (Windows)
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
Code.s = "__kernel void filter(__global uchar4* px, const uint count)" +
"{" +
" uint i = get_global_id(0);" +
" if (i < count){" +
" px[i].xyz = sqrt(0.23f * px[i].z * px[i].z + 0.70f * px[i].y * px[i].y + 0.07f * px[i].x * px[i].x);" +
" }" +
"}"
CompilerElse
Code.s = "__kernel void filter(__global uchar4* px, const uint count)" +
"{" +
" uint i = get_global_id(0);" +
" if (i < count){" +
" px[i].xyz = sqrt(0.23f * px[i].x * px[i].x + 0.70f * px[i].y * px[i].y + 0.07f * px[i].z * px[i].z);" +
" }" +
"}"
CompilerEndIf
Program = CompileProgram(Context, Code)
Kernel = clCreateKernel(Program, "filter", #Null)
If Kernel = 0
MessageRequester("Error", "Unable to create kernel !") : End
EndIf
LocalWorkSize = GetLocalWorkSize(Kernel, Device)
UseJPEGImageDecoder()
LoadImage32Bit(0, "test.jpg")
If IsImage(0) And StartDrawing(ImageOutput(0))
BufferSize = DrawingBufferPitch() * OutputHeight()
PixelCount.l = BufferSize >> 2
GlobalWorkSize = LocalWorkSize * ((PixelCount + LocalWorkSize - 1) / LocalWorkSize)
Buffer = clCreateBuffer(Context, #CL_MEM_USE_HOST_PTR, BufferSize, DrawingBuffer(), #Null)
clSetKernelArg(Kernel, 0, SizeOf(Integer), @Buffer)
clSetKernelArg(Kernel, 1, SizeOf(Long), @PixelCount)
err = clEnqueueNDRangeKernel(Queue, Kernel, 1, #Null, @GlobalWorkSize, @LocalWorkSize, 0, #Null, #Null)
clFinish(Queue)
err = clEnqueueReadBuffer(Queue, Buffer, #True, 0, BufferSize, DrawingBuffer(), 0, #Null, #Null)
clReleaseMemObject(Buffer)
StopDrawing()
EndIf
OpenWindow(0, 0, 0, 700, 500, "OpenCL Image Processing", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
ImageGadget(0, 0, 0, 700, 500, ImageID(0))
Repeat
Event = WaitWindowEvent()
Until Event = #PB_Event_CloseWindow
clReleaseKernel(Kernel)
clReleaseProgram(Program)
clReleaseCommandQueue(Queue)
clReleaseContext(Context)