Example code.
Running with v6.10, C compiler, optimizations enabled, threadsafe enabled & debugger enabled:
~43ms per frame when Render_Thread() is on line 50.
Code: Select all
;/ Phil James 08/2023 - Pixel shader on CPU - Optimized.
;/ - Taken from 'An introduction to Shader Art Coding': https://www.youtube.com/watch?v=f4s1h2YETNY
EnableExplicit
#MyWin_Main = 0 : #MyGad_GL = 0
Structure RGBf : R.f : G.f : B.f : A.f : EndStructure
Structure Pointf : X.f : Y.f : EndStructure
Global Dim PaletteLUP.rgbf(255) ;/ lookup table for palette
;#SizeX = 160 : #SizeY = 120 : #DrawScale = 8 ; ~1ms on 5800h
;#SizeX = 320 : #SizeY = 240 : #DrawScale = 4 ; ~2ms
#SizeX = 640 : #SizeY = 480 : #DrawScale = 1 ; ~3ms
;#SizeX = 1280 : #SizeY = 960 : #DrawScale = 1 ; ~14ms
;#SizeX = 1600 : #SizeY = 960 : #DrawScale = 1 ; ~17ms
#SizeXM1 = #SizeX - 1 : #SizeYM1 = #SizeY - 1 : #HalfX = #SizeX / 2 : #HalfY = #SizeY / 2 : #Iterations = 3
Structure Thread_Structure
ThreadID.i : Timef.f : Y_Start.l : Y_End.l
EndStructure
Global Event, ThreadCount = CountCPUs(#PB_System_ProcessCPUs)
Global Exit, OnFrame.f, TextureID0, AccuTime
Global Appname.s = "CPU Pixel shader - Optimized - Phil James 10/2023 ["+Str(#SizeX)+" x "+Str(#SizeY)+"]"
Macro ClampM(v, min, max) : If v < min : v = min : ElseIf v > max : v = max : EndIf : EndMacro
Macro FractM(value) : ((value) - Int(Round(value,#PB_Round_Down))) : EndMacro
Macro MinM(a,ia,mn) : If ia < mn : a = ia : Else : a = mn : EndIf : EndMacro
Macro MaxM(a,ia,mx) : If ia > mx : a = ia : Else : a = mx : EndIf : EndMacro
Procedure Palette_CreateLUP()
Protected MyLoop.i
For MyLoop = 0 To 255
PaletteLUP(MyLoop)\R = 0.5 + 0.5 * Cos(6.28318 * (1.0 * (MyLoop / 255.0) + 0.263))
PaletteLUP(MyLoop)\G = 0.5 + 0.5 * Cos(6.28318 * (1.0 * (MyLoop / 255.0) + 0.416))
PaletteLUP(MyLoop)\B = 0.5 + 0.5 * Cos(6.28318 * (1.0 * (MyLoop / 255.0) + 0.557))
Next
EndProcedure
Global Dim Render_ThreadControl.Thread_Structure(ThreadCount)
Global Dim ImageArray_Main.l((#SizeY / 2)-1, (#SizeX / 2) - 1 )
Procedure Render_Thread(*Thread.Thread_Structure)
Protected X, Y, UV.pointf, Dist.f, Dist0.f, Col.rgbf, FinalColour.rgbf, MyLoop.i, Scale.f = 1.5, Dist1.f, Exp.f, DistL.l
For Y = *Thread\Y_Start To *Thread\y_end
For X = 0 To (#SizeX / 2) - 1
UV\x = (x * 2.0 - #SizeX) / #SizeY : UV\y = (y * 2.0 - #SizeY) / #SizeY
FinalColour\R = 0 : FinalColour\G = 0 : FinalColour\B = 0
Dist0 = Sqr((UV\X*UV\X) + (UV\Y * UV\Y))
Exp = Exp(-Dist0)
For MyLoop = 0 To #Iterations
UV\x = Fractm(UV\x * Scale) - 0.5 : UV\y = Fractm(UV\y * Scale) - 0.5
DistL = Fractm(Dist0 + MyLoop * 0.4 + *Thread\Timef * 0.4) * 255.0
Dist = Pow(0.01 / (Abs(Sin((Sqr((UV\X * UV\X) + (UV\Y * UV\Y)) * Exp) * 8.0 + *Thread\Timef) * 0.125)+0.00001), 1.2)
ClampM(Dist,0,1)
ClampM(DistL,0,255)
FinalColour\R + PaletteLUP(DistL)\R * dist : FinalColour\G + PaletteLUP(DistL)\G * dist : FinalColour\B + PaletteLUP(DistL)\B * dist
Next
ClampM(FinalColour\R,0,1) : ClampM(FinalColour\g,0,1) : ClampM(FinalColour\b,0,1)
ImageArray_Main(y,x) = RGB(FinalColour\r * 255.0,FinalColour\G * 255.0,FinalColour\b * 255.0)
Next
Next
EndProcedure
Procedure Render_Main()
Protected MyLoop, FPS.f, Time.f
Time = ElapsedMilliseconds()
For MyLoop = 1 To ThreadCount ; create the threads
Render_ThreadControl(MyLoop)\Timef = Time / 1000.0
Render_ThreadControl(MyLoop)\ThreadID = CreateThread(@Render_Thread(),@Render_ThreadControl(Myloop))
Next
For Myloop = 1 To ThreadCount : WaitThread(Render_ThreadControl(MyLoop)\ThreadID) : Next ; wait for thread finish.
glClearColor_(1.0,0.0,0.0,1.0) : glClear_(#GL_COLOR_BUFFER_BIT)
glTexSubImage2D_(#GL_TEXTURE_2D, 0, 0, 0, #SizeX/2, #SizeY/2, #GL_RGBA, #GL_UNSIGNED_BYTE, @ImageArray_Main()) ;/ update texture from array
;/ render as quad
glBegin_(#GL_QUADS)
glTexCoord2f_(0,0) : glVertex2f_(0,0) : glTexCoord2f_(1,0) : glVertex2f_(0.5,0) : glTexCoord2f_(1,1) : glVertex2f_(0.5,0.5) : glTexCoord2f_(0,1) : glVertex2f_(0,0.5)
glTexCoord2f_(0,1) : glVertex2f_(0,0.5) : glTexCoord2f_(1,1) : glVertex2f_(0.5,0.5) : glTexCoord2f_(1,0) : glVertex2f_(0.5,1.0) : glTexCoord2f_(0,0) : glVertex2f_(0,1.0)
glTexCoord2f_(1,1) : glVertex2f_(0.5,0.5) : glTexCoord2f_(0,1) : glVertex2f_(1.0,0.5) : glTexCoord2f_(0,0) : glVertex2f_(1.0,1.0) : glTexCoord2f_(1,0) : glVertex2f_(0.5,1.0)
glTexCoord2f_(1,0) : glVertex2f_(0.5,0) : glTexCoord2f_(0,0) : glVertex2f_(1.0,0) : glTexCoord2f_(0,1) : glVertex2f_(1.0,0.5) : glTexCoord2f_(1,1) : glVertex2f_(0.5,0.5)
glEnd_()
SetGadgetAttribute(#MyGad_GL,#PB_OpenGL_FlipBuffers,#True)
AccuTime + (ElapsedMilliseconds() - time) : Onframe + 1
EndProcedure
Procedure Init_Main()
Protected MyLoop.i, StrideY.i = Round(((#SizeY/2)-1) / ThreadCount,#PB_Round_Up), Y.i = 0
OpenWindow(#MyWin_Main,0,0,#SizeX * #DrawScale,#SizeY * #DrawScale,"",#PB_Window_ScreenCentered|#PB_Window_SystemMenu)
OpenGLGadget(#MyGad_GL,0,0,#SizeX * #DrawScale,#SizeY * #DrawScale,#PB_OpenGL_NoFlipSynchronization)
Palette_CreateLUP()
For MyLoop = 1 To ThreadCount
Render_ThreadControl(Myloop)\Y_Start = Y : Y + StrideY
Render_ThreadControl(Myloop)\Y_End = Y - 1
If Render_ThreadControl(Myloop)\Y_End > (#SizeY/2)-1 : Render_ThreadControl(Myloop)\Y_End = (#SizeY/2)-1 : EndIf
Next
glMatrixMode_(#GL_PROJECTION) : glLoadIdentity_() : glOrtho_(0,1,1,0,0,1)
glGenTextures_(1, @TextureID0) : glBindTexture_(#GL_TEXTURE_2D, TextureID0) : glEnable_(#GL_TEXTURE_2D)
glTexParameteri_(#GL_TEXTURE_2D, #GL_TEXTURE_MAG_FILTER, #GL_NEAREST)
glTexParameteri_(#GL_TEXTURE_2D, #GL_TEXTURE_MIN_FILTER, #GL_NEAREST)
glTexImage2D_(#GL_TEXTURE_2D, 0, #GL_RGB, #SizeX/2, #SizeY/2, 0, #GL_RGBA, #GL_UNSIGNED_BYTE, @ImageArray_Main());
AddWindowTimer(0,0,1000)
EndProcedure
Init_Main()
Repeat
Repeat
Event = WindowEvent()
If Event = #PB_Event_CloseWindow : Exit = #True : EndIf
If Event = #PB_Event_Timer : SetWindowTitle(0,Appname + " - "+StrF(AccuTime / Onframe,2)+"ms") : EndIf
Until Event = 0
Render_Main()
Until Exit = #True
glDeleteTextures_(1,@TextureID0)