- Récursion retirée.
- 8 scènes ajoutées.
- meilleurs infos à l'image et en console.
Hello,
Explorant la méthode de rendu en path tracing j'ai porté smallpt en PureBasic, à titre d'expérience.
C'est vraiment l'un des plus basiques path tracer que l'on peut trouver mais ça marche et - bien que plus lent qu'en C/C++ - les performances restent respectables et encore le code est loin d'être optimisé.
La version originale utilise OpenMP pour le multithreading, celle-ci est 'manuellement' multithreadée et utilise tous les cores disponibles.
Il y a un problème par contre, vu que smallpt utilise la récursion pour sa fonction 'radiance', avec la génération d'une erreur 'Stack Overflow' au bout d'un moment, alors j'ai limité la récursion à 5 dans la section roulette russe pour la terminaison du rayon. A part cela la version PureBasic a toutes les features de la version originale.
Je n'ai pas essayé de porter la fonction erand48(Xi) et j'utilise un générateur de nombres aléatoires (très) basique.
Certaines améliorations sont disponibles sur le site de Kevin Beason que je compte ajouter ultérieurement (sans recursion, sampling de lumière explicite, autres scenes, ..). Plus tard j'essaierai également de créer une version OpenCL.
A propos des performances, Kevin a publié quelques statistiques avec sa config de l'époque:
~5m pour 200 spp (render(50) dans le code plus bas) avec un 2.4 GHz Intel Core 2 Quad CPU utilisant 4 threads.
Si quelqu'un a encore une config similaire, je serai curieux de connaitre les temps de rendu pour comparaison.
Sur mon i7 x980 3.2 GHz j'obtiens ~1m43s avec 12 threads.
N'oubliez pas de compiler le code avec le switch "Create threadsafe executable" activé!
Code : Tout sélectionner
EnableExplicit
OpenConsole("purept")
; ---[ 3d Vector ]------------------------------------------------------------
Structure v3d
x.d
y.d
z.d
EndStructure
Macro v3Null( v_ )
v_\x = 0.0
v_\y = 0.0
v_\z = 0.0
EndMacro
Macro v3Clamp( v_ )
v_\x = clamp(v_\x)
v_\y = clamp(v_\y)
v_\z = clamp(v_\z)
EndMacro
Macro v3Norm( v_, n_ )
n_ = Sqr( v_\x*v_\x + v_\y*v_\y + v_\z*v_\z )
If n_ <> 0.0
n_ = 1.0/n_
v_\x*n_
v_\y*n_
v_\z*n_
EndIf
EndMacro
Macro v3Dot( v1_, v2_ )
( v1_\x * v2_\x + v1_\y * v2_\y + v1_\z * v2_\z )
EndMacro
Macro v3Copy( v_, v1_ )
v_\x = v1_\x
v_\y = v1_\y
v_\z = v1_\z
EndMacro
Macro v3Add( v_, v1_, v2_ )
v_\x = v1_\x + v2_\x
v_\y = v1_\y + v2_\y
v_\z = v1_\z + v2_\z
EndMacro
Macro v3AddScaledInPlace( v_, v1_, s1_ )
v_\x + v1_\x * s1_
v_\y + v1_\y * s1_
v_\z + v1_\z * s1_
EndMacro
Macro v3Sub( v_, v1_, v2_ )
v_\x = v1_\x - v2_\x
v_\y = v1_\y - v2_\y
v_\z = v1_\z - v2_\z
EndMacro
Macro v3Mul( v_, v1_, v2_ )
v_\x = v1_\x * v2_\x
v_\y = v1_\y * v2_\y
v_\z = v1_\z * v2_\z
EndMacro
Macro v3MulInPlace( v_, v1_ )
v_\x * v1_\x
v_\y * v1_\y
v_\z * v1_\z
EndMacro
Macro v3Scale( v_, v1_, s1_ )
v_\x = v1_\x * s1_
v_\y = v1_\y * s1_
v_\z = v1_\z * s1_
EndMacro
Macro v3ScaleInPlace( v_, s_ )
v_\x * s_
v_\y * s_
v_\z * s_
EndMacro
Macro v3Cross( v_, v1_, v2_ )
v_\x = v1_\y * v2_\z - v1_\z * v2_\y
v_\y = v1_\z * v2_\x - v1_\x * v2_\z
v_\z = v1_\x * v2_\y - v1_\y * v2_\x
EndMacro
Macro v3Neg( v_, v1_ )
v_\x = -v1_\x
v_\y = -v1_\y
v_\z = -v1_\z
EndMacro
; ---[ 3d Ray ]---------------------------------------------------------------
Structure r3d
o.v3d
d.v3d
EndStructure
; ---[ Material ]-------------------------------------------------------------
Enumeration
#DIFF
#SPEC
#REFR
EndEnumeration
; ---[ Sphere ]---------------------------------------------------------------
Structure s3d
rad.d ; radius
p.v3d ; position
e.v3d ; emission
c.v3d ; color
m.i ; material: #DIFF/#SPEC/#REFR
EndStructure
Procedure.d s3dIntersect( *sphere.s3d, *ray.r3d )
; returns distance, 0 if no hit
; Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
; Vec op = p - r.o
Protected op.v3d
v3Sub( op, *sphere\p, *ray\o )
; double t, eps=1e-4, b=op.dot(r.d), det=b*b-op.dot(op)+rad*rad;
Protected t.d
Protected eps.d = 1.0e-4
Protected b.d = v3Dot( op, *ray\d )
Protected det.d = b*b - v3Dot( op, op ) + ( *sphere\rad * *sphere\rad )
; if (det<0) return 0; else det=sqrt(det);
If det < 0.0
ProcedureReturn 0.0
Else
det = Sqr(det)
EndIf
; return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0);
t = b - det
If t <= eps
t = b + det
If t <= eps
ProcedureReturn 0.0
EndIf
EndIf
ProcedureReturn t
EndProcedure
; ---[ Scene ]----------------------------------------------------------------
Macro Sphere( idx_, rad_, posx_, posy_, posz_, emir_, emig_, emib_, colr_, colg_, colb_, mat_ )
scene(idx_)\rad = rad_
scene(idx_)\p\x = posx_
scene(idx_)\p\y = posy_
scene(idx_)\p\z = posz_
scene(idx_)\e\x = emir_
scene(idx_)\e\y = emig_
scene(idx_)\e\z = emib_
scene(idx_)\c\x = colr_
scene(idx_)\c\y = colg_
scene(idx_)\c\z = colb_
scene(idx_)\m = mat_
EndMacro
Global Dim scene.s3d(8)
Sphere( 0, 1.0e5, 1.0e5+1.0, 40.8, 81.6, 0.0, 0.0, 0.0, 0.75, 0.25, 0.25, #DIFF ) ; Left
Sphere( 1, 1.0e5, -1.0e5+99.0, 40.8, 81.6, 0.0, 0.0, 0.0, 0.25, 0.25, 0.75, #DIFF ) ; Rght
Sphere( 2, 1.0e5, 50.0, 40.8, 1.0e5, 0.0, 0.0, 0.0, 0.75, 0.75, 0.75, #DIFF ) ; Back
Sphere( 3, 1.0e5, 50.0, 40.8, -1.0e5+170.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, #DIFF ) ; Frnt
Sphere( 4, 1.0e5, 50.0, 1.0e5, 81.6, 0.0, 0.0, 0.0, 0.75, 0.75, 0.75, #DIFF ) ; Botm
Sphere( 5, 1.0e5, 50.0, -1.0e5+81.6, 81.6, 0.0, 0.0, 0.0, 0.75, 0.75, 0.75, #DIFF ) ; Top
Sphere( 6, 16.5, 27.0, 16.5, 47.0, 0.0, 0.0, 0.0, 0.999, 0.999, 0.999, #SPEC ) ; Mirr
Sphere( 7, 16.5, 73.0, 16.5, 78.0, 0.0, 0.0, 0.0, 0.999, 0.999, 0.999, #REFR ) ; Glas
Sphere( 8, 600.0, 50.0, 681.6-0.27, 81.6, 12.0,12.0,12.0, 0.0, 0.0, 0.0, #DIFF ) ; Lite
; ---[ Helpers ]--------------------------------------------------------------
Procedure.d clamp( x.d )
If x < 0.0
ProcedureReturn 0.0
ElseIf x > 1.0
ProcedureReturn 1.0
EndIf
ProcedureReturn x
EndProcedure
Procedure.l toInt( x.d )
; linear to ~sRGB (not accurate), yet the spheres colors are not
; linearized prior rendering.
ProcedureReturn Int( Pow(clamp(x),1.0/2.2)*255+0.5 )
EndProcedure
Procedure.b intersect( *ray.r3d, *t.Double, *id.Long )
Protected d.d, inf.d = 1e20
Protected i.i = ArraySize(scene())
*t\d = inf
While i >= 0
d = s3dIntersect( scene(i), *ray )
If d And d<*t\d
*t\d = d
*id\l = i
EndIf
i - 1
Wend
ProcedureReturn Bool( *t\d < inf )
EndProcedure
Procedure.d rand()
ProcedureReturn Random(16384,0)/16384.0
EndProcedure
; ---[ Radiance ]-------------------------------------------------------------
Procedure radiance( *ret.v3d, *r.r3d, depth.l, *Xi.Unicode )
Protected ntemp.d
Protected tt_.d
Protected r2_.r3d
Protected t .d = 0.0 ; distance to intersection
Protected id.l = 0 ; id of intersected object
; if miss, return black
If Not intersect( *r, @t, @id )
v3Null(*ret)
ProcedureReturn
EndIf
Protected *obj.s3d = scene(id) ; the hit object
; x = r.o + r.d*t
Protected x.v3d
x\x = *r\o\x + *r\d\x*t
x\y = *r\o\y + *r\d\y*t
x\z = *r\o\z + *r\d\z*t
; n=(x-obj.p).norm()
Protected n.v3d
v3Sub( n, x, *obj\p )
v3Norm( n, ntemp )
; nl=n.dot(r.d)<0?n:n*-1
Protected nl.v3d
If v3Dot( n, *r\d ) < 0.0
v3Copy( nl, n )
Else
v3Neg( nl, n )
EndIf
; f=obj.c
Protected f.v3d
v3Copy( f, *obj\c )
; max refl
Protected p.d
If ( f\x > f\y ) And ( f\x > f\z )
p = f\x
ElseIf f\y > f\z
p = f\y
Else
p = f\z
EndIf
; Russian Roulette
depth + 1
If depth > 5
;If rand() < p ; erand48(Xi) < p
;f\x/p
;f\y/p
;f\z/p
;Else
v3Copy( *ret, *obj\e )
ProcedureReturn
;EndIf
EndIf
Protected rr.v3d
Protected rt.r3d
Protected rd.v3d
; Ideal DIFFUSE reflection
If *obj\m = #DIFF
Protected r1 .d = 2.0*#PI*rand()
Protected r2 .d = rand()
Protected r2s .d = Sqr(r2)
Protected w.v3d
v3Copy( w, nl )
Protected ut.v3d,u.v3d
If Abs( w\x ) > 0.1
ut\x = 0.0
ut\y = 1.0
ut\z = 0.0
Else
ut\x = 1.0
ut\y = 0.0
ut\z = 0.0
EndIf
v3Cross(u,ut,w)
v3Norm(u,ntemp)
Protected v.v3d
v3Cross(v,w,u)
Protected d.v3d
d\x = u\x*Cos(r1)*r2s + v\x*Sin(r1)*r2s + w\x*Sqr(1.0-r2)
d\y = u\y*Cos(r1)*r2s + v\y*Sin(r1)*r2s + w\y*Sqr(1.0-r2)
d\z = u\z*Cos(r1)*r2s + v\z*Sin(r1)*r2s + w\z*Sqr(1.0-r2)
v3Norm(d,ntemp)
; return obj.e + f.mult( radiance( Ray(x,d),depth,Xi) );
v3Copy( rt\o, x )
v3Copy( rt\d, d )
radiance( @rr, @rt, depth, 0 )
v3MulInPlace( f, rr )
v3Add( *ret, *obj\e, f )
ProcedureReturn
; Ideal SPECULAR reflection
ElseIf *obj\m = #SPEC
; return obj.e + f.mult(radiance( Ray( x, r.d-n*2*n.dot(r.d) ), depth, Xi ) );
v3Copy( rt\o, x )
v3Copy(rd,n)
tt_ = 2.0*v3Dot(n,*r\d)
v3ScaleInPlace( rd, tt_ )
v3Sub( rt\d, *r\d, rd )
radiance( @rr, @rt, depth, 0 )
v3MulInPlace( f, rr )
v3Add( *ret, *obj\e, f )
ProcedureReturn
EndIf
; Ideal dielectric REFRACTION
; Ray reflRay( x, r.d - n*2*n.dot(r.d) );
Protected reflRay.r3d
v3Copy( reflRay\o, x )
v3Copy(rd,n)
tt_ = 2.0*v3Dot(n,*r\d)
v3ScaleInPlace( rd, tt_ )
v3Sub( reflRay\d, *r\d, rd )
; Ray from outside going in?
; bool into = n.dot(nl)>0;
Protected into.b = Bool( v3Dot(n,nl) > 0.0 )
; double nc=1, nt=1.5, nnt=into?nc/nt:nt/nc, ddn=r.d.dot(nl), cos2t;
Protected nc.d=1.0,nt.d=1.5,nnt.d,ddn.d,cos2t.d
If into
nnt = nc/nt
Else
nnt = nt/nc
EndIf
ddn = v3Dot(*r\d,nl)
cos2t = 1.0 - nnt*nnt*( 1.0 - ddn*ddn )
If cos2t < 0 ; Total internal reflection
; return obj.e + f.mult(radiance(reflRay,depth,Xi));
radiance( @rr, @reflRay, depth, 0 )
v3MulInPlace( f, rr )
v3Add( *ret, *obj\e, f )
ProcedureReturn
EndIf
; Vec tdir = ( r.d*nnt - n*( (into?1:-1)*(ddn*nnt+sqrt(cos2t)) ) ).norm();
Protected intod.d
If into
intod = 1.0
Else
intod = -1.0
EndIf
Protected t_.d,n_.v3d,r_.v3d
t_ = intod*(ddn*nnt+Sqr(cos2t))
v3Copy(n_,n)
v3ScaleInPlace( n_, t_ )
v3Copy(r_,*r\d)
v3ScaleInPlace( r_, nnt )
Protected tdir.v3d
v3Sub(tdir,r_,n_)
v3Norm(tdir,ntemp)
; double a=nt-nc, b=nt+nc, R0=a*a/(b*b), c = 1-(into?-ddn:tdir.dot(n));
Protected a.d=nt-nc,b.d=nt+nc,R0.d=a*a/(b*b),c.d
If into
c = 1.0 + ddn
Else
c = 1.0 - v3Dot(tdir,n)
EndIf
; double Re=R0+(1-R0)*c*c*c*c*c,Tr=1-Re,P=.25+.5*Re,RP=Re/P,TP=Tr/(1-P);
Protected Re.d=R0+(1.0-R0)*c*c*c*c*c
Protected Tr.d=1.0-Re
Protected P_.d=0.25+0.5*Re
Protected RP.d=Re/P_
Protected TP.d=Tr/(1.0-P_)
; return obj.e + f.mult(depth>2 ? ( erand48(Xi)<P ? // Russian roulette
; radiance(reflRay,depth,Xi)*RP : radiance(Ray(x,tdir),depth,Xi)*TP ) :
; radiance(reflRay,depth,Xi)*Re+radiance(Ray(x,tdir),depth,Xi)*Tr);
If depth > 2
If rand() < P_
; radiance(reflRay,depth,Xi)*RP
radiance( @rr, @reflRay, depth, 0 )
v3ScaleInPlace( rr, RP )
Else
; radiance(Ray(x,tdir),depth,Xi)*TP )
v3Copy(r2_\o,x)
v3Copy(r2_\d,tdir)
radiance( @rr, @r2_, depth, 0 )
v3ScaleInPlace( rr, TP )
EndIf
Else
Protected rr1.v3d
Protected rr2.v3d
; radiance(reflRay,depth,Xi)*Re + radiance(Ray(x,tdir),depth,Xi)*Tr)
radiance( @rr1, @reflRay, depth, 0 )
v3ScaleInPlace( rr1, Re )
v3Copy(r2_\o,x)
v3Copy(r2_\d,tdir)
radiance( @rr2, @r2_, depth, 0 )
v3ScaleInPlace( rr1, Tr )
v3Add(rr,rr1,rr2)
EndIf
v3Mul( f, f, rr )
v3Add( *ret, *obj\e, f )
ProcedureReturn
EndProcedure
; ---[ Render ]---------------------------------------------------------------
; ...[ Rendering Context ]....................................................
Structure rContext
h .l
w .l
row_s .l
row_e .l
col_s .l
col_e .l
samps .l
tid .l ; thread id
isamps .d
cx .v3d
cy .v3d
cam .r3d
Array c.v3d(0) ; pixels
EndStructure
; ...[ Rendering Synchronization ]............................................
Global rSem = CreateSemaphore()
; ...[ Rendering Thread ].....................................................
Procedure rThread( *ctx.rContext )
; Local Variables
Protected ntemp.d,r.v3d
Protected row_s.l = *ctx\row_s
Protected row_e.l = *ctx\row_e
Protected tid .l = *ctx\tid
SignalSemaphore(rSem)
PrintN( "Starting Thread (tid:"+Str(tid)+") ["+Str(row_s)+":"+Str(row_e)+"]" )
; Loop over image rows
Protected y.l
For y = row_s To row_e
PrintN( "Rendering ("+Str(*ctx\samps*4)+" spp) "+Str( 100.0*(y-row_s)/(row_e-row_s) )+"% (tid:"+Str(tid)+") ["+Str(row_s)+":"+Str(row_e)+"] " )
; Loop over image columns
Protected x.l
For x = *ctx\col_s To *ctx\col_e
; Pixel Index
Protected i.l = ( *ctx\h - y - 1 )* *ctx\w + x
; 2x2 subpixel rows
Protected sy.l
For sy = 0 To 1
; 2x2 subpixel cols
Protected sx.l
For sx = 0 To 1
; samples
v3Null(r)
Protected s.l
For s=0 To *ctx\samps-1
; double r1 = 2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
Protected r1.d,dx.d
r1 = 2.0*rand()
If r1 < 1.0
dx = Sqr(r1)-1.0
Else
dx = 1.0-Sqr(2.0-r1)
EndIf
; double r2 = 2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
Protected r2.d,dy.d
r2 = 2.0*rand()
If r2 < 1.0
dy = Sqr(r2)-1.0
Else
dy = 1.0-Sqr(2.0-r2)
EndIf
; Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) + cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
Protected d.v3d
Protected ccx.d = ( ( sx + 0.5 + dx )/2.0 + x )/*ctx\w - 0.5
Protected ccy.d = ( ( sy + 0.5 + dy )/2.0 + y )/*ctx\h - 0.5
d\x = *ctx\cx\x*ccx + *ctx\cy\x*ccy + *ctx\cam\d\x
d\y = *ctx\cx\y*ccx + *ctx\cy\y*ccy + *ctx\cam\d\y
d\z = *ctx\cx\z*ccx + *ctx\cy\z*ccy + *ctx\cam\d\z
; r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
; Camera rays are pushed ^^^^^ forward to start in interior
Protected rr.v3d,ray.r3d
v3Null(rr)
v3Copy(ray\d,d)
v3Norm(ray\d,ntemp)
v3ScaleInPlace(d,138.0)
v3Add(ray\o,*ctx\cam\o,d)
radiance(@rr,@ray,0,0)
v3AddScaledInPlace(r,rr,*ctx\isamps)
Next
v3Clamp(r)
v3AddScaledInPlace(*ctx\c(i),r,0.25)
Next ; For sx = 0 To 1
Next ; For sy = 0 To 1
Next ; For x = *ctx\col_s To *ctx\col_e
Next ; For y = row_s To row_e
EndProcedure
; ...[ Rendering Main ].......................................................
Procedure render( samps.l )
Protected ntemp.d
; Context
Protected ctx.rContext
; Topology
ctx\w = 1024
ctx\h = 768
; Samples
ctx\samps = samps
ctx\isamps = 1.0/samps
; Cam pos
ctx\cam\o\x = 50.0
ctx\cam\o\y = 52.0
ctx\cam\o\z = 295.6
; Cam dir
ctx\cam\d\x = 0.0
ctx\cam\d\y = -0.042612
ctx\cam\d\z = -1.0
v3Norm(ctx\cam\d,ntemp)
; Pixels
ReDim ctx\c(ctx\w*ctx\h-1)
; Deltas
Protected cx.v3d,cy.v3d,r.v3d
ctx\cx\x = ctx\w*0.5135/ctx\h
ctx\cx\y = 0.0
ctx\cx\z = 0.0
v3Cross(ctx\cy,ctx\cx,ctx\cam\d)
v3Norm(ctx\cy,ntemp)
v3Scale(ctx\cy,ctx\cy,0.5135)
; Columns
ctx\col_s = 0
ctx\col_e = ctx\w-1
; Prepare For n Thread(s)
Dim aThread.l(0)
Protected ri.l,rThreadCount = CountCPUs(#PB_System_ProcessCPUs)
ReDim aThread(rThreadCount-1)
; Generate Threads
Protected row_step .l = ctx\h / rThreadCount
Protected row_last_step .l = ctx\h - row_step*(rThreadCount-1)
Protected row_s .l = 0
Protected row_e .l = row_step
Protected riStop .l = ArraySize(aThread())
For ri=0 To riStop
ctx\row_s = row_s
ctx\row_e = row_e - 1
ctx\tid = ri
aThread(ri) = CreateThread(@rThread(),@ctx)
WaitSemaphore(rSem)
row_s + row_step
If ri = riStop - 1
row_e + row_last_step
Else
row_e + row_step
EndIf
Next
; Wait For Rendering End
Protected Ta.d = ElapsedMilliseconds()
For ri=0 To riStop
WaitThread(aThread(ri))
Next
Protected Tb.d = ElapsedMilliseconds()
; Save Image
CreateImage(0,ctx\w,ctx\h)
StartDrawing(ImageOutput(0))
Box(0,0,ctx\w,ctx\h,RGB($32,$32,$32))
Protected x.l,y.l,i.l = 0
For y=0 To ctx\h-1
For x=0 To ctx\w-1
Plot(x,y,RGB(toInt(ctx\c(i)\x),toInt(ctx\c(i)\y),toInt(ctx\c(i)\z)))
i + 1
Next
Next
DrawText( 0, 0, "Render Time : "+ FormatDate( "%hhh%iim%sss", AddDate(0,#PB_Date_Second,(Tb-Ta)/1000.0) ) )
StopDrawing()
PrintN( "Saving purept.bmp.." )
SaveImage(0,"purept.bmp")
PrintN( "Done." )
Input()
EndProcedure
;render( 10 ) ; 40 spp (~21s)
render( 25 ) ; 100 spp (~59s)
;render( 50 ) ; 200 spp (~1m43s)
;render( 250 ) ; 1000 spp (~9m43s)
;render( 1250 ) ; 5k spp (~50m04s)
;render( 6250 ) ; 25k spp (not tested yet but several hours)
CloseConsole()
Guy.