accurate gamma function
Posted: Mon Aug 31, 2009 5:19 pm
accurate version using 80-bit floats
PB only version - less accurate
Code: Select all
Procedure.d gamma(x.d)
;Lanczos approximation for the complex plane
;calculated using vpa digits(256)
;the best set of coeffs was selected from
;a solution space of g=0 to 32 with 1 to 32 terms
;these coeffs really give superb performance
;of 15 sig. digits for 0<=real(z)<=171
;coeffs should sum to about g*g/2+23/24
oldcw.l
oldcw2.l
newcw2.l
newcw.l = 4927
is_neg_frac.l=0
;***************************************
; gamma(x + 1) = (x + y + 1/2)^(x + 1/2)*exp(-(x + y + 1/2))
; *sqrt(2*pi)*(c0 + c1/(x + 1) + c2/(x + 2) +...+ cn/(x + n))
;
; for more information visit http://home.att.net/~numericana/answer/info/godfrey.htm
; If ( x < 0 )
; x=-x
; If Frac(x)
; result = #PI*x/(gamma(x)*Sin(#PI*x))
; Else
; ;result=Infinity
; EndIf
; Else
;***************************************
!fld qword [p.v_x]
!fstcw word [p.v_oldcw]
!fldcw [p.v_newcw] ;this guarantees extended precision
!fld st0
!fstp tword [x_tmp]
!ftst
!fnstsw ax
!sahf
!jae gamma_xispositive
!fchs
!fstcw word [p.v_oldcw2]
!mov ax,[p.v_oldcw2]
!Or ax,110000000000b
!mov [p.v_newcw2],ax
!fldcw word [p.v_newcw2]
!fld st0 ;qword [p.v_x]
!fld st0
!frndint
!fsubp st1,st0
!fldcw word [p.v_oldcw2]
;at this point st0 = frac(x), st1 = x
!ftst ;test fac against zero
!fstp st0 ;discard frac
!fnstsw ax
!sahf
!je gamma_xisnegint
!mov [p.v_is_neg_frac],dword 1
!fld st0
!fstp tword [x_tmp]
!fldpi
!fmul st0,st1
!fld st0
!fsin
!fdivp st1,st0
!fxch
!jmp gamma_xispositive
!gamma_xisnegint:
!fldz
!fyl2x ;!fdivp st1,st0
!jmp gamma_exit
!gamma_xispositive:
;***************************************
!fld tword [x_half]
!faddp st1,st0 ;st0 = x+.5
!fld st0 ;st0 = x+.5, st1 = x+.5
!fld tword [g] ;st0=g, st1 = x+.5, st2 = x+.5
!fadd st0,st1 ;st0=x+.5+g = w, st1=x+.5
!fld st0 ;st0=w, st1=w, st2=x+.5, st3 = x+.5
!fxch st3 ;st0=x+.5, st1=w, st2=w, st3=w
!fxch st1
!fyl2x ;st0 = st0 ^ st1
!fld st0 ;"
!frndint ;"
!fsub st1, st0 ;"
!fld1 ;"
!fscale ;"
!fxch ;"
!fxch st2 ;"
!f2xm1 ;"
!fld1 ;"
!faddp st1, st0 ;"
!fmulp st1, st0 ;"
!fstp st1 ;clean up fpu stack, result in st0
!fstp st1
!fxch
!fchs ;st0 = - st0 = -(.5 + x + g)
!fld tword [const_e] ;st0 = exp(st0)
!fyl2x ;"
!fld st0 ;"
!frndint ;"
!fsub st1, st0 ;"
!fld1 ;"
!fscale ;"
!fxch ;"
!fxch st2 ;"
!f2xm1 ;"
!fld1 ;"
!faddp st1, st0 ;"
!fmulp st1, st0 ;"
!fstp st1 ;clean up fpu stack, result in st0
!fmulp st1,st0 ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g))
!fld tword [sq2pi] ;sqrt(2*pi)
!fmulp st1,st0 ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g)) * sqrt(2*pi)
!fldz
!fld tword [x_tmp] ;load x again
!fiadd dword [cg] ;st0 = x + 14
!mov eax,140
!mov ecx,14
!gammaloop:
!fld tword [eax+gammacoeff] ; 0.36899182659531622704e-5
!fdiv st0,st1 ;st0 = 0.36899182659531622704e-5 / (x + 14)
!faddp st2,st0
!fld1
!fsubp st1,st0 ;st0 "x" -= 1
!sub eax,10
!loop gammaloop
!fstp st0
!fld tword [gammacoeff] ; 0.99999999999999709182
!faddp st1,st0
!fmulp st1,st0
!cmp [p.v_is_neg_frac],dword 0
!je gamma_exit
!fdivp st1,st0
!gamma_exit:
!fldcw [p.v_oldcw] ; restore fpu control word
ProcedureReturn
DataSection
!section '.data' code readable writeable
! gammacoeff:
! dt 0.99999999999999709182
! dt 57.156235665862923517
! dt -59.597960355475491248
! dt 14.136097974741747174
! dt -0.49191381609762019978
! dt 0.33994649984811888699e-4
! dt 0.46523628927048575665e-4
! dt -0.98374475304879564677e-4
! dt 0.15808870322491248884e-3
! dt -0.21026444172410488319e-3
! dt 0.21743961811521264320e-3
! dt -0.16431810653676389022e-3
! dt 0.84418223983852743293e-4
! dt -0.26190838401581408670e-4
! dt 0.36899182659531622704e-5
;*-----------------------------------------------------*
! sq2pi: dt 2.50662827463100050241577 ; Sqrt(2*Pi)
! g: dt 4.7421875 ; 607/128
;*-----------------------------------------------------*
! const_e: dt 2.71828182845904523536029
! x_half: dt 0.5
! x_tmp: dt 0.0
! cg: dd 14
EndDataSection
EndProcedure
Procedure.d double_gamma(x.d)
oldcw.l
newcw.l = 4927
oldcw2.l
newcw2.l
is_neg_frac.l=0
!fld qword [p.v_x]
!fstcw word [p.v_oldcw]
!fldcw [p.v_newcw] ;this guarantees extended precision
!fld st0
!fld tword [double_gamma_half]
!fmulp st1,st0
!fstp tword [double_gamma_x_temp]
;******************************
!fldpi
!fmul st0,st1
!fcos
!fld tword [double_gamma_quarter]
!fmulp st1,st0
!fld tword [double_gamma_quarter]
!fsubrp st1,st0
!fild dword [double_gamma_two]
!fldpi
!fdivp st1,st0
;******************************
;Pow
!fyl2x
!fld st0
!frndint
!fsub st1, st0
!fld1
!fscale
!fxch
!fxch st2
!f2xm1
!fld1
!faddp st1, st0
!fmulp st1, st0
!fstp st1
;EndPow
;******************************
!fld st1
!fld tword [double_gamma_half]
!fmulp st1,st0
!fild dword [double_gamma_two]
;Pow
!fyl2x
!fld st0
!frndint
!fsub st1, st0
!fld1
!fscale
!fxch
!fxch st2
!f2xm1
!fld1
!faddp st1, st0
!fmulp st1, st0
!fstp st1
;EndPow
!fmulp st1,st0
!fstp st1
!fld tword [double_gamma_x_temp]
;******************************
!ftst
!fnstsw ax
!sahf
!jae double_gamma_xispositive
!fchs
!fstcw word [p.v_oldcw2]
!mov ax,[p.v_oldcw2]
!Or ax,110000000000b
!mov [p.v_newcw2],ax
!fldcw word [p.v_newcw2]
!fld st0 ;qword [p.v_x]
!fld st0
!frndint
!fsubp st1,st0
!fldcw word [p.v_oldcw2]
;at this point st0 = frac(x), st1 = x
!ftst ;test fac against zero
!fstp st0 ;discard frac
!fnstsw ax
!sahf
!je double_gamma_xisnegint
!mov [p.v_is_neg_frac],dword 1
!fld st0
!fstp tword [double_gamma_x_temp]
!fldpi
!fmul st0,st1
!fld st0
!fsin
!fdivp st1,st0
!fxch
!jmp double_gamma_xispositive
!double_gamma_xisnegint:
!fldz
!fyl2x ;!fdivp st1,st0
!jmp double_gamma_exit
!double_gamma_xispositive:
;***************************************
!fld tword [double_gamma_half]
!faddp st1,st0 ;st0 = x+.5
!fld st0 ;st0 = x+.5, st1 = x+.5
!fld tword [double_gamma_g] ;st0=g, st1 = x+.5, st2 = x+.5
!fadd st0,st1 ;st0=x+.5+g = w, st1=x+.5
!fld st0 ;st0=w, st1=w, st2=x+.5, st3 = x+.5
!fxch st3 ;st0=x+.5, st1=w, st2=w, st3=w
!fxch st1
!fyl2x ;st0 = st0 ^ st1
!fld st0 ;"
!frndint ;"
!fsub st1, st0 ;"
!fld1 ;"
!fscale ;"
!fxch ;"
!fxch st2 ;"
!f2xm1 ;"
!fld1 ;"
!faddp st1, st0 ;"
!fmulp st1, st0 ;"
!fstp st1 ;clean up fpu stack, result in st0
!fstp st1
!fxch
!fchs ;st0 = - st0 = -(.5 + x + g)
!fld tword [double_gamma_const_e] ;st0 = exp(st0)
!fyl2x ;"
!fld st0 ;"
!frndint ;"
!fsub st1, st0 ;"
!fld1 ;"
!fscale ;"
!fxch ;"
!fxch st2 ;"
!f2xm1 ;"
!fld1 ;"
!faddp st1, st0 ;"
!fmulp st1, st0 ;"
!fstp st1 ;clean up fpu stack, result in st0
!fmulp st1,st0 ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g))
!fld tword [double_gamma_sq2pi] ;sqrt(2*pi)
!fmulp st1,st0 ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g)) * sqrt(2*pi)
!fldz
!fld tword [double_gamma_x_temp] ;load x again
!fiadd dword [double_gamma_cg] ;st0 = x + 14
!mov eax,140
!mov ecx,14
!double_gammaloop:
!fld tword [eax+double_gamma_coeff] ; 0.36899182659531622704e-5
!fdiv st0,st1 ;st0 = 0.36899182659531622704e-5 / (x + 14)
!faddp st2,st0
!fld1
!fsubp st1,st0 ;st0 "x" -= 1
!sub eax,10
!loop double_gammaloop
!fstp st0
!fld tword [double_gamma_coeff] ; 0.99999999999999709182
!faddp st1,st0
!fmulp st1,st0
!cmp [p.v_is_neg_frac],dword 0
!je double_gamma_exit
!fdivp st1,st0
!double_gamma_exit:
!fmulp st1,st0
!fldcw [p.v_oldcw] ; restore fpu control word
ProcedureReturn
!fldcw [p.v_oldcw] ; restore fpu control word
DataSection
!section '.data' code readable writeable
! double_gamma_coeff:
! dt 0.99999999999999709182
! dt 57.156235665862923517
! dt -59.597960355475491248
! dt 14.136097974741747174
! dt -0.49191381609762019978
! dt 0.33994649984811888699e-4
! dt 0.46523628927048575665e-4
! dt -0.98374475304879564677e-4
! dt 0.15808870322491248884e-3
! dt -0.21026444172410488319e-3
! dt 0.21743961811521264320e-3
! dt -0.16431810653676389022e-3
! dt 0.84418223983852743293e-4
! dt -0.26190838401581408670e-4
! dt 0.36899182659531622704e-5
;*-----------------------------------------------------*
! double_gamma_sq2pi: dt 2.50662827463100050241577 ; Sqrt(2*Pi)
! double_gamma_g: dt 4.7421875 ; 607/128
;*-----------------------------------------------------*
! double_gamma_const_e: dt 2.71828182845904523536029
! double_gamma_half: dt 0.5
! double_gamma_quarter: dt 0.25
! double_gamma_x_temp: dt 0.0
! double_gamma_cg: dd 14
! double_gamma_two: dd 2
EndDataSection
EndProcedure
Define.i i
Define.d x
OpenConsole()
For i=1 To 20
x=i;-5.5
PrintN(StrD(x)+" , "+StrD(gamma(x),16))
Next
Input()
CloseConsole()
End
Code: Select all
Procedure.d Exp(x.d)
e.d = 2.718281828459045
EnableASM
lea eax,[p.v_x]
fld qword [eax]
fld qword [p.v_e]
fyl2x
fld st0
frndint
fsub st1, st0
fld1
fscale
fxch
fxch st2
f2xm1
fld1
faddp st1, st0
fmulp st1, st0
fstp st1
DisableASM
ProcedureReturn
EndProcedure
Procedure.d gamma(x.d)
Define.d pi = 3.14159265358979324
Define.d sq2pi=2.50662827463100050 ;sqrt(2Pi)
Define.d g=607/128 ; best results when 4<=g<=5
Define.d t, w, gam
Define.i i, cg = 14
;Lanczos approximation for the complex plane
;calculated using vpa digits(256)
;the best set of coeffs was selected from
;a solution space of g=0 to 32 with 1 to 32 terms
;these coeffs really give superb performance
;of 15 sig. digits for 0<=real(z)<=171
;coeffs should sum to about g*g/2+23/24
Dim c.d(14)
c( 0)= 0.9999999999999970918
c( 1)= 57.156235665862923517
c( 2)=-59.597960355475491248
c( 3)= 14.136097974741747174
c( 4)= -4.9191381609762019978/10
c( 5)= 3.3994649984811888699/100000
c( 6)= 4.6523628927048575665/100000
c( 7)= -9.8374475304879564677/100000
c( 8)= 1.5808870322491248884/10000
c( 9)= -2.1026444172410488319/10000
c(10)= 2.1743961811521264320/10000
c(11)= -1.6431810653676389022/10000
c(12)= 8.4418223983852743293/100000
c(13)= -2.6190838401581408670/100000
c(14)= 3.6899182659531622704/1000000
;CopyMemory(?carray, @c(), 120)
If ( x < 0 )
x=-x
gam = pi*x/(gamma(x)*Sin(pi*x))
Else
t = c(0)
For i=1 To cg
t = t + c(i)/(x+i)
Next
w = x + g + 0.5
gam=Pow(w,(x+0.5))*exp(-w)*sq2pi*t
EndIf
ProcedureReturn gam
EndProcedure
Define.i i
Define.d x
OpenConsole()
For i=1 To 20
x=i
PrintN(StrD(x)+" , "+StrD(gamma(x),16))
Next
Input()
CloseConsole()
End