Page 1 of 1

accurate gamma function

Posted: Mon Aug 31, 2009 5:19 pm
by jack
accurate version using 80-bit floats

Code: Select all

Procedure.d gamma(x.d)

    ;Lanczos approximation for the complex plane
    ;calculated using vpa digits(256)
    ;the best set of coeffs was selected from
    ;a solution space of g=0 to 32 with 1 to 32 terms
    ;these coeffs really give superb performance
    ;of 15 sig. digits for 0<=real(z)<=171
    ;coeffs should sum to about g*g/2+23/24
    oldcw.l
    oldcw2.l
    newcw2.l
    newcw.l = 4927
    is_neg_frac.l=0
  ;***************************************
  
  ; gamma(x + 1) = (x + y + 1/2)^(x + 1/2)*exp(-(x + y + 1/2))
  ; *sqrt(2*pi)*(c0 + c1/(x + 1) + c2/(x + 2) +...+ cn/(x + n))
  ; 
  ; for more information visit http://home.att.net/~numericana/answer/info/godfrey.htm
;     If ( x < 0 )
;         x=-x
;         If Frac(x)
;             result = #PI*x/(gamma(x)*Sin(#PI*x))
;         Else
;             ;result=Infinity
;         EndIf
;     Else
  ;***************************************
      !fld qword [p.v_x]
      !fstcw word [p.v_oldcw] 
      !fldcw [p.v_newcw]          ;this guarantees extended precision
      !fld st0
      !fstp tword [x_tmp]
      !ftst
      !fnstsw ax
      !sahf
      !jae gamma_xispositive
      !fchs
      !fstcw word [p.v_oldcw2] 
      !mov ax,[p.v_oldcw2] 
      !Or ax,110000000000b 
      !mov [p.v_newcw2],ax
      !fldcw word [p.v_newcw2] 
      !fld st0 ;qword [p.v_x]
      !fld st0
      !frndint  
      !fsubp st1,st0
      !fldcw word [p.v_oldcw2]
                                    ;at this point st0 = frac(x), st1 = x
      !ftst                         ;test fac against zero
      !fstp st0                     ;discard frac
      !fnstsw ax
      !sahf
      !je gamma_xisnegint
      !mov [p.v_is_neg_frac],dword 1
      !fld st0
      !fstp tword [x_tmp]
      !fldpi
      !fmul st0,st1
      !fld st0
      !fsin
      !fdivp st1,st0
      !fxch
      !jmp gamma_xispositive
      !gamma_xisnegint:
      !fldz     
      !fyl2x                        ;!fdivp st1,st0
      !jmp gamma_exit
      !gamma_xispositive:
  ;***************************************  
      !fld tword [x_half]
      !faddp st1,st0              ;st0 = x+.5
      !fld st0                    ;st0 = x+.5, st1 = x+.5
      !fld tword [g]              ;st0=g, st1 = x+.5, st2 = x+.5
      !fadd st0,st1               ;st0=x+.5+g = w, st1=x+.5
      !fld st0                    ;st0=w, st1=w, st2=x+.5, st3 = x+.5
      !fxch st3                   ;st0=x+.5, st1=w, st2=w, st3=w
      !fxch st1
      !fyl2x                      ;st0 = st0 ^ st1
      !fld st0                    ;"
      !frndint                    ;"
      !fsub st1, st0              ;"
      !fld1                       ;"
      !fscale                     ;"
      !fxch                       ;"
      !fxch st2                   ;"
      !f2xm1                      ;"
      !fld1                       ;"
      !faddp st1, st0             ;"
      !fmulp st1, st0             ;"
      !fstp st1                   ;clean up fpu stack, result in st0
      !fstp st1
      !fxch
      !fchs                       ;st0 = - st0 = -(.5 + x + g)
      !fld tword [const_e]        ;st0 = exp(st0)
      !fyl2x                      ;"
      !fld st0                    ;"
      !frndint                    ;"
      !fsub st1, st0              ;"
      !fld1                       ;"
      !fscale                     ;"
      !fxch                       ;"
      !fxch st2                   ;"
      !f2xm1                      ;"
      !fld1                       ;"
      !faddp st1, st0             ;"
      !fmulp st1, st0             ;"
      !fstp st1                   ;clean up fpu stack, result in st0
      !fmulp st1,st0              ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g))
      !fld tword [sq2pi]          ;sqrt(2*pi)
      !fmulp st1,st0              ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g)) * sqrt(2*pi)
      !fldz
      !fld tword [x_tmp]            ;load x again
      !fiadd dword [cg]           ;st0 = x + 14

      !mov eax,140
      !mov ecx,14
      !gammaloop:
      !fld tword [eax+gammacoeff] ; 0.36899182659531622704e-5
      !fdiv st0,st1               ;st0 = 0.36899182659531622704e-5 / (x + 14)
      !faddp st2,st0
      !fld1
      !fsubp st1,st0              ;st0 "x" -= 1
      !sub eax,10
      !loop gammaloop

      !fstp st0
      !fld tword [gammacoeff]     ; 0.99999999999999709182
      !faddp  st1,st0
      !fmulp st1,st0
      !cmp [p.v_is_neg_frac],dword 0
      !je gamma_exit
      !fdivp st1,st0
      !gamma_exit:
      !fldcw [p.v_oldcw]          ; restore fpu control word

  ProcedureReturn
  
DataSection
!section '.data' code readable writeable
! gammacoeff:     
! dt           0.99999999999999709182
! dt          57.156235665862923517
! dt         -59.597960355475491248
! dt          14.136097974741747174
! dt          -0.49191381609762019978
! dt            0.33994649984811888699e-4
! dt            0.46523628927048575665e-4
! dt           -0.98374475304879564677e-4
! dt            0.15808870322491248884e-3
! dt           -0.21026444172410488319e-3
! dt            0.21743961811521264320e-3
! dt           -0.16431810653676389022e-3
! dt            0.84418223983852743293e-4
! dt           -0.26190838401581408670e-4
! dt            0.36899182659531622704e-5
;*-----------------------------------------------------*
! sq2pi: dt     2.50662827463100050241577  ; Sqrt(2*Pi) 
! g: dt         4.7421875                  ; 607/128
;*-----------------------------------------------------*
! const_e: dt   2.71828182845904523536029
! x_half:  dt   0.5
! x_tmp:   dt  0.0
! cg:      dd   14
EndDataSection
EndProcedure

Procedure.d double_gamma(x.d)
      oldcw.l
      newcw.l = 4927
      oldcw2.l
      newcw2.l
      is_neg_frac.l=0
      
      !fld qword [p.v_x]
      !fstcw word [p.v_oldcw] 
      !fldcw [p.v_newcw]          ;this guarantees extended precision
      !fld st0
      !fld tword [double_gamma_half]
      !fmulp st1,st0
      !fstp tword [double_gamma_x_temp]
      ;******************************
      !fldpi
      !fmul st0,st1
      !fcos
      !fld tword [double_gamma_quarter]
      !fmulp st1,st0
      !fld tword [double_gamma_quarter]
      !fsubrp st1,st0
      !fild dword [double_gamma_two]
      !fldpi
      !fdivp st1,st0
      ;******************************
;Pow      
      !fyl2x
      !fld st0 
      !frndint 
      !fsub st1, st0 
      !fld1 
      !fscale 
      !fxch 
      !fxch st2 
      !f2xm1 
      !fld1 
      !faddp st1, st0 
      !fmulp st1, st0
      !fstp st1 
;EndPow
      ;******************************
      !fld st1
      !fld tword [double_gamma_half]
      !fmulp st1,st0
      !fild dword [double_gamma_two]
;Pow      
      !fyl2x
      !fld st0 
      !frndint 
      !fsub st1, st0 
      !fld1 
      !fscale 
      !fxch 
      !fxch st2 
      !f2xm1 
      !fld1 
      !faddp st1, st0 
      !fmulp st1, st0
      !fstp st1 
;EndPow
      !fmulp st1,st0
      !fstp st1
      !fld tword [double_gamma_x_temp]
;******************************
      !ftst
      !fnstsw ax
      !sahf
      !jae double_gamma_xispositive
      !fchs
      !fstcw word [p.v_oldcw2] 
      !mov ax,[p.v_oldcw2] 
      !Or ax,110000000000b 
      !mov [p.v_newcw2],ax
      !fldcw word [p.v_newcw2] 
      !fld st0 ;qword [p.v_x]
      !fld st0
      !frndint  
      !fsubp st1,st0
      !fldcw word [p.v_oldcw2]
                                    ;at this point st0 = frac(x), st1 = x
      !ftst                         ;test fac against zero
      !fstp st0                     ;discard frac
      !fnstsw ax
      !sahf
      !je double_gamma_xisnegint
      !mov [p.v_is_neg_frac],dword 1
      !fld st0
      !fstp tword [double_gamma_x_temp]
      !fldpi
      !fmul st0,st1
      !fld st0
      !fsin
      !fdivp st1,st0
      !fxch
      !jmp double_gamma_xispositive
      !double_gamma_xisnegint:
      !fldz     
      !fyl2x                        ;!fdivp st1,st0
      !jmp double_gamma_exit
      !double_gamma_xispositive:
  ;***************************************  
      !fld tword [double_gamma_half]
      !faddp st1,st0              ;st0 = x+.5
      !fld st0                    ;st0 = x+.5, st1 = x+.5
      !fld tword [double_gamma_g] ;st0=g, st1 = x+.5, st2 = x+.5
      !fadd st0,st1               ;st0=x+.5+g = w, st1=x+.5
      !fld st0                    ;st0=w, st1=w, st2=x+.5, st3 = x+.5
      !fxch st3                   ;st0=x+.5, st1=w, st2=w, st3=w
      !fxch st1
      !fyl2x                      ;st0 = st0 ^ st1
      !fld st0                    ;"
      !frndint                    ;"
      !fsub st1, st0              ;"
      !fld1                       ;"
      !fscale                     ;"
      !fxch                       ;"
      !fxch st2                   ;"
      !f2xm1                      ;"
      !fld1                       ;"
      !faddp st1, st0             ;"
      !fmulp st1, st0             ;"
      !fstp st1                   ;clean up fpu stack, result in st0
      !fstp st1
      !fxch
      !fchs                       ;st0 = - st0 = -(.5 + x + g)
      !fld tword [double_gamma_const_e]        ;st0 = exp(st0)
      !fyl2x                      ;"
      !fld st0                    ;"
      !frndint                    ;"
      !fsub st1, st0              ;"
      !fld1                       ;"
      !fscale                     ;"
      !fxch                       ;"
      !fxch st2                   ;"
      !f2xm1                      ;"
      !fld1                       ;"
      !faddp st1, st0             ;"
      !fmulp st1, st0             ;"
      !fstp st1                   ;clean up fpu stack, result in st0
      !fmulp st1,st0              ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g))
      !fld tword [double_gamma_sq2pi]          ;sqrt(2*pi)
      !fmulp st1,st0              ;st0 = (.5 + x + g) ^ (x + .5) * exp(-(.5 + x + g)) * sqrt(2*pi)
      !fldz
      !fld tword [double_gamma_x_temp]            ;load x again
      !fiadd dword [double_gamma_cg]           ;st0 = x + 14

      !mov eax,140
      !mov ecx,14
      !double_gammaloop:
      !fld tword [eax+double_gamma_coeff] ; 0.36899182659531622704e-5
      !fdiv st0,st1               ;st0 = 0.36899182659531622704e-5 / (x + 14)
      !faddp st2,st0
      !fld1
      !fsubp st1,st0              ;st0 "x" -= 1
      !sub eax,10
      !loop double_gammaloop

      !fstp st0
      !fld tword [double_gamma_coeff]     ; 0.99999999999999709182
      !faddp  st1,st0
      !fmulp st1,st0
      !cmp [p.v_is_neg_frac],dword 0
      !je double_gamma_exit
      !fdivp st1,st0
      !double_gamma_exit:
      !fmulp st1,st0
      !fldcw [p.v_oldcw]          ; restore fpu control word      

  ProcedureReturn  
      !fldcw [p.v_oldcw]          ; restore fpu control word
      DataSection
      !section '.data' code readable writeable
      ! double_gamma_coeff:     
      ! dt           0.99999999999999709182
      ! dt          57.156235665862923517
      ! dt         -59.597960355475491248
      ! dt          14.136097974741747174
      ! dt          -0.49191381609762019978
      ! dt            0.33994649984811888699e-4
      ! dt            0.46523628927048575665e-4
      ! dt           -0.98374475304879564677e-4
      ! dt            0.15808870322491248884e-3
      ! dt           -0.21026444172410488319e-3
      ! dt            0.21743961811521264320e-3
      ! dt           -0.16431810653676389022e-3
      ! dt            0.84418223983852743293e-4
      ! dt           -0.26190838401581408670e-4
      ! dt            0.36899182659531622704e-5
      ;*-----------------------------------------------------*
      ! double_gamma_sq2pi:     dt     2.50662827463100050241577  ; Sqrt(2*Pi) 
      ! double_gamma_g:         dt         4.7421875              ; 607/128
      ;*-----------------------------------------------------*
      ! double_gamma_const_e:   dt  2.71828182845904523536029
      ! double_gamma_half:      dt  0.5
      ! double_gamma_quarter:   dt  0.25
      ! double_gamma_x_temp:    dt  0.0
      ! double_gamma_cg:        dd  14
      ! double_gamma_two:       dd  2
      EndDataSection
      
EndProcedure

Define.i i
Define.d x

OpenConsole()

For i=1 To 20
    x=i;-5.5
    PrintN(StrD(x)+"  ,  "+StrD(gamma(x),16))
Next

Input()
CloseConsole()
End
PB only version - less accurate

Code: Select all

Procedure.d Exp(x.d)
    e.d = 2.718281828459045
    EnableASM
      lea eax,[p.v_x] 
      fld qword [eax] 
      fld qword [p.v_e]
      fyl2x
      fld st0 
      frndint 
      fsub st1, st0 
      fld1 
      fscale 
      fxch 
      fxch st2 
      f2xm1 
      fld1 
      faddp st1, st0 
      fmulp st1, st0
      fstp st1
    DisableASM
  ProcedureReturn 
EndProcedure 

Procedure.d gamma(x.d)
    Define.d pi = 3.14159265358979324
    Define.d sq2pi=2.50662827463100050 ;sqrt(2Pi)
    Define.d g=607/128 ; best results when 4<=g<=5
    Define.d t, w, gam
    Define.i i, cg = 14

    ;Lanczos approximation for the complex plane
    ;calculated using vpa digits(256)
    ;the best set of coeffs was selected from
    ;a solution space of g=0 to 32 with 1 to 32 terms
    ;these coeffs really give superb performance
    ;of 15 sig. digits for 0<=real(z)<=171
    ;coeffs should sum to about g*g/2+23/24
    
    Dim c.d(14)
    c( 0)=  0.9999999999999970918
    c( 1)= 57.156235665862923517
    c( 2)=-59.597960355475491248
    c( 3)= 14.136097974741747174
    c( 4)= -4.9191381609762019978/10
    c( 5)=   3.3994649984811888699/100000
    c( 6)=   4.6523628927048575665/100000
    c( 7)=  -9.8374475304879564677/100000
    c( 8)=   1.5808870322491248884/10000
    c( 9)=  -2.1026444172410488319/10000
    c(10)=   2.1743961811521264320/10000
    c(11)=  -1.6431810653676389022/10000
    c(12)=   8.4418223983852743293/100000
    c(13)=  -2.6190838401581408670/100000
    c(14)=   3.6899182659531622704/1000000
    ;CopyMemory(?carray, @c(), 120)
    If ( x < 0 )
        x=-x
        gam = pi*x/(gamma(x)*Sin(pi*x))
    Else
        t = c(0)
        For i=1 To cg
            t = t + c(i)/(x+i)
        Next
        w = x + g + 0.5
        gam=Pow(w,(x+0.5))*exp(-w)*sq2pi*t
    EndIf
    ProcedureReturn gam
EndProcedure

Define.i i
Define.d x

OpenConsole()
 
For i=1 To 20
    x=i
    PrintN(StrD(x)+"  ,  "+StrD(gamma(x),16))
Next

Input()
CloseConsole()
End