fsum implementation, converted from Pythons fsum

Share your advanced PureBasic knowledge/code with the community.
User avatar
NicTheQuick
Addict
Addict
Posts: 1519
Joined: Sun Jun 22, 2003 7:43 pm
Location: Germany, Saarbrücken
Contact:

fsum implementation, converted from Pythons fsum

Post by NicTheQuick »

Hi there,

inspired by this thread and my comment about the implementation of a sum function from the math library of Python, I converted this function to Purebasic.
Here are a few more links: The code also contains the original source code as comments so it should be easier to follow. I had to change a few little things because Purebasic does not have iterators. And because we can use native types all the time and do not have to convert between C types and Python objects we can also skip some asserts and checks.
In the end there is a small example summing up 40000 specific values which sum up to the correct value 20000.0 when using fsum but sum up to 0.0 with the naive implementation.

Feel free to use it where you need it:

Code: Select all

Macro assert(expression)
	CompilerIf #PB_Compiler_Debugger
		If Not (expression)
			DebuggerError("assert failed at " + Str(#PB_Compiler_Line - 2))
		EndIf
	CompilerEndIf
EndMacro

Macro IsFinite(float)
	(Not (IsNAN(float) Or IsInfinity(float)))
EndMacro

;#define NUM_PARTIALS  32  /* initial partials array size, on stack */
#NUM_PARTIALS = 32

; Source: https://github.com/python/cpython/blob/main/Modules/mathmodule.c#L1356
;Static PyObject *
;math_fsum(PyObject *Module, PyObject *seq)
;/*[clinic End generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
;{
Procedure.d fsum(Array seq.d(1))
	;PyObject *item, *iter, *sum = NULL;
	Protected item.i = 0, seq_size.i, sum.d
	;Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
	Protected i.i, j.i, n.i = 0, m.i = #NUM_PARTIALS
	;double x, y, t, ps[NUM_PARTIALS], *p = ps;
	Protected Dim ps.d(#NUM_PARTIALS - 1)
	Protected x.d, y.d
	; t is not needed because of Swap command
	; *p = ps is not needed because we can access ps() directly
	;double xsave, special_sum = 0.0, inf_sum = 0.0;
	Protected.d xsave, special_sum = 0.0, inf_sum = 0.0
	;double hi, yr, lo = 0.0;
	Protected.d hi, yr, lo = 0.0
	;
	;iter = PyObject_GetIter(seq);
	seq_size = ArraySize(seq()) + 1
	;if (iter == NULL)
	If seq_size = 0
		;return NULL;
		ProcedureReturn 0
	EndIf
	;
	;for(;;) {           /* for x in iterable */
	While item < seq_size
		;assert(0 <= n && n <= m);
		assert(0 <= n And n <= m)
		;assert((m == NUM_PARTIALS && p == ps) ||
			;(m >  NUM_PARTIALS && p != NULL));
		;
		;item = PyIter_Next(iter);
		;if (item == NULL) {
		;	If (PyErr_Occurred())
		;		goto _fsum_error;
		;	break;
		;}
		;ASSIGN_DOUBLE(x, item, error_with_item);
		x = seq(item)
		;Py_DECREF(item);
		;
		;xsave = x;
		xsave = x
		;For (i = j = 0; j < n; j++) {       /* for y in partials */
		i = 0
		j = 0
		While j < n 
			;y = p[j];
			y = ps(j)
			;if (fabs(x) < fabs(y)) {
			If Abs(x) < Abs(y)
				;t = x; x = y; y = t;
				Swap x, y
			;}
			EndIf
			;hi = x + y;
			hi = x + y
			;yr = hi - x;
			yr = hi - x
			;lo = y - yr;
			lo = y - yr
			;If (lo != 0.0)
			If lo <> 0.0
				;p[i++] = lo;
				ps(i) = lo
				i + 1
			EndIf
			;x = hi;
			x = hi
		;}
			j + 1
		Wend
		;
		;n = i;                              /* ps[i:] = [x] */
		n = i
		;if (x != 0.0) {
		If x <> 0.0
			;if (! Py_IS_FINITE(x)) {
			If Not IsFinite(x)
				;/* a nonfinite x could arise either as a result of intermediate overflow, or as a result of a nan or inf in the summands */
				;if (Py_IS_FINITE(xsave)) {
				If IsFinite(xsave)
					;PyErr_SetString(PyExc_OverflowError, "intermediate overflow in fsum");
					DebuggerError("intermediate overflow in fsum")
					;goto _fsum_error;
					Goto _fsum_error
				;}
				EndIf
				;if (Py_IS_INFINITY(xsave))
				If IsInfinity(xsave)
				;     inf_sum += xsave;
					inf_sum + xsave
				EndIf
				;special_sum += xsave;
				special_sum + xsave
				;/* reset partials */
				;n = 0;
				n = 0
			;}
			;else if (n >= m && _fsum_realloc(&p, n, ps, &m))
			ElseIf (n >= m) ; here we can just use ReDim and don't care about too less memory
				;goto _fsum_error;
				m + m
				ReDim ps(m - 1)
			;else
			Else
				;p[n++] = x;
				ps(n) = x
				n + 1
			EndIf
		;}
		EndIf
	;}
		item + 1
	Wend
	;
	;If (special_sum != 0.0) {
	If special_sum <> 0.0
		;if (Py_IS_NAN(inf_sum))
		If IsNAN(inf_sum)
			;PyErr_SetString(PyExc_ValueError, "-inf + inf in fsum");
			DebuggerError("-inf + inf in fsum")
		;else
		Else
			;sum = PyFloat_FromDouble(special_sum);
			sum = special_sum
		EndIf
		;goto _fsum_error;
		Goto _fsum_error
	;}
	EndIf
	;
	;hi = 0.0;
	hi = 0.0
	;If (n > 0) {
	If n > 0
		;hi = p[--n];
		n - 1
		hi = ps(n)
		;/* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */
		;While (n > 0) {
		While n > 0
			;x = hi;
			x = hi
			;y = p[--n];
			n - 1
			y = ps(n)
			;assert(fabs(y) < fabs(x));
			assert(Abs(y) < Abs(x))
			;hi = x + y;
			hi = x + y
			;yr = hi - x;
			yr = hi - x
			;lo = y - yr;
			lo = y - yr
			;If (lo != 0.0)
			If lo <> 0.0
				;Break;
				Break
			EndIf
		;}
		Wend
		;/* Make half-even rounding work across multiple partials.
		;   Needed so that sum([1e-16, 1, 1e16]) will round-up the last
		;   digit to two instead of down To zero (the 1e-16 makes the 1
		;   slightly closer to two).  With a potential 1 ULP rounding
		;   error fixed-up, math.fsum() can guarantee commutativity. */
		;If (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || (lo > 0.0 && p[n-1] > 0.0))) {
		If n > 0 And ((lo < 0.0 And ps(n - 1) < 0.0) Or (lo > 0.0 And ps(n - 1) > 0.0))
			;y = lo * 2.0;
			y = lo * 2.0
			;x = hi + y;
			x = hi + y
			;yr = x - hi;
			yr = x - hi
			;If (y == yr)
			If y = yr
				;hi = x;
				hi = x
			EndIf
		;}
		EndIf
	;}
	EndIf
	;sum = PyFloat_FromDouble(hi);
	sum = hi
; 
	;_fsum_error:
	_fsum_error:
	;Py_DECREF(iter);
	;If (p != ps)
		;PyMem_Free(p);
	;Return sum;
	ProcedureReturn sum
; 
;   error_with_item:
;     Py_DECREF(item);
;     Goto _fsum_error;
; }
EndProcedure

; Create test case, see https://code.activestate.com/recipes/393090/
Debug "Create array of 40000 summands in the form [1, 1e100, 1, -1e100] * 10000"
Define i.i
Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next
For i = 0 To 3
	Debug "summands(" + i + ") = " + summands(i)
Next
Debug "..."
Debug ""

; fsum
Debug "Sum using fsum:"
Debug fsum(summands())
Debug ""

; simple sum
Define sum.d = 0
For i = 0 To 39999
	sum + summands(i)
Next
Debug "Naive sum:"
Debug sum
The english grammar is freeware, you can use it freely - But it's not Open Source, i.e. you can not change it or publish it in altered way.
User avatar
luis
Addict
Addict
Posts: 3895
Joined: Wed Aug 31, 2005 11:09 pm
Location: Italy

Re: fsum implementation, converted from Pythons fsum

Post by luis »

That's black magic !
Pretty amazing.
"Have you tried turning it off and on again ?"
Little John
Addict
Addict
Posts: 4789
Joined: Thu Jun 07, 2007 3:25 pm
Location: Berlin, Germany

Re: fsum implementation, converted from Pythons fsum

Post by Little John »

luis wrote: Sat Sep 16, 2023 2:11 pm That's black magic !
Pretty amazing.
+1 :D

Thank you, NicTheQuick!
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

I know it doesn't exactly do what your code does but just for fun a simple PB approach.

Code: Select all

Structure S_DoubleQuadUnion
  StructureUnion
    d.d : q.q
  EndStructureUnion
EndStructure

Procedure.d SumDoubles(Array d.d(1))
  Protected Dim Sums.d(2047)
  Protected Sum.d, *DQ.S_DoubleQuadUnion = @d()
  Protected i, j = ArraySize(d())
  For i = 0 To j
    Sums(*DQ\q >> 52 & 2047) + *DQ\d
    *DQ + SizeOf(Double)
  Next
  For i = 2047 To 0 Step -1
    Sum + Sums(i)
  Next
  ProcedureReturn Sum
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug SumDoubles(summands())

Edit:
In the code above I created an array of 2048 items so there's an array item for each individual exponent which is most accurate.
If you need more performance you can group exponents which increases performance at the cost of a bit of accuracy.
Example:

Code: Select all

Structure S_DoubleQuadUnion
  StructureUnion
    d.d : q.q
  EndStructureUnion
EndStructure

Procedure.d SumDoubles(Array d.d(1))
  Protected Dim Sums.d(127)
  Protected Sum.d, *DQ.S_DoubleQuadUnion = @d()
  Protected i, j = ArraySize(d())
  For i = 0 To j
    Sums(*DQ\q >> 56 & 127) + *DQ\d
    *DQ + SizeOf(Double)
  Next
  For i = 127 To 0 Step -1
    Sum + Sums(i)
  Next
  ProcedureReturn Sum
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug SumDoubles(summands())

If you need something more accurate you might also want to look at the Kahan-Babushka-... codes I posted below.
viewtopic.php?p=607866#p607866
They are no as accurate as the Shewchuk algorithm Python uses but they are faster.
Last edited by wilbert on Thu Sep 28, 2023 8:51 am, edited 5 times in total.
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
Psychophanta
Always Here
Always Here
Posts: 5153
Joined: Wed Jun 11, 2003 9:33 pm
Location: Anare
Contact:

Re: fsum implementation, converted from Pythons fsum

Post by Psychophanta »

Nice one. :idea:
Don't know why, but i remembered another idea i had in the past, wich is how to obtain the mode statistic from an amount of real values, but with no use of store the repeated ones, but using another creative algorithm.
http://www.zeitgeistmovie.com

while (world==business) world+=mafia;
Little John
Addict
Addict
Posts: 4789
Joined: Thu Jun 07, 2007 3:25 pm
Location: Berlin, Germany

Re: fsum implementation, converted from Pythons fsum

Post by Little John »

Wilbert, what you are doing is incredible! :D
I'm glad you're back on the forum and I hope you're doing well.
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

Little John wrote: Sun Sep 17, 2023 6:54 am Wilbert, what you are doing is incredible! :D
I'm glad you're back on the forum and I hope you're doing well.
Thanks :)
I'm okay.
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
STARGÅTE
Addict
Addict
Posts: 2232
Joined: Thu Jan 10, 2008 1:30 pm
Location: Germany, Glienicke
Contact:

Re: fsum implementation, converted from Pythons fsum

Post by STARGÅTE »

wilbert wrote: Sat Sep 16, 2023 6:51 pm I know it doesn't exactly do what your code does but just for fun a simple PB approach which does return a decent result
Edit:
In the code above I created an array of 2048 items so there's an array item for each individual exponent which is most accurate.
If you need more performance you can group exponents which increases performance at the cost of a bit of accuracy.
Example:
Interesting approach. Thanks for sharing.
However, I would sum up at the end from higher exponents to lower exponents.
Then it is possible to catch also zeros in the final summation.

Code: Select all

For i = 2047 To 0 Step -1
    Sum + Sums(i)
  Next

Code: Select all

Dim summands.d(4)
summands(0) = 2e100
summands(1) = -1e100
summands(2) = -1e100
summands(3) = 1
summands(4) = 2

Debug SumDoubles(summands())
PB 6.01 ― Win 10, 21H2 ― Ryzen 9 3900X, 32 GB ― NVIDIA GeForce RTX 3080 ― Vivaldi 6.0 ― www.unionbytes.de
Lizard - Script language for symbolic calculations and moreTypeface - Sprite-based font include/module
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

STARGÅTE wrote: Sun Sep 17, 2023 8:24 am However, I would sum up at the end from higher exponents to lower exponents.
Then it is possible to catch also zeros in the final summation.
You are right. I was thinking the wrong way.
I changed my code above according to your suggestion.
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
NicTheQuick
Addict
Addict
Posts: 1519
Joined: Sun Jun 22, 2003 7:43 pm
Location: Germany, Saarbrücken
Contact:

Re: fsum implementation, converted from Pythons fsum

Post by NicTheQuick »

Hi wilbert. I also thought about doing it like this but on the other side there are also people way smarter than me and because of that I just converted that existing code.
To be honest I don't understand the code I converted completely but it seems to also catch some errors and I don't know how important these steps are.
The english grammar is freeware, you can use it freely - But it's not Open Source, i.e. you can not change it or publish it in altered way.
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

NicTheQuick wrote: Sun Sep 17, 2023 2:42 pm Hi wilbert. I also thought about doing it like this but on the other side there are also people way smarter than me and because of that I just converted that existing code.
The code you posted does do a better job. :)
I just wanted to try something relatively simple.
Windows (x64)
Raspberry Pi OS (Arm64)
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

When I was looking for some more information, I found a C implementation as well of the Shewchuk algorithm Python uses for fsum.
https://github.com/achan001/fsum/blob/master/shewchuk.c
Something I found interesting about this code that it offers an alternative without the Abs() function.

The Kahan summation algorithm was also mentioned often for summation
https://en.wikipedia.org/wiki/Kahan_summation_algorithm
Windows (x64)
Raspberry Pi OS (Arm64)
Little John
Addict
Addict
Posts: 4789
Joined: Thu Jun 07, 2007 3:25 pm
Location: Berlin, Germany

Re: fsum implementation, converted from Pythons fsum

Post by Little John »

Interesting. Thank you, wilbert!
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: fsum implementation, converted from Pythons fsum

Post by wilbert »

Shewchuk summation based on code from Albert Chan.

Code: Select all

; Shewchuk summation (assumed double-precision, round-to-nearest mode)

; original source code Copyright (c) 2018 Albert Chan (MIT License)
; https://github.com/achan001/fsum/

; PureBasic adaptation by Wilbert

#SC_STACK = 48

Structure sc_partials
  last.l
  p.d[#SC_STACK]
EndStructure

Procedure sc_init(*p.sc_partials)
  ; >> init the sc_partials structure <<
  *p\last = 0 : *p\p[0] = 0.0
EndProcedure

Procedure sc_add(*p.sc_partials, x.d)
  ; >> add a double <<
  Protected.l i, j
  Protected.d y, hi, yy
  Protected.Quad *chk = @x
  Protected.q msk = $7fffffffffffffff
  ; using a quad (\q) to compare instead of float
  ; because of a bug in the PB asm backend.
  ; using a msk variable instead of a direct mask
  ; because of a bug in the PB x86 c backend.
  For j = 0 To *p\last
    y = *p\p[j]
    hi = x + y : yy = hi - x
    x = (x - (hi - yy)) + (y - yy)
    If *chk\q
      ; x is NaN, Inf or non zero
      *p\p[i] = hi : i + 1
    Else
      x = hi
    EndIf
  Next
  If *chk\q & msk > $7ff0000000000000
    ; x is NaN
    *p\last = 0
  Else
    *p\last = i : *p\p[i] = x
    If i = #SC_STACK-1 : sc_add(*p, 0.0) : EndIf
  EndIf
EndProcedure
  
Procedure.d sc_total(*p.sc_partials)
  ; >> return the total of all added doubles <<
  Protected.l n
  Protected.d x, y, r
  Repeat
    n = *p\last
    If n = 0 : ProcedureReturn *p\p[0] : EndIf
    If n = 1 : ProcedureReturn *p\p[0] + *p\p[1] : EndIf
    x = *p\p[n]
    Repeat
      y = 0.5 * x : n - 1 : x = *p\p[n]
    Until n = 0 Or x <> x + y
    If x = x + y
      Break
    Else
      sc_add(*p, 0.0)
    EndIf
  ForEver
  y + y : r = x + y
  y = 2*(y - (r - x))
  If y <> (y + r) - r : ProcedureReturn r : EndIf
  If Bool(y < 0) = Bool(*p\p[2] < 0) And *p\p[2]
    ProcedureReturn r + y
  Else
    ProcedureReturn r
  EndIf
EndProcedure

Procedure.d sc_sum(Array d.d(1))
  ; >> return the sum of the array of doubles <<
  Protected p.sc_partials
  Protected.i i, j = ArraySize(d())
  sc_init(@p)
  For i = 0 To j
    sc_add(@p, d(i))
  Next
  ProcedureReturn sc_total(@p)
EndProcedure



; Test

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next
Debug sc_sum(summands())

; alternative

sc_init(p.sc_partials)
For i = 0 To 39999 Step 4
	sc_add(p, 1e100)
	sc_add(p, 1)
	sc_add(p, -1e100)
	sc_add(p, 1)
Next
Debug sc_total(p)


And here's also my implementation of the Kahan-Babushka-Klein and Kahan-Babushka-Neumaier summation algorithms.
They are not as accurate as the Shewchuk algorithm but in a lot of cases they will probably be accurate enough.

Code: Select all

; Kahan-Babushka-Klein summation

Procedure.d Sum_kbk(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d s, cs, ccs, t, u, v
  For i = 0 To j
    v = d(i)
    t = s : s + v : u = v - s
    v = (t + u) + (v - (u + s))
    t = cs : cs + v : u = v - cs
    ccs + (t + u) + (v - (u + cs))
  Next
  ProcedureReturn (s + cs) + ccs
EndProcedure

; Kahan-Babushka-Neumaier summation
; (faster but less accurate)

Procedure.d Sum_kbn(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d c, s, t, u, v
  For i = 0 To j
    v = d(i) : t = s : s + v : u = v - s
    c + (t + u) + (v - (u + s))
  Next
  ProcedureReturn s + c
EndProcedure

; it's also possible to further increase
; the accuracy of the Kahan-Babushka-Klein
; summation at the cost of performance

Procedure.d Sum_kbx(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d s, cs, ccs, cccs, t, u, v
  For i = 0 To j
    v = d(i)
    t = s : s + v : u = v - s
    v = (t + u) + (v - (u + s))
    t = cs : cs + v : u = v - cs
    v = (t + u) + (v - (u + cs))
    t = ccs : ccs + v : u = v - ccs
    cccs + (t + u) + (v - (u + ccs))
  Next
  ProcedureReturn ((s + cs) + ccs) + cccs
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug Sum_kbk(summands())
Windows (x64)
Raspberry Pi OS (Arm64)
Post Reply