fsum implementation, converted from Pythons fsum

NicTheQuick · Post by **NicTheQuick** » Sat Sep 16, 2023 1:04 pm

Hi there,

inspired by this thread and my comment about the implementation of a sum function from the math library of Python, I converted this function to Purebasic.
Here are a few more links:

Source of original function: https://github.com/python/cpython/blob/ ... le.c#L1356
More detailed explanation and alternatives: https://code.activestate.com/recipes/393090/

The code also contains the original source code as comments so it should be easier to follow. I had to change a few little things because Purebasic does not have iterators. And because we can use native types all the time and do not have to convert between C types and Python objects we can also skip some asserts and checks.
In the end there is a small example summing up 40000 specific values which sum up to the correct value 20000.0 when using fsum but sum up to 0.0 with the naive implementation.

Feel free to use it where you need it:

Code: Select all

Macro assert(expression)
	CompilerIf #PB_Compiler_Debugger
		If Not (expression)
			DebuggerError("assert failed at " + Str(#PB_Compiler_Line - 2))
		EndIf
	CompilerEndIf
EndMacro

Macro IsFinite(float)
	(Not (IsNAN(float) Or IsInfinity(float)))
EndMacro

;#define NUM_PARTIALS  32  /* initial partials array size, on stack */
#NUM_PARTIALS = 32

; Source: https://github.com/python/cpython/blob/main/Modules/mathmodule.c#L1356
;Static PyObject *
;math_fsum(PyObject *Module, PyObject *seq)
;/*[clinic End generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
;{
Procedure.d fsum(Array seq.d(1))
	;PyObject *item, *iter, *sum = NULL;
	Protected item.i = 0, seq_size.i, sum.d
	;Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
	Protected i.i, j.i, n.i = 0, m.i = #NUM_PARTIALS
	;double x, y, t, ps[NUM_PARTIALS], *p = ps;
	Protected Dim ps.d(#NUM_PARTIALS - 1)
	Protected x.d, y.d
	; t is not needed because of Swap command
	; *p = ps is not needed because we can access ps() directly
	;double xsave, special_sum = 0.0, inf_sum = 0.0;
	Protected.d xsave, special_sum = 0.0, inf_sum = 0.0
	;double hi, yr, lo = 0.0;
	Protected.d hi, yr, lo = 0.0
	;
	;iter = PyObject_GetIter(seq);
	seq_size = ArraySize(seq()) + 1
	;if (iter == NULL)
	If seq_size = 0
		;return NULL;
		ProcedureReturn 0
	EndIf
	;
	;for(;;) {           /* for x in iterable */
	While item < seq_size
		;assert(0 <= n && n <= m);
		assert(0 <= n And n <= m)
		;assert((m == NUM_PARTIALS && p == ps) ||
			;(m >  NUM_PARTIALS && p != NULL));
		;
		;item = PyIter_Next(iter);
		;if (item == NULL) {
		;	If (PyErr_Occurred())
		;		goto _fsum_error;
		;	break;
		;}
		;ASSIGN_DOUBLE(x, item, error_with_item);
		x = seq(item)
		;Py_DECREF(item);
		;
		;xsave = x;
		xsave = x
		;For (i = j = 0; j < n; j++) {       /* for y in partials */
		i = 0
		j = 0
		While j < n 
			;y = p[j];
			y = ps(j)
			;if (fabs(x) < fabs(y)) {
			If Abs(x) < Abs(y)
				;t = x; x = y; y = t;
				Swap x, y
			;}
			EndIf
			;hi = x + y;
			hi = x + y
			;yr = hi - x;
			yr = hi - x
			;lo = y - yr;
			lo = y - yr
			;If (lo != 0.0)
			If lo <> 0.0
				;p[i++] = lo;
				ps(i) = lo
				i + 1
			EndIf
			;x = hi;
			x = hi
		;}
			j + 1
		Wend
		;
		;n = i;                              /* ps[i:] = [x] */
		n = i
		;if (x != 0.0) {
		If x <> 0.0
			;if (! Py_IS_FINITE(x)) {
			If Not IsFinite(x)
				;/* a nonfinite x could arise either as a result of intermediate overflow, or as a result of a nan or inf in the summands */
				;if (Py_IS_FINITE(xsave)) {
				If IsFinite(xsave)
					;PyErr_SetString(PyExc_OverflowError, "intermediate overflow in fsum");
					DebuggerError("intermediate overflow in fsum")
					;goto _fsum_error;
					Goto _fsum_error
				;}
				EndIf
				;if (Py_IS_INFINITY(xsave))
				If IsInfinity(xsave)
				;     inf_sum += xsave;
					inf_sum + xsave
				EndIf
				;special_sum += xsave;
				special_sum + xsave
				;/* reset partials */
				;n = 0;
				n = 0
			;}
			;else if (n >= m && _fsum_realloc(&p, n, ps, &m))
			ElseIf (n >= m) ; here we can just use ReDim and don't care about too less memory
				;goto _fsum_error;
				m + m
				ReDim ps(m - 1)
			;else
			Else
				;p[n++] = x;
				ps(n) = x
				n + 1
			EndIf
		;}
		EndIf
	;}
		item + 1
	Wend
	;
	;If (special_sum != 0.0) {
	If special_sum <> 0.0
		;if (Py_IS_NAN(inf_sum))
		If IsNAN(inf_sum)
			;PyErr_SetString(PyExc_ValueError, "-inf + inf in fsum");
			DebuggerError("-inf + inf in fsum")
		;else
		Else
			;sum = PyFloat_FromDouble(special_sum);
			sum = special_sum
		EndIf
		;goto _fsum_error;
		Goto _fsum_error
	;}
	EndIf
	;
	;hi = 0.0;
	hi = 0.0
	;If (n > 0) {
	If n > 0
		;hi = p[--n];
		n - 1
		hi = ps(n)
		;/* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */
		;While (n > 0) {
		While n > 0
			;x = hi;
			x = hi
			;y = p[--n];
			n - 1
			y = ps(n)
			;assert(fabs(y) < fabs(x));
			assert(Abs(y) < Abs(x))
			;hi = x + y;
			hi = x + y
			;yr = hi - x;
			yr = hi - x
			;lo = y - yr;
			lo = y - yr
			;If (lo != 0.0)
			If lo <> 0.0
				;Break;
				Break
			EndIf
		;}
		Wend
		;/* Make half-even rounding work across multiple partials.
		;   Needed so that sum([1e-16, 1, 1e16]) will round-up the last
		;   digit to two instead of down To zero (the 1e-16 makes the 1
		;   slightly closer to two).  With a potential 1 ULP rounding
		;   error fixed-up, math.fsum() can guarantee commutativity. */
		;If (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || (lo > 0.0 && p[n-1] > 0.0))) {
		If n > 0 And ((lo < 0.0 And ps(n - 1) < 0.0) Or (lo > 0.0 And ps(n - 1) > 0.0))
			;y = lo * 2.0;
			y = lo * 2.0
			;x = hi + y;
			x = hi + y
			;yr = x - hi;
			yr = x - hi
			;If (y == yr)
			If y = yr
				;hi = x;
				hi = x
			EndIf
		;}
		EndIf
	;}
	EndIf
	;sum = PyFloat_FromDouble(hi);
	sum = hi
; 
	;_fsum_error:
	_fsum_error:
	;Py_DECREF(iter);
	;If (p != ps)
		;PyMem_Free(p);
	;Return sum;
	ProcedureReturn sum
; 
;   error_with_item:
;     Py_DECREF(item);
;     Goto _fsum_error;
; }
EndProcedure

; Create test case, see https://code.activestate.com/recipes/393090/
Debug "Create array of 40000 summands in the form [1, 1e100, 1, -1e100] * 10000"
Define i.i
Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next
For i = 0 To 3
	Debug "summands(" + i + ") = " + summands(i)
Next
Debug "..."
Debug ""

; fsum
Debug "Sum using fsum:"
Debug fsum(summands())
Debug ""

; simple sum
Define sum.d = 0
For i = 0 To 39999
	sum + summands(i)
Next
Debug "Naive sum:"
Debug sum

luis · Post by **luis** » Sat Sep 16, 2023 2:11 pm

That's black magic !
Pretty amazing.

Little John · Post by **Little John** » Sat Sep 16, 2023 3:10 pm

luis wrote: Sat Sep 16, 2023 2:11 pm That's black magic !
Pretty amazing.

+1

Thank you, NicTheQuick!

wilbert · Post by **wilbert** » Sat Sep 16, 2023 6:51 pm

I know it doesn't exactly do what your code does but just for fun a simple PB approach.

Code: Select all

Structure S_DoubleQuadUnion
  StructureUnion
    d.d : q.q
  EndStructureUnion
EndStructure

Procedure.d SumDoubles(Array d.d(1))
  Protected Dim Sums.d(2047)
  Protected Sum.d, *DQ.S_DoubleQuadUnion = @d()
  Protected i, j = ArraySize(d())
  For i = 0 To j
    Sums(*DQ\q >> 52 & 2047) + *DQ\d
    *DQ + SizeOf(Double)
  Next
  For i = 2047 To 0 Step -1
    Sum + Sums(i)
  Next
  ProcedureReturn Sum
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug SumDoubles(summands())

Edit:
In the code above I created an array of 2048 items so there's an array item for each individual exponent which is most accurate.
If you need more performance you can group exponents which increases performance at the cost of a bit of accuracy.
Example:

Code: Select all

Structure S_DoubleQuadUnion
  StructureUnion
    d.d : q.q
  EndStructureUnion
EndStructure

Procedure.d SumDoubles(Array d.d(1))
  Protected Dim Sums.d(127)
  Protected Sum.d, *DQ.S_DoubleQuadUnion = @d()
  Protected i, j = ArraySize(d())
  For i = 0 To j
    Sums(*DQ\q >> 56 & 127) + *DQ\d
    *DQ + SizeOf(Double)
  Next
  For i = 127 To 0 Step -1
    Sum + Sums(i)
  Next
  ProcedureReturn Sum
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug SumDoubles(summands())

If you need something more accurate you might also want to look at the Kahan-Babushka-... codes I posted below.
viewtopic.php?p=607866#p607866
They are no as accurate as the Shewchuk algorithm Python uses but they are faster.

Psychophanta · Post by **Psychophanta** » Sat Sep 16, 2023 6:57 pm

Nice one.

Don't know why, but i remembered another idea i had in the past, wich is how to obtain the mode statistic from an amount of real values, but with no use of store the repeated ones, but using another creative algorithm.

Little John · Post by **Little John** » Sun Sep 17, 2023 6:54 am

Wilbert, what you are doing is incredible!

I'm glad you're back on the forum and I hope you're doing well.

wilbert · Post by **wilbert** » Sun Sep 17, 2023 7:19 am

Little John wrote: Sun Sep 17, 2023 6:54 am Wilbert, what you are doing is incredible!
I'm glad you're back on the forum and I hope you're doing well.

Thanks

I'm okay.

STARGÅTE · Post by **STARGÅTE** » Sun Sep 17, 2023 8:24 am

wilbert wrote: Sat Sep 16, 2023 6:51 pm I know it doesn't exactly do what your code does but just for fun a simple PB approach which does return a decent result
Edit:
In the code above I created an array of 2048 items so there's an array item for each individual exponent which is most accurate.
If you need more performance you can group exponents which increases performance at the cost of a bit of accuracy.
Example:

Interesting approach. Thanks for sharing.
However, I would sum up at the end from higher exponents to lower exponents.
Then it is possible to catch also zeros in the final summation.

Code: Select all

For i = 2047 To 0 Step -1
    Sum + Sums(i)
  Next

Code: Select all

Dim summands.d(4)
summands(0) = 2e100
summands(1) = -1e100
summands(2) = -1e100
summands(3) = 1
summands(4) = 2

Debug SumDoubles(summands())

wilbert · Post by **wilbert** » Sun Sep 17, 2023 9:01 am

STARGÅTE wrote: Sun Sep 17, 2023 8:24 am However, I would sum up at the end from higher exponents to lower exponents.
Then it is possible to catch also zeros in the final summation.

You are right. I was thinking the wrong way.
I changed my code above according to your suggestion.

NicTheQuick · Post by **NicTheQuick** » Sun Sep 17, 2023 2:42 pm

Hi wilbert. I also thought about doing it like this but on the other side there are also people way smarter than me and because of that I just converted that existing code.
To be honest I don't understand the code I converted completely but it seems to also catch some errors and I don't know how important these steps are.

wilbert · Post by **wilbert** » Sun Sep 17, 2023 4:56 pm

NicTheQuick wrote: Sun Sep 17, 2023 2:42 pm Hi wilbert. I also thought about doing it like this but on the other side there are also people way smarter than me and because of that I just converted that existing code.

The code you posted does do a better job.

I just wanted to try something relatively simple.

wilbert · Post by **wilbert** » Mon Sep 18, 2023 3:47 pm

When I was looking for some more information, I found a C implementation as well of the Shewchuk algorithm Python uses for fsum.
https://github.com/achan001/fsum/blob/master/shewchuk.c
Something I found interesting about this code that it offers an alternative without the Abs() function.

The Kahan summation algorithm was also mentioned often for summation
https://en.wikipedia.org/wiki/Kahan_summation_algorithm

Little John · Post by **Little John** » Tue Sep 19, 2023 6:23 am

Interesting. Thank you, wilbert!

wilbert · Post by **wilbert** » Fri Sep 22, 2023 6:03 am

Shewchuk summation based on code from Albert Chan.

Code: Select all

; Shewchuk summation (assumed double-precision, round-to-nearest mode)

; original source code Copyright (c) 2018 Albert Chan (MIT License)
; https://github.com/achan001/fsum/

; PureBasic adaptation by Wilbert

#SC_STACK = 48

Structure sc_partials
  last.l
  p.d[#SC_STACK]
EndStructure

Procedure sc_init(*p.sc_partials)
  ; >> init the sc_partials structure <<
  *p\last = 0 : *p\p[0] = 0.0
EndProcedure

Procedure sc_add(*p.sc_partials, x.d)
  ; >> add a double <<
  Protected.l i, j
  Protected.d y, hi, yy
  Protected.Quad *chk = @x
  Protected.q msk = $7fffffffffffffff
  ; using a quad (\q) to compare instead of float
  ; because of a bug in the PB asm backend.
  ; using a msk variable instead of a direct mask
  ; because of a bug in the PB x86 c backend.
  For j = 0 To *p\last
    y = *p\p[j]
    hi = x + y : yy = hi - x
    x = (x - (hi - yy)) + (y - yy)
    If *chk\q
      ; x is NaN, Inf or non zero
      *p\p[i] = hi : i + 1
    Else
      x = hi
    EndIf
  Next
  If *chk\q & msk > $7ff0000000000000
    ; x is NaN
    *p\last = 0
  Else
    *p\last = i : *p\p[i] = x
    If i = #SC_STACK-1 : sc_add(*p, 0.0) : EndIf
  EndIf
EndProcedure
  
Procedure.d sc_total(*p.sc_partials)
  ; >> return the total of all added doubles <<
  Protected.l n
  Protected.d x, y, r
  Repeat
    n = *p\last
    If n = 0 : ProcedureReturn *p\p[0] : EndIf
    If n = 1 : ProcedureReturn *p\p[0] + *p\p[1] : EndIf
    x = *p\p[n]
    Repeat
      y = 0.5 * x : n - 1 : x = *p\p[n]
    Until n = 0 Or x <> x + y
    If x = x + y
      Break
    Else
      sc_add(*p, 0.0)
    EndIf
  ForEver
  y + y : r = x + y
  y = 2*(y - (r - x))
  If y <> (y + r) - r : ProcedureReturn r : EndIf
  If Bool(y < 0) = Bool(*p\p[2] < 0) And *p\p[2]
    ProcedureReturn r + y
  Else
    ProcedureReturn r
  EndIf
EndProcedure

Procedure.d sc_sum(Array d.d(1))
  ; >> return the sum of the array of doubles <<
  Protected p.sc_partials
  Protected.i i, j = ArraySize(d())
  sc_init(@p)
  For i = 0 To j
    sc_add(@p, d(i))
  Next
  ProcedureReturn sc_total(@p)
EndProcedure



; Test

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next
Debug sc_sum(summands())

; alternative

sc_init(p.sc_partials)
For i = 0 To 39999 Step 4
	sc_add(p, 1e100)
	sc_add(p, 1)
	sc_add(p, -1e100)
	sc_add(p, 1)
Next
Debug sc_total(p)

And here's also my implementation of the Kahan-Babushka-Klein and Kahan-Babushka-Neumaier summation algorithms.
They are not as accurate as the Shewchuk algorithm but in a lot of cases they will probably be accurate enough.

Code: Select all

; Kahan-Babushka-Klein summation

Procedure.d Sum_kbk(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d s, cs, ccs, t, u, v
  For i = 0 To j
    v = d(i)
    t = s : s + v : u = v - s
    v = (t + u) + (v - (u + s))
    t = cs : cs + v : u = v - cs
    ccs + (t + u) + (v - (u + cs))
  Next
  ProcedureReturn (s + cs) + ccs
EndProcedure

; Kahan-Babushka-Neumaier summation
; (faster but less accurate)

Procedure.d Sum_kbn(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d c, s, t, u, v
  For i = 0 To j
    v = d(i) : t = s : s + v : u = v - s
    c + (t + u) + (v - (u + s))
  Next
  ProcedureReturn s + c
EndProcedure

; it's also possible to further increase
; the accuracy of the Kahan-Babushka-Klein
; summation at the cost of performance

Procedure.d Sum_kbx(Array d.d(1))
  Protected.i i, j = ArraySize(d())
  Protected.d s, cs, ccs, cccs, t, u, v
  For i = 0 To j
    v = d(i)
    t = s : s + v : u = v - s
    v = (t + u) + (v - (u + s))
    t = cs : cs + v : u = v - cs
    v = (t + u) + (v - (u + cs))
    t = ccs : ccs + v : u = v - ccs
    cccs + (t + u) + (v - (u + ccs))
  Next
  ProcedureReturn ((s + cs) + ccs) + cccs
EndProcedure



; Test the code

Dim summands.d(39999)
For i = 0 To 39999 Step 4
	summands(i + 0) = 1
	summands(i + 1) = 1e100
	summands(i + 2) = 1
	summands(i + 3) = -1e100
Next

Debug Sum_kbk(summands())

PureBasic Forums - English

fsum implementation, converted from Pythons fsum

fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum

Re: fsum implementation, converted from Pythons fsum