Page 1 of 1
SQLite + Percentile ?
Posted: Tue Dec 26, 2017 8:39 am
by Oliver13
I have a huge SQLite database with the following fields: id, date, type, result
Regrettably, the native aggregate functions have no
percentile function, so I bypass this with the following SQL-statement.
For example to get the 90th percentile of all results with type=3:
Code: Select all
SELECT result AS 'type3_percent90'
FROM table
WHERE type=3
ORDER BY result ASC
LIMIT 1
OFFSET (SELECT
COUNT(*)
FROM table
WHERE type=3) * 9 / 10 - 1;
This is ok for single queries, but not for complex requests, so it would be better to have an implemented function.
Could anybody help how to convert the
percentile extension into PB ?
TX a lot,
Oliver
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 1:20 pm
by infratec
Hi,
don't know how to test it, so I don't know if it works 100% correct.
Code: Select all
CompilerIf #PB_Compiler_IsMainFile
EnableExplicit
CompilerEndIf
ImportC ""
qsort(*base, nitems.l, size.l, *Prog)
;qsort(void *base, size_t nitems, size_t size, Int (*compar)(const void *, const void*))
EndImport
CompilerSelect #PB_Compiler_OS
CompilerCase #PB_OS_MacOS
ImportC "/Applications/PureBasic/purelibraries/macos/libraries/libpbsqlite3.a"
CompilerDefault
ImportC "sqlite3.lib"
CompilerEndSelect
sqlite3_create_function(DatabaseID, zFunctionName.p-utf8, nArg.l, eTextRep.l, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_aggregate_context(*sqlite3_context, nBytes.l)
sqlite3_result_double(*sqlite3_context, dbl.d)
sqlite3_value_numeric_type.l(*sqlite3_value)
sqlite3_value_double.d(*sqlite3_value)
sqlite3_value_int.i(*sqlite3_value)
sqlite3_value_type.i(*sqlite3_value)
sqlite3_realloc(*void, int.l)
sqlite3_free(*void)
sqlite3_result_error(*sqlite3_context, char.p-utf8, int.l)
sqlite3_result_error_nomem(*sqlite3_context)
sqlite3_errcode.i(*db)
sqlite3_errmsg16.i(*sqlite3)
EndImport
#SQLITE_OK = 0
#SQLITE_UTF8 = 1
#SQLITE_INTEGER = 1
#SQLITE_FLOAT = 2
#SQLITE3_TEXT = 3
#SQLITE_BLOB = 4
#SQLITE_NULL = 5
Structure PercentileStrucure
nAlloc.i ; Number of slots allocated for a[]
nUsed.i ; Number of slots actually used in a[]
rPct.d ; 1.0 more than the value for P
*a.Double ; Array of Y values
EndStructure
Procedure.i sameValue(a.d, b.d)
a - b
ProcedureReturn Bool(a >= -0.001 And a <= 0.001)
EndProcedure
ProcedureC percentStep(*pCtx, argc.l, *argv)
Protected *ptr, *a
Protected *p.PercentileStrucure
Protected.d rPct
Protected.i eType
Protected.d y
Protected.i n
; Requirement 3: P must be a number between 0 And 100
*ptr = PeekI(*argv + 1 * SizeOf(Long))
eType = sqlite3_value_numeric_type(*ptr)
rPct = sqlite3_value_double(*ptr);
If (eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT) Or (rPct < 0.0 Or rPct > 100.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not a number between 0.0 And 100.0", -1)
ProcedureReturn
EndIf
; Allocate the session context.
*p = sqlite3_aggregate_context(*pCtx, SizeOf(PercentileStrucure))
If *p = 0
ProcedureReturn
EndIf
; Remember the P value. Throw an error If the P value is different
; from any prior row, per Requirement (2).
If *p\rPct = 0.0
*p\rPct = rPct + 1.0
ElseIf Not sameValue(*p\rPct, rPct + 1.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not the same For all input rows", -1)
ProcedureReturn
EndIf
; Ignore rows For which Y is NULL
*ptr = PeekI(*argv + 0 * SizeOf(Long))
eType = sqlite3_value_type(*ptr)
If eType = #SQLITE_NULL
ProcedureReturn
EndIf
; If Not NULL, then Y must be numeric. Otherwise throw an error.
; Requirement 4
If eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT
sqlite3_result_error(*pCtx, "1st argument to percentile() is not numeric", -1)
ProcedureReturn
EndIf
; Throw an error If the Y value is infinity Or NaN
y = sqlite3_value_double(*ptr)
If IsInfinity(y)
sqlite3_result_error(*pCtx, "Inf input to percentile()", -1)
ProcedureReturn
EndIf
; Allocate And store the Y
If *p\nUsed >= *p\nAlloc
n = *p\nAlloc * 2 + 250
*a = sqlite3_realloc(*p\a, 8 * n)
If *a = 0
sqlite3_free(*p\a)
ClearStructure(*p, PercentileStrucure)
sqlite3_result_error_nomem(*pCtx)
ProcedureReturn
EndIf
*p\nAlloc = n
*p\a = *a
EndIf
;*p\a[*p\nUsed] = y
PokeD(*p\a + 8 * *p\nUsed, y)
*p\nUsed + 1
EndProcedure
ProcedureC.l doubleCmp(*pA, *pB)
Protected Result.l
Protected.d a, b
Result = 1
a = PeekD(*pA)
b = PeekD(*pB)
If a=b
Result = 0
ElseIf a < b
Result = -1
EndIf
ProcedureReturn Result
EndProcedure
ProcedureC percentFinal(*pCtx)
Protected *p.PercentileStrucure
Protected.i i1, i2
Protected.d v1, v2
Protected.d ix, vx
Protected.i i
*p = sqlite3_aggregate_context(*pCtx, 0)
If *p
If *p\a <> 0
If *p\nUsed
qsort(*p\a, *p\nUsed, SizeOf(double), @doubleCmp())
; For i = 0 To *p\nUsed - 2
; Debug PeekD(*p\a + 8 * i)
; Next i
; Dim a.d(*p\nUsed - 1)
;
; For i = 0 To *p\nUsed - 1
; ;a(i) = *p\a[i]
; a(i) = PeekD(*p\a + 8 * i)
; Next i
;
; SortArray(a(), #PB_Sort_Ascending, 0, ArraySize(a()))
; Debug "-----"
; For i = 0 To ArraySize(a())
; Debug a(i)
; Next i
; Debug "+++++"
ix = (*p\rPct - 1.0) * (*p\nUsed - 1) * 0.01
i1 = ix
;i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1
If ix = i1
i2 = i1
Else
If i1 = *p\nUsed - 1
i2 = i1
Else
i2 = i1 + 1
EndIf
EndIf
;v1 = *p\a[i1]
v1 = PeekD(*p\a + 8 * i1)
;v1 = a(i1)
;v2 = *p\a[i2]
v2 = PeekD(*p\a + 8 * i2)
;v2 = a(i2)
vx = v1 + (v2 - v1) * (ix - i1)
sqlite3_result_double(*pCtx, vx)
EndIf
sqlite3_free(*p\a)
EndIf
ClearStructure(*p, PercentileStrucure)
EndIf
EndProcedure
Procedure.i sqlite3_percentile_init(db.i)
ProcedureReturn Bool(sqlite3_create_function(DatabaseID(db), "percentile", 2, #SQLITE_UTF8, 0, 0, @percentStep(), @percentFinal()) = #SQLITE_OK)
EndProcedure
CompilerIf #PB_Compiler_IsMainFile
Define.i DB, i, j
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "")
If DB
If sqlite3_percentile_init(DB)
DatabaseUpdate(DB, "CREATE TABLE test(a INTEGER)")
For i = 0 To 10
j = Random(1000)
Debug Str(i) + ": " + Str(j)
DatabaseUpdate(DB, "INSERT INTO test VALUES (" + Str(j) + ")")
Next i
If DatabaseQuery(DB, "SELECT percentile(a, 50) FROM test")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
If sqlite3_errcode(DatabaseID(DB))
Debug PeekS(sqlite3_errmsg16(DatabaseID(DB)))
EndIf
Else
Debug DatabaseError()
EndIf
EndIf
CloseDatabase(DB)
EndIf
CompilerEndIf
Bernd
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 1:29 pm
by infratec
Ups...
I think it does not work.
It returns the value at the coressponding row.
I modified the listing above to show this.
Bernd
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 1:42 pm
by infratec
Seems to work now.
But I want to eliminate my additional array which was needed because I was not able to define the variable double array
inside of the structure [0] failed at allocation.
Bernd
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 1:52 pm
by infratec
Hope it works now without my additional array.
Extensive testing is needed!
Bernd
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 4:00 pm
by Oliver13
Hi Bernd,
great, thank you very much !
But there is still a bug; I modified the sample:
For example with percentile=10, the function returns 9.9, but should return 9.0
Code: Select all
CompilerIf #PB_Compiler_IsMainFile
Define.i DB, i, j
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "")
If DB
If sqlite3_percentile_init(DB)
DatabaseUpdate(DB, "CREATE TABLE test(a DOUBLE)")
For i = 0 To 99
j = i; fill with 0-99
DatabaseUpdate(DB, "INSERT INTO test VALUES (" + Str(j) + ")")
Next i
; show all records
If DatabaseQuery(DB, "SELECT * FROM test ORDER BY a ASC")
While NextDatabaseRow(db)
Debug GetDatabaseString(db,0)
Wend
FinishDatabaseQuery(db)
EndIf
For i=0 To 10
Debug Str(i*10)+" percentile"
If DatabaseQuery(DB, "SELECT a FROM test ORDER BY a ASC LIMIT 1 OFFSET (Select COUNT(*) FROM test) *"+Str(i)+" / 10 - 1")
If NextDatabaseRow(DB)
Debug "SQL:"+GetDatabaseString(DB, 0)
EndIf
FinishDatabaseQuery(DB)
Else
If sqlite3_errcode(DatabaseID(DB))
Debug PeekS(sqlite3_errmsg16(DatabaseID(DB)))
EndIf
EndIf
If DatabaseQuery(DB, "SELECT percentile(a, "+Str(i*10)+") from test")
While NextDatabaseRow(DB)
Debug "External:"+GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
Else
If sqlite3_errcode(DatabaseID(DB))
Debug PeekS(sqlite3_errmsg16(DatabaseID(DB)))
EndIf
EndIf
Next
EndIf
CloseDatabase(DB)
EndIf
CompilerEndIf
Any idea ? Thx a lot
Oliver
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 4:14 pm
by infratec
Hm...
I think you SQL select is a bit wrong.
If i is 0 you use an offset of -1. This can't be valid.
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 4:26 pm
by infratec
If I use the values of
https://onlinecourses.science.psu.edu/stat414/node/315
for the urban city, I get 53.8 as result. (instead of 53.6, which maybe a rounding issue)
Code: Select all
CompilerIf #PB_Compiler_IsMainFile
Define.i DB, i, j
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "")
If DB
If sqlite3_percentile_init(DB)
DatabaseUpdate(DB, "CREATE TABLE test(a INTEGER)")
; For i = 0 To 10
; j = Random(1000)
; Debug Str(i) + ": " + Str(j)
; DatabaseUpdate(DB, "INSERT INTO test VALUES (" + Str(j) + ")")
; Next i
DatabaseUpdate(DB, "INSERT INTO test VALUES (40)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (48)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (49)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (50)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (53)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (54)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (55)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (61)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (64)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (65)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (66)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (75)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (98)")
If DatabaseQuery(DB, "SELECT percentile(a, 40) FROM test")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Debug GetDatabaseString(DB, 1)
Wend
FinishDatabaseQuery(DB)
If sqlite3_errcode(DatabaseID(DB))
Debug PeekS(sqlite3_errmsg16(DatabaseID(DB)))
EndIf
Else
Debug DatabaseError()
EndIf
EndIf
CloseDatabase(DB)
EndIf
CompilerEndIf
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 4:30 pm
by infratec
I can only say: I have translated the C example to PB.
In my opinion it is correct.
But maybe you can find a difference.
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 5:27 pm
by skywalk
MS Excel, (=PERCENTILE(A1:A13, 0.40)), returns 53.8.
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 6:10 pm
by infratec
After reading a bit more ...
There is no standard definition of percentile.
The Wiki shows different methods which results in 3 dfferent results:
1. The nearest rank method
Only values included in the values can be returned.
2. linear interpolation between closest ranks method
Has 3 defined possibilites: C= 1/2 C=0 and C=1
All of them results in different results.
So please tell me what you want ?
As mentioned: if you have enough values the results get closer.
Also MS Excel changed the method between some versions.
Bernd
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 7:00 pm
by Oliver13
infratec wrote:After reading a bit more ...
There is no standard definition of percentile.
The Wiki shows different methods which results in 3 dfferent results:
1. The nearest rank method
Only values included in the values can be returned.
Sorry, my skills in statistics are very poor and I only knew the nearest rank method..
The result need to be a member of the values list
So it is method 1..
TIA
Oliver
Re: SQLite + Percentile ?
Posted: Tue Dec 26, 2017 10:14 pm
by infratec
Hi,
percentile nearest rank methode
see:
https://en.wikipedia.org/wiki/Percentile
Code: Select all
CompilerIf #PB_Compiler_IsMainFile
EnableExplicit
CompilerEndIf
ImportC ""
qsort(*base, nitems.l, size.l, *Prog)
;qsort(void *base, size_t nitems, size_t size, Int (*compar)(const void *, const void*))
EndImport
CompilerSelect #PB_Compiler_OS
CompilerCase #PB_OS_MacOS
ImportC "/Applications/PureBasic/purelibraries/macos/libraries/libpbsqlite3.a"
CompilerDefault
ImportC "sqlite3.lib"
CompilerEndSelect
sqlite3_create_function(DatabaseID, zFunctionName.p-utf8, nArg.l, eTextRep.l, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_aggregate_context(*sqlite3_context, nBytes.l)
sqlite3_result_double(*sqlite3_context, dbl.d)
sqlite3_value_numeric_type.l(*sqlite3_value)
sqlite3_value_double.d(*sqlite3_value)
sqlite3_value_type.i(*sqlite3_value)
sqlite3_realloc(*void, int.l)
sqlite3_free(*void)
sqlite3_result_error(*sqlite3_context, char.p-utf8, int.l)
sqlite3_result_error_nomem(*sqlite3_context)
sqlite3_errcode.i(*db)
sqlite3_errmsg16.i(*sqlite3)
EndImport
#SQLITE_OK = 0
#SQLITE_UTF8 = 1
#SQLITE_INTEGER = 1
#SQLITE_FLOAT = 2
#SQLITE3_TEXT = 3
#SQLITE_BLOB = 4
#SQLITE_NULL = 5
Structure Percentile
nAlloc.i ; Number of slots allocated for a[]
nUsed.i ; Number of slots actually used in a[]
rPct.d ; 1.0 more than the value for P
*a.Double ; Array of Y values
EndStructure
Procedure.i sameValue(a.d, b.d)
a - b
ProcedureReturn Bool(a >= -0.001 And a <= 0.001)
EndProcedure
ProcedureC percentStep(*pCtx, argc.l, *argv)
Protected *ptr, *a
Protected *p.Percentile
Protected.d rPct
Protected.i eType
Protected.d y
Protected.i n
; Requirement 3: P must be a number between 0 And 100
*ptr = PeekI(*argv + 1 * SizeOf(Long))
eType = sqlite3_value_numeric_type(*ptr)
rPct = sqlite3_value_double(*ptr);
If (eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT) Or (rPct < 0.0 Or rPct > 100.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not a number between 0.0 And 100.0", -1)
ProcedureReturn
EndIf
; Allocate the session context.
*p = sqlite3_aggregate_context(*pCtx, SizeOf(Percentile))
If *p = 0
ProcedureReturn
EndIf
; Remember the P value. Throw an error If the P value is different
; from any prior row, per Requirement (2).
If *p\rPct = 0.0
*p\rPct = rPct; + 1.0
ElseIf Not sameValue(*p\rPct, rPct); + 1.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not the same For all input rows", -1)
ProcedureReturn
EndIf
; Ignore rows For which Y is NULL
*ptr = PeekI(*argv + 0 * SizeOf(Long))
eType = sqlite3_value_type(*ptr)
If eType = #SQLITE_NULL
ProcedureReturn
EndIf
; If Not NULL, then Y must be numeric. Otherwise throw an error.
; Requirement 4
If eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT
sqlite3_result_error(*pCtx, "1st argument to percentile() is not numeric", -1)
ProcedureReturn
EndIf
; Throw an error If the Y value is infinity Or NaN
y = sqlite3_value_double(*ptr)
If IsInfinity(y)
sqlite3_result_error(*pCtx, "Inf input to percentile()", -1)
ProcedureReturn
EndIf
; Allocate And store the Y
If *p\nUsed >= *p\nAlloc
n = *p\nAlloc * 2 + 250
*a = sqlite3_realloc(*p\a, 8 * n)
If *a = 0
sqlite3_free(*p\a)
ClearStructure(*p, Percentile)
sqlite3_result_error_nomem(*pCtx)
ProcedureReturn
EndIf
*p\nAlloc = n
*p\a = *a
EndIf
;*p\a[*p\nUsed] = y
PokeD(*p\a + 8 * *p\nUsed, y)
*p\nUsed + 1
EndProcedure
ProcedureC.l doubleCmp(*pA.Double, *pB.Double)
Protected Result.l
Result = 1
If *pA\d = *pB\d
Result = 0
ElseIf *pA\d < *pB\d
Result = -1
EndIf
ProcedureReturn Result
EndProcedure
ProcedureC percentFinal(*pCtx)
Protected *p.Percentile
Protected.i i1, i2
Protected.d v1, v2
Protected.d ix, vx
Protected.i i
Protected.d v11, v22
*p = sqlite3_aggregate_context(*pCtx, 0)
If *p
If *p\a <> 0
If *p\nUsed
If *p\rPct > 0
If *p\rPct = 100
vx = PeekD(*p\a + 8 * (*p\nUsed - 1))
Else
qsort(*p\a, *p\nUsed, SizeOf(double), @doubleCmp())
; Debug "-----"
; For i = 0 To *p\nUsed - 1
; Debug Str(i) + ": " + StrD(PeekD(*p\a + 8 * i))
; Next i
; Debug "+++++"
i1 = Round(*p\rPct / 100 * *p\nUsed, #PB_Round_Up) - 1
vx = PeekD(*p\a + 8 * i1)
EndIf
Else
vx = 0
EndIf
sqlite3_result_double(*pCtx, vx)
EndIf
sqlite3_free(*p\a)
EndIf
ClearStructure(*p, Percentile)
EndIf
EndProcedure
Procedure.i sqlite3_percentile_init(db.i)
ProcedureReturn Bool(sqlite3_create_function(DatabaseID(db), "percentile", 2, #SQLITE_UTF8, 0, 0, @percentStep(), @percentFinal()) = #SQLITE_OK)
EndProcedure
CompilerIf #PB_Compiler_IsMainFile
Define.i DB, i, j
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "")
If DB
If sqlite3_percentile_init(DB)
DatabaseUpdate(DB, "CREATE TABLE test(a INTEGER)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (15)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (20)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (35)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (40)")
DatabaseUpdate(DB, "INSERT INTO test VALUES (50)")
If DatabaseQuery(DB, "SELECT percentile(a, 50) FROM test")
While NextDatabaseRow(DB)
Debug "Res: " + GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
If sqlite3_errcode(DatabaseID(DB))
Debug PeekS(sqlite3_errmsg16(DatabaseID(DB)))
EndIf
Else
Debug DatabaseError()
EndIf
EndIf
CloseDatabase(DB)
EndIf
CompilerEndIf
Bernd
Re: SQLite + Percentile ?
Posted: Wed Dec 27, 2017 7:47 am
by Oliver13
That works great, thank you very much !!!