Page 1 of 1

MergeSort Procedure for arrays using pointers.

Posted: Sun Feb 14, 2010 10:30 pm
by skywalk
I'm new to PureBasic and found that arrays are QuickSorted and unstable.
Not always a problem if array is one dimensional.
But, I use multi-dimensional arrays, and need to preserve the sort order for duplicates found in any particular column.
Take a look at the mergesort below and see if it helps.
Not sure if my implementation is the fastest, but it seems to work.
I have to extract a 1 dimensional array from the 2 dimensional array to send to the mergesort routine.
It returns a 1 dimensional Pointer array of the sorted elements' locations.
A simple ASCII csv file is output to show unsorted and mergesorted behavior.

Code: Select all

EnableExplicit

Procedure.i ar2D_1DD(Array x.d(2), Array xm.d(1), nPts.i, Index.i, fixed_elem.i=1)
  ; Convert 2D Array To 1D
  If nPts > 1
    Define.i i
    Dim xm.d(nPts - 1)
    If fixed_elem = 1
      For i = 0 To nPts - 1
        xm(i) = x(index, i)
      Next i
    Else
      For i = 0 To nPts - 1
        xm(i) = x(i, index)
      Next i
    EndIf
    ProcedureReturn 1
  Else
    ProcedureReturn 0
  EndIf
EndProcedure

Procedure.i arSortPD(Array ar.d(1), Array arp.i(1), iStart.i=0)
  ; REV:  051126, skywalk
  ; Return = sorted array of pointers, arp()
  ; ar() is unchanged by sort.
  ; Pointer array arp() must 'fit' within bounds of ar() to be sorted.
  ; iStart = start range of sort index.
  ; iStop  = ArraySize(arp()), stop range of sort.
  ; NOTES:
  ;   Stable(unlike QuickSort) list-merge sort algorithm from Knuth, Volume 3.
  ;   Position of duplicates are retained. 1st duplicate < next.
  ;   Operations = n*log(n) time.
  ;   QuickSort is faster in general, but can be longer with sorted arrays.
  ; 2 DATATYPE CHANGES REQUIRED:
  ; Change the calling parameter 'Array ar.d(1)' Type.
  Protected.d ari, arj, ark
  ; END OF DATATYPE CHANGES:
  Protected.i z, z2, p1, p2
  Protected.i krun, kother
  Protected.i i, j, k, Q
  Protected.i iStop = ArraySize(arp(),1)
  If iStop <= iStart
    If iStop < iStart
      ProcedureReturn 0
    EndIf
    arp(iStart) = iStart
    ProcedureReturn 0
  EndIf
  ; z for zero, < z for end-runs, z2-p negates pointer p
  z = iStart - 1
  z2 = 2 * z
  ; Use 2 ascending lists from 2 smallest values. p2 < p1.
  p2 = iStart
  p1 = p2 + 1
  If ar(p1) < ar(p2)
    p1 = p2
    p2 = p1 + 1
  EndIf
  krun = p1
  kother = p2
  arj = ar(p1)
  ark = ar(p1)
  For i = iStart + 2 To iStop
    ari = ar(i)           ; ari = current element
    If ari < arj          ; arj = ar(p1)
      If ari >= ar(p2)    ; Check for minimum
        arp(i) = p1
        p1 = i
      Else                ; New minimum
        arp(i) = arp(p2)
        arp(p2) = p1
        p1 = p2
        If kother = p2
          kother = i
        EndIf
        p2 = i
      EndIf
      arj = ar(p1)
    Else
      If ari >= ark       ; ark = last value in run
        arp(krun) = i     ; Store last
      Else                ; start new run
        arp(kother) = z2 - i
        kother = krun
      EndIf
      krun = i
      ark = ari
    EndIf
  Next i
  arp(krun) = z
  arp(kother) = z
  arp(p1) = z2 - arp(p1)
  Repeat                  ; Make multiple passes
    i = arp(p1)
    j = arp(p2)
    If j = z 
      Break
    EndIf
    krun = p1
    kother = p2
    Repeat                ; Loop through runs
      i = z2 - i
      j = z2 - j
      ari = ar(i)
      arj = ar(j)
      k = krun
      Repeat              ; Merge one run
        If ari <= arj 
          arp(k) = i
          k = i
          i = arp(i)
          If i <= z 
            arp(k) = j
            Repeat
              k = j
              j = arp(j)
            Until j <= z
            Break
          EndIf
          ari = ar(i)
        Else
          arp(k) = j
          k = j
          j = arp(j)
          If j <= z 
            arp(k) = i
            Repeat
              k = i
              i = arp(i)
            Until i <= z
            Break
          EndIf
          arj = ar(j)
        EndIf
      ForEver
      arp(krun) = z2 - arp(krun)  ; Start of run
      krun = kother
      kother = k                  ; Track end of current run
    Until j = z
    arp(krun) = i
    arp(kother) = z
  ForEver
  arp(p2) = p1
  arp(p1) = z2 - arp(p1)          ; Complete the sorted list
  ; Sort arp() in place.
  i = p2
  For j = z2 - iStart To z2 - iStop Step -1
    k = arp(p2)
    arp(p2) = j
    p2 = k
  Next j
  For i = iStart To iStop
    j = arp(i)
    If j < z 
      j = z2 - j
      Repeat
        Q = z2 - arp(j)
        arp(j) = k
        k = j
        j = Q
      Until j <= z
    EndIf
  Next i
  ProcedureReturn 1
EndProcedure

;-{ Example use of the stable Merge Sort
; To sort a 2D Array, 1st extract the column to be sorted, Xsorted(1).
; Sort that 1D array Xsorted
; Reassign values in the 2D array based on sorted 1D pointer values
#nr = 8
#nc = 4
#ColToSort = 2
#rndDuplicate = #nr - 4
Global Dim ar.d(#nc-1,#nr-1)
Global Dim ar2.d(#nc-1,#nr-1)
Global Dim Xsrch.d(#nr-1)
Global Dim aSorted.d(#nc-1,#nr-1)
Global Dim ap.i(#nr-1)
Define.i i,j, nPts, nDec = 8
Define.s hdr$, dOUT$, s$
nPts = #nr * #nc
; Create random data 2D array
RandomSeed(456)   ; Use same data in comparisons
For i=0 To #nc-1
  For j=0 To #nr-1
    ar(i,j) = 10+(Random(100) + Random(100)) / 10
    If i=#ColToSort And j>0 And j = #rndDuplicate
      ar(i,j) = ar(i,#rndDuplicate-(Random(#rndDuplicate)))
    EndIf
  Next j
Next i
CopyArray(ar(),ar2())
; Sort the 2D array by 1 column
ar2D_1DD(ar(), Xsrch(), #ColToSort) ; Grab a column of data
arSortPD(Xsrch(), aP())                 ; aP() contains the Final sorted Pointer values
; Build the sorted 2D array using aP()
For i = 0 To #nc-1
  For j = 0 To #nr-1
    asorted(i, j) = ar(i, aP(j))
  Next j
Next i
Debug "; -- Original --"
For j=0 To #nc-2
  s$ = RSet("C" + Str(j) + ",",12)
  hdr$ + s$
Next j
s$ = RSet("C" + Str(j),11)
hdr$ + s$ + ", SortBy"
Debug "; " + hdr$
For j = 0 To #nr-1
  dOUT$ = #NULL$
  For i = 0 To #nc-2
    dOUT$ + StrD(ar(i,j),nDec) + ","
  Next i
  dOUT$ + StrD(ar(i,j),nDec) + ", -"
  Debug "; " + dOUT$
Next j
Debug "; -- After Merge Sort --"
Debug "; " + hdr$
For j = 0 To #nr-1
  dOUT$ = #NULL$
  For i = 0 To #nc-2
    dOUT$ + StrD(asorted(i,j),nDec) + ","
  Next i
  dOUT$ + StrD(asorted(i,j),nDec) + ", C" + Str(#ColToSort)
  Debug "; " + dOUT$
Next j
Debug "; -- FAIL using PB SortArray() --"
SortArray(ar2(),#PB_Sort_Ascending) ; No way to sort on 1 column of 2D array :(
; Output results
Debug "; " + hdr$
For j = 0 To #nr-1
  dOUT$ = #NULL$
  For i = 0 To #nc-2
    dOUT$ + StrD(ar2(i,j),nDec) + ","
  Next i
  dOUT$ + StrD(ar2(i,j),nDec) + ", C" + Str(#ColToSort)
  Debug "; " + dOUT$
Next j
EDIT: changed some comments, output to Debug window instead of Ascii file.

Re: MergeSort Procedure for arrays using pointers.

Posted: Wed Mar 17, 2010 7:24 pm
by jamba
hey Steve, you must not have had inline ASM enabled when you compiled :P

I've made a few modifications:
OUTS -> OutStr
Put some of the other stuff in a procedure,which makes it a little more usable (for me)
changed "define" to "protected" inside the procedures (and removed a duplicate declaration)

That's about it.
Operationally, nothing was changed. This is pretty awesome!

Code: Select all

EnableExplicit
;ImportC "MSVCRT.LIB" ;<--don't need this for static lib
ImportC ""
  ; int sprintf( char *buffer, const char *format, ... )
  ; Output is sent to buffer. Return value is number of characters written. 
  ; Since variable arguments, must declare a specific function for each combination required.
  sPrintf.l(result.s,num_format.s,numsd.d) As "_sprintf"
EndImport

Procedure.i ar2Dto1D(Array x.d(2), Array xm.d(1), nPts.i, Index.i, fixed_elem.i=1)
  ; Convert 2D Array To 1D
  If nPts > 1
    Protected.i i
    Dim xm.d(nPts - 1)
    If fixed_elem = 1
      For i = 0 To nPts - 1
        xm(i) = x(index, i)
      Next i
    Else
      For i = 0 To nPts - 1
        xm(i) = x(i, index)
      Next i
    EndIf
    ProcedureReturn 1
  Else
    ProcedureReturn 0
  EndIf
EndProcedure

Procedure arSortKv3MergePD(Array v.d(1), Array L.i(1), Lbd.i=0)
  ; Numeric Pointer sort in ascending order
  ; Syntax:  arSortKv3MergePD(values(), pointers())
  ; The list-merge algorithm can be found in Knuth, Vol. 3.
  ; Notes:
  ; Use Lbd to adjust lower bounds of operation
  ; Both values and pointers should be one-dimensional arrays.  Both
  ; should have the same lower & upper bounds.  (To be exact, the
  ; values array may have extra elements -- it will be indexed by
  ; the bounds of pointers.)  On return, the values array will be
  ; unchanged, and the pointers array will be filled in with the
  ; indexes of elements of the values array, in order.  That
  ; is, the first element in pointers will be the index of the
  ; smallest element in values, and so forth.
  ; Duplicates are stored with 1st as smallest (the sort is "stable").
 
  Protected.d vi, vj, vk   ; <--- Change these for type used.
       ; .s for String, .i for Integer, etc.
       ; Remember to also change the Values array type v.d(1)
  Protected.i Ubd
  Protected.i z, zz, L1, L2
  Protected.i krun, kother
  Protected.i i, j, k, Q
  ; Array size check (zero or one elements) 
  Ubd = ArraySize(L())
  If Ubd <= Lbd
    If Ubd < Lbd
     ProcedureReturn
    EndIf
    L(Lbd) = Lbd
    ProcedureReturn
  EndIf
  z = Lbd - 1
  zz = 2 * z
  L2 = Lbd
  L1 = L2 + 1
  If v(L1) < v(L2)
    L1 = L2
    L2 = L1 + 1
  EndIf
  krun = L1
  kother = L2
  vj = v(L1)
  vk = v(L1)
  For i = Lbd + 2 To Ubd
    vi = v(i)         
    If vi < vj       
      If vi >= v(L2) 
        L(i) = L1
        L1 = i
      Else               
        L(i) = L(L2)
        L(L2) = L1
        L1 = L2
        If kother = L2
          kother = i
        EndIf
        L2 = i
      EndIf
      vj = v(L1)
    Else
      If vi >= vk       
        L(krun) = i     
      Else               
        L(kother) = zz - i
        kother = krun
      EndIf
      krun = i
      vk = vi
    EndIf
  Next i
  L(krun) = z
  L(kother) = z
  L(L1) = zz - L(L1)
  Repeat                 
    i = L(L1)
    j = L(L2)
    If j = z
      Break
    EndIf
    krun = L1
    kother = L2
    Repeat
      i = zz - i
      j = zz - j
      vi = v(i)
      vj = v(j)
      k = krun
      Repeat
        If vi <= vj
          L(k) = i
          k = i
          i = L(i)
          If i <= z
            L(k) = j
            Repeat
              k = j
              j = L(j)
            Until j <= z ; While j > z
            Break
          EndIf
          vi = v(i)
        Else
          L(k) = j
          k = j
          j = L(j)
          If j <= z
            L(k) = i
            Repeat
              k = i
              i = L(i)
            Until i <= z ; While i > z
            Break
          EndIf
          vj = v(j)
        EndIf
      ForEver
      L(krun) = zz - L(krun)   
      krun = kother   
      kother = k     
    Until j = z
    L(krun) = i
    L(kother) = z
  ForEver
  L(L2) = L1
  L(L1) = zz - L(L1) 
  i = L2
  For j = zz - Lbd To zz - Ubd Step -1
    k = L(L2)
    L(L2) = j
    L2 = k
  Next j
  For i = Lbd To Ubd
    j = L(i)
    If j < z
      j = zz - j
      Repeat
        Q = zz - L(j)
        L(j) = k
        k = j
        j = Q
      Until j <= z ; While j > z
    EndIf
  Next i
EndProcedure

Procedure.i MergeSort2DArray(Array sortedArr2D.d(2),Array origArr2D.d(2),numCols.i,nRow.i,ColToSort.i)
  ;origArr2D() is preserved, sortedArr2D() is returned as a copy of the information from the original array (but sorted)
  Protected Dim arr1D.d(nRow-1)
  Protected Dim arrPtrs.i(nRow-1)
  Protected.i i,j,ri
  
  ri = ar2Dto1D(origArr2D(), arr1D(), nRow, ColToSort)  ; Grab the specified column of data
  arSortKv3MergePD(arr1D(), arrPtrs())          ; aP() contains the Final sorted Pointer values
                                                ; Xsrch() is unchanged
  For i = 0 To numCols-1
    For j = 0 To nRow-1
      sortedArr2D(i, j) = origArr2D(i, arrPtrs(j))
    Next j
  Next i   
  ProcedureReturn ri                                   
EndProcedure

;-{ Example use of the MergeSort
; To sort a 2D Array, 1st extract the column to be sorted, Xsorted(1).
; Sort that 1D array Xsorted
; Reassign values in the 2D array based on sorted 1D pointer values

#nr=10
#nc=10
#scol=3
#rndDuplicate=5
Global Dim ar.d(#nc-1,#nr-1)
Global Dim aSorted.d(#nc-1,#nr-1)
Define.i i,j
Define.s hdrS, OutStr, s, sp
s = Space(8)

;load array
For i=0 To #nc-1
  For j=0 To #nr-1
    ar(i,j) = 10+(Random(100) + Random(100)) / 10           
    If i=#scol And j>0 And j = #rndDuplicate
      ar(i,j) = ar(i,#rndDuplicate-(Random(#rndDuplicate)))
    EndIf
  Next j
Next i

If MergeSort2DArray(aSorted(),ar(),#nc,#nr,#scol)
  If CreateFile(1,"c:\sortmerge.csv")
    For j=0 To #nc-2
      s = RSet("C" + Str(j) + ",",9)
      hdrS = hdrS + s   
    Next j 
    s = RSet("C" + Str(j),8)   
    hdrS = hdrS + s + ", SortBy"
    WriteStringN(1, hdrS,#PB_Ascii)
  
    sp = "%8.2lf"
    For j = 0 To #nr-1
      OutStr=""
      For i = 0 To #nc-2
        sprintf(s,sp,ar(i,j))
        OutStr = OutStr + s + ","     
      Next i
      sprintf(s,sp,ar(i,j))
      OutStr = OutStr + s + ", -"
      WriteStringN(1, OutStr, #PB_Ascii)
    Next j
    For j = 0 To #nr-1
      OutStr=""
      For i = 0 To #nc-2
        sprintf(s,sp,asorted(i,j))
        OutStr = OutStr + s + ","     
      Next i
      sprintf(s,sp,asorted(i,j))
      OutStr = OutStr + s + ", C" + Str(#scol)
      WriteStringN(1, OutStr, #PB_Ascii)
    Next j
    CloseFile(1)
  EndIf
Else
  MessageRequester("array sort","sort failed")
EndIf
End
;-} End