Thanks Jack. This sub is easily to convert and do away with pointers but this sub is part of a larger group of c routines that use pointers and compute the fast Kendall Tau statistic with O(N log N) speed instead of O(N^2) speed. As you can see from the C code below these group of functions are not so easily converted from pointers. Below is the Fast Kendall Tau C Code. I figured if I could convert the C pointer code KendallSmallN to a PB pointer code, I could convert the rest.
Code: Select all
/*-------------------------------------------------------------------------*/
/* This file contains code to calculate Kendall's Tau in O(N log N) time in
* a manner similar to the following reference:
*
* A Computer Method for Calculating Kendall's Tau with Ungrouped Data
* William R. Knight Journal of the American Statistical Association,
* Vol. 61, No. 314, Part 1 (Jun., 1966), pp. 436-439
*
* Copyright 2010 David Simcha
*
* License:
* Boost Software License - Version 1.0 - August 17th, 2003
*
* Permission is hereby granted, free of charge, to any person or organization
* obtaining a copy of the software and accompanying documentation covered by
* this license (the "Software") to use, reproduce, display, distribute,
* execute, and transmit the Software, and to prepare derivative works of the
* Software, and to permit third-parties to whom the Software is furnished to
* do so, all subject to the following:
*
* The copyright notices in the Software and this entire statement, including
* the above license grant, this restriction and the following disclaimer,
* must be included in all copies of the Software, in whole or in part, and
* all derivative works of the Software, unless such copies or derivative
* works are solely in the form of machine-executable object code generated by
* a source language processor.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*/
/*-------------------------------------------------------------------------*/
/**---------------- Adapted for AFNI by RWCox - April 2010 ---------------**/
/*-------------------------------------------------------------------------*/
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#ifdef SOLARIS_OLD /* 'if' might be overkill, but just to be minimal... */
/* 10 May 2010 [rickr] */
#include "machdep.h"
#else
#include <stdint.h>
#endif
/*-------------------------------------------------------------------------*/
/* Sorts in place, returns the bubble sort distance between the input array
* and the sorted array.
*/
static int insertionSort(float *arr, int len)
{
int maxJ, i,j , swapCount = 0;
/* printf("enter insertionSort len=%d\n",len) ; */
if(len < 2) { return 0; }
maxJ = len - 1;
for(i = len - 2; i >= 0; --i) {
float val = arr[i];
for(j=i; j < maxJ && arr[j + 1] < val; ++j) {
arr[j] = arr[j + 1];
}
arr[j] = val;
swapCount += (j - i);
}
return swapCount;
}
/*-------------------------------------------------------------------------*/
static int merge(float *from, float *to, int middle, int len)
{
int bufIndex, leftLen, rightLen , swaps ;
float *left , *right;
/* printf("enter merge\n") ; */
bufIndex = 0;
swaps = 0;
left = from;
right = from + middle;
rightLen = len - middle;
leftLen = middle;
while(leftLen && rightLen) {
if(right[0] < left[0]) {
to[bufIndex] = right[0];
swaps += leftLen;
rightLen--;
right++;
} else {
to[bufIndex] = left[0];
leftLen--;
left++;
}
bufIndex++;
}
if(leftLen) {
#pragma omp critical (MEMCPY)
memcpy(to + bufIndex, left, leftLen * sizeof(float));
} else if(rightLen) {
#pragma omp critical (MEMCPY)
memcpy(to + bufIndex, right, rightLen * sizeof(float));
}
return swaps;
}
/*-------------------------------------------------------------------------*/
/* Sorts in place, returns the bubble sort distance between the input array
* and the sorted array.
*/
static int mergeSort(float *x, float *buf, int len)
{
int swaps , half ;
/* printf("enter mergeSort\n") ; */
if(len < 10) {
return insertionSort(x, len);
}
swaps = 0;
if(len < 2) { return 0; }
half = len / 2;
swaps += mergeSort(x, buf, half);
swaps += mergeSort(x + half, buf + half, len - half);
swaps += merge(x, buf, half, len);
#pragma omp critical (MEMCPY)
memcpy(x, buf, len * sizeof(float));
return swaps;
}
/*-------------------------------------------------------------------------*/
static int getMs(float *data, int len) /* Assumes data is sorted */
{
int Ms = 0, tieCount = 0 , i ;
/* printf("enter getMs\n") ; */
for(i = 1; i < len; i++) {
if(data[i] == data[i-1]) {
tieCount++;
} else if(tieCount) {
Ms += (tieCount * (tieCount + 1)) / 2;
tieCount = 0;
}
}
if(tieCount) {
Ms += (tieCount * (tieCount + 1)) / 2;
}
return Ms;
}
/*-------------------------------------------------------------------------*/
/* This function calculates the Kendall correlation tau_b.
* The arrays arr1 should be sorted before this call, and arr2 should be
* re-ordered in lockstep. This can be done by calling
* qsort_floatfloat(len,arr1,arr2)
* for example.
* Note also that arr1 and arr2 will be modified, so if they need to
* be preserved, do so before calling this function.
*/
float kendallNlogN( float *arr1, float *arr2, int len )
{
int m1 = 0, m2 = 0, tieCount, swapCount, nPair, s,i ;
float cor ;
/* printf("enter kendallNlogN\n") ; */
if( len < 2 ) return (float)0 ;
nPair = len * (len - 1) / 2;
s = nPair;
tieCount = 0;
for(i = 1; i < len; i++) {
if(arr1[i - 1] == arr1[i]) {
tieCount++;
} else if(tieCount > 0) {
insertionSort(arr2 + i - tieCount - 1, tieCount + 1);
m1 += tieCount * (tieCount + 1) / 2;
s += getMs(arr2 + i - tieCount - 1, tieCount + 1);
tieCount = 0;
}
}
if(tieCount > 0) {
insertionSort(arr2 + i - tieCount - 1, tieCount + 1);
m1 += tieCount * (tieCount + 1) / 2;
s += getMs(arr2 + i - tieCount - 1, tieCount + 1);
}
swapCount = mergeSort(arr2, arr1, len);
m2 = getMs(arr2, len);
s -= (m1 + m2) + 2 * swapCount;
if( m1 < nPair && m2 < nPair )
cor = s / ( sqrtf((float)(nPair-m1)) * sqrtf((float)(nPair-m2)) ) ;
else
cor = 0.0f ;
return cor ;
}
/*-------------------------------------------------------------------------*/
/* This function uses a simple O(N^2) implementation. It probably has a
* smaller constant and therefore is useful in the small N case, and is also
* useful for testing the relatively complex O(N log N) implementation.
*/
float kendallSmallN( float *arr1, float *arr2, int len )
{
int m1 = 0, m2 = 0, s = 0, nPair , i,j ;
float cor ;
/* printf("enter kendallSmallN\n") ; */
for(i = 0; i < len; i++) {
for(j = i + 1; j < len; j++) {
if(arr2[i] > arr2[j]) {
if (arr1[i] > arr1[j]) {
s++;
} else if(arr1[i] < arr1[j]) {
s--;
} else {
m1++;
}
} else if(arr2[i] < arr2[j]) {
if (arr1[i] > arr1[j]) {
s--;
} else if(arr1[i] < arr1[j]) {
s++;
} else {
m1++;
}
} else {
m2++;
if(arr1[i] == arr1[j]) {
m1++;
}
}
}
}
nPair = len * (len - 1) / 2;
if( m1 < nPair && m2 < nPair )
cor = s / ( sqrtf((float)(nPair-m1)) * sqrtf((float)(nPair-m2)) ) ;
else
cor = 0.0f ;
return cor ;
}
#if 0
int main( int argc , char *argv[] )
{
float a[100], b[100];
float smallNCor, smallNCov, largeNCor, largeNCov;
int i;
int N ;
#define NBOT 10
#define NTOP 2000
#define NSTEP 10
/* Test the small N version against a few values obtained from the old
* version in R. Only exercising the small N version because the large
* N version requires the first array to be sorted and the second to be
* reordered in lockstep before it's called.*/
{
float a1[] = {1,2,3,5,4};
float a2[] = {1,2,3,3,5};
float b1[] = {8,6,7,5,3,0,9};
float b2[] = {3,1,4,1,5,9,2};
float c1[] = {1,1,1,2,3,3,4,4};
float c2[] = {1,2,1,3,3,5,5,5};
printf("kSmall 5 = %g\n", kendallSmallN(a1, a2, 5) ) ;
printf("kSmall 7 = %g\n", kendallSmallN(b1, b2, 7) ) ;
printf("kSmall 8 = %g\n", kendallSmallN(c1, c2, 8) ) ;
}
/* Now that we're confident that the simple, small N version works,
* extensively test it against the much more complex and bug-prone
* O(N log N) version.
*/
for(i = 0; i < 100; i++) {
int j, len;
for(j = 0; j < 100; j++) {
a[j] = rand() % 30;
b[j] = rand() % 30;
}
len = rand() % 50 + 50;
/* The large N version assumes that the first array is sorted. This
* will usually be made true in R before passing the arrays to the
* C functions.
*/
insertionSort(a, len);
smallNCor = kendallSmallN(a, b, len);
largeNCor = kendallNlogN (a, b, len);
printf("Cor: kSmall = %g kNlogN = %g\n",smallNCor,largeNCor) ;
}
printf("-- short tests done --\n") ;
/* Speed test. Compare the O(N^2) version, which is very similar to
* R's current impl, to my O(N log N) version.
*/
{
float *foo, *bar, *buf;
int i;
float startTime, stopTime;
foo = (float *) malloc(NTOP * sizeof(float));
bar = (float *) malloc(NTOP * sizeof(float));
buf = (float *) malloc(NTOP * sizeof(float));
for( N=NBOT ; N <= NTOP ; N += NSTEP ){
for(i = 0; i < N; i++) {
foo[i] = rand();
bar[i] = rand();
}
startTime = clock();
smallNCor = kendallSmallN(foo, bar, N);
stopTime = clock();
printf("N=%d: slow = %f ms val = %g\n", N,stopTime - startTime,smallNCor);
startTime = clock();
/* Only sorting first array. Normally the second one would be
* reordered in lockstep.
*/
#if 1
qsort_floatfloat( N , foo , bar ) ;
#else
mergeSort(foo, buf, N);
#endif
largeNCor = kendallNlogN(foo, bar, N);
stopTime = clock();
printf("N=%d: fast = %f ms val = %g\n", N,stopTime - startTime,largeNCor);
}
}
return 0;
}
#endif