If you are doing 64k intros, you can not rely on the runtime library for some basic fpu functions. Most of them are provided as intrinsics by vc++, but some need to be implemented by yourself.
Most of the fpu assembly code in here was written or found by ryg.
This is a header that gives you access to most basic fpu functions. Note that you don't need to include math.h or anything else to get this working.
Code: Select all
/****************************************************************************/
/*** ***/
/*** Intrinsics ***/
/*** ***/
/****************************************************************************/
typedef unsigned int size_t;
extern "C"
{
int __cdecl abs(int);
double __cdecl atan(double);
double __cdecl atan2(double,double);
double __cdecl cos(double);
double __cdecl exp(double);
double __cdecl fabs(double);
double __cdecl log(double);
double __cdecl log10(double);
double __cdecl sin(double);
double __cdecl sqrt(double);
double __cdecl tan(double);
double __cdecl acos(double);
double __cdecl asin(double);
double __cdecl cosh(double);
double __cdecl fmod(double,double);
double __cdecl pow(double,double);
double __cdecl sinh(double);
double __cdecl tanh(double);
void * __cdecl memset( void *dest, int c, size_t count );
void * __cdecl memcpy( void *dest, const void *src, size_t count );
int __cdecl memcmp( const void *buf1, const void *buf2, size_t count );
size_t __cdecl strlen( const char *string );
}
#pragma intrinsic (abs) // int intrinsic
#pragma intrinsic (memset,memcpy,memcmp,strlen) // memory intrinsic
#pragma intrinsic (atan,atan2,cos,exp,log,log10,sin,sqrt,tan,fabs) // true intrinsic
#pragma intrinsic (acos,asin,cosh,fmod,pow,sinh,tanh) // fake intrinsic
__forceinline sInt sAbs(sInt i) { return abs(i); }
__forceinline void sSetMem(sPtr dd,sInt s,sInt c) { memset(dd,s,c); }
__forceinline void sCopyMem(sPtr dd,const void *ss,sInt c) { memcpy(dd,ss,c); }
__forceinline sInt sCmpMem(const sPtr dd,const void *ss,sInt c) { return (sInt)memcmp(dd,ss,c); }
__forceinline sInt sGetStringLen(const sChar *s) { return (sInt)strlen(s); }
__forceinline sF64 sFATan(sF64 f) { return atan(f); }
__forceinline sF64 sFATan2(sF64 a,sF64 b) { return atan2(a,b); }
__forceinline sF64 sFCos(sF64 f) { return cos(f); }
__forceinline sF64 sFAbs(sF64 f) { return fabs(f); }
__forceinline sF64 sFLog(sF64 f) { return log(f); }
__forceinline sF64 sFLog10(sF64 f) { return log10(f); }
__forceinline sF64 sFSin(sF64 f) { return sin(f); }
__forceinline sF64 sFSqrt(sF64 f) { return sqrt(f); }
__forceinline sF64 sFACos(sF64 f) { return acos(f); }
__forceinline sF64 sFASin(sF64 f) { return asin(f); }
__forceinline sF64 sFCosH(sF64 f) { return cosh(f); }
__forceinline sF64 sFSinH(sF64 f) { return sinh(f); }
__forceinline sF64 sFTanH(sF64 f) { return tanh(f); }
__forceinline sF64 sFInvSqrt(sF64 f) { return 1.0/sqrt(f); }
#if !sINTRO
__forceinline sF64 sFMod(sF64 a,sF64 b) { return fmod(a,b); }
__forceinline sF64 sFExp(sF64 f) { return exp(f); }
__forceinline sF64 sFPow(sF64 a,sF64 b) { return pow(a,b); }
#endif
#if sINTRO
sF64 sFPow(sF64 a,sF64 b);
sF64 sFMod(sF64 a,sF64 b);
sF64 sFExp(sF64 f);
#endif
/****************************************************************************/
/*** ***/
/*** asm ***/
/*** ***/
/****************************************************************************/
#pragma warning (disable : 4035)
__forceinline void sFloatFix()
{
__asm
{
fclex;
push 0103fh; // round to nearest even + single precision
fldcw [esp];
pop eax;
}
}
__forceinline void sFloatDouble()
{
__asm
{
fclex;
push 0123fh; // round to nearest even + double precision
fldcw [esp];
pop eax;
}
}
__forceinline void sFloatDen1()
{
__asm
{
fclex;
push 0141fh;
fldcw [esp];
pop eax;
}
}
__forceinline void sFloatDen0()
{
__asm
{
fclex;
push 0143fh;
fldcw [esp];
pop eax;
}
}
__forceinline sInt sFtol (const float f)
{
__asm
{
fld f
push eax
fistp dword ptr [esp]
pop eax
}
}
__forceinline sF32 sFRound (const float f)
{
__asm
{
fld f
frndint
}
}
__forceinline void sFSinCos(const float x,sF32 &sine,sF32 &cosine)
{
__asm
{
fld x;
fsincos;
mov eax,[cosine];
fstp dword ptr [eax];
mov eax,[sine];
fstp dword ptr [eax];
}
}
__forceinline sInt sMulDiv(sInt var_a,sInt var_b,sInt var_c)
{
__asm
{
mov eax,var_a
imul var_b
idiv var_c
}
}
__forceinline sInt sMulShift(sInt var_a,sInt var_b)
{
__asm
{
mov eax, var_a
imul var_b
shrd eax, edx, 16
}
}
__forceinline sInt sDivShift(sInt var_a,sInt var_b)
{
__asm
{
mov eax,var_a
mov edx,eax
shl eax,16
sar edx,16
idiv var_b
}
}
#pragma warning (default : 4035)
Some floatingpoint functions are not true intrinsics, even the build-in version relies on the runtime library. Here is an implementation without:
sF64 sFMod(sF64 a,sF64 b)
{
__asm
{
fld qword ptr [b];
fld qword ptr [a];
fprem;
fstp st(1);
fstp qword ptr [a];
}
return a;
}
sF64 sFPow(sF64 a,sF64 b)
{
// faster pow based on code by agner fog
__asm
{
fld qword ptr [b];
fld qword ptr [a];
ftst;
fstsw ax;
sahf;
jz zero;
fyl2x;
fist dword ptr [a];
sub esp, 12;
mov dword ptr [esp],0;
mov dword ptr [esp+4],0x80000000;
fisub dword ptr [a];
mov eax, dword ptr [a];
add eax, 0x3fff;
mov [esp+8], eax;
jle underflow;
cmp eax, 0x8000;
jge overflow;
f2xm1;
fld1;
fadd;
fld tbyte ptr [esp];
add esp, 12;
fmul;
jmp end;
underflow:
fstp st;
fldz;
add esp, 12;
jmp end;
overflow:
push 0x7f800000;
fstp st;
fld dword ptr [esp];
add esp, 16;
jmp end;
zero:
fstp st(1);
end:
}
}
sF64 sFExp(sF64 f)
{
__asm
{
fld qword ptr [f];
fldl2e;
fmulp st(1), st;
fld1;
fld st(1);
fprem;
f2xm1;
faddp st(1), st;
fscale;
fstp st(1);
fstp qword ptr [f];
}
return f;
}
