AsmLib: http://www.agner.org/optimize/asmlib.zipAsmlib is a function library to call from C or C++ and now PB for all x86 and x86-64 platforms. It is not
intended to be a complete function library, but contains mainly:
• Faster versions of several standard C functions
• Useful functions that are difficult to find elsewhere
• Functions that are best written in assembly language
• Efficient random number generators
These functions are written in assembly language for the sake of optimizing speed. Many of
the functions have multiple branches for different instruction sets, such as SSE2, SSE4.2,
AVX, AVX2, etc. These functions will automatically detect which instruction set is supported
by the computer it is running on and select the optimal branch.
This library is also intended as a showcase to illustrate the optimization methods explained
in my optimization manuals and an example of how to make a cross-platform function
library.
The latest version of asmlib is always available at http://www.agner.org/optimize.
Code: Select all
EnableExplicit
CompilerIf #PB_Compiler_Unicode And #PB_Compiler_Debugger
Debug "ASMLIB: In Unicode mode you can't use the asmlib string functions with pb strings"
CompilerEndIf
CompilerIf Defined(AGNERFOG_ASMLIB_H, #PB_Constant) = 0
#AGNERFOG_ASMLIB_H = 1
;- NOTES
;
; DEFINE #ASMLIB_OVERRIDE_STANDARD_LIBRARY to Override the standard library with this optimized functions
;
IncludePath #PB_Compiler_FilePath
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
CompilerIf Defined(ASMLIB_OVERRIDE_STANDARD_LIBRARY, #PB_Constant) = 1
#ASMLIB_LIB = #PB_Compiler_FilePath + "\asmlib\libacof32o.lib"
CompilerElse
#ASMLIB_LIB = #PB_Compiler_FilePath + "\asmlib\libacof32.lib"
CompilerEndIf
CompilerElse ; x64
CompilerIf Defined(ASMLIB_OVERRIDE_STANDARD_LIBRARY, #PB_Constant) = 1
#ASMLIB_LIB = #PB_Compiler_FilePath + "\asmlib\libacof64o.lib"
CompilerElse
#ASMLIB_LIB = #PB_Compiler_FilePath + "\asmlib\libacof64.lib"
CompilerEndIf
CompilerEndIf
CompilerIf Defined(ASMLIB_OVERRIDE_STANDARD_LIBRARY, #PB_Constant) = 1
ImportC "/DEFAULTLIB:"+#ASMLIB_LIB
EndImport
#ASMLIB_IMP_FILE = ""
CompilerElse
#ASMLIB_IMP_FILE = #ASMLIB_LIB
CompilerEndIf
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
ImportC #ASMLIB_IMP_FILE ; CDECL
CompilerElse ; x64 - FASTCALL
Import #ASMLIB_IMP_FILE
CompilerEndIf
A_memcpy.i(*dest, *src, count.i) ; Copy count bytes from src to dest
A_memmove.i(*dest, *src, count.i) ; Same as memcpy, allows overlap between src and dest
A_memset.i(*dest, c.l, count.i) ; Set count bytes in dest to (char)c
A_memcmp.l(*buf1, *buf2, num.i) ; Compares two blocks of memory
GetMemcpyCacheLimit.i() ; Data blocks bigger than this will be copied uncached by memcpy and memmove
SetMemcpyCacheLimit(Value.i) ; Change limit in GetMemcpyCacheLimit
GetMemsetCacheLimit.i() ; Data blocks bigger than this will be stored uncached by memset
SetMemsetCacheLimit(Value.i) ; Change limit in GetMemsetCacheLimit
A_strcat.i(*dest, *src) ; returns (char*) - Concatenate strings dest and src. Store result in dest
A_strcpy.i(*dest, *src) ; returns (char*) - Copy string src to dest
A_strlen.i(*str) ; Get length of zero-terminated string
A_strcmp.l(*a, *b) ; Compare strings. Case sensitive
A_stricmp.l(*string1, *string2) ; Compare strings. Case insensitive for A-Z only
A_strstr.i(*haystack, *needle) ; returns (char*) - Search for substring in string
A_strtolower(*string) ; Convert string To lower Case For A-Z only
A_strtoupper(*string) ; Convert string to upper case for a-z only
A_substring.i(*dest, *source, pos.i, len.i) ; Copy a substring for source into dest
A_strspn.i(*str, *set) ; Find span of characters that belong to set
A_strcspn.i(*str, *set) ; Find span of characters that don't belong to set
strCountInSet.i(*str, *set) ; Count characters that belong to set
strcount_UTF8.i(*str) ; Counts the number of characters in a UTF-8 encoded string
; miscellaneous functions
A_popcount.l(x.l) ; Count 1-bits in 32-bit integer
RoundD.l(x.d) ; Round to nearest or even
RoundF.l(x.f) ; Round to nearest or even
InstructionSet.l() ; Tell which instruction set is supported
ProcessorName.i() ; ASCIIZ text describing microprocessor
CpuType(*vendor.LONG, *family.LONG, *model.LONG) ; Get CPU vendor, family and model
DataCacheSize.i(level.l) ; Get size of data cache
A_DebugBreak() ; Makes a debug breakpoint
ReadTSC.i() ; Read microprocessor internal clock (only 32 bits supported by compiler)
cpuid_ex(*abcd, _eax.l, _ecx) ; call CPUID instruction
; integer division functions
; not done...
EndImport
CompilerEndIf
Code: Select all
#ASMLIB_OVERRIDE_STANDARD_LIBRARY = 1
#TEST_OPTIMIZATIONS = 1
CompilerIf #TEST_OPTIMIZATIONS = 1
XIncludeFile "asmlib.pbi"
A_strcmp(@"", @"")
CompilerEndIf
Define *buf1, *buf2
Define Time.l, EndTime.l
Define i.l
*buf1 = AllocateMemory(2048)
*buf2 = AllocateMemory(2048)
RandomData(*buf1, 2048)
RandomData(*buf2, 2048)
Time = timeGetTime_()
For i.l = 0 To 5000000
CopyMemory(*buf1, *buf2, 2048)
Next i
EndTime = timeGetTime_() - Time
Debug EndTime