mlom.a is a library of routines in x86-64 machine code, doing useful and fun things using the capabilities of these processors, which can be called from C or from other languages. Some of these replicate existing libc functions, but they implement the function using a builtin instruction or instructions in x86-64 that seems tailor made for it-- but usually turns out to be slower than some other method. This sofware is released here to the public domain and you may do whatever you want with it, but don't blame me if it crashed your program or something. The library is downloadable here. The following routines are currently included:
/* string.c */ /*** Finds the index of the next zero byte after str, using the REPNE SCASB instruction. ***/ long mlom_strlen(const char *str); /*** copies from src to dest until reaching a zero, using the REPNE SCASB and REP MOVSB ***/ void mlom_strcpy(char *dest, const char *src); /*** copies from src to the space includeing and following the first zero after dest until reaching a zero in src, using REPNE SCASB and REP MOVSB ***/ void mlom_strcat(char *dest, const char *src); /*** searches for the first character equal to ch in the membory block following str of length n, using REPNE SCASB. if not found, returns str + n ***/ const char *mlom_memchr_const(const char *str, long n, int ch); char *mlom_memchr(char *str, long n, int ch); /*** copies n bytes from src to dest using REP MOVSB ***/ void mlom_memcpy(char *dest, const char *src, long n); /*** sets n bytes in dest to c using REP STOSB ***/ void mlom_memset(char *dest, int c, long n); /*** sets n bytes in dest to 0 using REP STOSB ***/ void mlom_memclr(char *dest, long n); /*** compares the n bytes following A to those following B using REPE CMPSB and returns -1, 0 or 1 depending on the result of the comparison ***/ int mlom_memcmp(const char *A, const char *B, long n); /* util.c */ /*** does the CRC32-C hash of the n bytes at mem, using the builtin CRC instrutions in x86-64 ***/ int mlom_crc32C(const char *mem, long n); /*** returns the value of the "time stamp counter" register ***/ long mlom_rdtsc(void); /*** returns a random 64 bit number ***/ long mlom_rdrand(void); /*** returns a random-er 64 bit number (?) ***/ long mlom_rdseed(void); /*** tries a nonexistent prefetch mode, which does not result in #UD exception for some reason ***/ int mlom_testprefetch(const char *mem); /* math.c */ /*** rotate integer x right by c bits ***/ long mlom_ror64(long x, char c); int mlom_ror32(int x, char c); short mlom_ror16(short x, char c); char mlom_ror8(char x, char c); /*** computes the absolute value of the integer x ***/ unsigned long mlom_abs(long x); /*** given the angle theta, stores its sine and cosine in sin and cos, using x87 instructions ***/ void mlom_sincosd(const double *theta,double *sin,double *cos); /*** gets pi using x87 FLDPI instruction ***/ void mlom_getpi(double *pi); /*** computes x ^ y using x87 instructions and stores the result in res ***/ void mlom_pow(const double *x, const double *y, double *res); /*** converts integer to string with the help of x87 FBSTP instruction ***/ void mlom_bcditoa(long x, char a[20]);