/* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. * publis * Last revised 12 Oct 97 * * Original coded (in crc_i386.asm) and put into the public domain * by Paul Kienitz and Christian Spieler. * * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) * fixed to assemble with masm by not using .model directive which makes * assumptions about segment alignment. Also, * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather * than lodsb, and other misc. changes resulting in the following performance * increases: * * unrolled loops NO_UNROLLED_LOOPS * *8 >8 <8 *8 >8 <8 * * +54% +42% +35% +82% +52% +25% * * first item in each table is input buffer length, even multiple of 8 * second item in each table is input buffer length, > 8 * third item in each table is input buffer length, < 8 * * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) * Incorporated Rodney Brown's 32-bit-reads optimization as found in the * UNIX AS source crc_i386.S. This new code can be disabled by defining * the macro symbol NO_32_BIT_LOADS. * * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs * (like the Pentium Pro, Pentium II, and probably some Pentium clones). * This optimization is controlled by the macro symbol __686 and is disabled * by default. (This default is based on the assumption that most users * do not yet work on a Pentium Pro or Pentium II machine ...) * * FLAT memory model assumed. * * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. * This results in shorter code at the expense of reduced performance. * */ #include "zip.h" #ifndef USE_ZLIB #ifndef ZCONST # define ZCONST const #endif #if (defined(_MSC_VER) && _MSC_VER >= 700) #if (defined(_M_IX86) && _M_IX86 >= 300) /* This code is intended for Microsoft C/C++ (32-bit compiler). */ /* * These two (three) macros make up the loop body of the CRC32 cruncher. * registers modified: * eax : crc value "c" * esi : pointer to next data byte (or dword) "buf++" * registers read: * edi : pointer to base of crc_table array * scratch registers: * ebx : index into crc_table array * (requires upper three bytes = 0 when __686 is undefined) */ #ifndef __686 #define Do_CRC \ __asm mov bl, al \ __asm shr eax, 8 \ __asm xor eax, [edi+ebx*4] #else /* __686 */ #ifdef NO_MOVZX_SUPPORT #define movzx__ebx__al __asm _emit 0x0F __asm _emit 0xB6 __asm _emit 0xD8 #else #define movzx__ebx__al __asm movzx ebx, al #endif #define Do_CRC \ movzx__ebx__al \ __asm shr eax, 8 \ __asm xor eax, [edi+ebx*4] #endif /* ?__686 */ #define Do_CRC_byte \ __asm xor al, byte ptr [esi] \ __asm inc esi \ Do_CRC #ifndef NO_32_BIT_LOADS #define Do_CRC_dword \ __asm xor eax, dword ptr [esi] \ __asm add esi, 4 \ Do_CRC \ Do_CRC \ Do_CRC \ Do_CRC #endif /* !NO_32_BIT_LOADS */ /* ========================================================================= */ ulg crc32(crc, buf, len) ulg crc; /* crc shift register */ ZCONST uch *buf; /* pointer to bytes to pump through */ extent len; /* number of bytes in buf[] */ /* Run a set of bytes through the crc shift register. If buf is a NULL pointer, then initialize the crc shift register contents instead. Return the current crc in either case. */ { __asm { push edx push ecx mov esi,buf ; 2nd arg: uch *buf sub eax,eax ;> if (!buf) test esi,esi ;> return 0; jz fine ;> else { call get_crc_table mov edi,eax mov eax,crc ; 1st arg: ulg crc #ifndef __686 sub ebx,ebx ; ebx=0; make bl usable as a dword #endif mov ecx,len ; 3rd arg: extent len not eax ;> c = ~crc; #ifndef NO_UNROLLED_LOOPS # ifndef NO_32_BIT_LOADS test ecx,ecx je bail align_loop: test esi,3 ; align buf pointer on next jz aligned_now ; dword boundary Do_CRC_byte dec ecx jnz align_loop aligned_now: # endif /* !NO_32_BIT_LOADS */ mov edx,ecx ; save len in edx and edx,000000007H ; edx = len % 8 shr ecx,3 ; ecx = len / 8 jz No_Eights ; align loop head at start of 486 internal cache line !! align 16 Next_Eight: # ifndef NO_32_BIT_LOADS Do_CRC_dword Do_CRC_dword # else /* NO_32_BIT_LOADS */ Do_CRC_byte Do_CRC_byte Do_CRC_byte Do_CRC_byte Do_CRC_byte Do_CRC_byte Do_CRC_byte Do_CRC_byte # endif /* ?NO_32_BIT_LOADS */ dec ecx jnz Next_Eight No_Eights: mov ecx,edx #endif /* NO_UNROLLED_LOOPS */ #ifndef NO_JECXZ_SUPPORT jecxz bail ;> if (len) #else test ecx,ecx ;> if (len) jz bail #endif ; align loop head at start of 486 internal cache line !! align 16 loupe: ;> do { Do_CRC_byte ; c = CRC32(c, *buf++); dec ecx ;> } while (--len); jnz loupe bail: ;> } not eax ;> return ~c; fine: pop ecx pop edx } } #endif /* _M_IX86 >= 300 */ #endif /* _MSC_VER >= 700*/ #endif /* !USE_ZLIB */