title "Compute Checksum" ;++ ; ; Copyright (c) Microsoft Corporation. All rights reserved. ; ; Module Name: ; ; xsum.amd ; ; Abstract: ; ; This module implements the platform specific function to compute the ; checksum of a buffer. ; ; Author: ; ; David N. Cutler (davec) 6-Jul-2000 ; ; Environment: ; ; Any mode. ; ;-- include ksamd64.inc ;++ ; ; ULONG ; tcpxsum( ; IN ULONG Checksum, ; IN PUCHAR Source, ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function computes the checksum of the specified buffer and combines ; the computed checksum with the specified checksum. ; ; Arguments: ; ; Checksum (ecx) - Suppiles the initial checksum value, in 16-bit form, ; with the high word set to 0. ; ; Source (rdx) - Supplies a pointer to the checksum buffer. ; ; Length (r8d) - Supplies the length of the buffer in bytes. ; ; Return Value: ; ; The computed checksum, in 16-bit form, with the high word set to 0. ; ;-- NESTED_ENTRY tcpxsum, _TEXT$00 push_reg rbx ; save nonvolatile register END_PROLOGUE mov r11, rdx ; save initial buffer address mov bx, cx ; save initial checksum mov r10, rdx ; set checksum buffer address mov ecx, r8d ; set buffer length xor eax, eax ; clear computed checksum test ecx, ecx ; test if any bytes to checksum jz combine ; if z, no bytes to checksum ; ; If the checksum buffer is not word aligned, then add the first byte of ; the buffer to the checksum. ; ; N.B. First buffer address check is done using rdx rather than r10 so ; the register ah can be used. ; test dl, 1 ; test if buffer word aligned jz short word_aligned ; if z, buffer word aligned mov ah, [rdx] ; get first byte of checksum inc r10 ; increment buffer address dec ecx ; decrement number of bytes jz done ; if z set, no more bytes ; ; If the buffer is not an even number of bytes, then add the last byte of ; the buffer to the checksum. ; word_aligned: ; shr ecx, 1 ; convert to word count jnc short word_count ; if nc, even number of bytes mov al, [r10][rcx * 2] ; initialize the computed checksum jz done ; if z set, no more bytes ; ; If the buffer is not quadword aligned, then add words to the checksum until ; the buffer is quadword aligned. ; word_count: ; test r10b, 6 ; test if buffer quadword aligned jz short qword_aligned ; if z, buffer quadword aligned qword_align: ; add ax, [r10] ; add next word of checksum adc eax, 0 ; propagate carry add r10, 2 ; increment buffer address dec ecx ; decrement number of words jz done ; if z, no more words test r10b, 6 ; test if buffer qword aligned jnz short qword_align ; if nz, buffer not qword aligned ; ; Compute checksum in large blocks of qwords. ; qword_aligned: ; mov edx, ecx ; copy number or words remaining shr edx, 2 ; compute number of quadwords jz residual_words ; if z, no quadwords to checksum mov r8d, edx ; compute number of loop iterations shr r8d, 4 ; and edx, 16 - 1 ; isolate partial loop iteration jz short checksum_loop ; if z, no partial loop iteration sub rdx, 16 ; compute negative loop top offset lea r10, [r10][rdx * 8] ; bias initial buffer address neg rdx ; compute positive loop top offset add r8d, 1 ; increment loop iteration count ; ; ASSEMBLER WORKAROUND - when fixed, remove the following data ; byte ; db 04ch lea r9, checksum_start ; get address of checksum array lea r9, [r9][rdx * 4] ; compute initial iteration address jmp r9 ; start checksum ; ; Checksum quadwords. ; ; N.B. This loop is entered with carry clear. ; align 16 checksum_loop: ; prefetchnta 0[r10] ; prefetch start of 128-byte block prefetchnta 120[r10] ; prefetch end of 128-byte block ; ; N.B. The first 16 of following instructions are exactly 4 bytes long. ; checksum_start: ; adc rax, 0[r10] ; Compute checksum ; db 049h ; Manually encode the 4-byte db 013h ; version of the instruction db 042h ; db 000h ; adc rax, 0[r10] adc rax, 8[r10] ; adc rax, 16[r10] ; adc rax, 24[r10] ; adc rax, 32[r10] ; adc rax, 40[r10] ; adc rax, 48[r10] ; adc rax, 56[r10] ; adc rax, 64[r10] ; adc rax, 72[r10] ; adc rax, 80[r10] ; adc rax, 88[r10] ; adc rax, 96[r10] ; adc rax, 104[r10] ; adc rax, 112[r10] ; adc rax, 120[r10] ; .errnz (($ - checksum_start) - (4 * 16)) lea r10, 128[r10] ; update source address dec r8d ; decrement loop count jnz short checksum_loop ; if nz, more iterations adc rax, 0 ; propagate last carry ; ; Compute checksum of residual words. ; residual_words: ; and ecx, 3 ; isolate residual words jz short done ; if z, no residual words add_word: ; add ax, [r10] ; add word to checksum adc ax, 0 ; propagate carry add r10, 2 ; increment buffer address dec ecx ; decrement word count jnz short add_word ; if nz, more words remaining ; ; Fold the computed checksum to 32-bits and then to 16-bits. ; done: ; mov rcx, rax ; fold the checksum to 32-bits ror rcx, 32 ; swap high and low dwords add rax, rcx ; produce sum + carry in high 32-bits shr rax, 32 ; extract 32-bit checksum mov ecx, eax ; fold the checksum to 16-bits ror ecx, 16 ; swap high and low words add eax, ecx ; produce sum + carry in high 16-bits shr eax, 16 ; extract 16-bit check sum test r11b, 1 ; test if buffer word aligned jz short combine ; if z set, buffer word aligned ror ax, 8 ; swap checksum bytes ; ; Combine the input checksum with the computed checksum. ; combine: ; add ax, bx ; combine checksums adc eax, 0 ; add carry to low 16-bits pop rbx ; restore nonvolatile register retq ; return NESTED_END tcpxsum, _TEXT$00 end