performance

This commit is contained in:
davidly 2024-07-24 22:01:59 -07:00
parent 1b9b8b0959
commit 209e766086
2 changed files with 20 additions and 24 deletions

View File

@ -49,40 +49,38 @@ false equ 0
loops equ 10 loops equ 10
arraysize equ 8190 arraysize equ 8190
tttdataseg segment para public 'sievedata' sieve_dataseg segment para public 'sievedata'
assume ds: tttdataseg assume ds: sieve_dataseg
crlfmsg db 13,10,0 crlfmsg db 13,10,0
primesmsg db ' primes.',13,10,0 primesmsg db ' primes.',13,10,0
count dw 0
ITER dw 0 ITER dw 0
PKFLAGS dd 0 PKFLAGS dd 0
align 16 align 16
flags db arraysize + 1 dup( ? ) flags db arraysize + 1 dup( ? )
afterflags db 0 afterflags db 0
tttdataseg ends sieve_dataseg ends
.code .code
start: start:
mov ax, tttdataseg mov ax, sieve_dataseg
mov ds, ax mov ds, ax
iteragain: ; for ( iter = 1; iter <= 10; iter++ ) iteragain: ; for ( iter = 1; iter <= 10; iter++ )
mov word ptr [ count ], 0 ; count = 0 xor dx, dx ; count of primes found
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE; ; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
mov ah, true mov ah, true
mov al, true mov al, true
mov di, offset flags mov di, offset flags
mov cx, ( arraysize + 2 ) / 2 ; 4095 mov cx, ( arraysize + 2 ) / 2 ; 4096
push ds push ds
pop es pop es
cld cld
rep stosw rep stosw
stosb
; iterate through array entries and count primes ; iterate through array entries and count primes
@ -105,13 +103,13 @@ start:
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
kloop: kloop:
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate mov [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
add si, ax ; k += prime add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size) cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jl kloop jl kloop
inccount: inccount:
inc word ptr [ COUNT ] ; count++ inc dx ; count++
flagisoff: ; check if outer loop is done flagisoff: ; check if outer loop is done
inc bx inc bx
@ -123,7 +121,7 @@ start:
cmp word ptr [ ITER ], loops cmp word ptr [ ITER ], loops
jnz iteragain jnz iteragain
mov ax, word ptr [COUNT] mov ax, dx
call printint call printint
call printcrlf call printcrlf

View File

@ -49,40 +49,38 @@ false equ 0
loops equ 10 loops equ 10
arraysize equ 8190 arraysize equ 8190
tttdataseg segment para public 'sievedata' sieve_dataseg segment para public 'sievedata'
assume ds: tttdataseg assume ds: sieve_dataseg
crlfmsg db 13,10,0 crlfmsg db 13,10,0
primesmsg db ' primes.',13,10,0 primesmsg db ' primes.',13,10,0
count dw 0
ITER dw 0 ITER dw 0
PKFLAGS dd 0 PKFLAGS dd 0
align 16 align 16
flags db arraysize + 1 dup( ? ) flags db arraysize + 1 dup( ? )
afterflags db 0 afterflags db 0
tttdataseg ends sieve_dataseg ends
.code .code
start: start:
mov ax, tttdataseg mov ax, sieve_dataseg
mov ds, ax mov ds, ax
iteragain: ; for ( iter = 1; iter <= 10; iter++ ) iteragain: ; for ( iter = 1; iter <= 10; iter++ )
mov word ptr [ count ], 0 ; count = 0 xor dx, dx ; count of primes found
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE; ; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
mov ah, true mov ah, true
mov al, true mov al, true
mov di, offset flags mov di, offset flags
mov cx, ( arraysize + 2 ) / 2 ; 4095 mov cx, ( arraysize + 2 ) / 2 ; 4096
push ds push ds
pop es pop es
cld cld
rep stosw rep stosw
stosb
; iterate through array entries and count primes ; iterate through array entries and count primes
@ -105,13 +103,13 @@ start:
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
kloop: kloop:
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate mov [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
add si, ax ; k += prime add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size) cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jl kloop jl kloop
inccount: inccount:
inc word ptr [ COUNT ] ; count++ inc dx ; count++
flagisoff: ; check if outer loop is done flagisoff: ; check if outer loop is done
inc bx inc bx
@ -123,7 +121,7 @@ start:
cmp word ptr [ ITER ], loops cmp word ptr [ ITER ], loops
jnz iteragain jnz iteragain
mov ax, word ptr [COUNT] mov ax, dx
call printint call printint
call printcrlf call printcrlf