performance fixes

This commit is contained in:
davidly 2024-07-16 16:11:50 -07:00
parent ed672a90b8
commit e16ee31852
2 changed files with 42 additions and 32 deletions

View File

@ -49,8 +49,8 @@ false equ 0
loops equ 10
arraysize equ 8190
dataseg segment para public 'data'
assume ds: dataseg
tttdataseg segment para public 'sievedata'
assume ds: tttdataseg
crlfmsg db 13,10,0
primesmsg db ' primes.',13,10,0
@ -61,30 +61,33 @@ dataseg segment para public 'data'
align 16
flags db arraysize + 1 dup( ? )
afterflags db 0
dataseg ends
tttdataseg ends
mov ax, dataseg
mov ax, tttdataseg
mov ds, ax
mov cx, 0 ; handy place for 0
mov di, offset afterFlags ; handy place for this constant
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
mov word ptr [ count ], 0 ; count = 0
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
xor bx, bx
mov byte ptr [ offset flags + bx ], true
inc bx
cmp bx, arraysize
jle nexttrue
mov ah, true
mov al, true
mov di, offset flags
mov cx, ( arraysize + 2 ) / 2 ; 4095
push ds
pop es
rep stosw
; iterate through array entries and count primes
mov di, offset afterFlags ; handy place for this constant
xor cx, cx ; handy place for 0
xor bx, bx ; bx is "i" in the outer for loop
nextprime: ; for (i = 0; i <= SIZE; i++) {
@ -97,13 +100,15 @@ start:
mov si, offset flags
add si, bx ; for (k = i + prime; k <= SIZE; k += prime)
add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags?
jge inccount
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
jmp kloop
add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jl kloop
inc word ptr [ COUNT ] ; count++

View File

@ -49,8 +49,8 @@ false equ 0
loops equ 10
arraysize equ 8190
dataseg segment para public 'data'
assume ds: dataseg
tttdataseg segment para public 'sievedata'
assume ds: tttdataseg
crlfmsg db 13,10,0
primesmsg db ' primes.',13,10,0
@ -61,30 +61,33 @@ dataseg segment para public 'data'
align 16
flags db arraysize + 1 dup( ? )
afterflags db 0
dataseg ends
tttdataseg ends
mov ax, dataseg
mov ax, tttdataseg
mov ds, ax
mov cx, 0 ; handy place for 0
mov di, offset afterFlags ; handy place for this constant
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
mov word ptr [ count ], 0 ; count = 0
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
xor bx, bx
mov byte ptr [ offset flags + bx ], true
inc bx
cmp bx, arraysize
jle nexttrue
mov ah, true
mov al, true
mov di, offset flags
mov cx, ( arraysize + 2 ) / 2 ; 4095
push ds
pop es
rep stosw
; iterate through array entries and count primes
mov di, offset afterFlags ; handy place for this constant
xor cx, cx ; handy place for 0
xor bx, bx ; bx is "i" in the outer for loop
nextprime: ; for (i = 0; i <= SIZE; i++) {
@ -97,13 +100,15 @@ start:
mov si, offset flags
add si, bx ; for (k = i + prime; k <= SIZE; k += prime)
add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags?
jge inccount
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
jmp kloop
add si, ax ; k += prime
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
jl kloop
inc word ptr [ COUNT ] ; count++