performance
This commit is contained in:
parent
1b9b8b0959
commit
209e766086
@ -49,40 +49,38 @@ false equ 0
|
|||||||
loops equ 10
|
loops equ 10
|
||||||
arraysize equ 8190
|
arraysize equ 8190
|
||||||
|
|
||||||
tttdataseg segment para public 'sievedata'
|
sieve_dataseg segment para public 'sievedata'
|
||||||
assume ds: tttdataseg
|
assume ds: sieve_dataseg
|
||||||
|
|
||||||
crlfmsg db 13,10,0
|
crlfmsg db 13,10,0
|
||||||
primesmsg db ' primes.',13,10,0
|
primesmsg db ' primes.',13,10,0
|
||||||
count dw 0
|
|
||||||
ITER dw 0
|
ITER dw 0
|
||||||
PKFLAGS dd 0
|
PKFLAGS dd 0
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
flags db arraysize + 1 dup( ? )
|
flags db arraysize + 1 dup( ? )
|
||||||
afterflags db 0
|
afterflags db 0
|
||||||
tttdataseg ends
|
sieve_dataseg ends
|
||||||
|
|
||||||
.code
|
.code
|
||||||
|
|
||||||
start:
|
start:
|
||||||
mov ax, tttdataseg
|
mov ax, sieve_dataseg
|
||||||
mov ds, ax
|
mov ds, ax
|
||||||
|
|
||||||
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
|
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
|
||||||
mov word ptr [ count ], 0 ; count = 0
|
xor dx, dx ; count of primes found
|
||||||
|
|
||||||
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
|
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
|
||||||
|
|
||||||
mov ah, true
|
mov ah, true
|
||||||
mov al, true
|
mov al, true
|
||||||
mov di, offset flags
|
mov di, offset flags
|
||||||
mov cx, ( arraysize + 2 ) / 2 ; 4095
|
mov cx, ( arraysize + 2 ) / 2 ; 4096
|
||||||
push ds
|
push ds
|
||||||
pop es
|
pop es
|
||||||
cld
|
cld
|
||||||
rep stosw
|
rep stosw
|
||||||
stosb
|
|
||||||
|
|
||||||
; iterate through array entries and count primes
|
; iterate through array entries and count primes
|
||||||
|
|
||||||
@ -105,13 +103,13 @@ start:
|
|||||||
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
|
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
|
||||||
|
|
||||||
kloop:
|
kloop:
|
||||||
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
|
mov [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
|
||||||
add si, ax ; k += prime
|
add si, ax ; k += prime
|
||||||
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
|
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
|
||||||
jl kloop
|
jl kloop
|
||||||
|
|
||||||
inccount:
|
inccount:
|
||||||
inc word ptr [ COUNT ] ; count++
|
inc dx ; count++
|
||||||
|
|
||||||
flagisoff: ; check if outer loop is done
|
flagisoff: ; check if outer loop is done
|
||||||
inc bx
|
inc bx
|
||||||
@ -123,7 +121,7 @@ start:
|
|||||||
cmp word ptr [ ITER ], loops
|
cmp word ptr [ ITER ], loops
|
||||||
jnz iteragain
|
jnz iteragain
|
||||||
|
|
||||||
mov ax, word ptr [COUNT]
|
mov ax, dx
|
||||||
call printint
|
call printint
|
||||||
|
|
||||||
call printcrlf
|
call printcrlf
|
||||||
|
@ -49,40 +49,38 @@ false equ 0
|
|||||||
loops equ 10
|
loops equ 10
|
||||||
arraysize equ 8190
|
arraysize equ 8190
|
||||||
|
|
||||||
tttdataseg segment para public 'sievedata'
|
sieve_dataseg segment para public 'sievedata'
|
||||||
assume ds: tttdataseg
|
assume ds: sieve_dataseg
|
||||||
|
|
||||||
crlfmsg db 13,10,0
|
crlfmsg db 13,10,0
|
||||||
primesmsg db ' primes.',13,10,0
|
primesmsg db ' primes.',13,10,0
|
||||||
count dw 0
|
|
||||||
ITER dw 0
|
ITER dw 0
|
||||||
PKFLAGS dd 0
|
PKFLAGS dd 0
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
flags db arraysize + 1 dup( ? )
|
flags db arraysize + 1 dup( ? )
|
||||||
afterflags db 0
|
afterflags db 0
|
||||||
tttdataseg ends
|
sieve_dataseg ends
|
||||||
|
|
||||||
.code
|
.code
|
||||||
|
|
||||||
start:
|
start:
|
||||||
mov ax, tttdataseg
|
mov ax, sieve_dataseg
|
||||||
mov ds, ax
|
mov ds, ax
|
||||||
|
|
||||||
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
|
iteragain: ; for ( iter = 1; iter <= 10; iter++ )
|
||||||
mov word ptr [ count ], 0 ; count = 0
|
xor dx, dx ; count of primes found
|
||||||
|
|
||||||
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
|
; set all array entries to true: for (i = 0; i <= SIZE; i++) flags[i] = TRUE;
|
||||||
|
|
||||||
mov ah, true
|
mov ah, true
|
||||||
mov al, true
|
mov al, true
|
||||||
mov di, offset flags
|
mov di, offset flags
|
||||||
mov cx, ( arraysize + 2 ) / 2 ; 4095
|
mov cx, ( arraysize + 2 ) / 2 ; 4096
|
||||||
push ds
|
push ds
|
||||||
pop es
|
pop es
|
||||||
cld
|
cld
|
||||||
rep stosw
|
rep stosw
|
||||||
stosb
|
|
||||||
|
|
||||||
; iterate through array entries and count primes
|
; iterate through array entries and count primes
|
||||||
|
|
||||||
@ -105,13 +103,13 @@ start:
|
|||||||
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
|
jge inccount ; redundant check to the one in the loop below but it saves a jump instruction in the loop
|
||||||
|
|
||||||
kloop:
|
kloop:
|
||||||
mov byte ptr [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
|
mov [ si ], cl ; flags[ k ] = false. use cl for 0 because it's faster than an immediate
|
||||||
add si, ax ; k += prime
|
add si, ax ; k += prime
|
||||||
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
|
cmp si, di ; is si >= offset afterFlags? (i.e. k <= size)
|
||||||
jl kloop
|
jl kloop
|
||||||
|
|
||||||
inccount:
|
inccount:
|
||||||
inc word ptr [ COUNT ] ; count++
|
inc dx ; count++
|
||||||
|
|
||||||
flagisoff: ; check if outer loop is done
|
flagisoff: ; check if outer loop is done
|
||||||
inc bx
|
inc bx
|
||||||
@ -123,7 +121,7 @@ start:
|
|||||||
cmp word ptr [ ITER ], loops
|
cmp word ptr [ ITER ], loops
|
||||||
jnz iteragain
|
jnz iteragain
|
||||||
|
|
||||||
mov ax, word ptr [COUNT]
|
mov ax, dx
|
||||||
call printint
|
call printint
|
||||||
|
|
||||||
call printcrlf
|
call printcrlf
|
||||||
|
Loading…
Reference in New Issue
Block a user