
.CODE             ;Indicates the start of a code segment.


;void OrR(void *R1, const void *R2, unsigned maxX)
	public	OrR
OrR proc \
        uses rsi rdi,
;	R1:ptr byte,	RCX
;       R2:ptr byte,	RDX
;       maxX:DWORD	R8

        mov     rdi,rcx
        mov     rsi,rdx
        mov	rcx,R8

        mov     rcx,R8		; cx=amount of pixels
	sub	rcx,8
	jl	LoopSimple

		; Processing of 4px block with size 3xDWORD	
LoopPix4:lodsq
	or	[rdi],rax
	add	rdi,8
	sub	rcx,8
	jae	LoopPix4

			; Simple loop for 0,1,2,3 pixels (works also for more px)
LoopSimple:
	add	rcx,8		; ecx was -4, correct counter
        jz	ToEnd		; array has zero size
LoopPix:lodsb
        or	[rdi],al
	inc	rdi
	loop	LoopPix
ToEnd:	ret
	
OrR endp


;void AndR(void *R1, const void *R2, unsigned maxX)
	public	AndR
AndR proc \
        uses rsi rdi
;	R1:ptr byte,	RCX
;       R2:ptr byte,	RDX
;       maxX:DWORD	R8

        mov     rdi,rcx
        mov     rsi,rdx
        mov	rcx,R8

        mov     rcx,R8		; cx=amount of pixels
	sub	rcx,8
	jl	LoopSimple

		; Processing of 4px block with size 3xDWORD	
LoopPix4:lodsq
	and	[rdi],rax
	add	rdi,8
	sub	rcx,8
	jae	LoopPix4

			; Simple loop for 0,1,2,3 pixels (works also for more px)
LoopSimple:
	add	rcx,8		; ecx was -4, correct counter
        jz	ToEnd		; array has zero size
LoopPix:lodsb
        and	[rdi],al
	inc	rdi
	loop	LoopPix
ToEnd:	ret
	
AndR endp


;void ShrR(void *R, unsigned maxX)
	public	ShrR
ShrR proc
;	R1:ptr byte,	RCX
;       maxX:DWORD	RDX

	jrcxz	ToEnd
	xchg	rcx,rdx
	jrcxz	ToEnd

	mov	al,byte ptr [rdx]
	sar	al,1		; duplicate upper 8th bit
	mov	byte ptr [rdx],al
	dec	rcx			; CY is preserved for next byte
	jz	ToEnd
LoopPx1:inc	rdx			; CY is preserved
	mov	al,byte ptr [rdx]
	rcr	al,1
	mov	byte ptr [rdx],al
	loop	LoopPx1			; CY is preserved

ToEnd:	ret
	
ShrR endp


;void ShlR(void *R, unsigned maxX)
	public	ShlR
ShlR proc
;	R1:ptr byte,	RCX
;       maxX:DWORD	RDX

	jrcxz	ToEnd
	xchg	rcx,rdx
	jrcxz	ToEnd
	
	add	rdx,rcx
	dec	rdx			; end of blob

	mov	al,byte ptr [rdx]
	mov	ah,al
	shr	ah,1			; bit 0 to CY
	rcl	al,1			; duplicated bit 0
	mov	byte ptr [rdx],al
	dec	rcx			; CY is preserved for next byte
	jz	ToEnd
LoopPx1:dec	rdx			; CY is preserved
	mov	al,byte ptr [rdx]
	rcl	al,1
	mov	byte ptr [rdx],al
	loop	LoopPx1			; CY is preserved

ToEnd:	ret
	
ShlR endp


;void AddLu32u8(unsigned Size, uint32_t *Accu, uint8_t *pAdd)
	public  AddLu32u8
AddLu32u8 proc \
        uses rsi
;       count:DWORD,	RCX
;       Accu: ptr byte, RDX
;       pAdd: ptr byte R8
        
        mov	rsi,R8
        or	rsi,rsi
        jz	ToEnd
        or	rdx,rdx
        jz	ToEnd
        jrcxz	ToEnd

ByteLop:movzx	eax,byte ptr [rsi]
	inc	rsi
	add	[rdx],eax
	add	rdx,4
	loop	ByteLop
ToEnd:
        ret                     ; _cdecl return        
        
AddLu32u8 endp



;void SubLu32u8(unsigned Size, uint32_t *Accu, uint8_t *pSub)
	public  SubLu32u8
SubLu32u8 proc \
        uses rsi
;       count:DWORD,	RCX
;       Accu: ptr byte, RDX
;       pAdd: ptr byte R8
        
        mov	rsi,R8
        or	rsi,rsi
        jz	ToEnd
        or	rdx,rdx
        jz	ToEnd
        jrcxz	ToEnd

ByteLop:movzx	eax,byte ptr [rsi]
        inc	rsi
	sub	[rdx],eax
	add	rdx,4
	loop	ByteLop
ToEnd:
        ret                     ; _cdecl return        
        
SubLu32u8 endp


;***********  FOR GAUSIAN ***********


;void AbsDiff_u32(uint32_t *Out, uint32_t *In, int SizeX)
	public	AbsDiff_u32
AbsDiff_u32 proc \
	uses rsi
;	R1:ptr byte,	RCX
;	R2:ptr byte,	RDX
;       maxX:DWORD	R8

	jrcxz	ToEnd			; NULL ptr
	
	mov	rsi,rdx			; source ptr
	mov	rdx,rcx			; dest ptr
	
	mov	rcx,R8
	jrcxz	ToEnd			; zero amount

LoopPx1:lodsd
	sub	eax,[rdx]
	jns	Positive
	not	eax
	inc	eax
Positive:mov	[rdx],eax
	add	rdx,4
	loop	LoopPx1

ToEnd:	ret
	
AbsDiff_u32 endp


;void AbsDiffCopy_u32(uint32_t *Out, uint32_t *In1, uint32_t *In1, int SizeX)
	public	AbsDiffCopy_u32
AbsDiffCopy_u32 proc \
	uses rsi
;	Out:ptr byte,	RCX
;	In1:ptr byte,	RDX
;	In2:ptr byte,	R8
;       maxX:DWORD	R9

	jrcxz	ToEnd			; NULL ptr
	
	mov	rsi,rdx			; source ptr
	mov	rdx,rcx			; dest ptr
	
	mov	rcx,R9
	jrcxz	ToEnd			; zero amount

LoopPx1:lodsd
	sub	eax,[R8]
	jns	Positive
	not	eax
	inc	eax
Positive:mov	[rdx],eax
	add	R8,4
	add	rdx,4
	loop	LoopPx1

ToEnd:	ret
	
AbsDiffCopy_u32 endp




	end

