xref: /illumos-gate/usr/src/uts/sparc/ml/ip_ocsum.s (revision 9b0bb795)
23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate */
27*7c478bd9Sstevel@tonic-gate#ident	"%Z%%M%	%I%	%E% SMI"
29*7c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
32*7c478bd9Sstevel@tonic-gate * ip_ocsum(address, halfword_count, sum)
33*7c478bd9Sstevel@tonic-gate * Do a 16 bit one's complement sum of a given number of (16-bit)
34*7c478bd9Sstevel@tonic-gate * halfwords. The halfword pointer must not be odd.
35*7c478bd9Sstevel@tonic-gate *	%o0 address; %o1 count; %o2 sum accumulator; %o4 temp
36*7c478bd9Sstevel@tonic-gate * 	%g2 and %g3 used in main loop
37*7c478bd9Sstevel@tonic-gate *
38*7c478bd9Sstevel@tonic-gate * (from @(#)ocsum.s 1.3 89/02/24 SMI)
39*7c478bd9Sstevel@tonic-gate *
40*7c478bd9Sstevel@tonic-gate */
42*7c478bd9Sstevel@tonic-gate	ENTRY(ip_ocsum)
43*7c478bd9Sstevel@tonic-gate	cmp	%o1, 31		! less than 62 bytes?
44*7c478bd9Sstevel@tonic-gate	bl,a	.dohw		!   just do halfwords
45*7c478bd9Sstevel@tonic-gate	tst	%o1		! delay slot, test count
47*7c478bd9Sstevel@tonic-gate	btst	31, %o0		! (delay slot)
48*7c478bd9Sstevel@tonic-gate	bz	2f		! if 32 byte aligned, skip
49*7c478bd9Sstevel@tonic-gate	nop
51*7c478bd9Sstevel@tonic-gate	!
52*7c478bd9Sstevel@tonic-gate	! Do first halfwords until 32-byte aligned
53*7c478bd9Sstevel@tonic-gate	!
55*7c478bd9Sstevel@tonic-gate	lduh	[%o0], %g2	! read data
56*7c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0	! increment address
57*7c478bd9Sstevel@tonic-gate	add	%o2, %g2, %o2	! add to accumulator, don't need carry yet
58*7c478bd9Sstevel@tonic-gate	btst	31, %o0		! 32 byte aligned?
59*7c478bd9Sstevel@tonic-gate	bnz	1b
60*7c478bd9Sstevel@tonic-gate	sub	%o1, 1, %o1	! decrement count
61*7c478bd9Sstevel@tonic-gate	!
62*7c478bd9Sstevel@tonic-gate	! loop to add in 32 byte chunks
63*7c478bd9Sstevel@tonic-gate	! The loads and adds are staggered to help avoid load/use
64*7c478bd9Sstevel@tonic-gate	! interlocks on highly pipelined implementations, and double
65*7c478bd9Sstevel@tonic-gate	! loads are used for 64-bit wide memory systems.
66*7c478bd9Sstevel@tonic-gate	!
68*7c478bd9Sstevel@tonic-gate	sub	%o1, 16, %o1	! decrement count to aid testing
70*7c478bd9Sstevel@tonic-gate	ldd	[%o0], %g2	! read data
71*7c478bd9Sstevel@tonic-gate	ldd	[%o0+8], %o4	! read more data
72*7c478bd9Sstevel@tonic-gate	addcc	%o2, %g2, %o2	! add to accumulator
73*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %g3, %o2	! add to accumulator with carry
74*7c478bd9Sstevel@tonic-gate	ldd	[%o0+16], %g2	! read more data
75*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %o4, %o2	! add to accumulator with carry
76*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %o5, %o2	! add to accumulator with carry
77*7c478bd9Sstevel@tonic-gate	ldd	[%o0+24], %o4	! read more data
78*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %g2, %o2	! add to accumulator with carry
79*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %g3, %o2	! add to accumulator with carry
80*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %o4, %o2	! add to accumulator
81*7c478bd9Sstevel@tonic-gate	addxcc	%o2, %o5, %o2	! add to accumulator with carry
82*7c478bd9Sstevel@tonic-gate	addxcc	%o2, 0, %o2	! if final carry, add it in
83*7c478bd9Sstevel@tonic-gate	subcc	%o1, 16, %o1	! decrement count (in halfwords)
84*7c478bd9Sstevel@tonic-gate	bge	4b
85*7c478bd9Sstevel@tonic-gate	add	%o0, 32, %o0	! delay slot, increment address
87*7c478bd9Sstevel@tonic-gate	add	%o1, 16, %o1	! add back in
88*7c478bd9Sstevel@tonic-gate	!
89*7c478bd9Sstevel@tonic-gate	! Do any remaining halfwords
90*7c478bd9Sstevel@tonic-gate	!
91*7c478bd9Sstevel@tonic-gate	b	.dohw
92*7c478bd9Sstevel@tonic-gate	tst	%o1		! delay slot, for more to do
95*7c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0	! increment address
96*7c478bd9Sstevel@tonic-gate	addcc	%o2, %g2, %o2	! add to accumulator
97*7c478bd9Sstevel@tonic-gate	addxcc	%o2, 0, %o2	! if carry, add it in
98*7c478bd9Sstevel@tonic-gate	subcc	%o1, 1, %o1	! decrement count
100*7c478bd9Sstevel@tonic-gate	bg,a	3b		! more to do?
101*7c478bd9Sstevel@tonic-gate	lduh	[%o0], %g2	! read data
103*7c478bd9Sstevel@tonic-gate	!
104*7c478bd9Sstevel@tonic-gate	! at this point the 32-bit accumulator
105*7c478bd9Sstevel@tonic-gate	! has the result that needs to be returned in 16-bits
106*7c478bd9Sstevel@tonic-gate	!
107*7c478bd9Sstevel@tonic-gate	sll	%o2, 16, %o4	! put low halfword in high halfword %o4
108*7c478bd9Sstevel@tonic-gate	addcc	%o4, %o2, %o2	! add the 2 halfwords in high %o2, set carry
109*7c478bd9Sstevel@tonic-gate	srl	%o2, 16, %o2	! shift to low halfword
110*7c478bd9Sstevel@tonic-gate	retl			! return
111*7c478bd9Sstevel@tonic-gate	addxcc	%o2, 0, %o0	! add in carry if any. result in %o0
112*7c478bd9Sstevel@tonic-gate	SET_SIZE(ip_ocsum)