1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1996-2011 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                                                                      *
19da2e3ebdSchin ***********************************************************************/
207c2fbfb3SApril Chin #pragma prototyped
217c2fbfb3SApril Chin 
227c2fbfb3SApril Chin /*
237c2fbfb3SApril Chin  * att
247c2fbfb3SApril Chin  */
257c2fbfb3SApril Chin 
267c2fbfb3SApril Chin #define att_description	\
277c2fbfb3SApril Chin 	"The system 5 release 4 checksum. This is the default for \bsum\b \
287c2fbfb3SApril Chin 	when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \
297c2fbfb3SApril Chin 	all of the other methods are order dependent."
307c2fbfb3SApril Chin #define att_options	0
317c2fbfb3SApril Chin #define att_match	"att|sys5|s5|default"
327c2fbfb3SApril Chin #define att_open	long_open
337c2fbfb3SApril Chin #define att_init	long_init
347c2fbfb3SApril Chin #define att_print	long_print
357c2fbfb3SApril Chin #define att_data	long_data
367c2fbfb3SApril Chin #define att_scale	512
377c2fbfb3SApril Chin 
3834f9b3eeSRoland Mainz #if defined(__SUNPRO_C) || defined(__GNUC__)
3934f9b3eeSRoland Mainz 
4034f9b3eeSRoland Mainz #if defined(__SUNPRO_C)
4134f9b3eeSRoland Mainz #    include <sun_prefetch.h>
4234f9b3eeSRoland Mainz #    define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
4334f9b3eeSRoland Mainz #elif defined(__GNUC__)
4434f9b3eeSRoland Mainz #    define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
4534f9b3eeSRoland Mainz #else
4634f9b3eeSRoland Mainz #    error Unknown compiler
4734f9b3eeSRoland Mainz #endif
4834f9b3eeSRoland Mainz 
4934f9b3eeSRoland Mainz #define CBLOCK_SIZE (64)
5034f9b3eeSRoland Mainz #pragma unroll(16)
5134f9b3eeSRoland Mainz 
5234f9b3eeSRoland Mainz /* Inmos transputer would love this algorithm */
5334f9b3eeSRoland Mainz static int
att_block(register Sum_t * p,const void * s,size_t n)5434f9b3eeSRoland Mainz att_block(register Sum_t* p, const void* s, size_t n)
5534f9b3eeSRoland Mainz {
5634f9b3eeSRoland Mainz 	register uint32_t	c = ((Integral_t*)p)->sum;
5734f9b3eeSRoland Mainz 	register const unsigned char*	b = (const unsigned char*)s;
5834f9b3eeSRoland Mainz 	register const unsigned char*	e = b + n;
5934f9b3eeSRoland Mainz 	register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
6034f9b3eeSRoland Mainz 	register unsigned int i;
61*b30d1939SAndy Fiddaman 
6234f9b3eeSRoland Mainz 	s0=s1=s2=s3=s4=s5=s6=s7=0U;
63*b30d1939SAndy Fiddaman 
6434f9b3eeSRoland Mainz 	sum_prefetch((void *)b);
65*b30d1939SAndy Fiddaman 
6634f9b3eeSRoland Mainz 	while (n > CBLOCK_SIZE)
6734f9b3eeSRoland Mainz 	{
6834f9b3eeSRoland Mainz 		sum_prefetch((b+CBLOCK_SIZE));
69*b30d1939SAndy Fiddaman 
7034f9b3eeSRoland Mainz 		/* Compiler will unroll for() loops per #pragma unroll */
7134f9b3eeSRoland Mainz 		for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
7234f9b3eeSRoland Mainz 		{
7334f9b3eeSRoland Mainz 			/*
7434f9b3eeSRoland Mainz 			 * use s0-s7 to decouple calculations (this improves pipelining)
7534f9b3eeSRoland Mainz 			 * because each operation is completely independent from it's
7634f9b3eeSRoland Mainz 			 * siblings
7734f9b3eeSRoland Mainz 			 */
7834f9b3eeSRoland Mainz 			s0+=b[0];
7934f9b3eeSRoland Mainz 			s1+=b[1];
8034f9b3eeSRoland Mainz 			s2+=b[2];
8134f9b3eeSRoland Mainz 			s3+=b[3];
8234f9b3eeSRoland Mainz 			s4+=b[4];
8334f9b3eeSRoland Mainz 			s5+=b[5];
8434f9b3eeSRoland Mainz 			s6+=b[6];
8534f9b3eeSRoland Mainz 			s7+=b[7];
8634f9b3eeSRoland Mainz 
8734f9b3eeSRoland Mainz 			b+=8;
8834f9b3eeSRoland Mainz 			n-=8;
8934f9b3eeSRoland Mainz 		}
9034f9b3eeSRoland Mainz 	}
91*b30d1939SAndy Fiddaman 
9234f9b3eeSRoland Mainz 	c+=s0+s1+s2+s3+s4+s5+s6+s7;
9334f9b3eeSRoland Mainz 
9434f9b3eeSRoland Mainz 	while (b < e)
9534f9b3eeSRoland Mainz 		c += *b++;
9634f9b3eeSRoland Mainz 	((Integral_t*)p)->sum = c;
9734f9b3eeSRoland Mainz 	return 0;
9834f9b3eeSRoland Mainz }
9934f9b3eeSRoland Mainz 
10034f9b3eeSRoland Mainz #else
1017c2fbfb3SApril Chin static int
att_block(register Sum_t * p,const void * s,size_t n)1027c2fbfb3SApril Chin att_block(register Sum_t* p, const void* s, size_t n)
1037c2fbfb3SApril Chin {
1047c2fbfb3SApril Chin 	register uint32_t	c = ((Integral_t*)p)->sum;
1057c2fbfb3SApril Chin 	register unsigned char*	b = (unsigned char*)s;
1067c2fbfb3SApril Chin 	register unsigned char*	e = b + n;
1077c2fbfb3SApril Chin 
1087c2fbfb3SApril Chin 	while (b < e)
1097c2fbfb3SApril Chin 		c += *b++;
1107c2fbfb3SApril Chin 	((Integral_t*)p)->sum = c;
1117c2fbfb3SApril Chin 	return 0;
1127c2fbfb3SApril Chin }
11334f9b3eeSRoland Mainz #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
1147c2fbfb3SApril Chin 
1157c2fbfb3SApril Chin static int
att_done(Sum_t * p)1167c2fbfb3SApril Chin att_done(Sum_t* p)
1177c2fbfb3SApril Chin {
1187c2fbfb3SApril Chin 	register uint32_t	c = ((Integral_t*)p)->sum;
1197c2fbfb3SApril Chin 
1207c2fbfb3SApril Chin 	c = (c & 0xffff) + ((c >> 16) & 0xffff);
1217c2fbfb3SApril Chin 	c = (c & 0xffff) + (c >> 16);
1227c2fbfb3SApril Chin 	((Integral_t*)p)->sum = c & 0xffff;
1237c2fbfb3SApril Chin 	return short_done(p);
1247c2fbfb3SApril Chin }
125