1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1992-2011 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                                                                      *
20da2e3ebdSchin ***********************************************************************/
21*b30d1939SAndy Fiddaman /*
22*b30d1939SAndy Fiddaman  * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
23*b30d1939SAndy Fiddaman  */
24da2e3ebdSchin #pragma prototyped
25da2e3ebdSchin /*
26da2e3ebdSchin  * David Korn
27da2e3ebdSchin  * AT&T Bell Laboratories
28da2e3ebdSchin  *
29da2e3ebdSchin  * library interface for word count
30da2e3ebdSchin  */
31da2e3ebdSchin 
32da2e3ebdSchin #include <cmd.h>
33da2e3ebdSchin #include <wc.h>
34da2e3ebdSchin #include <ctype.h>
35da2e3ebdSchin 
367c2fbfb3SApril Chin #if _hdr_wchar && _hdr_wctype && _lib_iswctype
37da2e3ebdSchin 
38da2e3ebdSchin #include <wchar.h>
39da2e3ebdSchin #include <wctype.h>
4034f9b3eeSRoland Mainz #include <lc.h>
41da2e3ebdSchin 
42da2e3ebdSchin #else
43da2e3ebdSchin 
44da2e3ebdSchin #ifndef iswspace
45da2e3ebdSchin #define iswspace(x)	isspace(x)
46da2e3ebdSchin #endif
47da2e3ebdSchin 
48da2e3ebdSchin #endif
49da2e3ebdSchin 
5034f9b3eeSRoland Mainz #define	WC_SP		0x08
5134f9b3eeSRoland Mainz #define	WC_NL		0x10
5234f9b3eeSRoland Mainz #define	WC_MB		0x20
5334f9b3eeSRoland Mainz #define	WC_ERR		0x40
54da2e3ebdSchin 
5534f9b3eeSRoland Mainz #define eol(c)		((c)&WC_NL)
5634f9b3eeSRoland Mainz #define mbc(c)		((c)&WC_MB)
5734f9b3eeSRoland Mainz #define spc(c)		((c)&WC_SP)
583e14f97fSRoger A. Faulkner #define mb2wc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
5934f9b3eeSRoland Mainz 
wc_init(int mode)6034f9b3eeSRoland Mainz Wc_t* wc_init(int mode)
61da2e3ebdSchin {
62da2e3ebdSchin 	register int	n;
63da2e3ebdSchin 	register int	w;
64da2e3ebdSchin 	Wc_t*		wp;
65da2e3ebdSchin 
6634f9b3eeSRoland Mainz 	if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
6734f9b3eeSRoland Mainz 		return 0;
6834f9b3eeSRoland Mainz 	if (!mbwide())
6934f9b3eeSRoland Mainz 		wp->mb = 0;
7034f9b3eeSRoland Mainz #if _hdr_wchar && _hdr_wctype && _lib_iswctype
7134f9b3eeSRoland Mainz 	else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
7234f9b3eeSRoland Mainz 		wp->mb = 1;
7334f9b3eeSRoland Mainz #endif
7434f9b3eeSRoland Mainz 	else
7534f9b3eeSRoland Mainz 		wp->mb = -1;
76da2e3ebdSchin 	w = mode & WC_WORDS;
7734f9b3eeSRoland Mainz 	for (n = (1<<CHAR_BIT); --n >= 0;)
7834f9b3eeSRoland Mainz 		wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
7934f9b3eeSRoland Mainz 	wp->type['\n'] = WC_SP|WC_NL;
8034f9b3eeSRoland Mainz 	if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
8134f9b3eeSRoland Mainz 	{
8234f9b3eeSRoland Mainz 		for (n = 0; n < 64; n++)
8334f9b3eeSRoland Mainz 		{
8434f9b3eeSRoland Mainz 			wp->type[0x80+n] |= WC_MB;
8534f9b3eeSRoland Mainz 			if (n<32)
8634f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+1;
8734f9b3eeSRoland Mainz 			else if (n<48)
8834f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+2;
8934f9b3eeSRoland Mainz 			else if (n<56)
9034f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+3;
9134f9b3eeSRoland Mainz 			else if (n<60)
9234f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+4;
9334f9b3eeSRoland Mainz 			else if (n<62)
9434f9b3eeSRoland Mainz 				wp->type[0xc0+n] |= WC_MB+5;
9534f9b3eeSRoland Mainz 		}
9634f9b3eeSRoland Mainz 		wp->type[0xc0] = WC_MB|WC_ERR;
9734f9b3eeSRoland Mainz 		wp->type[0xc1] = WC_MB|WC_ERR;
9834f9b3eeSRoland Mainz 		wp->type[0xfe] = WC_MB|WC_ERR;
9934f9b3eeSRoland Mainz 		wp->type[0xff] = WC_MB|WC_ERR;
10034f9b3eeSRoland Mainz 	}
10134f9b3eeSRoland Mainz 	wp->mode = mode;
10234f9b3eeSRoland Mainz 	return wp;
10334f9b3eeSRoland Mainz }
10434f9b3eeSRoland Mainz 
invalid(const char * file,int nlines)10534f9b3eeSRoland Mainz static int invalid(const char *file, int nlines)
10634f9b3eeSRoland Mainz {
10734f9b3eeSRoland Mainz 	error_info.file = (char*)file;
10834f9b3eeSRoland Mainz 	error_info.line = nlines;
10934f9b3eeSRoland Mainz 	error(ERROR_SYSTEM|1, "invalid multibyte character");
11034f9b3eeSRoland Mainz 	error_info.file = 0;
11134f9b3eeSRoland Mainz 	error_info.line = 0;
11234f9b3eeSRoland Mainz 	return nlines;
11334f9b3eeSRoland Mainz }
11434f9b3eeSRoland Mainz 
11534f9b3eeSRoland Mainz /*
11634f9b3eeSRoland Mainz  * handle utf space characters
11734f9b3eeSRoland Mainz  */
11834f9b3eeSRoland Mainz 
chkstate(int state,register unsigned int c)11934f9b3eeSRoland Mainz static int chkstate(int state, register unsigned int c)
12034f9b3eeSRoland Mainz {
12134f9b3eeSRoland Mainz 	switch(state)
12234f9b3eeSRoland Mainz 	{
12334f9b3eeSRoland Mainz 	case 1:
12434f9b3eeSRoland Mainz 		state = (c==0x9a?4:0);
12534f9b3eeSRoland Mainz 		break;
12634f9b3eeSRoland Mainz 	case 2:
12734f9b3eeSRoland Mainz 		state = ((c==0x80||c==0x81)?6+(c&1):0);
12834f9b3eeSRoland Mainz 		break;
12934f9b3eeSRoland Mainz 	case 3:
13034f9b3eeSRoland Mainz 		state = (c==0x80?5:0);
13134f9b3eeSRoland Mainz 		break;
13234f9b3eeSRoland Mainz 	case 4:
13334f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
13434f9b3eeSRoland Mainz 		break;
13534f9b3eeSRoland Mainz 	case 5:
13634f9b3eeSRoland Mainz 		state = (c==0x80?10:0);
13734f9b3eeSRoland Mainz 		break;
13834f9b3eeSRoland Mainz 	case 6:
13934f9b3eeSRoland Mainz 		state = 0;
14034f9b3eeSRoland Mainz 		if(c==0xa0 || c==0xa1)
14134f9b3eeSRoland Mainz 			return(10);
14234f9b3eeSRoland Mainz 		else if((c&0xf0)== 0x80)
14334f9b3eeSRoland Mainz 		{
14434f9b3eeSRoland Mainz 			if((c&=0xf)==7)
14534f9b3eeSRoland Mainz 				return(iswspace(0x2007)?10:0);
14634f9b3eeSRoland Mainz 			if(c<=0xb)
14734f9b3eeSRoland Mainz 				return(10);
14834f9b3eeSRoland Mainz 		}
14934f9b3eeSRoland Mainz 		else if(c==0xaf && iswspace(0x202f))
15034f9b3eeSRoland Mainz 			return(10);
15134f9b3eeSRoland Mainz 		break;
15234f9b3eeSRoland Mainz 	case 7:
15334f9b3eeSRoland Mainz 		state = (c==0x9f?10:0);
15434f9b3eeSRoland Mainz 		break;
15534f9b3eeSRoland Mainz 	case 8:
15634f9b3eeSRoland Mainz 		return (iswspace(c)?10:0);
15734f9b3eeSRoland Mainz 	}
15834f9b3eeSRoland Mainz 	return state;
159da2e3ebdSchin }
160da2e3ebdSchin 
161da2e3ebdSchin /*
162da2e3ebdSchin  * compute the line, word, and character count for file <fd>
163da2e3ebdSchin  */
16434f9b3eeSRoland Mainz 
wc_count(Wc_t * wp,Sfio_t * fd,const char * file)165da2e3ebdSchin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
166da2e3ebdSchin {
16734f9b3eeSRoland Mainz 	register char*		type = wp->type;
16834f9b3eeSRoland Mainz 	register unsigned char*	cp;
1693e14f97fSRoger A. Faulkner 	register Sfoff_t	nbytes;
170da2e3ebdSchin 	register Sfoff_t	nchars;
171da2e3ebdSchin 	register Sfoff_t	nwords;
172da2e3ebdSchin 	register Sfoff_t	nlines;
17334f9b3eeSRoland Mainz 	register Sfoff_t	eline = -1;
17434f9b3eeSRoland Mainz 	register Sfoff_t	longest = 0;
175da2e3ebdSchin 	register ssize_t	c;
17634f9b3eeSRoland Mainz 	register unsigned char*	endbuff;
17734f9b3eeSRoland Mainz 	register int		lasttype = WC_SP;
178da2e3ebdSchin 	unsigned int		lastchar;
17934f9b3eeSRoland Mainz 	ssize_t			n;
18034f9b3eeSRoland Mainz 	ssize_t			o;
18134f9b3eeSRoland Mainz 	unsigned char*		buff;
182da2e3ebdSchin 	wchar_t			x;
18334f9b3eeSRoland Mainz 	unsigned char		side[32];
184da2e3ebdSchin 
185da2e3ebdSchin 	sfset(fd,SF_WRITE,1);
1863e14f97fSRoger A. Faulkner 	nlines = nwords = nchars = nbytes = 0;
187da2e3ebdSchin 	wp->longest = 0;
18834f9b3eeSRoland Mainz 	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
189da2e3ebdSchin 	{
190da2e3ebdSchin 		cp = buff = endbuff = 0;
191da2e3ebdSchin 		for (;;)
192da2e3ebdSchin 		{
1933e14f97fSRoger A. Faulkner 			if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
194da2e3ebdSchin 			{
19534f9b3eeSRoland Mainz 				if ((o = endbuff-cp) < sizeof(side))
19634f9b3eeSRoland Mainz 				{
19734f9b3eeSRoland Mainz 					if (buff)
19834f9b3eeSRoland Mainz 					{
19934f9b3eeSRoland Mainz 						if (o)
20034f9b3eeSRoland Mainz 							memcpy(side, cp, o);
20134f9b3eeSRoland Mainz 						mbinit();
20234f9b3eeSRoland Mainz 					}
20334f9b3eeSRoland Mainz 					else
20434f9b3eeSRoland Mainz 						o = 0;
20534f9b3eeSRoland Mainz 					cp = side + o;
20634f9b3eeSRoland Mainz 					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
20734f9b3eeSRoland Mainz 					{
20834f9b3eeSRoland Mainz 						if ((nchars - longest) > wp->longest)
20934f9b3eeSRoland Mainz 							wp->longest = nchars - longest;
21034f9b3eeSRoland Mainz 						break;
21134f9b3eeSRoland Mainz 					}
2123e14f97fSRoger A. Faulkner 					nbytes += n;
21334f9b3eeSRoland Mainz 					if ((c = sizeof(side) - o) > n)
21434f9b3eeSRoland Mainz 						c = n;
21534f9b3eeSRoland Mainz 					if (c)
21634f9b3eeSRoland Mainz 						memcpy(cp, buff, c);
21734f9b3eeSRoland Mainz 					endbuff = buff + n;
21834f9b3eeSRoland Mainz 					cp = side;
21934f9b3eeSRoland Mainz 					x = mbchar(cp);
22034f9b3eeSRoland Mainz 					if ((cp-side) < o)
22134f9b3eeSRoland Mainz 					{
22234f9b3eeSRoland Mainz 						cp = buff;
22334f9b3eeSRoland Mainz 						nchars += (cp-side) - 1;
22434f9b3eeSRoland Mainz 					}
22534f9b3eeSRoland Mainz 					else
22634f9b3eeSRoland Mainz 						cp = buff + (cp-side) - o;
22734f9b3eeSRoland Mainz 				}
22834f9b3eeSRoland Mainz 				else
229da2e3ebdSchin 				{
23034f9b3eeSRoland Mainz 					cp++;
23134f9b3eeSRoland Mainz 					x = -1;
232da2e3ebdSchin 				}
23334f9b3eeSRoland Mainz 				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
23434f9b3eeSRoland Mainz 					eline = invalid(file, nlines);
235da2e3ebdSchin 			}
23634f9b3eeSRoland Mainz 			else
23734f9b3eeSRoland Mainz 				cp += n ? n : 1;
23834f9b3eeSRoland Mainz 			if (x == '\n')
239da2e3ebdSchin 			{
240da2e3ebdSchin 				if ((nchars - longest) > wp->longest)
241da2e3ebdSchin 					wp->longest = nchars - longest;
24234f9b3eeSRoland Mainz 				longest = nchars + 1;
243da2e3ebdSchin 				nlines++;
244da2e3ebdSchin 				lasttype = 1;
245da2e3ebdSchin 			}
246da2e3ebdSchin 			else if (iswspace(x))
247da2e3ebdSchin 				lasttype = 1;
248da2e3ebdSchin 			else if (lasttype)
249da2e3ebdSchin 			{
250da2e3ebdSchin 				lasttype = 0;
251da2e3ebdSchin 				nwords++;
252da2e3ebdSchin 			}
25334f9b3eeSRoland Mainz 			nchars++;
25434f9b3eeSRoland Mainz 		}
2553e14f97fSRoger A. Faulkner 		if (!(wp->mode & WC_MBYTE))
2563e14f97fSRoger A. Faulkner 			nchars = nbytes;
25734f9b3eeSRoland Mainz 	}
25834f9b3eeSRoland Mainz 	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25934f9b3eeSRoland Mainz 	{
26034f9b3eeSRoland Mainz 		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
26134f9b3eeSRoland Mainz 		{
26234f9b3eeSRoland Mainz 			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
26334f9b3eeSRoland Mainz 			{
26434f9b3eeSRoland Mainz 				nchars += c;
26534f9b3eeSRoland Mainz 				endbuff = cp + c;
26634f9b3eeSRoland Mainz 				if (*--endbuff == '\n')
26734f9b3eeSRoland Mainz 					nlines++;
26834f9b3eeSRoland Mainz 				else
26934f9b3eeSRoland Mainz 					*endbuff = '\n';
27034f9b3eeSRoland Mainz 				for (;;)
27134f9b3eeSRoland Mainz 					if (*cp++ == '\n')
27234f9b3eeSRoland Mainz 					{
27334f9b3eeSRoland Mainz 						if (cp > endbuff)
27434f9b3eeSRoland Mainz 							break;
27534f9b3eeSRoland Mainz 						nlines++;
27634f9b3eeSRoland Mainz 					}
27734f9b3eeSRoland Mainz 			}
27834f9b3eeSRoland Mainz 		}
27934f9b3eeSRoland Mainz 		else
28034f9b3eeSRoland Mainz 		{
28134f9b3eeSRoland Mainz 			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
28234f9b3eeSRoland Mainz 			{
28334f9b3eeSRoland Mainz 				nchars += c;
28434f9b3eeSRoland Mainz 				/* check to see whether first character terminates word */
28534f9b3eeSRoland Mainz 				if (c==1)
28634f9b3eeSRoland Mainz 				{
28734f9b3eeSRoland Mainz 					if (eol(lasttype))
28834f9b3eeSRoland Mainz 						nlines++;
28934f9b3eeSRoland Mainz 					if ((c = type[*cp]) && !lasttype)
29034f9b3eeSRoland Mainz 						nwords++;
29134f9b3eeSRoland Mainz 					lasttype = c;
29234f9b3eeSRoland Mainz 					continue;
29334f9b3eeSRoland Mainz 				}
29434f9b3eeSRoland Mainz 				if (!lasttype && type[*cp])
29534f9b3eeSRoland Mainz 					nwords++;
29634f9b3eeSRoland Mainz 				lastchar = cp[--c];
29734f9b3eeSRoland Mainz 				*(endbuff = cp+c) = '\n';
29834f9b3eeSRoland Mainz 				c = lasttype;
29934f9b3eeSRoland Mainz 				/* process each buffer */
30034f9b3eeSRoland Mainz 				for (;;)
30134f9b3eeSRoland Mainz 				{
30234f9b3eeSRoland Mainz 					/* process spaces and new-lines */
30334f9b3eeSRoland Mainz 					do
30434f9b3eeSRoland Mainz 					{
30534f9b3eeSRoland Mainz 						if (eol(c))
30634f9b3eeSRoland Mainz 							for (;;)
30734f9b3eeSRoland Mainz 							{
30834f9b3eeSRoland Mainz 								/* check for end of buffer */
30934f9b3eeSRoland Mainz 								if (cp > endbuff)
31034f9b3eeSRoland Mainz 									goto beob;
31134f9b3eeSRoland Mainz 								nlines++;
31234f9b3eeSRoland Mainz 								if (*cp != '\n')
31334f9b3eeSRoland Mainz 									break;
31434f9b3eeSRoland Mainz 								cp++;
31534f9b3eeSRoland Mainz 							}
31634f9b3eeSRoland Mainz 					} while (c = type[*cp++]);
31734f9b3eeSRoland Mainz 					/* skip over word characters */
31834f9b3eeSRoland Mainz 					while (!(c = type[*cp++]));
31934f9b3eeSRoland Mainz 					nwords++;
32034f9b3eeSRoland Mainz 				}
32134f9b3eeSRoland Mainz 			beob:
32234f9b3eeSRoland Mainz 				if ((cp -= 2) >= buff)
32334f9b3eeSRoland Mainz 					c = type[*cp];
32434f9b3eeSRoland Mainz 				else
32534f9b3eeSRoland Mainz 					c = lasttype;
32634f9b3eeSRoland Mainz 				lasttype = type[lastchar];
32734f9b3eeSRoland Mainz 				/* see if was in word */
32834f9b3eeSRoland Mainz 				if (!c && !lasttype)
32934f9b3eeSRoland Mainz 					nwords--;
33034f9b3eeSRoland Mainz 			}
33134f9b3eeSRoland Mainz 			if (eol(lasttype))
33234f9b3eeSRoland Mainz 				nlines++;
33334f9b3eeSRoland Mainz 			else if (!lasttype)
33434f9b3eeSRoland Mainz 				nwords++;
335da2e3ebdSchin 		}
336da2e3ebdSchin 	}
337da2e3ebdSchin 	else
338da2e3ebdSchin 	{
33934f9b3eeSRoland Mainz 		int		lineoff=0;
34034f9b3eeSRoland Mainz 		int		skip=0;
34134f9b3eeSRoland Mainz 		int		adjust=0;
34234f9b3eeSRoland Mainz 		int		state=0;
34334f9b3eeSRoland Mainz 		int		oldc;
34434f9b3eeSRoland Mainz 		int		xspace;
34534f9b3eeSRoland Mainz 		int		wasspace = 1;
34634f9b3eeSRoland Mainz 		unsigned char*	start;
347*b30d1939SAndy Fiddaman 		int             flagm = 0;
348*b30d1939SAndy Fiddaman 
34934f9b3eeSRoland Mainz 
35034f9b3eeSRoland Mainz 		lastchar = 0;
35134f9b3eeSRoland Mainz 		start = (endbuff = side) + 1;
35234f9b3eeSRoland Mainz 		xspace = iswspace(0xa0) || iswspace(0x85);
35334f9b3eeSRoland Mainz 		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
354da2e3ebdSchin 		{
3553e14f97fSRoger A. Faulkner 			nbytes += c;
356da2e3ebdSchin 			nchars += c;
35734f9b3eeSRoland Mainz 			start = cp-lineoff;
358da2e3ebdSchin 			/* check to see whether first character terminates word */
359da2e3ebdSchin 			if(c==1)
360da2e3ebdSchin 			{
36134f9b3eeSRoland Mainz 				if(eol(lasttype))
362da2e3ebdSchin 					nlines++;
36334f9b3eeSRoland Mainz 				if((c = type[*cp]) && !lasttype)
364da2e3ebdSchin 					nwords++;
365da2e3ebdSchin 				lasttype = c;
36634f9b3eeSRoland Mainz 				endbuff = start;
367da2e3ebdSchin 				continue;
368da2e3ebdSchin 			}
369da2e3ebdSchin 			lastchar = cp[--c];
370da2e3ebdSchin 			endbuff = cp+c;
37134f9b3eeSRoland Mainz 			cp[c] = '\n';
37234f9b3eeSRoland Mainz 			if(mbc(lasttype))
37334f9b3eeSRoland Mainz 			{
37434f9b3eeSRoland Mainz 				c = lasttype;
375*b30d1939SAndy Fiddaman 				flagm = 1;
37634f9b3eeSRoland Mainz 				goto mbyte;
37734f9b3eeSRoland Mainz 			}
37834f9b3eeSRoland Mainz 			if(!lasttype && spc(type[*cp]))
37934f9b3eeSRoland Mainz 				nwords++;
380da2e3ebdSchin 			c = lasttype;
381da2e3ebdSchin 			/* process each buffer */
382da2e3ebdSchin 			for (;;)
383da2e3ebdSchin 			{
384da2e3ebdSchin 				/* process spaces and new-lines */
38534f9b3eeSRoland Mainz 			spaces:
38634f9b3eeSRoland Mainz 				do
387da2e3ebdSchin 				{
38834f9b3eeSRoland Mainz 					if (eol(c))
389da2e3ebdSchin 					{
390da2e3ebdSchin 						/* check for end of buffer */
391da2e3ebdSchin 						if (cp > endbuff)
392da2e3ebdSchin 							goto eob;
39334f9b3eeSRoland Mainz 						if(wp->mode&WC_LONGEST)
39434f9b3eeSRoland Mainz 						{
39534f9b3eeSRoland Mainz 							if((cp-start)-adjust > longest)
39634f9b3eeSRoland Mainz 								longest = (cp-start)-adjust-1;
39734f9b3eeSRoland Mainz 							start = cp;
39834f9b3eeSRoland Mainz 						}
399da2e3ebdSchin 						nlines++;
40034f9b3eeSRoland Mainz 						nchars -= adjust;
40134f9b3eeSRoland Mainz 						adjust = 0;
40234f9b3eeSRoland Mainz 					}
40334f9b3eeSRoland Mainz 				} while (spc(c = type[*cp++]));
40434f9b3eeSRoland Mainz 				wasspace=1;
40534f9b3eeSRoland Mainz 				if(mbc(c))
40634f9b3eeSRoland Mainz 				{
40734f9b3eeSRoland Mainz 				mbyte:
40834f9b3eeSRoland Mainz 					do
40934f9b3eeSRoland Mainz 					{
41034f9b3eeSRoland Mainz 						if(c&WC_ERR)
41134f9b3eeSRoland Mainz 							goto err;
41234f9b3eeSRoland Mainz 						if(skip && (c&7))
413da2e3ebdSchin 							break;
41434f9b3eeSRoland Mainz 						if(!skip)
41534f9b3eeSRoland Mainz 						{
41634f9b3eeSRoland Mainz 							if(!(c&7))
41734f9b3eeSRoland Mainz 							{
41834f9b3eeSRoland Mainz 								skip=1;
41934f9b3eeSRoland Mainz 								break;
42034f9b3eeSRoland Mainz 							}
42134f9b3eeSRoland Mainz 							skip = (c&7);
42234f9b3eeSRoland Mainz 							adjust += skip;
42334f9b3eeSRoland Mainz 							state = 0;
424*b30d1939SAndy Fiddaman 							if (flagm == 1) {
425*b30d1939SAndy Fiddaman 								flagm = 0;
426*b30d1939SAndy Fiddaman 								oldc = *cp;
427*b30d1939SAndy Fiddaman 								if (xspace && (
428*b30d1939SAndy Fiddaman 								    iswspace
429*b30d1939SAndy Fiddaman 								    (*cp)
430*b30d1939SAndy Fiddaman 								    == 1)) {
431*b30d1939SAndy Fiddaman 									state
432*b30d1939SAndy Fiddaman 									    = 8;
433*b30d1939SAndy Fiddaman 								}
434*b30d1939SAndy Fiddaman 								continue;
435*b30d1939SAndy Fiddaman 							}
43634f9b3eeSRoland Mainz 							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
43734f9b3eeSRoland Mainz 								oldc = *cp;
43834f9b3eeSRoland Mainz 							else if(xspace && cp[-1]==0xc2)
43934f9b3eeSRoland Mainz 							{
44034f9b3eeSRoland Mainz 								state = 8;
44134f9b3eeSRoland Mainz 								oldc = *cp;
44234f9b3eeSRoland Mainz 							}
44334f9b3eeSRoland Mainz 						}
44434f9b3eeSRoland Mainz 						else
44534f9b3eeSRoland Mainz 						{
44634f9b3eeSRoland Mainz 							skip--;
44734f9b3eeSRoland Mainz 							if(state && (state=chkstate(state,oldc)))
44834f9b3eeSRoland Mainz 							{
44934f9b3eeSRoland Mainz 								if(state==10)
45034f9b3eeSRoland Mainz 								{
45134f9b3eeSRoland Mainz 									if(!wasspace)
45234f9b3eeSRoland Mainz 										nwords++;
45334f9b3eeSRoland Mainz 									wasspace = 1;
45434f9b3eeSRoland Mainz 									state=0;
45534f9b3eeSRoland Mainz 									goto spaces;
45634f9b3eeSRoland Mainz 								}
45734f9b3eeSRoland Mainz 								oldc = *cp;
45834f9b3eeSRoland Mainz 							}
45934f9b3eeSRoland Mainz 						}
46034f9b3eeSRoland Mainz 					} while (mbc(c = type[*cp++]));
46134f9b3eeSRoland Mainz 					wasspace = 0;
46234f9b3eeSRoland Mainz 					if(skip)
46334f9b3eeSRoland Mainz 					{
46434f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
46534f9b3eeSRoland Mainz 							goto eob;
46634f9b3eeSRoland Mainz 				err:
46734f9b3eeSRoland Mainz 						skip = 0;
46834f9b3eeSRoland Mainz 						state = 0;
46934f9b3eeSRoland Mainz 						if(eline!=nlines && !(wp->mode & WC_QUIET))
47034f9b3eeSRoland Mainz 							eline = invalid(file, nlines);
47134f9b3eeSRoland Mainz 						while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
47234f9b3eeSRoland Mainz 							c=type[*cp++];
47334f9b3eeSRoland Mainz 						if(eol(c) && (cp > endbuff))
47434f9b3eeSRoland Mainz 						{
47534f9b3eeSRoland Mainz 							c = WC_MB|WC_ERR;
47634f9b3eeSRoland Mainz 							goto eob;
47734f9b3eeSRoland Mainz 						}
47834f9b3eeSRoland Mainz 						if(mbc(c))
47934f9b3eeSRoland Mainz 							goto mbyte;
48034f9b3eeSRoland Mainz 						else if(c&WC_SP)
48134f9b3eeSRoland Mainz 							goto spaces;
48234f9b3eeSRoland Mainz 					}
48334f9b3eeSRoland Mainz 					if(spc(c))
48434f9b3eeSRoland Mainz 					{
48534f9b3eeSRoland Mainz 						nwords++;
48634f9b3eeSRoland Mainz 						continue;
487da2e3ebdSchin 					}
48834f9b3eeSRoland Mainz 				}
489da2e3ebdSchin 				/* skip over word characters */
49034f9b3eeSRoland Mainz 				while(!(c = type[*cp++]));
49134f9b3eeSRoland Mainz 				if(mbc(c))
49234f9b3eeSRoland Mainz 					goto mbyte;
493da2e3ebdSchin 				nwords++;
494da2e3ebdSchin 			}
495da2e3ebdSchin 		eob:
49634f9b3eeSRoland Mainz 			lineoff = cp-start;
497da2e3ebdSchin 			if((cp -= 2) >= buff)
49834f9b3eeSRoland Mainz 				c = type[*cp];
499da2e3ebdSchin 			else
50034f9b3eeSRoland Mainz 				c = lasttype;
50134f9b3eeSRoland Mainz 			lasttype = type[lastchar];
502da2e3ebdSchin 			/* see if was in word */
503da2e3ebdSchin 			if(!c && !lasttype)
504da2e3ebdSchin 				nwords--;
505da2e3ebdSchin 		}
50634f9b3eeSRoland Mainz 		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
50734f9b3eeSRoland Mainz 			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
50834f9b3eeSRoland Mainz 		wp->longest = longest;
50934f9b3eeSRoland Mainz 		if (eol(lasttype))
510da2e3ebdSchin 			nlines++;
51134f9b3eeSRoland Mainz 		else if (!lasttype)
512da2e3ebdSchin 			nwords++;
5133e14f97fSRoger A. Faulkner 		if (wp->mode & WC_MBYTE)
5143e14f97fSRoger A. Faulkner 			nchars -= adjust;
5153e14f97fSRoger A. Faulkner 		else
5163e14f97fSRoger A. Faulkner 			nchars = nbytes;
517da2e3ebdSchin 	}
518da2e3ebdSchin 	wp->chars = nchars;
519da2e3ebdSchin 	wp->words = nwords;
520da2e3ebdSchin 	wp->lines = nlines;
52134f9b3eeSRoland Mainz 	return 0;
522da2e3ebdSchin }
52334f9b3eeSRoland Mainz 
524