17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, by Sun Microsystems, Inc.
247c478bd9Sstevel@tonic-gate  * All rights reserved.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate  * wio_get.c
29*1da57d55SToomas Soome  *
307c478bd9Sstevel@tonic-gate  * Wide I/O Library
317c478bd9Sstevel@tonic-gate  *
327c478bd9Sstevel@tonic-gate  * Copyright 1990, 1995 by Mortice Kern Systems Inc.  All rights reserved.
337c478bd9Sstevel@tonic-gate  *
347c478bd9Sstevel@tonic-gate  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #if M_RCSID
377c478bd9Sstevel@tonic-gate #ifndef lint
387c478bd9Sstevel@tonic-gate static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $";
397c478bd9Sstevel@tonic-gate #endif
407c478bd9Sstevel@tonic-gate #endif
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #include <mks.h>
437c478bd9Sstevel@tonic-gate #include <errno.h>
447c478bd9Sstevel@tonic-gate #include <m_wio.h>
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate #ifdef M_I18N_LOCKING_SHIFT
477c478bd9Sstevel@tonic-gate /*
487c478bd9Sstevel@tonic-gate  * Eat one or more shift-out and/or shift-in bytes.
49*1da57d55SToomas Soome  * Return non-zero if an error occured on the stream.
507c478bd9Sstevel@tonic-gate  * The stream's input state is updated accordingly.
517c478bd9Sstevel@tonic-gate  *
527c478bd9Sstevel@tonic-gate  * NOTE this function assumes that the shift-in and
537c478bd9Sstevel@tonic-gate  * shift-out are bytes.
547c478bd9Sstevel@tonic-gate  */
557c478bd9Sstevel@tonic-gate static int
eat_shift_bytes(wio)567c478bd9Sstevel@tonic-gate eat_shift_bytes(wio)
577c478bd9Sstevel@tonic-gate t_wide_io *wio;
587c478bd9Sstevel@tonic-gate {
597c478bd9Sstevel@tonic-gate 	char mb;
607c478bd9Sstevel@tonic-gate 	int ch, prev;
617c478bd9Sstevel@tonic-gate 	mbstate_t start_state;
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate 	for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) {
647c478bd9Sstevel@tonic-gate 		/* Was it an insignificant shift byte, SI-SI or SO-SO? */
657c478bd9Sstevel@tonic-gate 		if (ch != prev) {
66*1da57d55SToomas Soome 			/* First iteration will always enter here looking
677c478bd9Sstevel@tonic-gate 			 * for a state change.  Subsequent iterations entering
687c478bd9Sstevel@tonic-gate 			 * here are trying to identify redundant shifts, which
697c478bd9Sstevel@tonic-gate 			 * are SO-SI or SI-SO pairs.
707c478bd9Sstevel@tonic-gate 			 */
717c478bd9Sstevel@tonic-gate 			mb = (char) ch;
727c478bd9Sstevel@tonic-gate 			start_state = wio->_state;
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate 			/* Convert byte and identify a state change. */
757c478bd9Sstevel@tonic-gate 			if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1
767c478bd9Sstevel@tonic-gate 			|| mbsinit(&start_state) == mbsinit(&wio->_state)) {
777c478bd9Sstevel@tonic-gate 				/* Encoding error or no state change. */
787c478bd9Sstevel@tonic-gate 				if (wio->get != (int (*)(int, void *)) 0)
797c478bd9Sstevel@tonic-gate 					(void) (*wio->unget)(ch, wio->object);
807c478bd9Sstevel@tonic-gate 				wio->_state = start_state;
817c478bd9Sstevel@tonic-gate 				break;
827c478bd9Sstevel@tonic-gate 			}
837c478bd9Sstevel@tonic-gate 		}
847c478bd9Sstevel@tonic-gate 	}
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate 	if (wio->iserror != (int (*)(void *)) 0)
87*1da57d55SToomas Soome 		return !(*wio->iserror)(wio->object);
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 	return 0;
907c478bd9Sstevel@tonic-gate }
917c478bd9Sstevel@tonic-gate #endif /* M_I18N_LOCKING_SHIFT */
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate /*
947c478bd9Sstevel@tonic-gate  * Return a wide character or WEOF for EOF or error.
957c478bd9Sstevel@tonic-gate  *
967c478bd9Sstevel@tonic-gate  * The function referenced by "get" is passed the pointer "object"
977c478bd9Sstevel@tonic-gate  * and returns an input byte or EOF if no further data available.
987c478bd9Sstevel@tonic-gate  *
99*1da57d55SToomas Soome  * This mechanism is used to do conversions of byte strings or
1007c478bd9Sstevel@tonic-gate  * streams into wide characters without loss of information in the
1017c478bd9Sstevel@tonic-gate  * case of a bad multibyte character conversion.  The bad multibyte
1027c478bd9Sstevel@tonic-gate  * sequence is passed through as individual bytes.
1037c478bd9Sstevel@tonic-gate  */
1047c478bd9Sstevel@tonic-gate wint_t
m_wio_get(wio)1057c478bd9Sstevel@tonic-gate m_wio_get(wio)
1067c478bd9Sstevel@tonic-gate t_wide_io *wio;
1077c478bd9Sstevel@tonic-gate {
1087c478bd9Sstevel@tonic-gate         int ch;
1097c478bd9Sstevel@tonic-gate         wchar_t wc;
1107c478bd9Sstevel@tonic-gate 	mbstate_t start_state;
1117c478bd9Sstevel@tonic-gate 	static mbstate_t initial_state = { 0 };
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate 	if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) {
1147c478bd9Sstevel@tonic-gate 		errno = EINVAL;
1157c478bd9Sstevel@tonic-gate 		return -1;
1167c478bd9Sstevel@tonic-gate 	}
1177c478bd9Sstevel@tonic-gate 
118*1da57d55SToomas Soome 	/* Do still have bytes available? */
1197c478bd9Sstevel@tonic-gate 	if (wio->_next < wio->_size)
1207c478bd9Sstevel@tonic-gate 		return (wint_t) wio->_mb[wio->_next++];
1217c478bd9Sstevel@tonic-gate 
1227c478bd9Sstevel@tonic-gate         /* Read in enough bytes to convert a multibyte character. */
1237c478bd9Sstevel@tonic-gate 	wio->_size = 0;
1247c478bd9Sstevel@tonic-gate 	start_state = wio->_state;
1257c478bd9Sstevel@tonic-gate         for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) {
1267c478bd9Sstevel@tonic-gate                 if ((ch = (*wio->get)(wio->object)) == EOF)
1277c478bd9Sstevel@tonic-gate                         break;
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate 		wio->_mb[wio->_next] = ch;
1307c478bd9Sstevel@tonic-gate 
131*1da57d55SToomas Soome 		/* Attempt to convert multibyte character sequence. */
1327c478bd9Sstevel@tonic-gate                 wio->_size = mbrtowc(
1337c478bd9Sstevel@tonic-gate 			&wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state
1347c478bd9Sstevel@tonic-gate 		);
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate 		++wio->_next;
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 		if (0 <= wio->_size) {
1397c478bd9Sstevel@tonic-gate #ifdef M_I18N_LOCKING_SHIFT
1407c478bd9Sstevel@tonic-gate 			/* Only eat shift bytes within a line, since in line
1417c478bd9Sstevel@tonic-gate 			 * canonical mode, attempting to eat shift bytes
1427c478bd9Sstevel@tonic-gate 			 * following a <newline> causes another read().
1437c478bd9Sstevel@tonic-gate 			 */
1447c478bd9Sstevel@tonic-gate 			if (ch != '\n') {
145*1da57d55SToomas Soome 				/* When a valid character is found, consume
1467c478bd9Sstevel@tonic-gate 				 * any trailing shift-in or shift-out bytes,
1477c478bd9Sstevel@tonic-gate 				 * updating the state accordingly.
1487c478bd9Sstevel@tonic-gate 				 */
1497c478bd9Sstevel@tonic-gate 				(void) eat_shift_bytes(wio);
1507c478bd9Sstevel@tonic-gate 			}
1517c478bd9Sstevel@tonic-gate #endif /* M_I18N_LOCKING_SHIFT */
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 			/* Remember the number of bytes converted. */
1547c478bd9Sstevel@tonic-gate 			wio->_size = wio->_next;
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate 			return (wint_t) wc;
1577c478bd9Sstevel@tonic-gate                 }
1587c478bd9Sstevel@tonic-gate         }
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate 	/* If we fill the multibyte character buffer or receive an
1617c478bd9Sstevel@tonic-gate 	 * EOF without recognising a multibyte character, then we
1627c478bd9Sstevel@tonic-gate 	 * will return individual bytes from the buffer.  The buffer
163*1da57d55SToomas Soome 	 * is restored to its state before the bogus byte sequence
1647c478bd9Sstevel@tonic-gate 	 * was read.
165*1da57d55SToomas Soome 	 */
1667c478bd9Sstevel@tonic-gate 	wio->_state = start_state;
1677c478bd9Sstevel@tonic-gate 	wio->_size = wio->_next;
1687c478bd9Sstevel@tonic-gate 	wio->_next = 0;
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate 	return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF;
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate 
174