1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1996, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 /*
28  * wio_get.c
29  *
30  * Wide I/O Library
31  *
32  * Copyright 1990, 1995 by Mortice Kern Systems Inc.  All rights reserved.
33  *
34  */
35 
36 #if M_RCSID
37 #ifndef lint
38 static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $";
39 #endif
40 #endif
41 
42 #include <mks.h>
43 #include <errno.h>
44 #include <m_wio.h>
45 
46 #ifdef M_I18N_LOCKING_SHIFT
47 /*
48  * Eat one or more shift-out and/or shift-in bytes.
49  * Return non-zero if an error occured on the stream.
50  * The stream's input state is updated accordingly.
51  *
52  * NOTE this function assumes that the shift-in and
53  * shift-out are bytes.
54  */
55 static int
eat_shift_bytes(wio)56 eat_shift_bytes(wio)
57 t_wide_io *wio;
58 {
59 	char mb;
60 	int ch, prev;
61 	mbstate_t start_state;
62 
63 	for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) {
64 		/* Was it an insignificant shift byte, SI-SI or SO-SO? */
65 		if (ch != prev) {
66 			/* First iteration will always enter here looking
67 			 * for a state change.  Subsequent iterations entering
68 			 * here are trying to identify redundant shifts, which
69 			 * are SO-SI or SI-SO pairs.
70 			 */
71 			mb = (char) ch;
72 			start_state = wio->_state;
73 
74 			/* Convert byte and identify a state change. */
75 			if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1
76 			|| mbsinit(&start_state) == mbsinit(&wio->_state)) {
77 				/* Encoding error or no state change. */
78 				if (wio->get != (int (*)(int, void *)) 0)
79 					(void) (*wio->unget)(ch, wio->object);
80 				wio->_state = start_state;
81 				break;
82 			}
83 		}
84 	}
85 
86 	if (wio->iserror != (int (*)(void *)) 0)
87 		return !(*wio->iserror)(wio->object);
88 
89 	return 0;
90 }
91 #endif /* M_I18N_LOCKING_SHIFT */
92 
93 /*
94  * Return a wide character or WEOF for EOF or error.
95  *
96  * The function referenced by "get" is passed the pointer "object"
97  * and returns an input byte or EOF if no further data available.
98  *
99  * This mechanism is used to do conversions of byte strings or
100  * streams into wide characters without loss of information in the
101  * case of a bad multibyte character conversion.  The bad multibyte
102  * sequence is passed through as individual bytes.
103  */
104 wint_t
m_wio_get(wio)105 m_wio_get(wio)
106 t_wide_io *wio;
107 {
108         int ch;
109         wchar_t wc;
110 	mbstate_t start_state;
111 	static mbstate_t initial_state = { 0 };
112 
113 	if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) {
114 		errno = EINVAL;
115 		return -1;
116 	}
117 
118 	/* Do still have bytes available? */
119 	if (wio->_next < wio->_size)
120 		return (wint_t) wio->_mb[wio->_next++];
121 
122         /* Read in enough bytes to convert a multibyte character. */
123 	wio->_size = 0;
124 	start_state = wio->_state;
125         for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) {
126                 if ((ch = (*wio->get)(wio->object)) == EOF)
127                         break;
128 
129 		wio->_mb[wio->_next] = ch;
130 
131 		/* Attempt to convert multibyte character sequence. */
132                 wio->_size = mbrtowc(
133 			&wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state
134 		);
135 
136 		++wio->_next;
137 
138 		if (0 <= wio->_size) {
139 #ifdef M_I18N_LOCKING_SHIFT
140 			/* Only eat shift bytes within a line, since in line
141 			 * canonical mode, attempting to eat shift bytes
142 			 * following a <newline> causes another read().
143 			 */
144 			if (ch != '\n') {
145 				/* When a valid character is found, consume
146 				 * any trailing shift-in or shift-out bytes,
147 				 * updating the state accordingly.
148 				 */
149 				(void) eat_shift_bytes(wio);
150 			}
151 #endif /* M_I18N_LOCKING_SHIFT */
152 
153 			/* Remember the number of bytes converted. */
154 			wio->_size = wio->_next;
155 
156 			return (wint_t) wc;
157                 }
158         }
159 
160 	/* If we fill the multibyte character buffer or receive an
161 	 * EOF without recognising a multibyte character, then we
162 	 * will return individual bytes from the buffer.  The buffer
163 	 * is restored to its state before the bogus byte sequence
164 	 * was read.
165 	 */
166 	wio->_state = start_state;
167 	wio->_size = wio->_next;
168 	wio->_next = 0;
169 
170 	return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF;
171 }
172 
173 
174