1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1996, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * wio_get.c 31 * 32 * Wide I/O Library 33 * 34 * Copyright 1990, 1995 by Mortice Kern Systems Inc. All rights reserved. 35 * 36 */ 37 38 #if M_RCSID 39 #ifndef lint 40 static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $"; 41 #endif 42 #endif 43 44 #include <mks.h> 45 #include <errno.h> 46 #include <m_wio.h> 47 48 #ifdef M_I18N_LOCKING_SHIFT 49 /* 50 * Eat one or more shift-out and/or shift-in bytes. 51 * Return non-zero if an error occured on the stream. 52 * The stream's input state is updated accordingly. 53 * 54 * NOTE this function assumes that the shift-in and 55 * shift-out are bytes. 56 */ 57 static int 58 eat_shift_bytes(wio) 59 t_wide_io *wio; 60 { 61 char mb; 62 int ch, prev; 63 mbstate_t start_state; 64 65 for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) { 66 /* Was it an insignificant shift byte, SI-SI or SO-SO? */ 67 if (ch != prev) { 68 /* First iteration will always enter here looking 69 * for a state change. Subsequent iterations entering 70 * here are trying to identify redundant shifts, which 71 * are SO-SI or SI-SO pairs. 72 */ 73 mb = (char) ch; 74 start_state = wio->_state; 75 76 /* Convert byte and identify a state change. */ 77 if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1 78 || mbsinit(&start_state) == mbsinit(&wio->_state)) { 79 /* Encoding error or no state change. */ 80 if (wio->get != (int (*)(int, void *)) 0) 81 (void) (*wio->unget)(ch, wio->object); 82 wio->_state = start_state; 83 break; 84 } 85 } 86 } 87 88 if (wio->iserror != (int (*)(void *)) 0) 89 return !(*wio->iserror)(wio->object); 90 91 return 0; 92 } 93 #endif /* M_I18N_LOCKING_SHIFT */ 94 95 /* 96 * Return a wide character or WEOF for EOF or error. 97 * 98 * The function referenced by "get" is passed the pointer "object" 99 * and returns an input byte or EOF if no further data available. 100 * 101 * This mechanism is used to do conversions of byte strings or 102 * streams into wide characters without loss of information in the 103 * case of a bad multibyte character conversion. The bad multibyte 104 * sequence is passed through as individual bytes. 105 */ 106 wint_t 107 m_wio_get(wio) 108 t_wide_io *wio; 109 { 110 int ch; 111 wchar_t wc; 112 mbstate_t start_state; 113 static mbstate_t initial_state = { 0 }; 114 115 if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) { 116 errno = EINVAL; 117 return -1; 118 } 119 120 /* Do still have bytes available? */ 121 if (wio->_next < wio->_size) 122 return (wint_t) wio->_mb[wio->_next++]; 123 124 /* Read in enough bytes to convert a multibyte character. */ 125 wio->_size = 0; 126 start_state = wio->_state; 127 for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) { 128 if ((ch = (*wio->get)(wio->object)) == EOF) 129 break; 130 131 wio->_mb[wio->_next] = ch; 132 133 /* Attempt to convert multibyte character sequence. */ 134 wio->_size = mbrtowc( 135 &wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state 136 ); 137 138 ++wio->_next; 139 140 if (0 <= wio->_size) { 141 #ifdef M_I18N_LOCKING_SHIFT 142 /* Only eat shift bytes within a line, since in line 143 * canonical mode, attempting to eat shift bytes 144 * following a <newline> causes another read(). 145 */ 146 if (ch != '\n') { 147 /* When a valid character is found, consume 148 * any trailing shift-in or shift-out bytes, 149 * updating the state accordingly. 150 */ 151 (void) eat_shift_bytes(wio); 152 } 153 #endif /* M_I18N_LOCKING_SHIFT */ 154 155 /* Remember the number of bytes converted. */ 156 wio->_size = wio->_next; 157 158 return (wint_t) wc; 159 } 160 } 161 162 /* If we fill the multibyte character buffer or receive an 163 * EOF without recognising a multibyte character, then we 164 * will return individual bytes from the buffer. The buffer 165 * is restored to its state before the bogus byte sequence 166 * was read. 167 */ 168 wio->_state = start_state; 169 wio->_size = wio->_next; 170 wio->_next = 0; 171 172 return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF; 173 } 174 175 176