17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate * with the License.
87c478bd9Sstevel@tonic-gate *
97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate * and limitations under the License.
137c478bd9Sstevel@tonic-gate *
147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate *
207c478bd9Sstevel@tonic-gate * CDDL HEADER END
217c478bd9Sstevel@tonic-gate */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate * Copyright (c) 1996, by Sun Microsystems, Inc.
247c478bd9Sstevel@tonic-gate * All rights reserved.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate * wio_get.c
29*1da57d55SToomas Soome *
307c478bd9Sstevel@tonic-gate * Wide I/O Library
317c478bd9Sstevel@tonic-gate *
327c478bd9Sstevel@tonic-gate * Copyright 1990, 1995 by Mortice Kern Systems Inc. All rights reserved.
337c478bd9Sstevel@tonic-gate *
347c478bd9Sstevel@tonic-gate */
357c478bd9Sstevel@tonic-gate
367c478bd9Sstevel@tonic-gate #if M_RCSID
377c478bd9Sstevel@tonic-gate #ifndef lint
387c478bd9Sstevel@tonic-gate static char rcsID[] = "$Header: /rd/src/libc/wide/rcs/wio_get.c 1.3 1995/07/26 17:50:45 ant Exp $";
397c478bd9Sstevel@tonic-gate #endif
407c478bd9Sstevel@tonic-gate #endif
417c478bd9Sstevel@tonic-gate
427c478bd9Sstevel@tonic-gate #include <mks.h>
437c478bd9Sstevel@tonic-gate #include <errno.h>
447c478bd9Sstevel@tonic-gate #include <m_wio.h>
457c478bd9Sstevel@tonic-gate
467c478bd9Sstevel@tonic-gate #ifdef M_I18N_LOCKING_SHIFT
477c478bd9Sstevel@tonic-gate /*
487c478bd9Sstevel@tonic-gate * Eat one or more shift-out and/or shift-in bytes.
49*1da57d55SToomas Soome * Return non-zero if an error occured on the stream.
507c478bd9Sstevel@tonic-gate * The stream's input state is updated accordingly.
517c478bd9Sstevel@tonic-gate *
527c478bd9Sstevel@tonic-gate * NOTE this function assumes that the shift-in and
537c478bd9Sstevel@tonic-gate * shift-out are bytes.
547c478bd9Sstevel@tonic-gate */
557c478bd9Sstevel@tonic-gate static int
eat_shift_bytes(wio)567c478bd9Sstevel@tonic-gate eat_shift_bytes(wio)
577c478bd9Sstevel@tonic-gate t_wide_io *wio;
587c478bd9Sstevel@tonic-gate {
597c478bd9Sstevel@tonic-gate char mb;
607c478bd9Sstevel@tonic-gate int ch, prev;
617c478bd9Sstevel@tonic-gate mbstate_t start_state;
627c478bd9Sstevel@tonic-gate
637c478bd9Sstevel@tonic-gate for (prev = EOF; (ch = (*wio->get)(wio->object)) != EOF; prev = ch) {
647c478bd9Sstevel@tonic-gate /* Was it an insignificant shift byte, SI-SI or SO-SO? */
657c478bd9Sstevel@tonic-gate if (ch != prev) {
66*1da57d55SToomas Soome /* First iteration will always enter here looking
677c478bd9Sstevel@tonic-gate * for a state change. Subsequent iterations entering
687c478bd9Sstevel@tonic-gate * here are trying to identify redundant shifts, which
697c478bd9Sstevel@tonic-gate * are SO-SI or SI-SO pairs.
707c478bd9Sstevel@tonic-gate */
717c478bd9Sstevel@tonic-gate mb = (char) ch;
727c478bd9Sstevel@tonic-gate start_state = wio->_state;
737c478bd9Sstevel@tonic-gate
747c478bd9Sstevel@tonic-gate /* Convert byte and identify a state change. */
757c478bd9Sstevel@tonic-gate if (mbrtowc((wchar_t *) 0, &mb, 1, &wio->_state) == -1
767c478bd9Sstevel@tonic-gate || mbsinit(&start_state) == mbsinit(&wio->_state)) {
777c478bd9Sstevel@tonic-gate /* Encoding error or no state change. */
787c478bd9Sstevel@tonic-gate if (wio->get != (int (*)(int, void *)) 0)
797c478bd9Sstevel@tonic-gate (void) (*wio->unget)(ch, wio->object);
807c478bd9Sstevel@tonic-gate wio->_state = start_state;
817c478bd9Sstevel@tonic-gate break;
827c478bd9Sstevel@tonic-gate }
837c478bd9Sstevel@tonic-gate }
847c478bd9Sstevel@tonic-gate }
857c478bd9Sstevel@tonic-gate
867c478bd9Sstevel@tonic-gate if (wio->iserror != (int (*)(void *)) 0)
87*1da57d55SToomas Soome return !(*wio->iserror)(wio->object);
887c478bd9Sstevel@tonic-gate
897c478bd9Sstevel@tonic-gate return 0;
907c478bd9Sstevel@tonic-gate }
917c478bd9Sstevel@tonic-gate #endif /* M_I18N_LOCKING_SHIFT */
927c478bd9Sstevel@tonic-gate
937c478bd9Sstevel@tonic-gate /*
947c478bd9Sstevel@tonic-gate * Return a wide character or WEOF for EOF or error.
957c478bd9Sstevel@tonic-gate *
967c478bd9Sstevel@tonic-gate * The function referenced by "get" is passed the pointer "object"
977c478bd9Sstevel@tonic-gate * and returns an input byte or EOF if no further data available.
987c478bd9Sstevel@tonic-gate *
99*1da57d55SToomas Soome * This mechanism is used to do conversions of byte strings or
1007c478bd9Sstevel@tonic-gate * streams into wide characters without loss of information in the
1017c478bd9Sstevel@tonic-gate * case of a bad multibyte character conversion. The bad multibyte
1027c478bd9Sstevel@tonic-gate * sequence is passed through as individual bytes.
1037c478bd9Sstevel@tonic-gate */
1047c478bd9Sstevel@tonic-gate wint_t
m_wio_get(wio)1057c478bd9Sstevel@tonic-gate m_wio_get(wio)
1067c478bd9Sstevel@tonic-gate t_wide_io *wio;
1077c478bd9Sstevel@tonic-gate {
1087c478bd9Sstevel@tonic-gate int ch;
1097c478bd9Sstevel@tonic-gate wchar_t wc;
1107c478bd9Sstevel@tonic-gate mbstate_t start_state;
1117c478bd9Sstevel@tonic-gate static mbstate_t initial_state = { 0 };
1127c478bd9Sstevel@tonic-gate
1137c478bd9Sstevel@tonic-gate if (wio == (t_wide_io *) 0 || wio->get == (int (*)(void *)) 0) {
1147c478bd9Sstevel@tonic-gate errno = EINVAL;
1157c478bd9Sstevel@tonic-gate return -1;
1167c478bd9Sstevel@tonic-gate }
1177c478bd9Sstevel@tonic-gate
118*1da57d55SToomas Soome /* Do still have bytes available? */
1197c478bd9Sstevel@tonic-gate if (wio->_next < wio->_size)
1207c478bd9Sstevel@tonic-gate return (wint_t) wio->_mb[wio->_next++];
1217c478bd9Sstevel@tonic-gate
1227c478bd9Sstevel@tonic-gate /* Read in enough bytes to convert a multibyte character. */
1237c478bd9Sstevel@tonic-gate wio->_size = 0;
1247c478bd9Sstevel@tonic-gate start_state = wio->_state;
1257c478bd9Sstevel@tonic-gate for (wio->_next = 0; wio->_next < MB_CUR_MAX; ) {
1267c478bd9Sstevel@tonic-gate if ((ch = (*wio->get)(wio->object)) == EOF)
1277c478bd9Sstevel@tonic-gate break;
1287c478bd9Sstevel@tonic-gate
1297c478bd9Sstevel@tonic-gate wio->_mb[wio->_next] = ch;
1307c478bd9Sstevel@tonic-gate
131*1da57d55SToomas Soome /* Attempt to convert multibyte character sequence. */
1327c478bd9Sstevel@tonic-gate wio->_size = mbrtowc(
1337c478bd9Sstevel@tonic-gate &wc, (char *) (wio->_mb + wio->_next), 1, &wio->_state
1347c478bd9Sstevel@tonic-gate );
1357c478bd9Sstevel@tonic-gate
1367c478bd9Sstevel@tonic-gate ++wio->_next;
1377c478bd9Sstevel@tonic-gate
1387c478bd9Sstevel@tonic-gate if (0 <= wio->_size) {
1397c478bd9Sstevel@tonic-gate #ifdef M_I18N_LOCKING_SHIFT
1407c478bd9Sstevel@tonic-gate /* Only eat shift bytes within a line, since in line
1417c478bd9Sstevel@tonic-gate * canonical mode, attempting to eat shift bytes
1427c478bd9Sstevel@tonic-gate * following a <newline> causes another read().
1437c478bd9Sstevel@tonic-gate */
1447c478bd9Sstevel@tonic-gate if (ch != '\n') {
145*1da57d55SToomas Soome /* When a valid character is found, consume
1467c478bd9Sstevel@tonic-gate * any trailing shift-in or shift-out bytes,
1477c478bd9Sstevel@tonic-gate * updating the state accordingly.
1487c478bd9Sstevel@tonic-gate */
1497c478bd9Sstevel@tonic-gate (void) eat_shift_bytes(wio);
1507c478bd9Sstevel@tonic-gate }
1517c478bd9Sstevel@tonic-gate #endif /* M_I18N_LOCKING_SHIFT */
1527c478bd9Sstevel@tonic-gate
1537c478bd9Sstevel@tonic-gate /* Remember the number of bytes converted. */
1547c478bd9Sstevel@tonic-gate wio->_size = wio->_next;
1557c478bd9Sstevel@tonic-gate
1567c478bd9Sstevel@tonic-gate return (wint_t) wc;
1577c478bd9Sstevel@tonic-gate }
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate /* If we fill the multibyte character buffer or receive an
1617c478bd9Sstevel@tonic-gate * EOF without recognising a multibyte character, then we
1627c478bd9Sstevel@tonic-gate * will return individual bytes from the buffer. The buffer
163*1da57d55SToomas Soome * is restored to its state before the bogus byte sequence
1647c478bd9Sstevel@tonic-gate * was read.
165*1da57d55SToomas Soome */
1667c478bd9Sstevel@tonic-gate wio->_state = start_state;
1677c478bd9Sstevel@tonic-gate wio->_size = wio->_next;
1687c478bd9Sstevel@tonic-gate wio->_next = 0;
1697c478bd9Sstevel@tonic-gate
1707c478bd9Sstevel@tonic-gate return 0 < wio->_size ? (wint_t) wio->_mb[wio->_next++] : WEOF;
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate
1737c478bd9Sstevel@tonic-gate
174