1*16d86563SAlexander Pyhalov /* 2*16d86563SAlexander Pyhalov * CDDL HEADER START 3*16d86563SAlexander Pyhalov * 4*16d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the 5*16d86563SAlexander Pyhalov * Common Development and Distribution License (the "License"). 6*16d86563SAlexander Pyhalov * You may not use this file except in compliance with the License. 7*16d86563SAlexander Pyhalov * 8*16d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9*16d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing. 10*16d86563SAlexander Pyhalov * See the License for the specific language governing permissions 11*16d86563SAlexander Pyhalov * and limitations under the License. 12*16d86563SAlexander Pyhalov * 13*16d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each 14*16d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE. 15*16d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the 16*16d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying 17*16d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner] 18*16d86563SAlexander Pyhalov * 19*16d86563SAlexander Pyhalov * CDDL HEADER END 20*16d86563SAlexander Pyhalov */ 21*16d86563SAlexander Pyhalov /* 22*16d86563SAlexander Pyhalov * Copyright (c) 1998-1999 by Sun Microsystems, Inc. 23*16d86563SAlexander Pyhalov * All rights reserved. 24*16d86563SAlexander Pyhalov */ 25*16d86563SAlexander Pyhalov 26*16d86563SAlexander Pyhalov #ifndef UTF7_TO_UCS_H 27*16d86563SAlexander Pyhalov #define UTF7_TO_UCS_H 28*16d86563SAlexander Pyhalov 29*16d86563SAlexander Pyhalov 30*16d86563SAlexander Pyhalov #include "common_defs.h" 31*16d86563SAlexander Pyhalov 32*16d86563SAlexander Pyhalov 33*16d86563SAlexander Pyhalov /* Modified Base64 alphabet to Value mapping table -- see RFC 2045. */ 34*16d86563SAlexander Pyhalov static const signed char rmb64[0x100] = { 35*16d86563SAlexander Pyhalov /*00*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 36*16d86563SAlexander Pyhalov /*10*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 37*16d86563SAlexander Pyhalov /*20*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 38*16d86563SAlexander Pyhalov /*30*/ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 39*16d86563SAlexander Pyhalov /*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 40*16d86563SAlexander Pyhalov /*50*/ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 41*16d86563SAlexander Pyhalov /*60*/ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42*16d86563SAlexander Pyhalov /*70*/ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, 43*16d86563SAlexander Pyhalov /*80*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44*16d86563SAlexander Pyhalov /*90*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45*16d86563SAlexander Pyhalov /*a0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46*16d86563SAlexander Pyhalov /*b0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 47*16d86563SAlexander Pyhalov /*c0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 48*16d86563SAlexander Pyhalov /*d0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 49*16d86563SAlexander Pyhalov /*e0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 50*16d86563SAlexander Pyhalov /*f0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 51*16d86563SAlexander Pyhalov }; 52*16d86563SAlexander Pyhalov 53*16d86563SAlexander Pyhalov /* 54*16d86563SAlexander Pyhalov * Any UCS-2 character sequences will yield: 55*16d86563SAlexander Pyhalov * 56*16d86563SAlexander Pyhalov * +-16 bits (UCS-2)-+ +-16 bits (UCS-2)-+ +-16 bits (UCS-2)-+ 57*16d86563SAlexander Pyhalov * | | | | | | 58*16d86563SAlexander Pyhalov * xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 59*16d86563SAlexander Pyhalov * | || | | || | | || | | || | 60*16d86563SAlexander Pyhalov * +--0--++--1--+ +---2--++--3--+ +--4--++---5--+ +--6--++--7--+ MBase64 chars 61*16d86563SAlexander Pyhalov * ^ ^ 62*16d86563SAlexander Pyhalov * initially, | | 63*16d86563SAlexander Pyhalov * four remnant bits, | 64*16d86563SAlexander Pyhalov * two remnant bits, 65*16d86563SAlexander Pyhalov * 66*16d86563SAlexander Pyhalov * and, then no remnant bit for three sequential UCS-2 characters, 67*16d86563SAlexander Pyhalov * respectively, and repeat these three UCS-2 character sequences. For the 68*16d86563SAlexander Pyhalov * first UCS-2 character in this sequence, there will be two MBase64 69*16d86563SAlexander Pyhalov * characters, and for the second and the third UCS-2 characters, there will be 70*16d86563SAlexander Pyhalov * three MBase64 characters. 71*16d86563SAlexander Pyhalov * 72*16d86563SAlexander Pyhalov * Following action numbers, 0, 2, 5, and, 7, are assigned to each of 73*16d86563SAlexander Pyhalov * corresponding MBase64 characters that can either yield a UCS-2 character or 74*16d86563SAlexander Pyhalov * indicate a character that is the starting/initial one. 75*16d86563SAlexander Pyhalov */ 76*16d86563SAlexander Pyhalov #define ICV_U7_ACTION_START 0 77*16d86563SAlexander Pyhalov #define ICV_U7_ACTION_HARVEST1 2 78*16d86563SAlexander Pyhalov #define ICV_U7_ACTION_HARVEST2 5 79*16d86563SAlexander Pyhalov #define ICV_U7_ACTION_HARVEST3 7 80*16d86563SAlexander Pyhalov 81*16d86563SAlexander Pyhalov #define ICV_U7_UCS4_OUTOFUTF16 0xfffefeff 82*16d86563SAlexander Pyhalov 83*16d86563SAlexander Pyhalov #define OUTBUF_SIZE_CHECK(sz) \ 84*16d86563SAlexander Pyhalov if ((obtail - ob) < (sz)) { \ 85*16d86563SAlexander Pyhalov errno = E2BIG; \ 86*16d86563SAlexander Pyhalov ret_val = (size_t)-1; \ 87*16d86563SAlexander Pyhalov break; \ 88*16d86563SAlexander Pyhalov } 89*16d86563SAlexander Pyhalov 90*16d86563SAlexander Pyhalov /* 91*16d86563SAlexander Pyhalov * For better performance and readability, we perfer to write macros like 92*16d86563SAlexander Pyhalov * below instead of putting them in functions and then calling them. 93*16d86563SAlexander Pyhalov */ 94*16d86563SAlexander Pyhalov #define CHECK_OUTBUF_SZ_AND_WRITE_U2 \ 95*16d86563SAlexander Pyhalov obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\ 96*16d86563SAlexander Pyhalov if ((obtail - ob) < obsz) { \ 97*16d86563SAlexander Pyhalov errno = E2BIG; \ 98*16d86563SAlexander Pyhalov ret_val = (size_t)-1; \ 99*16d86563SAlexander Pyhalov break; \ 100*16d86563SAlexander Pyhalov } \ 101*16d86563SAlexander Pyhalov if (cd->little_endian) { \ 102*16d86563SAlexander Pyhalov if (! cd->bom_written) { \ 103*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xff; \ 104*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xfe; \ 105*16d86563SAlexander Pyhalov cd->bom_written = true; \ 106*16d86563SAlexander Pyhalov } \ 107*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(u4 & 0xff); \ 108*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 109*16d86563SAlexander Pyhalov } else { \ 110*16d86563SAlexander Pyhalov if (! cd->bom_written) { \ 111*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xfe; \ 112*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xff; \ 113*16d86563SAlexander Pyhalov cd->bom_written = true; \ 114*16d86563SAlexander Pyhalov } \ 115*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 116*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(u4 & 0xff); \ 117*16d86563SAlexander Pyhalov } 118*16d86563SAlexander Pyhalov 119*16d86563SAlexander Pyhalov #define CHECK_OUTBUF_SZ_AND_WRITE_U4 \ 120*16d86563SAlexander Pyhalov obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\ 121*16d86563SAlexander Pyhalov if ((obtail - ob) < obsz) { \ 122*16d86563SAlexander Pyhalov errno = E2BIG; \ 123*16d86563SAlexander Pyhalov ret_val = (size_t)-1; \ 124*16d86563SAlexander Pyhalov break; \ 125*16d86563SAlexander Pyhalov } \ 126*16d86563SAlexander Pyhalov if (cd->little_endian) { \ 127*16d86563SAlexander Pyhalov if (! cd->bom_written) { \ 128*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xff; \ 129*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xfe; \ 130*16d86563SAlexander Pyhalov *(ushort_t *)ob = (ushort_t)0; \ 131*16d86563SAlexander Pyhalov ob += 2; \ 132*16d86563SAlexander Pyhalov cd->bom_written = true; \ 133*16d86563SAlexander Pyhalov } \ 134*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(u4 & 0xff); \ 135*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 136*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 16) & 0xff); \ 137*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 24) & 0xff); \ 138*16d86563SAlexander Pyhalov } else { \ 139*16d86563SAlexander Pyhalov if (! cd->bom_written) { \ 140*16d86563SAlexander Pyhalov *(ushort_t *)ob = (ushort_t)0; \ 141*16d86563SAlexander Pyhalov ob += 2; \ 142*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xfe; \ 143*16d86563SAlexander Pyhalov *ob++ = (uchar_t)0xff; \ 144*16d86563SAlexander Pyhalov cd->bom_written = true; \ 145*16d86563SAlexander Pyhalov } \ 146*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 24) & 0xff); \ 147*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 16) & 0xff); \ 148*16d86563SAlexander Pyhalov *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 149*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(u4 & 0xff); \ 150*16d86563SAlexander Pyhalov } 151*16d86563SAlexander Pyhalov 152*16d86563SAlexander Pyhalov /* 153*16d86563SAlexander Pyhalov * UTF-7's code range is basically that of UTF-16, i.e., 154*16d86563SAlexander Pyhalov * U+0000 0000 ~ U+0010 FFFF, it cannot go beyond the U+0010 FFFF. 155*16d86563SAlexander Pyhalov */ 156*16d86563SAlexander Pyhalov #define CHECK_OUTBUF_SZ_AND_WRITE_U8_OR_EILSEQ \ 157*16d86563SAlexander Pyhalov if (u4 <= 0x7f) { \ 158*16d86563SAlexander Pyhalov OUTBUF_SIZE_CHECK(1); \ 159*16d86563SAlexander Pyhalov *ob++ = (uchar_t)u4; \ 160*16d86563SAlexander Pyhalov } else if (u4 <= 0x7ff) { \ 161*16d86563SAlexander Pyhalov OUTBUF_SIZE_CHECK(2); \ 162*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0xc0 | ((u4 & 0x07c0) >> 6)); \ 163*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | (u4 & 0x003f)); \ 164*16d86563SAlexander Pyhalov } else if (u4 <= 0x00ffff) { \ 165*16d86563SAlexander Pyhalov OUTBUF_SIZE_CHECK(3); \ 166*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0xe0 | ((u4 & 0x0f000) >> 12)); \ 167*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0) >> 6)); \ 168*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | (u4 & 0x0003f)); \ 169*16d86563SAlexander Pyhalov } else if (u4 <= 0x10ffff) { \ 170*16d86563SAlexander Pyhalov OUTBUF_SIZE_CHECK(4); \ 171*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0xf0 | ((u4 & 0x01c0000) >> 18)); \ 172*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | ((u4 & 0x003f000) >> 12)); \ 173*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | ((u4 & 0x0000fc0) >> 6)); \ 174*16d86563SAlexander Pyhalov *ob++ = (uchar_t)(0x80 | (u4 & 0x000003f)); \ 175*16d86563SAlexander Pyhalov } else { \ 176*16d86563SAlexander Pyhalov errno = EILSEQ; \ 177*16d86563SAlexander Pyhalov ret_val = (size_t)-1; \ 178*16d86563SAlexander Pyhalov break; \ 179*16d86563SAlexander Pyhalov } 180*16d86563SAlexander Pyhalov 181*16d86563SAlexander Pyhalov 182*16d86563SAlexander Pyhalov #endif /* UTF7_TO_UCS_H */ 183