13e14f97fSRoger A. Faulkner# 23e14f97fSRoger A. Faulkner# CDDL HEADER START 33e14f97fSRoger A. Faulkner# 43e14f97fSRoger A. Faulkner# The contents of this file are subject to the terms of the 53e14f97fSRoger A. Faulkner# Common Development and Distribution License (the "License"). 63e14f97fSRoger A. Faulkner# You may not use this file except in compliance with the License. 73e14f97fSRoger A. Faulkner# 83e14f97fSRoger A. Faulkner# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93e14f97fSRoger A. Faulkner# or http://www.opensolaris.org/os/licensing. 103e14f97fSRoger A. Faulkner# See the License for the specific language governing permissions 113e14f97fSRoger A. Faulkner# and limitations under the License. 123e14f97fSRoger A. Faulkner# 133e14f97fSRoger A. Faulkner# When distributing Covered Code, include this CDDL HEADER in each 143e14f97fSRoger A. Faulkner# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153e14f97fSRoger A. Faulkner# If applicable, add the following below this CDDL HEADER, with the 163e14f97fSRoger A. Faulkner# fields enclosed by brackets "[]" replaced with your own identifying 173e14f97fSRoger A. Faulkner# information: Portions Copyright [yyyy] [name of copyright owner] 183e14f97fSRoger A. Faulkner# 193e14f97fSRoger A. Faulkner# CDDL HEADER END 203e14f97fSRoger A. Faulkner# 213e14f97fSRoger A. Faulkner 223e14f97fSRoger A. Faulkner# 233e14f97fSRoger A. Faulkner# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 243e14f97fSRoger A. Faulkner# 253e14f97fSRoger A. Faulkner 263e14f97fSRoger A. Faulkner# 273e14f97fSRoger A. Faulkner# This test checks whether the AST "cut" utility's "-d" option 283e14f97fSRoger A. Faulkner# works with multibyte characters 293e14f97fSRoger A. Faulkner# 303e14f97fSRoger A. Faulkner# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"): 313e14f97fSRoger A. Faulkner# ------------ snip ------------ 323e14f97fSRoger A. Faulkner# cut -d with multibyte char no longer work correctly. 33*b30d1939SAndy Fiddaman# 343e14f97fSRoger A. Faulkner# $ echo $LANG 353e14f97fSRoger A. Faulkner# ja 36*b30d1939SAndy Fiddaman# $ od -tx1 mb.eucjp 373e14f97fSRoger A. Faulkner# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a 383e14f97fSRoger A. Faulkner# 0000013 39*b30d1939SAndy Fiddaman# $ od -tx1 delim 403e14f97fSRoger A. Faulkner# 0000000 a4 a4 0a 413e14f97fSRoger A. Faulkner# 0000003 423e14f97fSRoger A. Faulkner# $ wc -m mb.eucjp 433e14f97fSRoger A. Faulkner# 6 mb.eucjp 44*b30d1939SAndy Fiddaman# 453e14f97fSRoger A. Faulkner# It has 5 characters (2byte each). 46*b30d1939SAndy Fiddaman# 47*b30d1939SAndy Fiddaman# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1 483e14f97fSRoger A. Faulkner# 0000000 0a 493e14f97fSRoger A. Faulkner# 0000001 50*b30d1939SAndy Fiddaman# 513e14f97fSRoger A. Faulkner# correct output is 52*b30d1939SAndy Fiddaman# 533e14f97fSRoger A. Faulkner# 0000000 a4 a2 0a 543e14f97fSRoger A. Faulkner# 0000003 55*b30d1939SAndy Fiddaman# 563e14f97fSRoger A. Faulkner# files are attached. 573e14f97fSRoger A. Faulkner# ------------ snip ------------ 583e14f97fSRoger A. Faulkner# 593e14f97fSRoger A. Faulkner 603e14f97fSRoger A. Faulkner# test setup 613e14f97fSRoger A. Faulknerfunction err_exit 623e14f97fSRoger A. Faulkner{ 633e14f97fSRoger A. Faulkner print -u2 -n "\t" 643e14f97fSRoger A. Faulkner print -u2 -r ${Command}[$1]: "${@:2}" 653e14f97fSRoger A. Faulkner (( Errors < 127 && Errors++ )) 663e14f97fSRoger A. Faulkner} 673e14f97fSRoger A. Faulkneralias err_exit='err_exit $LINENO' 683e14f97fSRoger A. Faulkner 693e14f97fSRoger A. Faulknerset -o nounset 703e14f97fSRoger A. FaulknerCommand=${0##*/} 713e14f97fSRoger A. Faulknerinteger Errors=0 723e14f97fSRoger A. Faulkner 733e14f97fSRoger A. Faulknertypeset ocwd 743e14f97fSRoger A. Faulknertypeset tmpdir 753e14f97fSRoger A. Faulknertypeset out 763e14f97fSRoger A. Faulkner 773e14f97fSRoger A. Faulkner# create temporary test directory 783e14f97fSRoger A. Faulknerocwd="$PWD" 793e14f97fSRoger A. Faulknertmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory" 803e14f97fSRoger A. Faulkner 813e14f97fSRoger A. Faulknercd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; } 823e14f97fSRoger A. Faulkner 833e14f97fSRoger A. Faulkner 843e14f97fSRoger A. Faulkner# run tests 853e14f97fSRoger A. Faulkner 863e14f97fSRoger A. Faulkner 873e14f97fSRoger A. Faulknerfunction test1 883e14f97fSRoger A. Faulkner{ 893e14f97fSRoger A. Faulkner typeset cut_cmd="$1" 903e14f97fSRoger A. Faulkner typeset testid 913e14f97fSRoger A. Faulkner typeset out 923e14f97fSRoger A. Faulkner typeset testname 933e14f97fSRoger A. Faulkner compound saved_locale 94*b30d1939SAndy Fiddaman 953e14f97fSRoger A. Faulkner # save locale information 963e14f97fSRoger A. Faulkner [[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}" 973e14f97fSRoger A. Faulkner [[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}" 983e14f97fSRoger A. Faulkner [[ -v LANG ]] && saved_locale.LANG="${LANG}" 993e14f97fSRoger A. Faulkner 1003e14f97fSRoger A. Faulkner compound -r -a testcases=( 1013e14f97fSRoger A. Faulkner ( 1023e14f97fSRoger A. Faulkner typeset name="ascii_plain" 1033e14f97fSRoger A. Faulkner typeset locale="C" 1043e14f97fSRoger A. Faulkner typeset input_format='abcdefg' 1053e14f97fSRoger A. Faulkner typeset -a cut_args_format=( "-f1" "-d" "e" ) 1063e14f97fSRoger A. Faulkner typeset output_format='abcd' 1073e14f97fSRoger A. Faulkner ) 1083e14f97fSRoger A. Faulkner ( 1093e14f97fSRoger A. Faulkner typeset name="unicode_plain" 1103e14f97fSRoger A. Faulkner typeset locale="<unicode>" 1113e14f97fSRoger A. Faulkner typeset input_format='abcd\u[20ac]fg' 1123e14f97fSRoger A. Faulkner typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' ) 1133e14f97fSRoger A. Faulkner typeset output_format='abcd' 1143e14f97fSRoger A. Faulkner ) 1153e14f97fSRoger A. Faulkner ( 1163e14f97fSRoger A. Faulkner typeset name="unicode_plain2" 1173e14f97fSRoger A. Faulkner typeset locale="<unicode>" 1183e14f97fSRoger A. Faulkner typeset input_format='abcd\u[20ac]fg' 1193e14f97fSRoger A. Faulkner typeset -a cut_args_format=( '-f1' '-d' 'f' ) 1203e14f97fSRoger A. Faulkner typeset output_format='abcd\u[20ac]' 1213e14f97fSRoger A. Faulkner ) 1223e14f97fSRoger A. Faulkner ) 1233e14f97fSRoger A. Faulkner 1243e14f97fSRoger A. Faulkner for testid in "${!testcases[@]}" ; do 1253e14f97fSRoger A. Faulkner nameref tc=testcases[${testid}] 1263e14f97fSRoger A. Faulkner testname="${cut_cmd}/${tc.name}" 1273e14f97fSRoger A. Faulkner 1283e14f97fSRoger A. Faulkner if [[ "${tc.locale}" == "<unicode>" ]] ; then 129*b30d1939SAndy Fiddaman if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then 1303e14f97fSRoger A. Faulkner export LC_ALL='en_US.UTF-8' 1313e14f97fSRoger A. Faulkner fi 1323e14f97fSRoger A. Faulkner else 1333e14f97fSRoger A. Faulkner export LC_ALL="${tc.locale}" 1343e14f97fSRoger A. Faulkner fi 135*b30d1939SAndy Fiddaman 1363e14f97fSRoger A. Faulkner # build "cut_args" array with multibyte characters in the current locale 1373e14f97fSRoger A. Faulkner typeset -a cut_args 1383e14f97fSRoger A. Faulkner integer arg_index 1393e14f97fSRoger A. Faulkner for arg_index in "${!tc.cut_args_format[@]}" ; do 1403e14f97fSRoger A. Faulkner cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" ) 1413e14f97fSRoger A. Faulkner done 142*b30d1939SAndy Fiddaman 1433e14f97fSRoger A. Faulkner typeset output_format="$( printf -- "${tc.output_format}" )" 144*b30d1939SAndy Fiddaman 1453e14f97fSRoger A. Faulkner #printf "args=|%q|\n" "${cut_args[@]}" 1463e14f97fSRoger A. Faulkner 1473e14f97fSRoger A. Faulkner out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?" 1483e14f97fSRoger A. Faulkner [[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")" 1493e14f97fSRoger A. Faulkner 1503e14f97fSRoger A. Faulkner # cleanup and restore locale settings 1513e14f97fSRoger A. Faulkner unset cut_args arg_index 1523e14f97fSRoger A. Faulkner [[ -v saved_locale.LC_ALL ]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL 1533e14f97fSRoger A. Faulkner [[ -v saved_locale.LC_CTYPE ]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE 1543e14f97fSRoger A. Faulkner [[ -v saved_locale.LANG ]] && LANG="${saved_locale.LANG}" || unset LANG 1553e14f97fSRoger A. Faulkner done 1563e14f97fSRoger A. Faulkner 1573e14f97fSRoger A. Faulkner return 0 1583e14f97fSRoger A. Faulkner} 1593e14f97fSRoger A. Faulkner 1603e14f97fSRoger A. Faulkner 1613e14f97fSRoger A. Faulknerfunction test2 1623e14f97fSRoger A. Faulkner{ 1633e14f97fSRoger A. Faulkner typeset cutcmd=$1 1643e14f97fSRoger A. Faulkner typeset testname="${cutcmd}" 1653e14f97fSRoger A. Faulkner typeset out 1663e14f97fSRoger A. Faulkner 1673e14f97fSRoger A. Faulkner # create files 1683e14f97fSRoger A. Faulkner printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp" 1693e14f97fSRoger A. Faulkner printf "\xa4\xa4\x0a" >"delim" 1703e14f97fSRoger A. Faulkner 1713e14f97fSRoger A. Faulkner # run test 1723e14f97fSRoger A. Faulkner out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?" 1733e14f97fSRoger A. Faulkner [[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")" 1743e14f97fSRoger A. Faulkner 1753e14f97fSRoger A. Faulkner # cleanup 1763e14f97fSRoger A. Faulkner rm "mb.eucjp" "delim" 1773e14f97fSRoger A. Faulkner 1783e14f97fSRoger A. Faulkner return 0 1793e14f97fSRoger A. Faulkner} 1803e14f97fSRoger A. Faulkner 1813e14f97fSRoger A. Faulkner#for cmd in "/usr/bin/cut" "cut" ; do 1823e14f97fSRoger A. Faulknerfor cmd in "cut" ; do 1833e14f97fSRoger A. Faulkner test1 "${cmd}" 1843e14f97fSRoger A. Faulkner test2 "${cmd}" 1853e14f97fSRoger A. Faulknerdone 1863e14f97fSRoger A. Faulkner 1873e14f97fSRoger A. Faulkner 1883e14f97fSRoger A. Faulkner 1893e14f97fSRoger A. Faulknercd "${ocwd}" 1903e14f97fSRoger A. Faulknerrmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}". 1913e14f97fSRoger A. Faulkner 1923e14f97fSRoger A. Faulkner# tests done 1933e14f97fSRoger A. Faulknerexit $((Errors)) 194