13e14f97fSRoger A. Faulkner#
23e14f97fSRoger A. Faulkner# CDDL HEADER START
33e14f97fSRoger A. Faulkner#
43e14f97fSRoger A. Faulkner# The contents of this file are subject to the terms of the
53e14f97fSRoger A. Faulkner# Common Development and Distribution License (the "License").
63e14f97fSRoger A. Faulkner# You may not use this file except in compliance with the License.
73e14f97fSRoger A. Faulkner#
83e14f97fSRoger A. Faulkner# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93e14f97fSRoger A. Faulkner# or http://www.opensolaris.org/os/licensing.
103e14f97fSRoger A. Faulkner# See the License for the specific language governing permissions
113e14f97fSRoger A. Faulkner# and limitations under the License.
123e14f97fSRoger A. Faulkner#
133e14f97fSRoger A. Faulkner# When distributing Covered Code, include this CDDL HEADER in each
143e14f97fSRoger A. Faulkner# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153e14f97fSRoger A. Faulkner# If applicable, add the following below this CDDL HEADER, with the
163e14f97fSRoger A. Faulkner# fields enclosed by brackets "[]" replaced with your own identifying
173e14f97fSRoger A. Faulkner# information: Portions Copyright [yyyy] [name of copyright owner]
183e14f97fSRoger A. Faulkner#
193e14f97fSRoger A. Faulkner# CDDL HEADER END
203e14f97fSRoger A. Faulkner#
213e14f97fSRoger A. Faulkner
223e14f97fSRoger A. Faulkner#
233e14f97fSRoger A. Faulkner# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
243e14f97fSRoger A. Faulkner#
253e14f97fSRoger A. Faulkner
263e14f97fSRoger A. Faulkner#
273e14f97fSRoger A. Faulkner# This test checks whether the AST "cut" utility's "-d" option
283e14f97fSRoger A. Faulkner# works with multibyte characters
293e14f97fSRoger A. Faulkner#
303e14f97fSRoger A. Faulkner# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"):
313e14f97fSRoger A. Faulkner# ------------ snip ------------
323e14f97fSRoger A. Faulkner# cut -d with multibyte char no longer work correctly.
33*b30d1939SAndy Fiddaman#
343e14f97fSRoger A. Faulkner# $ echo $LANG
353e14f97fSRoger A. Faulkner# ja
36*b30d1939SAndy Fiddaman# $ od -tx1 mb.eucjp
373e14f97fSRoger A. Faulkner# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a
383e14f97fSRoger A. Faulkner# 0000013
39*b30d1939SAndy Fiddaman# $ od -tx1 delim
403e14f97fSRoger A. Faulkner# 0000000 a4 a4 0a
413e14f97fSRoger A. Faulkner# 0000003
423e14f97fSRoger A. Faulkner# $ wc -m mb.eucjp
433e14f97fSRoger A. Faulkner#        6 mb.eucjp
44*b30d1939SAndy Fiddaman#
453e14f97fSRoger A. Faulkner# It has 5 characters (2byte each).
46*b30d1939SAndy Fiddaman#
47*b30d1939SAndy Fiddaman# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1
483e14f97fSRoger A. Faulkner# 0000000 0a
493e14f97fSRoger A. Faulkner# 0000001
50*b30d1939SAndy Fiddaman#
513e14f97fSRoger A. Faulkner# correct output is
52*b30d1939SAndy Fiddaman#
533e14f97fSRoger A. Faulkner# 0000000 a4 a2 0a
543e14f97fSRoger A. Faulkner# 0000003
55*b30d1939SAndy Fiddaman#
563e14f97fSRoger A. Faulkner# files are attached.
573e14f97fSRoger A. Faulkner# ------------ snip ------------
583e14f97fSRoger A. Faulkner#
593e14f97fSRoger A. Faulkner
603e14f97fSRoger A. Faulkner# test setup
613e14f97fSRoger A. Faulknerfunction err_exit
623e14f97fSRoger A. Faulkner{
633e14f97fSRoger A. Faulkner	print -u2 -n "\t"
643e14f97fSRoger A. Faulkner	print -u2 -r ${Command}[$1]: "${@:2}"
653e14f97fSRoger A. Faulkner	(( Errors < 127 && Errors++ ))
663e14f97fSRoger A. Faulkner}
673e14f97fSRoger A. Faulkneralias err_exit='err_exit $LINENO'
683e14f97fSRoger A. Faulkner
693e14f97fSRoger A. Faulknerset -o nounset
703e14f97fSRoger A. FaulknerCommand=${0##*/}
713e14f97fSRoger A. Faulknerinteger Errors=0
723e14f97fSRoger A. Faulkner
733e14f97fSRoger A. Faulknertypeset ocwd
743e14f97fSRoger A. Faulknertypeset tmpdir
753e14f97fSRoger A. Faulknertypeset out
763e14f97fSRoger A. Faulkner
773e14f97fSRoger A. Faulkner# create temporary test directory
783e14f97fSRoger A. Faulknerocwd="$PWD"
793e14f97fSRoger A. Faulknertmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory"
803e14f97fSRoger A. Faulkner
813e14f97fSRoger A. Faulknercd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; }
823e14f97fSRoger A. Faulkner
833e14f97fSRoger A. Faulkner
843e14f97fSRoger A. Faulkner# run tests
853e14f97fSRoger A. Faulkner
863e14f97fSRoger A. Faulkner
873e14f97fSRoger A. Faulknerfunction test1
883e14f97fSRoger A. Faulkner{
893e14f97fSRoger A. Faulkner	typeset cut_cmd="$1"
903e14f97fSRoger A. Faulkner	typeset testid
913e14f97fSRoger A. Faulkner	typeset out
923e14f97fSRoger A. Faulkner	typeset testname
933e14f97fSRoger A. Faulkner	compound saved_locale
94*b30d1939SAndy Fiddaman
953e14f97fSRoger A. Faulkner	# save locale information
963e14f97fSRoger A. Faulkner	[[ -v LC_ALL	]] && saved_locale.LC_ALL="${LC_ALL}"
973e14f97fSRoger A. Faulkner	[[ -v LC_CTYPE	]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
983e14f97fSRoger A. Faulkner	[[ -v LANG	]] && saved_locale.LANG="${LANG}"
993e14f97fSRoger A. Faulkner
1003e14f97fSRoger A. Faulkner	compound -r -a testcases=(
1013e14f97fSRoger A. Faulkner		(
1023e14f97fSRoger A. Faulkner			typeset name="ascii_plain"
1033e14f97fSRoger A. Faulkner			typeset locale="C"
1043e14f97fSRoger A. Faulkner			typeset input_format='abcdefg'
1053e14f97fSRoger A. Faulkner			typeset -a cut_args_format=( "-f1" "-d" "e" )
1063e14f97fSRoger A. Faulkner			typeset output_format='abcd'
1073e14f97fSRoger A. Faulkner		)
1083e14f97fSRoger A. Faulkner		(
1093e14f97fSRoger A. Faulkner			typeset name="unicode_plain"
1103e14f97fSRoger A. Faulkner			typeset locale="<unicode>"
1113e14f97fSRoger A. Faulkner			typeset input_format='abcd\u[20ac]fg'
1123e14f97fSRoger A. Faulkner			typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' )
1133e14f97fSRoger A. Faulkner			typeset output_format='abcd'
1143e14f97fSRoger A. Faulkner		)
1153e14f97fSRoger A. Faulkner		(
1163e14f97fSRoger A. Faulkner			typeset name="unicode_plain2"
1173e14f97fSRoger A. Faulkner			typeset locale="<unicode>"
1183e14f97fSRoger A. Faulkner			typeset input_format='abcd\u[20ac]fg'
1193e14f97fSRoger A. Faulkner			typeset -a cut_args_format=( '-f1' '-d' 'f' )
1203e14f97fSRoger A. Faulkner			typeset output_format='abcd\u[20ac]'
1213e14f97fSRoger A. Faulkner		)
1223e14f97fSRoger A. Faulkner	)
1233e14f97fSRoger A. Faulkner
1243e14f97fSRoger A. Faulkner	for testid in "${!testcases[@]}" ; do
1253e14f97fSRoger A. Faulkner        	nameref tc=testcases[${testid}]
1263e14f97fSRoger A. Faulkner		testname="${cut_cmd}/${tc.name}"
1273e14f97fSRoger A. Faulkner
1283e14f97fSRoger A. Faulkner		if [[ "${tc.locale}" == "<unicode>" ]] ; then
129*b30d1939SAndy Fiddaman			if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then
1303e14f97fSRoger A. Faulkner				export LC_ALL='en_US.UTF-8'
1313e14f97fSRoger A. Faulkner			fi
1323e14f97fSRoger A. Faulkner		else
1333e14f97fSRoger A. Faulkner			export LC_ALL="${tc.locale}"
1343e14f97fSRoger A. Faulkner		fi
135*b30d1939SAndy Fiddaman
1363e14f97fSRoger A. Faulkner		# build "cut_args" array with multibyte characters in the current locale
1373e14f97fSRoger A. Faulkner		typeset -a cut_args
1383e14f97fSRoger A. Faulkner		integer arg_index
1393e14f97fSRoger A. Faulkner		for arg_index in "${!tc.cut_args_format[@]}" ; do
1403e14f97fSRoger A. Faulkner			cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" )
1413e14f97fSRoger A. Faulkner		done
142*b30d1939SAndy Fiddaman
1433e14f97fSRoger A. Faulkner		typeset output_format="$( printf -- "${tc.output_format}" )"
144*b30d1939SAndy Fiddaman
1453e14f97fSRoger A. Faulkner		#printf "args=|%q|\n" "${cut_args[@]}"
1463e14f97fSRoger A. Faulkner
1473e14f97fSRoger A. Faulkner		out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
1483e14f97fSRoger A. Faulkner		[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
1493e14f97fSRoger A. Faulkner
1503e14f97fSRoger A. Faulkner		# cleanup and restore locale settings
1513e14f97fSRoger A. Faulkner		unset cut_args arg_index
1523e14f97fSRoger A. Faulkner		[[ -v saved_locale.LC_ALL	]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL
1533e14f97fSRoger A. Faulkner		[[ -v saved_locale.LC_CTYPE	]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE
1543e14f97fSRoger A. Faulkner		[[ -v saved_locale.LANG		]] && LANG="${saved_locale.LANG}" || unset LANG
1553e14f97fSRoger A. Faulkner	done
1563e14f97fSRoger A. Faulkner
1573e14f97fSRoger A. Faulkner	return 0
1583e14f97fSRoger A. Faulkner}
1593e14f97fSRoger A. Faulkner
1603e14f97fSRoger A. Faulkner
1613e14f97fSRoger A. Faulknerfunction test2
1623e14f97fSRoger A. Faulkner{
1633e14f97fSRoger A. Faulkner	typeset cutcmd=$1
1643e14f97fSRoger A. Faulkner	typeset testname="${cutcmd}"
1653e14f97fSRoger A. Faulkner	typeset out
1663e14f97fSRoger A. Faulkner
1673e14f97fSRoger A. Faulkner	# create files
1683e14f97fSRoger A. Faulkner	printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp"
1693e14f97fSRoger A. Faulkner	printf "\xa4\xa4\x0a" >"delim"
1703e14f97fSRoger A. Faulkner
1713e14f97fSRoger A. Faulkner	# run test
1723e14f97fSRoger A. Faulkner	out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
1733e14f97fSRoger A. Faulkner	[[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")"
1743e14f97fSRoger A. Faulkner
1753e14f97fSRoger A. Faulkner	# cleanup
1763e14f97fSRoger A. Faulkner	rm "mb.eucjp" "delim"
1773e14f97fSRoger A. Faulkner
1783e14f97fSRoger A. Faulkner	return 0
1793e14f97fSRoger A. Faulkner}
1803e14f97fSRoger A. Faulkner
1813e14f97fSRoger A. Faulkner#for cmd in "/usr/bin/cut" "cut" ; do
1823e14f97fSRoger A. Faulknerfor cmd in "cut" ; do
1833e14f97fSRoger A. Faulkner	test1 "${cmd}"
1843e14f97fSRoger A. Faulkner	test2 "${cmd}"
1853e14f97fSRoger A. Faulknerdone
1863e14f97fSRoger A. Faulkner
1873e14f97fSRoger A. Faulkner
1883e14f97fSRoger A. Faulkner
1893e14f97fSRoger A. Faulknercd "${ocwd}"
1903e14f97fSRoger A. Faulknerrmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}".
1913e14f97fSRoger A. Faulkner
1923e14f97fSRoger A. Faulkner# tests done
1933e14f97fSRoger A. Faulknerexit $((Errors))
194