1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24#
25
26#
27# This test checks whether the AST "cut" utility's "-d" option
28# works with multibyte characters
29#
30# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"):
31# ------------ snip ------------
32# cut -d with multibyte char no longer work correctly.
33#
34# $ echo $LANG
35# ja
36# $ od -tx1 mb.eucjp
37# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a
38# 0000013
39# $ od -tx1 delim
40# 0000000 a4 a4 0a
41# 0000003
42# $ wc -m mb.eucjp
43#        6 mb.eucjp
44#
45# It has 5 characters (2byte each).
46#
47# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1
48# 0000000 0a
49# 0000001
50#
51# correct output is
52#
53# 0000000 a4 a2 0a
54# 0000003
55#
56# files are attached.
57# ------------ snip ------------
58#
59
60# test setup
61function err_exit
62{
63	print -u2 -n "\t"
64	print -u2 -r ${Command}[$1]: "${@:2}"
65	(( Errors < 127 && Errors++ ))
66}
67alias err_exit='err_exit $LINENO'
68
69set -o nounset
70Command=${0##*/}
71integer Errors=0
72
73typeset ocwd
74typeset tmpdir
75typeset out
76
77# create temporary test directory
78ocwd="$PWD"
79tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory"
80
81cd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; }
82
83
84# run tests
85
86
87function test1
88{
89	typeset cut_cmd="$1"
90	typeset testid
91	typeset out
92	typeset testname
93	compound saved_locale
94
95	# save locale information
96	[[ -v LC_ALL	]] && saved_locale.LC_ALL="${LC_ALL}"
97	[[ -v LC_CTYPE	]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
98	[[ -v LANG	]] && saved_locale.LANG="${LANG}"
99
100	compound -r -a testcases=(
101		(
102			typeset name="ascii_plain"
103			typeset locale="C"
104			typeset input_format='abcdefg'
105			typeset -a cut_args_format=( "-f1" "-d" "e" )
106			typeset output_format='abcd'
107		)
108		(
109			typeset name="unicode_plain"
110			typeset locale="<unicode>"
111			typeset input_format='abcd\u[20ac]fg'
112			typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' )
113			typeset output_format='abcd'
114		)
115		(
116			typeset name="unicode_plain2"
117			typeset locale="<unicode>"
118			typeset input_format='abcd\u[20ac]fg'
119			typeset -a cut_args_format=( '-f1' '-d' 'f' )
120			typeset output_format='abcd\u[20ac]'
121		)
122	)
123
124	for testid in "${!testcases[@]}" ; do
125        	nameref tc=testcases[${testid}]
126		testname="${cut_cmd}/${tc.name}"
127
128		if [[ "${tc.locale}" == "<unicode>" ]] ; then
129			if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then
130				export LC_ALL='en_US.UTF-8'
131			fi
132		else
133			export LC_ALL="${tc.locale}"
134		fi
135
136		# build "cut_args" array with multibyte characters in the current locale
137		typeset -a cut_args
138		integer arg_index
139		for arg_index in "${!tc.cut_args_format[@]}" ; do
140			cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" )
141		done
142
143		typeset output_format="$( printf -- "${tc.output_format}" )"
144
145		#printf "args=|%q|\n" "${cut_args[@]}"
146
147		out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
148		[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
149
150		# cleanup and restore locale settings
151		unset cut_args arg_index
152		[[ -v saved_locale.LC_ALL	]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL
153		[[ -v saved_locale.LC_CTYPE	]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE
154		[[ -v saved_locale.LANG		]] && LANG="${saved_locale.LANG}" || unset LANG
155	done
156
157	return 0
158}
159
160
161function test2
162{
163	typeset cutcmd=$1
164	typeset testname="${cutcmd}"
165	typeset out
166
167	# create files
168	printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp"
169	printf "\xa4\xa4\x0a" >"delim"
170
171	# run test
172	out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
173	[[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")"
174
175	# cleanup
176	rm "mb.eucjp" "delim"
177
178	return 0
179}
180
181#for cmd in "/usr/bin/cut" "cut" ; do
182for cmd in "cut" ; do
183	test1 "${cmd}"
184	test2 "${cmd}"
185done
186
187
188
189cd "${ocwd}"
190rmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}".
191
192# tests done
193exit $((Errors))
194