1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24#
25
26#
27# This test checks whether the AST "join" utility works with
28# multibyte characters as seperator.
29#
30# This was reported as CR #6904878 ("join -t no longer works with multibyte char separator"):
31# ------------ snip ------------
32# join doesn't handle multibyte separator correctly.
33#
34# $ echo $LANG
35# ja
36# $ od -tx1 input1
37# 0000000 66 31 a1 f7 66 32 0a
38# 0000007
39# $ od -tx1 input2
40# 0000000 74 32 a1 f7 66 31 0a
41# 0000007
42# # 0xa1 0xf7 in the file is multibyte character.
43# $ od -tx1 delim
44# 0000000 a1 f7 0a
45# 0000003
46#
47# $ /usr/bin/join -j1 1 -j2 2 -o 1.1 -t `cat delim` input1 input2
48# $
49#
50# It should output "f1".
51#
52# files are attached.
53# ------------ snip ------------
54#
55
56# test setup
57function err_exit
58{
59	print -u2 -n "\t"
60	print -u2 -r ${Command}[$1]: "${@:2}"
61	(( Errors < 127 && Errors++ ))
62}
63alias err_exit='err_exit $LINENO'
64
65set -o nounset
66Command=${0##*/}
67integer Errors=0
68
69typeset ocwd
70typeset tmpdir
71typeset out
72
73# create temporary test directory
74ocwd="$PWD"
75tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904878_join_-t_no_longer_works_with_multibyte_char_separator.XXXXXXXX")" || err_exit "Cannot create temporary directory"
76
77cd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; }
78
79
80# run tests
81
82
83function test1
84{
85	typeset join_cmd="$1"
86	typeset testid
87	typeset out
88	typeset testname
89	compound saved_locale
90
91	# save locale information
92	[[ -v LC_ALL	]] && saved_locale.LC_ALL="${LC_ALL}"
93	[[ -v LC_CTYPE	]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
94	[[ -v LANG	]] && saved_locale.LANG="${LANG}"
95
96	compound -r -a testcases=(
97		(
98			typeset name="ascii_simple"
99			typeset locale="C"
100			typeset input1_format="fish 81 91\n"
101			typeset input2_format="fish B A\n"
102			typeset -a join_args_format=( "input1" "input2" )
103			typeset output_format="fish 81 91 B A"
104		)
105		(
106			typeset name="ja_JP.eucJP_multibyte_delimiter"
107			typeset locale="ja_JP.eucJP"
108			typeset input1_format="\x66\x31\xa1\xf7\x66\x32\x0a"
109			typeset input2_format="\x74\x32\xa1\xf7\x66\x31\x0a"
110			typeset -a join_args_format=( "-j1" "1" "-j2" "2" "-o" "1.1" "-t" "\xa1\xf7" "input1" "input2" )
111			typeset output_format="f1"
112		)
113	)
114
115	for testid in "${!testcases[@]}" ; do
116        	nameref tc=testcases[${testid}]
117		testname="${join_cmd}/${tc.name}"
118
119		if [[ "${tc.locale}" == "<unicode>" ]] ; then
120			if [[ "$LC_ALL" != *.UTF-8 ]] ; then
121				export LC_ALL='en_US.UTF-8'
122			fi
123		else
124			export LC_ALL="${tc.locale}"
125		fi
126
127		# build "join_args" array with multibyte characters in the current locale
128		typeset -a join_args
129		integer arg_index
130		for arg_index in "${!tc.join_args_format[@]}" ; do
131			join_args+=( "$( printf -- "${tc.join_args_format[arg_index]}" )" )
132		done
133
134		typeset output_format="$( printf -- "${tc.output_format}" )"
135
136		#printf "args=|%q|\n" "${join_args[@]}"
137
138		printf "${tc.input1_format}" >"input1"
139		printf "${tc.input2_format}" >"input2"
140
141		out="$(${SHELL} -c "${join_cmd} \"\$@\"" dummy "${join_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
142		[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
143
144		rm "input1" "input2"
145
146		# cleanup and restore locale settings
147		unset join_args arg_index
148		[[ -v saved_locale.LC_ALL	]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL
149		[[ -v saved_locale.LC_CTYPE	]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE
150		[[ -v saved_locale.LANG		]] && LANG="${saved_locale.LANG}" || unset LANG
151	done
152
153	return 0
154}
155
156
157function test2
158{
159	typeset joincmd=$1
160	typeset testname="${joincmd}"
161	typeset out
162
163	# create files
164	printf "\x66\x31\xa1\xf7\x66\x32\x0a" >"input1"
165	printf "\x74\x32\xa1\xf7\x66\x31\x0a" >"input2"
166	printf "\xa1\xf7\x0a" >"delim"
167
168	# run test
169	out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -j1 1 -j2 2 -o 1.1 -t $(cat delim) input1 input2' dummy "${joincmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
170	[[ "${out}" == 'f1' ]] || err_exit "${testname}: Expected 'f1', got $(printf "%q\n" "${out}")"
171
172	# cleanup
173	rm "input1" "input2" "delim"
174
175	return 0
176}
177
178#for cmd in "/usr/bin/join" "join" ; do
179for cmd in "join" ; do
180	test1 "${cmd}"
181	test2 "${cmd}"
182done
183
184
185
186cd "${ocwd}"
187rmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}".
188
189# tests done
190exit $((Errors))
191