1*e6d6c189SCody Peter Mello#! /bin/sh
2*e6d6c189SCody Peter Mello# From arnold@f7.net  Sun Apr 22 20:15:25 2007
3*e6d6c189SCody Peter Mello# Date: Thu, 19 Apr 2007 17:09:02 +0300
4*e6d6c189SCody Peter Mello# From: Pekka Pessi <Pekka.Pessi@nokia.com>
5*e6d6c189SCody Peter Mello# X-Face: #V(jdpv[lI!TNUU=2*oh:="#suS*ponXW"yr6G;~L}<xZn_2^0)V{jqdc4y}@2b]ffd}SY#
6*e6d6c189SCody Peter Mello#  :9||1pew85O,WjiYA"6C7bW^zt^+.{b#B{lEE+4$9lrXL(55g}dU>uZ\JfD\"IG#G{j`hZI;=DmT\H
7*e6d6c189SCody Peter Mello#  pfDMyJ`i=:M;BM3R.`[>P^ER8+]i
8*e6d6c189SCody Peter Mello# Subject: UTF-8 locale and \n in regexps
9*e6d6c189SCody Peter Mello# To: bug-gawk@gnu.org
10*e6d6c189SCody Peter Mello# Cc: Pekka.Pessi@nokia.com
11*e6d6c189SCody Peter Mello# Message-id: <pvlkgoh2wx.fsf@nokia.com>
12*e6d6c189SCody Peter Mello# MIME-version: 1.0
13*e6d6c189SCody Peter Mello# Content-type: multipart/mixed; boundary="=-=-="
14*e6d6c189SCody Peter Mello#
15*e6d6c189SCody Peter Mello# --=-=-=
16*e6d6c189SCody Peter Mello#
17*e6d6c189SCody Peter Mello# Hello,
18*e6d6c189SCody Peter Mello#
19*e6d6c189SCody Peter Mello# It looks like regexp with \n in [^] behaves badly if locale has
20*e6d6c189SCody Peter Mello# an UTF-8 ctype.
21*e6d6c189SCody Peter Mello#
22*e6d6c189SCody Peter Mello# It looks like if there is \n and an range without \n, like /\n[^x\n]foo/,
23*e6d6c189SCody Peter Mello# and first \n ends an even-numbered line within the string, regexp
24*e6d6c189SCody Peter Mello# does not match.
25*e6d6c189SCody Peter Mello#
26*e6d6c189SCody Peter Mello# Please see the attached script for an demonstration.
27*e6d6c189SCody Peter Mello#
28*e6d6c189SCody Peter Mello# --Pekka Pessi
29*e6d6c189SCody Peter Mello#
30*e6d6c189SCody Peter Mello#
31*e6d6c189SCody Peter Mello# --=-=-=
32*e6d6c189SCody Peter Mello# Content-Disposition: inline; filename=gawk-test
33*e6d6c189SCody Peter Mello#
34*e6d6c189SCody Peter Mello#! /bin/sh
35*e6d6c189SCody Peter Mello
36*e6d6c189SCody Peter Melloif [ -z "$AWK" ]; then
37*e6d6c189SCody Peter Mello    printf '$AWK must be set\n' >&2
38*e6d6c189SCody Peter Mello    exit 1
39*e6d6c189SCody Peter Mellofi
40*e6d6c189SCody Peter Mello
41*e6d6c189SCody Peter Mello# April 2010: Remove UNKNOWN, causes spurious failures on some systems
42*e6d6c189SCody Peter Mellofor LC_ALL in C POSIX en_US.ISO8859-1 en_US.UTF-8 #UNKNOWN
43*e6d6c189SCody Peter Mellodo
44*e6d6c189SCody Peter Melloexport LC_ALL
45*e6d6c189SCody Peter Mellocat <<EOF |
46*e6d6c189SCody Peter Melloline1
47*e6d6c189SCody Peter Melloline2
48*e6d6c189SCody Peter Melloline3
49*e6d6c189SCody Peter Melloline4 
50*e6d6c189SCody Peter Melloline5
51*e6d6c189SCody Peter Melloline6
52*e6d6c189SCody Peter Melloline7
53*e6d6c189SCody Peter Melloline8
54*e6d6c189SCody Peter Melloline9
55*e6d6c189SCody Peter MelloEOF
56*e6d6c189SCody Peter Mello$AWK '
57*e6d6c189SCody Peter MelloBEGIN { RS="\0"; }
58*e6d6c189SCody Peter Mello{
59*e6d6c189SCody Peter Mello  if (match($0, /\n[^2\n]*2/)) { got2=1; } else { print "no match 2"; }
60*e6d6c189SCody Peter Mello  if (match($0, /\n[^3\n]*3/)) { got3=1; } else { print "no match 3"; }
61*e6d6c189SCody Peter Mello  if (match($0, /\n[^4\n]*4/)) { got4=1; } else { print "no match 4"; }
62*e6d6c189SCody Peter Mello  if (match($0, /\n[^5\t]*5/)) { got5=1; } else { print "no match 5"; }
63*e6d6c189SCody Peter Mello  if (match($0, /\n[^6\n]*6/)) { got6=1; } else { print "no match 6"; }
64*e6d6c189SCody Peter Mello  if (match($0, /\n[a-z]*7\n/)){ got7=1; } else { print "no match 7"; }
65*e6d6c189SCody Peter Mello  if (match($0, /\n[^8\n]*8/)) { got8=1; } else { print "no match 8"; }
66*e6d6c189SCody Peter Mello  if (match($0, /8.[^9\n]+9/)) { got9=1; } else { print "no match 9"; }
67*e6d6c189SCody Peter Mello}
68*e6d6c189SCody Peter Mello
69*e6d6c189SCody Peter MelloEND { exit(!(got2 && got3 && got4 && got5 && got6 && got7 && got8 && got9)); }
70*e6d6c189SCody Peter Mello' || {
71*e6d6c189SCody Peter Mello  echo LC_ALL=$LC_ALL FAILED
72*e6d6c189SCody Peter Mello  exit 1
73*e6d6c189SCody Peter Mello}
74*e6d6c189SCody Peter Melloecho LC_ALL=$LC_ALL passed
75*e6d6c189SCody Peter Mellodone
76*e6d6c189SCody Peter Mello#
77*e6d6c189SCody Peter Mello# --=-=-=--
78*e6d6c189SCody Peter Mello#
79