1# From hankedr@dms.auburn.edu  Sun Jan 28 12:25:43 2001
2# Received: from mail.actcom.co.il [192.114.47.13]
3# 	by localhost with POP3 (fetchmail-5.5.0)
4# 	for arnold@localhost (single-drop); Sun, 28 Jan 2001 12:25:43 +0200 (IST)
5# Received: by actcom.co.il (mbox arobbins)
6#  (with Cubic Circle's cucipop (v1.31 1998/05/13) Sun Jan 28 12:27:08 2001)
7# X-From_: hankedr@dms.auburn.edu Sat Jan 27 15:15:57 2001
8# Received: from lmail.actcom.co.il by actcom.co.il  with ESMTP
9# 	(8.9.1a/actcom-0.2) id PAA23801 for <arobbins@actcom.co.il>;
10# 	Sat, 27 Jan 2001 15:15:55 +0200 (EET)
11# 	(rfc931-sender: lmail.actcom.co.il [192.114.47.13])
12# Received: from billohost.com (www.billohost.com [209.196.35.10])
13# 	by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id PAA15998
14# 	for <arobbins@actcom.co.il>; Sat, 27 Jan 2001 15:16:27 +0200
15# Received: from yak.dms.auburn.edu (yak.dms.auburn.edu [131.204.53.2])
16# 	by billohost.com (8.9.3/8.9.3) with ESMTP id IAA00467
17# 	for <arnold@skeeve.com>; Sat, 27 Jan 2001 08:15:52 -0500
18# Received: (from hankedr@localhost)
19# 	by yak.dms.auburn.edu (8.9.3/8.9.3/Debian/GNU) id HAA24441;
20# 	Sat, 27 Jan 2001 07:15:44 -0600
21# Date: Sat, 27 Jan 2001 07:15:44 -0600
22# Message-Id: <200101271315.HAA24441@yak.dms.auburn.edu>
23# From: Darrel Hankerson <hankedr@dms.auburn.edu>
24# To: arnold@skeeve.com
25# Subject: [stolfi@ic.unicamp.br: Bug in [...]* matching with acute-u]
26# Mime-Version: 1.0 (generated by tm-edit 7.106)
27# Content-Type: message/rfc822
28# Status: R
29#
30# From: Jorge Stolfi <stolfi@ic.unicamp.br>
31# To: bug-gnu-utils@gnu.org
32# Subject: Bug in [...]* matching with acute-u
33# MIME-Version: 1.0
34# Reply-To: stolfi@ic.unicamp.br
35# X-MIME-Autoconverted: from 8bit to quoted-printable by grande.dcc.unicamp.br id GAA10716
36# Sender: bug-gnu-utils-admin@gnu.org
37# Errors-To: bug-gnu-utils-admin@gnu.org
38# X-BeenThere: bug-gnu-utils@gnu.org
39# X-Mailman-Version: 2.0
40# Precedence: bulk
41# List-Help: <mailto:bug-gnu-utils-request@gnu.org?subject=help>
42# List-Post: <mailto:bug-gnu-utils@gnu.org>
43# List-Subscribe: <http://mail.gnu.org/mailman/listinfo/bug-gnu-utils>,
44# 	<mailto:bug-gnu-utils-request@gnu.org?subject=subscribe>
45# List-Id: Bug reports for the GNU utilities <bug-gnu-utils.gnu.org>
46# List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/bug-gnu-utils>,
47# 	<mailto:bug-gnu-utils-request@gnu.org?subject=unsubscribe>
48# List-Archive: <http://mail.gnu.org/pipermail/bug-gnu-utils/>
49# Date: Sat, 27 Jan 2001 06:46:11 -0200 (EDT)
50# Content-Transfer-Encoding: 8bit
51# X-MIME-Autoconverted: from quoted-printable to 8bit by manatee.dms.auburn.edu id CAA14936
52# Content-Type: text/plain; charset=iso-8859-1
53# 	<mailto:bug-gnu-utils-request@gnu.org?subject=subscribe>
54# 	<mailto:bug-gnu-utils-request@gnu.org?subject=uns
55# Content-Length: 3137
56#
57#
58#
59# Hi,
60#
61# I think I have run into a bug in gawk's handling of REs of the
62# form [...]* when the bracketed list includes certain 8-bit characters,
63# specifically u-acute (octal \372).
64#
65# The problem occurs in GNU Awk 3.0.4, both under
66# Linux 2.2.14-5.0 (intel i686) and SunOS 5.5 (Sun sparc).
67#
68# Here is a program that illustrates the bug, and its output.
69# The first two lines of the output should be equal, shouldn't they?
70#
71# ----------------------------------------------------------------------
72#! /usr/bin/gawk -f
73
74BEGIN {
75  s = "bananas and ananases in canaan";
76  t = s; gsub(/[an]*n/, "AN", t);   printf "%-8s  %s\n", "[an]*n", t;
77  t = s; gsub(/[an�]*n/, "AN", t);  printf "%-8s  %s\n", "[an�]*n", t;
78  print "";
79  t = s; gsub(/[a�]*n/, "AN", t);   printf "%-8s  %s\n", "[a�]*n", t;
80  print "";
81  t = s; gsub(/[an]n/, "AN", t);    printf "%-8s  %s\n", "[an]n", t;
82  t = s; gsub(/[a�]n/, "AN", t);    printf "%-8s  %s\n", "[a�]n", t;
83  t = s; gsub(/[an�]n/, "AN", t);   printf "%-8s  %s\n", "[an�]n", t;
84  print "";
85  t = s; gsub(/[an]?n/, "AN", t);   printf "%-8s  %s\n", "[an]?n", t;
86  t = s; gsub(/[a�]?n/, "AN", t);   printf "%-8s  %s\n", "[a�]?n", t;
87  t = s; gsub(/[an�]?n/, "AN", t);  printf "%-8s  %s\n", "[an�]?n", t;
88  print "";
89  t = s; gsub(/[an]+n/, "AN", t);   printf "%-8s  %s\n", "[an]+n", t;
90  t = s; gsub(/[a�]+n/,  "AN", t);  printf "%-8s  %s\n", "[a�]+n", t;
91  t = s; gsub(/[an�]+n/, "AN", t);  printf "%-8s  %s\n", "[an�]+n", t;
92}
93# ----------------------------------------------------------------------
94# [an]*n    bANas ANd ANases iAN cAN
95# [an�]*n   bananas and ananases in canaan
96#
97# [a�]*n    bANANas ANd ANANases iAN cANAN
98#
99# [an]n     bANANas ANd ANANases in cANaAN
100# [a�]n     bANANas ANd ANANases in cANaAN
101# [an�]n    bANANas ANd ANANases in cANaAN
102#
103# [an]?n    bANANas ANd ANANases iAN cANaAN
104# [a�]?n    bANANas ANd ANANases iAN cANaAN
105# [an�]?n   bANANas ANd ANANases iAN cANaAN
106#
107# [an]+n    bANas ANd ANases in cAN
108# [a�]+n    bANANas ANd ANANases in cANAN
109# [an�]+n   bananas and ananases in canaan
110# ----------------------------------------------------------------------
111#
112# Apparently the problem is specific to u-acute; I've tried several
113# other 8-bit characters and they seem to behave as expected.
114#
115# By comparing the second and third output lines, it would seem that the
116# problem involves backtracking out of a partial match of [...]* in
117# order to match the next sub-expression, when the latter begins with
118# one of the given characters.
119#
120#
121# All the best,
122#
123# --stolfi
124#
125# ------------------------------------------------------------------------
126# Jorge Stolfi | http://www.dcc.unicamp.br/~stolfi | stolfi@dcc.unicamp.br
127# Institute of Computing (formerly DCC-IMECC)      | Wrk +55 (19)3788-5858
128# Universidade Estadual de Campinas (UNICAMP)      |     +55 (19)3788-5840
129# Av. Albert Einstein 1251 - Caixa Postal 6176     | Fax +55 (19)3788-5847
130# 13083-970 Campinas, SP -- Brazil                 | Hom +55 (19)3287-4069
131# ------------------------------------------------------------------------
132#
133# _______________________________________________
134# Bug-gnu-utils mailing list
135# Bug-gnu-utils@gnu.org
136# http://mail.gnu.org/mailman/listinfo/bug-gnu-utils
137#
138#
139