1*e6d6c189SCody Peter Mello# Date: Thu, 27 Apr 2006 20:59:03 +0100
2*e6d6c189SCody Peter Mello# From: Lee Haywood <ljhaywood2@googlemail.com>
3*e6d6c189SCody Peter Mello# Subject: gawk multi-byte support bugs, assertion bug and fix.
4*e6d6c189SCody Peter Mello# To: bug-gawk@gnu.org
5*e6d6c189SCody Peter Mello# Message-id: <60962be00604271259na0d8fdayb9d0c69a853216e8@mail.gmail.com>
6*e6d6c189SCody Peter Mello# MIME-version: 1.0
7*e6d6c189SCody Peter Mello# Content-type: multipart/alternative;
8*e6d6c189SCody Peter Mello#  boundary="----=_Part_10136_920879.1146167943492"
9*e6d6c189SCody Peter Mello# Status: RO
10*e6d6c189SCody Peter Mello#
11*e6d6c189SCody Peter Mello# ------=_Part_10136_920879.1146167943492
12*e6d6c189SCody Peter Mello# Content-Type: text/plain; charset=ISO-8859-1
13*e6d6c189SCody Peter Mello# Content-Transfer-Encoding: quoted-printable
14*e6d6c189SCody Peter Mello# Content-Disposition: inline
15*e6d6c189SCody Peter Mello#
16*e6d6c189SCody Peter Mello#
17*e6d6c189SCody Peter Mello# Firstly, I have been getting the following error from version 3.1.5.
18*e6d6c189SCody Peter Mello#
19*e6d6c189SCody Peter Mello#     awk: node.c:515: unref: Assertion `(tmp->flags & 4096) !=3D 0' failed.
20*e6d6c189SCody Peter Mello#
21*e6d6c189SCody Peter Mello# In mk_number() in node.c the MBS_SUPPORT code is inside the GAWKDEBUG
22*e6d6c189SCody Peter Mello# section - moving it outside explicitly clears the string values, which
23*e6d6c189SCody Peter Mello# prevents the assertion error from occurring.  The corrected version is
24*e6d6c189SCody Peter Mello# shown at the end of this message.
25*e6d6c189SCody Peter Mello#
26*e6d6c189SCody Peter Mello# As an aside, I also noticed that n->wstptr is not cleared by
27*e6d6c189SCody Peter Mello# set_field() and set_record() in field.c when the flags are set to
28*e6d6c189SCody Peter Mello# exclude WSTRCUR.  However, I do not have a test case to show if
29*e6d6c189SCody Peter Mello# changing them makes any difference.
30*e6d6c189SCody Peter Mello#
31*e6d6c189SCody Peter Mello# A second problem also occurs when gawk 3.1.5 is compiled with
32*e6d6c189SCody Peter Mello# multi-byte character support (MBS_SUPPORT).  The following code should
33*e6d6c189SCody Peter Mello# change the index of the substring "bc" from 2 to 3, but it gets
34*e6d6c189SCody Peter Mello# reported as 2 in both cases - which is obviously disastrous.
35*e6d6c189SCody Peter Mello#
36*e6d6c189SCody Peter Mello#     awk 'BEGIN {
37*e6d6c189SCody Peter Mello#             Value =3D "abc"
38*e6d6c189SCody Peter Mello#
39*e6d6c189SCody Peter Mello#             print "Before <" Value "> ",
40*e6d6c189SCody Peter Mello#                   index( Value, "bc" )
41*e6d6c189SCody Peter Mello#
42*e6d6c189SCody Peter Mello#             sub( /bc/, "bbc", Value )
43*e6d6c189SCody Peter Mello#
44*e6d6c189SCody Peter Mello#             print "After  <" Value ">",
45*e6d6c189SCody Peter Mello#                   index( Value, "bc" )
46*e6d6c189SCody Peter Mello#         }'
47*e6d6c189SCody Peter Mello#
48*e6d6c189SCody Peter Mello# Compiling with MBS_SUPPORT undefined makes these problems go away.
49*e6d6c189SCody Peter Mello#
50*e6d6c189SCody Peter Mello# /* mk_number --- allocate a node with defined number */
51*e6d6c189SCody Peter Mello#
52*e6d6c189SCody Peter Mello# NODE *
53*e6d6c189SCody Peter Mello# mk_number(AWKNUM x, unsigned int flags)
54*e6d6c189SCody Peter Mello# {
55*e6d6c189SCody Peter Mello#         register NODE *r;
56*e6d6c189SCody Peter Mello#
57*e6d6c189SCody Peter Mello#         getnode(r);
58*e6d6c189SCody Peter Mello#         r->type =3D Node_val;
59*e6d6c189SCody Peter Mello#         r->numbr =3D x;
60*e6d6c189SCody Peter Mello#         r->flags =3D flags;
61*e6d6c189SCody Peter Mello# #if defined MBS_SUPPORT
62*e6d6c189SCody Peter Mello#         r->wstptr =3D NULL;
63*e6d6c189SCody Peter Mello#         r->wstlen =3D 0;
64*e6d6c189SCody Peter Mello# #endif /* MBS_SUPPORT */
65*e6d6c189SCody Peter Mello# #ifdef GAWKDEBUG
66*e6d6c189SCody Peter Mello#         r->stref =3D 1;
67*e6d6c189SCody Peter Mello#         r->stptr =3D NULL;
68*e6d6c189SCody Peter Mello#         r->stlen =3D 0;
69*e6d6c189SCody Peter Mello# #if defined MBS_SUPPORT
70*e6d6c189SCody Peter Mello#         r->flags &=3D ~WSTRCUR;
71*e6d6c189SCody Peter Mello# #endif /* MBS_SUPPORT */
72*e6d6c189SCody Peter Mello# #endif /* GAWKDEBUG */
73*e6d6c189SCody Peter Mello#         return r;
74*e6d6c189SCody Peter Mello# }
75*e6d6c189SCody Peter Mello#
76*e6d6c189SCody Peter Mello# Thanks.
77*e6d6c189SCody Peter Mello#
78*e6d6c189SCody Peter Mello# --
79*e6d6c189SCody Peter Mello# Lee Haywood.
80*e6d6c189SCody Peter Mello
81*e6d6c189SCody Peter MelloBEGIN {
82*e6d6c189SCody Peter Mello	Value = "abc"
83*e6d6c189SCody Peter Mello
84*e6d6c189SCody Peter Mello	print "Before <" Value "> ", index( Value, "bc" )
85*e6d6c189SCody Peter Mello
86*e6d6c189SCody Peter Mello	sub( /bc/, "bbc", Value )
87*e6d6c189SCody Peter Mello
88*e6d6c189SCody Peter Mello	print "After  <" Value ">", index( Value, "bc" )
89*e6d6c189SCody Peter Mello}
90