aboutsummaryrefslogtreecommitdiff
path: root/contrib/bmake/unit-tests/directive-for-escape.mk
blob: 16df5b1db4e3c79a7acc44feba034df7e46c1099 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# $NetBSD: directive-for-escape.mk,v 1.23 2023/11/19 22:32:44 rillig Exp $
#
# Test escaping of special characters in the iteration values of a .for loop.
# These values get expanded later using the :U variable modifier, and this
# escaping and unescaping must pass all characters and strings unmodified.

.MAKEFLAGS: -df

# Even though the .for loops take quotes into account when splitting the
# string into words, the quotes don't need to be balanced, as of 2020-12-31.
# This could be considered a bug.
ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~


# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
# the loop.  Not only would it need the escaping for the variable modifier
# ':U' but also the escaping for the line-end comment.
.for chars in ${ASCII}
.  info ${chars}
.endfor
# expect-2: !"

# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
# as comment character.  Using 3 backslashes doesn't help either since
# then the situation is essentially the same as with 1 backslash.
# This means that a '#' sign cannot be passed in the value of a .for loop
# at all.
ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
.for chars in ${ASCII.2020-12-31}
.  info ${chars}
.endfor
# expect-2: !"\\

# Cover the code in ExprLen.
#
# XXX: It is unexpected that the variable V gets expanded in the loop body.
# The double '$$' should intuitively prevent exactly this.  Probably nobody
# was adventurous enough to use literal dollar signs in the values of a .for
# loop, allowing this edge case to go unnoticed for years.
#
# See for.c, function ExprLen.
V=		value
VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
.for i in ${VALUES}
.  info $i
.endfor
# expect: .  info ${:U\$}
# expect-3: $
# expect: .  info ${:U${V}}
# expect-5: value
# expect: .  info ${:U${V:=-with-modifier}}
# expect-7: value-with-modifier
# expect: .  info ${:U$(V)}
# expect-9: value
# expect: .  info ${:U$(V:=-with-modifier)}
# expect-11: value-with-modifier
#
# Providing the loop items directly has the same effect.
.for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
.  info $i
.endfor
# expect: .  info ${:U\$}
# expect-3: $
# expect: .  info ${:U${V}}
# expect-5: value
# expect-6: value-with-modifier
# expect-7: value
# expect-8: value-with-modifier

# Try to cover the code for nested '{}' in ExprLen, without success.
#
# The value of the variable VALUES is not meant to be an expression.
# Instead, it is meant to represent literal text, the only escaping mechanism
# being that each '$' is written as '$$'.
VALUES=		$${UNDEF:U\$$\$$ {{}} end}
#
# The .for loop splits ${VALUES} into 3 words, at the space characters, since
# the '$$' is an ordinary character and the spaces are not escaped.
#	Word 1 is '${UNDEF:U\$\$'
#	Word 2 is '{{}}'
#	Word 3 is 'end}'
#
# Each of these words is now inserted in the body of the .for loop.
.for i in ${VALUES}
# $i
.endfor
#
# When these words are injected into the body of the .for loop, each inside a
# '${:U...}' expression, the result is:
#
# expect: For: loop body with i = ${UNDEF:U\$\$:
# expect: # ${:U\${UNDEF\:U\\$\\$}
# expect: For: loop body with i = {{}}:
# expect: # ${:U{{\}\}}
# expect: For: loop body with i = end}:
# expect: # ${:Uend\}}
# expect: For: end for 1
#
# The first of these expressions is the most interesting one, due to its many
# special characters.  This expression is properly balanced:
#
#	Text	Meaning		Explanation
#	\$	$		escaped
#	{	{		ordinary text
#	UNDEF	UNDEF		ordinary text
#	\:	:		escaped
#	U	U		ordinary text
#	\\	\		escaped
#	$\	(expr)		an expression, the variable name is '\'
#	\$	$		escaped
#
# To make the expression '$\' visible, define it to an actual word:
${:U\\}=	backslash
.for i in ${VALUES}
.  info $i
.endfor
#
# expect-3: ${UNDEF:U\backslash$
# expect-4: {{}}
# expect-5: end}
#
# FIXME: There was no expression '$\' in the original text of the variable
# 'VALUES', that's a surprise in the parser.


# Second try to cover the code for nested '{}' in ExprLen.
#
# XXX: It is not the job of ExprLen to parse an expression, it is naive to
# expect ExprLen to get all the details right in just a few lines of code.
# Each variable modifier has its own inconsistent way of parsing nested
# expressions, braces and parentheses.  (Compare ':M', ':S', and
# ':D' for details.)  The only sensible thing to do is therefore to let
# Var_Parse do all the parsing work.
VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
.for i in ${VALUES}
.  info $i
.endfor
# expect-2: begin<fallback>end

# A single trailing dollar doesn't happen in practice.
# The dollar sign is correctly passed through to the body of the .for loop.
# There, it is expanded by the .info directive, but even there a trailing
# dollar sign is kept as-is.
.for i in ${:U\$}
.  info ${i}
.endfor
# expect-2: $

# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
# could contain colons, which affected expressions having this exact
# modifier.  This possibility was neither intended nor documented.
NUMBERS=	one two three
# expect+1: invalid character ':' in .for loop variable name
.for NUMBERS:M*e in replaced
.  info ${NUMBERS} ${NUMBERS:M*e}
.endfor

# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
# could contain braces, which allowed to replace sequences of
# expressions.  This possibility was neither intended nor documented.
BASENAME=	one
EXT=		.c
# expect+1: invalid character '}' in .for loop variable name
.for BASENAME}${EXT in replaced
.  info ${BASENAME}${EXT}
.endfor

# Demonstrate the various ways to refer to the iteration variable.
i=		outer
i2=		two
i,=		comma
.for i in inner
.  info .        $$i: $i
.  info .      $${i}: ${i}
.  info .   $${i:M*}: ${i:M*}
.  info .      $$(i): $(i)
.  info .   $$(i:M*): $(i:M*)
.  info . $${i$${:U}}: ${i${:U}}
.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
.  info .     $${i2}: ${i2}
.  info .     $${i,}: ${i,}
.  info .  adjacent: $i${i}${i:M*}$i
.endfor
# expect-11: .        $i: inner
# expect-11: .      ${i}: inner
# expect-11: .   ${i:M*}: inner
# expect-11: .      $(i): inner
# expect-11: .   $(i:M*): inner
# expect-11: . ${i${:U}}: outer
# expect-11: .    ${i\}}: inner}
# expect-11: .     ${i2}: two
# expect-11: .     ${i,}: comma
# expect-11: .  adjacent: innerinnerinnerinner

# Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
# since there was no check on valid variable names.  ForLoop_SubstVarShort
# skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
# parsed the body of the loop, substituting each '${$}' with an actual
# '${:Udollar}'.
# expect+1: invalid character '$' in .for loop variable name
.for $ in dollar
.  info eight $$$$$$$$ and no cents.
.  info eight ${$}${$}${$}${$} and no cents.
.endfor
# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
# an expression. The inner '$' is followed by a '}' and is thus a
# silent syntax error, the '$' is skipped. The variable name is thus '', and
# since since there is never a variable named '', the whole expression '${$}'
# evaluates to an empty string.
closing-brace=		}		# guard against an
${closing-brace}=	<closing-brace>	# alternative interpretation
# expect+1: eight  and no cents.
.info eight ${$}${$}${$}${$} and no cents.

# What happens if the values from the .for loop contain a literal newline?
# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
# body of the .for loop, where it was then interpreted as a literal newline,
# leading to syntax errors such as "Unclosed variable expression" in the upper
# line and "Invalid line type" in the lower line.
#
# The error message occurs in the line of the .for loop since that's the place
# where the body of the .for loop is constructed, and at this point the
# newline character gets replaced with a plain space.
# expect+2: newline in .for value
# expect+1: newline in .for value
.for i in "${.newline}"
.  info short: $i
.  info long: ${i}
.endfor
# expect-3: short: " "
# expect-3: long: " "

# No error since the newline character is not actually used.
.for i in "${.newline}"
.endfor

# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
# a newline character in a .for loop led to a crash since at the point where
# the error message including the stack trace is printed, the body of the .for
# loop is assembled, and at that point, ForLoop.nextItem had already been
# advanced.
.MAKEFLAGS: -dp
# expect+1: newline in .for value
.for i in "${.newline}"
: $i
.endfor
.MAKEFLAGS: -d0

.MAKEFLAGS: -df
.for i in \# \\\#
# $i
.endfor

.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
# $i
.endfor

# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
# target-local variables.  See for.c 1.45 from 2009-01-14.
.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
# $i
.endfor
# expect: # ${:U${.TARGET}}
# XXX: Why does '$' result in the same text as '$$'?
# expect: # ${:U${.TARGET}}
# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
# expect: # ${:U$${.TARGET\}}
# XXX: Why does '$' result in the same text as '$$'?
# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
# expect: # ${:U$${.TARGET\}}

.for i in ((( {{{ ))) }}}
# $i
.endfor


# When generating the body of a .for loop, recognizing the expressions is done
# using simple heuristics.  These can go wrong in ambiguous cases like this.
# The variable name ',' is unusual as it is not a pronounceable name, but the
# same principle applies for other names as well.  In this case, the text '$,'
# is replaced with the expression '${:U1}', even though the text does not
# represent an expression.
.for , in 1
# $$i $i
# VAR= $$i $i ${a:S,from$,to,}
VAR= $$i $i ${a:S,from$,to,}
.endfor
# expect: # $$i $i
# expect: # VAR= $$i $i ${a:S,from${:U1}to,}
# expect: VAR= $$i $i ${a:S,from${:U1}to,}
#
# When the above variable is evaluated, make will complain about the
# unfinished modifier ':S', as it is missing a comma.