diff options
Diffstat (limited to 'contrib/bmake/unit-tests/directive-for-escape.mk')
-rw-r--r-- | contrib/bmake/unit-tests/directive-for-escape.mk | 183 |
1 files changed, 134 insertions, 49 deletions
diff --git a/contrib/bmake/unit-tests/directive-for-escape.mk b/contrib/bmake/unit-tests/directive-for-escape.mk index 03a7a16b6a7b..913d61831c46 100644 --- a/contrib/bmake/unit-tests/directive-for-escape.mk +++ b/contrib/bmake/unit-tests/directive-for-escape.mk @@ -1,9 +1,8 @@ -# $NetBSD: directive-for-escape.mk,v 1.15 2022/01/27 20:15:14 rillig Exp $ +# $NetBSD: directive-for-escape.mk,v 1.30 2025/06/28 22:39:28 rillig Exp $ # # Test escaping of special characters in the iteration values of a .for loop. # These values get expanded later using the :U variable modifier, and this -# escaping and unescaping must pass all characters and strings effectively -# unmodified. +# escaping and unescaping must pass all characters and strings unmodified. .MAKEFLAGS: -df @@ -12,9 +11,12 @@ # This could be considered a bug. ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~ + # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of # the loop. Not only would it need the escaping for the variable modifier # ':U' but also the escaping for the line-end comment. +# expect+3: Unclosed expression, expecting "}" for modifier "U!"" +# expect+2: !" .for chars in ${ASCII} . info ${chars} .endfor @@ -25,6 +27,8 @@ ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~ # This means that a '#' sign cannot be passed in the value of a .for loop # at all. ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~ +# expect+3: Unclosed expression, expecting "}" for modifier "U!"\\\\" +# expect+2: !"\\ .for chars in ${ASCII.2020-12-31} . info ${chars} .endfor @@ -39,47 +43,97 @@ ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~ # See for.c, function ExprLen. V= value VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier) +# expect: . info ${:U\$} +# expect+10: $ +# expect: . info ${:U${V}} +# expect+8: value +# expect: . info ${:U${V:=-with-modifier}} +# expect+6: value-with-modifier +# expect: . info ${:U$(V)} +# expect+4: value +# expect: . info ${:U$(V:=-with-modifier)} +# expect+2: value-with-modifier .for i in ${VALUES} . info $i .endfor +# +# Providing the loop items directly has the same effect. +# expect: . info ${:U\$} +# expect+7: $ +# expect: . info ${:U${V}} +# expect+5: value +# expect+4: value-with-modifier +# expect+3: value +# expect+2: value-with-modifier +.for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier) +. info $i +.endfor # Try to cover the code for nested '{}' in ExprLen, without success. # -# The value of the variable VALUES is not meant to be a variable expression. +# The value of the variable VALUES is not meant to be an expression. # Instead, it is meant to represent literal text, the only escaping mechanism # being that each '$' is written as '$$'. +VALUES= $${UNDEF:U\$$\$$ {{}} end} # # The .for loop splits ${VALUES} into 3 words, at the space characters, since # the '$$' is an ordinary character and the spaces are not escaped. # Word 1 is '${UNDEF:U\$\$' # Word 2 is '{{}}' # Word 3 is 'end}' -# The first iteration expands the body of the .for loop to: -# expect: . info ${:U\${UNDEF\:U\\$\\$} -# The modifier ':U' unescapes the '\$' to a simple '$'. -# The modifier ':U' unescapes the '\:' to a simple ':'. -# The modifier ':U' unescapes the '\\' to a simple '\'. -# The modifier ':U' resolves the expression '$\' to the word 'backslash', due -# to the following variable definition. +# +# Each of these words is now inserted in the body of the .for loop. +.for i in ${VALUES} +# $i +.endfor +# +# When these words are injected into the body of the .for loop, each inside a +# '${:U...}' expression, the result is: +# +# expect: For: loop body with i = ${UNDEF:U\$\$: +# expect: # ${:U\${UNDEF\:U\\$\\$} +# expect: For: loop body with i = {{}}: +# expect: # ${:U{{\}\}} +# expect: For: loop body with i = end}: +# expect: # ${:Uend\}} +# expect: For: end for 1 +# +# The first of these expressions is the most interesting one, due to its many +# special characters. This expression is properly balanced: +# +# Text Meaning Explanation +# \$ $ escaped +# { { ordinary text +# UNDEF UNDEF ordinary text +# \: : escaped +# U U ordinary text +# \\ \ escaped +# $\ (expr) an expression, the variable name is '\' +# \$ $ escaped +# +# To make the expression '$\' visible, define it to an actual word: ${:U\\}= backslash -# FIXME: There was no expression '$\' in the original text of the previous -# line, that's a surprise in the parser. -# The modifier ':U' unescapes the '\$' to a simple '$'. # expect+4: ${UNDEF:U\backslash$ -VALUES= $${UNDEF:U\$$\$$ {{}} end} -# XXX: Where in the code does the '\$\$' get converted into a single '\$'? +# expect+3: {{}} +# expect+2: end} .for i in ${VALUES} . info $i .endfor +# +# FIXME: There was no expression '$\' in the original text of the variable +# 'VALUES', that's a surprise in the parser. -# Second try to cover the code for nested '{}' in ExprLen. + +# The second attempt to cover the code for nested '{}' in ExprLen. # -# XXX: It is wrong that ExprLen requires the braces to be balanced. +# XXX: It is not the job of ExprLen to parse an expression, it is naive to +# expect ExprLen to get all the details right in just a few lines of code. # Each variable modifier has its own inconsistent way of parsing nested -# variable expressions, braces and parentheses. (Compare ':M', ':S', and +# expressions, braces and parentheses. (Compare ':M', ':S', and # ':D' for details.) The only sensible thing to do is therefore to let # Var_Parse do all the parsing work. VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end +# expect+2: begin<fallback>end .for i in ${VALUES} . info $i .endfor @@ -88,24 +142,26 @@ VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end # The dollar sign is correctly passed through to the body of the .for loop. # There, it is expanded by the .info directive, but even there a trailing # dollar sign is kept as-is. +# expect+2: $ .for i in ${:U\$} . info ${i} .endfor -# As of 2020-12-31, the name of the iteration variable can even contain -# colons, which then affects variable expressions having this exact modifier. -# This is clearly an unintended side effect of the implementation. +# Before for.c 1.173 from 2023-05-08, the name of the iteration variable +# could contain colons, which affected expressions having this exact +# modifier. This possibility was neither intended nor documented. NUMBERS= one two three +# expect+1: Invalid character ":" in .for loop variable name .for NUMBERS:M*e in replaced . info ${NUMBERS} ${NUMBERS:M*e} .endfor -# As of 2020-12-31, the name of the iteration variable can contain braces, -# which gets even more surprising than colons, since it allows to replace -# sequences of variable expressions. There is no practical use case for -# this, though. +# Before for.c 1.173 from 2023-05-08, the name of the iteration variable +# could contain braces, which allowed to replace sequences of +# expressions. This possibility was neither intended nor documented. BASENAME= one EXT= .c +# expect+1: Invalid character "}" in .for loop variable name .for BASENAME}${EXT in replaced . info ${BASENAME}${EXT} .endfor @@ -114,34 +170,41 @@ EXT= .c i= outer i2= two i,= comma +# expect+2: inner inner inner inner inner +.for i in inner +. info $i ${i} ${i:M*} $(i) $(i:M*) +.endfor +# expect+2: outer +.for i in inner +. info ${i${:U}} +.endfor +# expect+2: inner} +.for i in inner +. info ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this +.endfor +# expect+2: two comma innerinnerinnerinner .for i in inner -. info . $$i: $i -. info . $${i}: ${i} -. info . $${i:M*}: ${i:M*} -. info . $$(i): $(i) -. info . $$(i:M*): $(i:M*) -. info . $${i$${:U}}: ${i${:U}} -. info . $${i\}}: ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this -. info . $${i2}: ${i2} -. info . $${i,}: ${i,} -. info . adjacent: $i${i}${i:M*}$i -.endfor - -# The variable name can be a single '$' since there is no check on valid -# variable names. ForLoop_SubstVarShort skips "stupid" variable names though, -# but ForLoop_SubstVarLong naively parses the body of the loop, substituting -# each '${$}' with an actual 'dollar'. +. info ${i2} ${i,} $i${i}${i:M*}$i +.endfor + +# Before for.c 1.173 from 2023-05-08, the variable name could be a single '$' +# since there was no check on valid variable names. ForLoop_SubstVarShort +# skipped "stupid" variable names though, but ForLoop_SubstVarLong naively +# parsed the body of the loop, substituting each '${$}' with an actual +# '${:Udollar}'. +# expect+1: Invalid character "$" in .for loop variable name .for $ in dollar . info eight $$$$$$$$ and no cents. . info eight ${$}${$}${$}${$} and no cents. .endfor # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts -# a variable expression. The inner '$' is followed by a '}' and is thus a +# an expression. The inner '$' is followed by a '}' and is thus a # silent syntax error, the '$' is skipped. The variable name is thus '', and # since since there is never a variable named '', the whole expression '${$}' # evaluates to an empty string. closing-brace= } # guard against an ${closing-brace}= <closing-brace> # alternative interpretation +# expect+1: eight and no cents. .info eight ${$}${$}${$}${$} and no cents. # What happens if the values from the .for loop contain a literal newline? @@ -149,12 +212,17 @@ ${closing-brace}= <closing-brace> # alternative interpretation # body of the .for loop, where it was then interpreted as a literal newline, # leading to syntax errors such as "Unclosed variable expression" in the upper # line and "Invalid line type" in the lower line. +# +# The error message occurs in the line of the .for loop since that's the place +# where the body of the .for loop is constructed, and at this point the +# newline character gets replaced with a plain space. +# expect+3: newline in .for value +# expect+2: newline in .for value +# expect+2: short: " ", long: " " .for i in "${.newline}" -. info short: $i -. info long: ${i} +. info short: $i, long: ${i} .endfor - -# No error since the newline character is not actually used. +# No error since the newline character is not actually used in the body. .for i in "${.newline}" .endfor @@ -164,6 +232,7 @@ ${closing-brace}= <closing-brace> # alternative interpretation # loop is assembled, and at that point, ForLoop.nextItem had already been # advanced. .MAKEFLAGS: -dp +# expect+1: newline in .for value .for i in "${.newline}" : $i .endfor @@ -195,6 +264,22 @@ ${closing-brace}= <closing-brace> # alternative interpretation .for i in ((( {{{ ))) }}} # $i .endfor -.MAKEFLAGS: -d0 -all: + +# When generating the body of a .for loop, recognizing the expressions is done +# using simple heuristics. These can go wrong in ambiguous cases like this. +# The variable name ',' is unusual as it is not a pronounceable name, but the +# same principle applies for other names as well. In this case, the text '$,' +# is replaced with the expression '${:U1}', even though the text does not +# represent an expression. +.for , in 1 +# $$i $i +# VAR= $$i $i ${a:S,from$,to,} +VAR= $$i $i ${a:S,from$,to,} +.endfor +# expect: # $$i $i +# expect: # VAR= $$i $i ${a:S,from${:U1}to,} +# expect: VAR= $$i $i ${a:S,from${:U1}to,} +# +# When the above variable is evaluated, make will complain about the +# unfinished modifier ':S', as it is missing a comma. |