diff options
Diffstat (limited to 'contrib/tzdata/zishrink.awk')
-rw-r--r-- | contrib/tzdata/zishrink.awk | 130 |
1 files changed, 100 insertions, 30 deletions
diff --git a/contrib/tzdata/zishrink.awk b/contrib/tzdata/zishrink.awk index 1947c7c519b9..c98dc6ae786d 100644 --- a/contrib/tzdata/zishrink.awk +++ b/contrib/tzdata/zishrink.awk @@ -23,7 +23,7 @@ function record_hash(n, name) function gen_rule_name(name, \ n) { - # Use a simple memonic: the first two letters. + # Use a simple mnemonic: the first two letters. n = substr(name, 1, 2) record_hash(n, name) # printf "# %s = %s\n", n, name @@ -150,10 +150,19 @@ function prehash_rule_names( \ } } +function make_line(n, field, \ + f, r) +{ + r = field[1] + for (f = 2; f <= n; f++) + r = r " " field[f] + return r +} + # Process the input line LINE and save it for later output. function process_input_line(line, \ - field, end, i, n, startdef, \ + f, field, end, n, outline, r, \ linkline, ruleline, zoneline) { # Remove comments, normalize spaces, and append a space to each line. @@ -190,8 +199,10 @@ function process_input_line(line, \ } # Abbreviate "max", "min", "only" and month names. - gsub(/ max /, " ma ", line) - gsub(/ min /, " mi ", line) + # Although "max" and "min" can both be abbreviated to just "m", + # the longer forms "ma" and "mi" are needed with zic 2023d and earlier. + gsub(/ max /, dataform == "vanguard" ? " m " : " ma ", line) + gsub(/ min /, dataform == "vanguard" ? " m " : " mi ", line) gsub(/ only /, " o ", line) gsub(/ Jan /, " Ja ", line) gsub(/ Feb /, " F ", line) @@ -218,46 +229,103 @@ function process_input_line(line, \ n = split(line, field) - # Abbreviate rule names. - i = zoneline ? 4 : linkline ? 0 : 2 - if (i && field[i] ~ /^[^-+0-9]/) { - if (!rule[field[i]]) - rule[field[i]] = gen_rule_name(field[i]) - field[i] = rule[field[i]] + # Record which rule names are used, and generate their abbreviations. + f = zoneline ? 4 : linkline || ruleline ? 0 : 2 + r = field[f] + if (r ~ /^[^-+0-9]/) { + rule_used[r] = 1 } - # If this zone supersedes an earlier one, delete the earlier one - # from the saved output lines. - startdef = "" if (zoneline) zonename = startdef = field[2] else if (linkline) zonename = startdef = field[3] else if (ruleline) zonename = "" - if (startdef) { - i = zonedef[startdef] - if (i) { - do - output_line[i - 1] = "" - while (output_line[i++] ~ /^[-+0-9]/); - } + + # Save the information for later output. + outline = make_line(n, field) + if (ruleline) + rule_output_line[nrule_out++] = outline + else if (linkline) { + # In vanguard format with Gawk, links are output sorted by destination. + if (dataform == "vanguard" && PROCINFO["version"]) + linkdef[zonename] = field[2] + else + link_output_line[nlink_out++] = outline + }else + zonedef[zonename] = (zoneline ? "" : zonedef[zonename] "\n") outline +} + +function omit_unused_rules( \ + i, field) +{ + for (i = 0; i < nrule_out; i++) { + split(rule_output_line[i], field) + if (!rule_used[field[2]]) + rule_output_line[i] = "" } - zonedef[zonename] = nout + 1 +} - # Save the line for later output. - line = field[1] - for (i = 2; i <= n; i++) - line = line " " field[i] - output_line[nout++] = line +function abbreviate_rule_names( \ + abbr, f, field, i, n, newdef, newline, r, \ + zoneline, zonelines, zonename) +{ + for (i = 0; i < nrule_out; i++) { + n = split(rule_output_line[i], field) + if (n) { + r = field[2] + if (r ~ /^[^-+0-9]/) { + abbr = rule[r] + if (!abbr) { + rule[r] = abbr = gen_rule_name(r) + } + field[2] = abbr + rule_output_line[i] = make_line(n, field) + } + } + } + for (zonename in zonedef) { + zonelines = split(zonedef[zonename], zoneline, /\n/) + newdef = "" + for (i = 1; i <= zonelines; i++) { + newline = zoneline[i] + n = split(newline, field) + f = i == 1 ? 4 : 2 + r = rule[field[f]] + if (r) { + field[f] = r + newline = make_line(n, field) + } + newdef = (newdef ? newdef "\n" : "") newline + } + zonedef[zonename] = newdef + } } function output_saved_lines( \ - i) + i, zonename) { - for (i = 0; i < nout; i++) - if (output_line[i]) - print output_line[i] + for (i = 0; i < nrule_out; i++) + if (rule_output_line[i]) + print rule_output_line[i] + + # When using gawk, output zones sorted by name. + # This makes the output a bit more compressible. + PROCINFO["sorted_in"] = "@ind_str_asc" + for (zonename in zonedef) + print zonedef[zonename] + + if (nlink_out) + for (i = 0; i < nlink_out; i++) + print link_output_line[i] + else { + # When using gawk, output links sorted by destination. + # This also helps compressibility a bit. + PROCINFO["sorted_in"] = "@val_type_asc" + for (zonename in linkdef) + printf "L %s %s\n", linkdef[zonename], zonename + } } BEGIN { @@ -314,5 +382,7 @@ BEGIN { } END { + omit_unused_rules() + abbreviate_rule_names() output_saved_lines() } |