aboutsummaryrefslogtreecommitdiff
path: root/lang/perl5.16/files/patch-utf-regcomp
diff options
context:
space:
mode:
Diffstat (limited to 'lang/perl5.16/files/patch-utf-regcomp')
-rw-r--r--lang/perl5.16/files/patch-utf-regcomp95
1 files changed, 0 insertions, 95 deletions
diff --git a/lang/perl5.16/files/patch-utf-regcomp b/lang/perl5.16/files/patch-utf-regcomp
deleted file mode 100644
index 44c9a55de49a..000000000000
--- a/lang/perl5.16/files/patch-utf-regcomp
+++ /dev/null
@@ -1,95 +0,0 @@
---- regcomp.c.orig 2006-01-08 21:59:27.000000000 +0100
-+++ regcomp.c 2007-11-06 22:48:26.000000000 +0100
-@@ -135,7 +135,10 @@ typedef struct RExC_state_t {
- I32 extralen;
- I32 seen_zerolen;
- I32 seen_evals;
-- I32 utf8;
-+ I32 utf8; /* whether the pattern is utf8 or not */
-+ I32 orig_utf8; /* whether the pattern was originally in utf8 */
-+ /* XXX use this for future optimisation of case
-+ * where pattern must be upgraded to utf8. */
- #if ADD_TO_REGEXEC
- char *starttry; /* -Dr: where regtry was called. */
- #define RExC_starttry (pRExC_state->starttry)
-@@ -161,6 +164,7 @@ typedef struct RExC_state_t {
- #define RExC_seen_zerolen (pRExC_state->seen_zerolen)
- #define RExC_seen_evals (pRExC_state->seen_evals)
- #define RExC_utf8 (pRExC_state->utf8)
-+#define RExC_orig_utf8 (pRExC_state->orig_utf8)
-
- #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
- #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
-@@ -1749,15 +1753,16 @@ Perl_pregcomp(pTHX_ char *exp, char *xen
- if (exp == NULL)
- FAIL("NULL regexp argument");
-
-- RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
-+ RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
-
-- RExC_precomp = exp;
- DEBUG_r({
- if (!PL_colorset) reginitcolors();
- PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
- PL_colors[4],PL_colors[5],PL_colors[0],
-- (int)(xend - exp), RExC_precomp, PL_colors[1]);
-+ (int)(xend - exp), exp, PL_colors[1]);
- });
-+redo_first_pass:
-+ RExC_precomp = exp;
- RExC_flags = pm->op_pmflags;
- RExC_sawback = 0;
-
-@@ -1783,6 +1788,25 @@ Perl_pregcomp(pTHX_ char *exp, char *xen
- RExC_precomp = Nullch;
- return(NULL);
- }
-+ if (RExC_utf8 && !RExC_orig_utf8) {
-+ /* It's possible to write a regexp in ascii that represents unicode
-+ codepoints outside of the byte range, such as via \x{100}. If we
-+ detect such a sequence we have to convert the entire pattern to utf8
-+ and then recompile, as our sizing calculation will have been based
-+ on 1 byte == 1 character, but we will need to use utf8 to encode
-+ at least some part of the pattern, and therefore must convert the whole
-+ thing.
-+ XXX: somehow figure out how to make this less expensive...
-+ -- dmq */
-+ STRLEN len = xend-exp;
-+ DEBUG_r(PerlIO_printf(Perl_debug_log,
-+ "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
-+ xend = exp + len;
-+ RExC_orig_utf8 = RExC_utf8;
-+ SAVEFREEPV(exp);
-+ goto redo_first_pass;
-+ }
- DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
-
- /* Small enough for pointer-storage convention?
---- t/op/pat.t.orig 2006-01-07 13:53:32.000000000 +0100
-+++ t/op/pat.t 2007-11-06 21:52:30.000000000 +0100
-@@ -6,7 +6,7 @@
-
- $| = 1;
-
--print "1..1187\n";
-+print "1..1189\n";
-
- BEGIN {
- chdir 't' if -d 't';
-@@ -3395,5 +3395,14 @@ ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i)
- "# assigning to original string should not corrupt match vars");
- }
-
--# last test 1187
-+{
-+ use warnings;
-+ my @w;
-+ local $SIG{__WARN__}=sub{push @w,"@_"};
-+ my $c=qq(\x{DF});
-+ ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
-+ ok(@w==0, "ASCII pattern that really is utf8");
-+}
-+
-+# last test 1189
-