-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Add pcre_u guard to functions using u flag #9724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,7 +97,11 @@ function check_comment( $author, $email, $url, $comment, $user_ip, $user_agent, | |
| * Check the comment fields for moderation keywords. If any are found, | ||
| * fail the check for the given field by returning false. | ||
| */ | ||
| $pattern = "#$word#iu"; | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $pattern = "#$word#iu"; | ||
| } else { | ||
| $pattern = "#$word#i"; | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect this could be another case where it’s okay to remove the UTF-8 flag, because whatever it would be good to verify this. one setup would be to have PHP using an |
||
| if ( preg_match( $pattern, $author ) ) { | ||
| return false; | ||
| } | ||
|
|
@@ -1415,7 +1419,11 @@ function wp_check_comment_disallowed_list( $author, $email, $url, $comment, $use | |
| // Do some escaping magic so that '#' chars in the spam words don't break things: | ||
| $word = preg_quote( $word, '#' ); | ||
|
|
||
| $pattern = "#$word#iu"; | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $pattern = "#$word#iu"; | ||
| } else { | ||
| $pattern = "#$word#i"; | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above: is the |
||
| if ( preg_match( $pattern, $author ) | ||
| || preg_match( $pattern, $email ) | ||
| || preg_match( $pattern, $url ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1048,7 +1048,7 @@ | |
| // Valid two-byte code points. | ||
|
|
||
| if ( $b1 >= 0xC2 && $b1 <= 0xDF && $b2 >= 0x80 && $b2 <= 0xBF ) { | ||
| $i++; | ||
| ++$i; | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -1289,19 +1289,12 @@ | |
| return $text; | ||
| } | ||
|
|
||
| // Check for support for utf8 in the installed PCRE library once and store the result in a static. | ||
| static $utf8_pcre = null; | ||
| if ( ! isset( $utf8_pcre ) ) { | ||
| // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged | ||
| $utf8_pcre = @preg_match( '/^./u', 'a' ); | ||
| } | ||
| // We can't demand utf8 in the PCRE installation, so just return the string in those cases. | ||
| if ( ! $utf8_pcre ) { | ||
| // Check for support for utf8 in the installed PCRE library. | ||
| if ( ! _wp_can_use_pcre_u() ) { | ||
| return $text; | ||
| } | ||
|
|
||
| // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $text. | ||
| if ( 1 === @preg_match( '/^./us', $text ) ) { | ||
| if ( 1 === preg_match( '/^./us', $text ) ) { | ||
| return $text; | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this whole function has been updated in |
||
|
|
||
|
|
@@ -4233,7 +4226,15 @@ | |
|
|
||
| if ( str_starts_with( wp_get_word_count_type(), 'characters' ) && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) { | ||
| $text = trim( preg_replace( "/[\n\r\t ]+/", ' ', $text ), ' ' ); | ||
| preg_match_all( '/./u', $text, $words_array ); | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| preg_match_all( '/./u', $text, $words_array ); | ||
| } else { | ||
| if ( function_exists( 'mb_str_split' ) ) { | ||
| $words_array = array( mb_str_split( $text, 1, 'UTF-8' ) ); | ||
| } else { | ||
| $words_array = array( str_split( $text ) ); | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have this function slated for much bigger updates. I would recommend against updating the PCRE usage here because of that. |
||
| $words_array = array_slice( $words_array[0], 0, $num_words + 1 ); | ||
| $sep = ''; | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -360,12 +360,20 @@ function wp_maybe_decline_date( $date, $format = '' ) { | |
| $decline = preg_match( '#[dj]\.? F#', $format ); | ||
| } else { | ||
| // If the format is not passed, try to guess it from the date string. | ||
| $decline = preg_match( '#\b\d{1,2}\.? [^\d ]+\b#u', $date ); | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $decline = preg_match( '#\b\d{1,2}\.? [^\d ]+\b#u', $date ); | ||
| } else { | ||
| $decline = preg_match( '#\b\d{1,2}\.? [^\d ]+\b#', $date ); | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the difference between the |
||
| } | ||
|
|
||
| if ( $decline ) { | ||
| foreach ( $months as $key => $month ) { | ||
| $months[ $key ] = '# ' . preg_quote( $month, '#' ) . '\b#u'; | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $months[ $key ] = '# ' . preg_quote( $month, '#' ) . '\b#u'; | ||
| } else { | ||
| $months[ $key ] = '# ' . preg_quote( $month, '#' ) . '\b#'; | ||
| } | ||
| } | ||
|
|
||
| foreach ( $months_genitive as $key => $month ) { | ||
|
|
@@ -383,12 +391,20 @@ function wp_maybe_decline_date( $date, $format = '' ) { | |
| $decline = preg_match( '#F [dj]#', $format ); | ||
| } else { | ||
| // If the format is not passed, try to guess it from the date string. | ||
| $decline = preg_match( '#\b[^\d ]+ \d{1,2}(st|nd|rd|th)?\b#u', trim( $date ) ); | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $decline = preg_match( '#\b[^\d ]+ \d{1,2}(st|nd|rd|th)?\b#u', trim( $date ) ); | ||
| } else { | ||
| $decline = preg_match( '#\b[^\d ]+ \d{1,2}(st|nd|rd|th)?\b#', trim( $date ) ); | ||
| } | ||
| } | ||
|
|
||
| if ( $decline ) { | ||
| foreach ( $months as $key => $month ) { | ||
| $months[ $key ] = '#\b' . preg_quote( $month, '#' ) . ' (\d{1,2})(st|nd|rd|th)?([-–]\d{1,2})?(st|nd|rd|th)?\b#u'; | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $months[ $key ] = '#\b' . preg_quote( $month, '#' ) . ' (\d{1,2})(st|nd|rd|th)?([-–]\d{1,2})?(st|nd|rd|th)?\b#u'; | ||
| } else { | ||
| $months[ $key ] = '#\b' . preg_quote( $month, '#' ) . ' (\d{1,2})(st|nd|rd|th)?([-–]\d{1,2})?(st|nd|rd|th)?\b#'; | ||
| } | ||
| } | ||
|
|
||
| foreach ( $months_genitive as $key => $month ) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,7 +155,13 @@ | |
| $unpoified = ''; | ||
| $previous_is_backslash = false; | ||
| foreach ( $lines as $line ) { | ||
| preg_match_all( '/./u', $line, $chars ); | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| preg_match_all( '/./u', $line, $chars ); | ||
| } elseif ( function_exists( 'mb_str_split' ) ) { | ||
| $chars = array( mb_str_split( $line, 1, 'UTF-8' ) ); | ||
| } else { | ||
| $chars = array( str_split( $line ) ); | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this splitting of lines into characters and looking for a backslash is something we can probably do away with entirely: a streaming approach with |
||
| $chars = $chars[0]; | ||
| foreach ( $chars as $char ) { | ||
| if ( ! $previous_is_backslash ) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -613,7 +613,11 @@ function get_shortcode_atts_regex() { | |
| function shortcode_parse_atts( $text ) { | ||
| $atts = array(); | ||
| $pattern = get_shortcode_atts_regex(); | ||
| $text = preg_replace( "/[\x{00a0}\x{200b}]+/u", ' ', $text ); | ||
| if ( _wp_can_use_pcre_u() ) { | ||
| $text = preg_replace( "/[\x{00a0}\x{200b}]+/u", ' ', $text ); | ||
| } else { | ||
| $text = str_replace( array( "\xc2\xa0", "\xe2\x80\x8b" ), ' ', $text ); | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the regex isn’t necessary, it would seem fine to replace it directly with the
although it would be good to verify that all supported versions of PHP support that Unicode syntax without any extensions. I think they do. |
||
| if ( preg_match_all( $pattern, $text, $match, PREG_SET_ORDER ) ) { | ||
| foreach ( $match as $m ) { | ||
| if ( ! empty( $m[1] ) ) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this PCRE pattern is only looking at US-ASCII characters and doesn’t even need the UTF-8 flag. do you see any reason not to update this simply to remove the flag?