From 9f610ddac3f3a566f7ec672f5741d2314ea63ffc Mon Sep 17 00:00:00 2001
From: azett
Date: Mon, 31 Dec 2018 12:04:25 +0100
Subject: [PATCH] Bugfix: sanitize_title_with_dashes() handles characters >
chr(128) correctly -> fixes
https://github.com/flatpressblog/flatpress/pull/11 by @moortaube in a general
way
---
fp-includes/core/core.wp-formatting.php | 2009 ++++++++++++-----------
1 file changed, 1072 insertions(+), 937 deletions(-)
diff --git a/fp-includes/core/core.wp-formatting.php b/fp-includes/core/core.wp-formatting.php
index f4e9471..c6868d9 100644
--- a/fp-includes/core/core.wp-formatting.php
+++ b/fp-includes/core/core.wp-formatting.php
@@ -1,81 +1,98 @@
)/Us", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
- $stop = count($textarr); $skip = 0; // loop stuff
- for ($i = 0; $i < $stop; $i++) {
- $curl = $textarr[$i];
-
- if (isset($curl{0}) && '<' != $curl{0} && $skip == 0) { // If it's not a tag
- $curl = str_replace('---', '—', $curl);
- $curl = str_replace(' -- ', ' — ', $curl);
- $curl = str_replace('--', '–', $curl);
- $curl = str_replace('xn–', 'xn--', $curl);
- $curl = str_replace('...', '…', $curl);
- $curl = str_replace('``', '“', $curl);
-
-
- // This is a hack, look at this more later. It works pretty well though.
- $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
- $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause");
- $curl = str_replace($cockney, $cockneyreplace, $curl);
-
- $curl = preg_replace("/'s/", '’s', $curl);
- $curl = preg_replace("/'(\d\d(?:’|')?s)/", "’$1", $curl);
- $curl = preg_replace('/(\s|\A|")\'/', '$1‘', $curl);
- //$curl = preg_replace('/(\d+)"/', '$1″', $curl);
-
- $curl = preg_replace('/(\s|\A)("|")(?!\s)/', '$1“$3', $curl);
-
- $curl = preg_replace("/(\d+)'/", '$1′', $curl);
- $curl = preg_replace("/(\S)'([^'\s])/", "$1’$2", $curl);
- // $curl = preg_replace('/(\s|\A)"(?!\s)/', '$1“$2', $curl);
-
- $curl = preg_replace('/(\s|\A)("|")(?!\s)/', '$1“$3', $curl);
-
- $curl = preg_replace('/("|")(\s|\S|\Z)/', '”$2', $curl);
- $curl = preg_replace("/'([\s.]|\Z)/", '’$1', $curl);
- $curl = preg_replace("/ \(tm\)/i", ' ™', $curl);
- $curl = str_replace("''", '”', $curl);
-
- $curl = preg_replace('/(\d+)x(\d+)/', "$1×$2", $curl);
- } elseif (strstr($curl, '') || strstr($curl, '/>')) {
- if ($skip > 0) $skip--;
- } elseif (strstr($curl, ')/Us", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $stop = count($textarr);
+ $skip = 0; // loop stuff
+ for($i = 0; $i < $stop; $i++) {
+ $curl = $textarr [$i];
+
+ if (isset($curl {0}) && '<' != $curl {0} && $skip == 0) { // If it's not a tag
+ $curl = str_replace('---', '—', $curl);
+ $curl = str_replace(' -- ', ' — ', $curl);
+ $curl = str_replace('--', '–', $curl);
+ $curl = str_replace('xn–', 'xn--', $curl);
+ $curl = str_replace('...', '…', $curl);
+ $curl = str_replace('``', '“', $curl);
+
+ // This is a hack, look at this more later. It works pretty well though.
+ $cockney = array(
+ "'tain't",
+ "'twere",
+ "'twas",
+ "'tis",
+ "'twill",
+ "'til",
+ "'bout",
+ "'nuff",
+ "'round",
+ "'cause"
+ );
+ $cockneyreplace = array(
+ "’tain’t",
+ "’twere",
+ "’twas",
+ "’tis",
+ "’twill",
+ "’til",
+ "’bout",
+ "’nuff",
+ "’round",
+ "’cause"
+ );
+ $curl = str_replace($cockney, $cockneyreplace, $curl);
+
+ $curl = preg_replace("/'s/", '’s', $curl);
+ $curl = preg_replace("/'(\d\d(?:’|')?s)/", "’$1", $curl);
+ $curl = preg_replace('/(\s|\A|")\'/', '$1‘', $curl);
+ // $curl = preg_replace('/(\d+)"/', '$1″', $curl);
+
+ $curl = preg_replace('/(\s|\A)("|")(?!\s)/', '$1“$3', $curl);
+
+ $curl = preg_replace("/(\d+)'/", '$1′', $curl);
+ $curl = preg_replace("/(\S)'([^'\s])/", "$1’$2", $curl);
+ // $curl = preg_replace('/(\s|\A)"(?!\s)/', '$1“$2', $curl);
+
+ $curl = preg_replace('/(\s|\A)("|")(?!\s)/', '$1“$3', $curl);
+
+ $curl = preg_replace('/("|")(\s|\S|\Z)/', '”$2', $curl);
+ $curl = preg_replace("/'([\s.]|\Z)/", '’$1', $curl);
+ $curl = preg_replace("/ \(tm\)/i", ' ™', $curl);
+ $curl = str_replace("''", '”', $curl);
+
+ $curl = preg_replace('/(\d+)x(\d+)/', "$1×$2", $curl);
+ } elseif (strstr($curl, '') || strstr($curl, '/>')) {
+ if ($skip > 0)
+ $skip--;
+ } elseif (strstr($curl, ' 0)
$skip++;
- } else {
- if (isset($curl{0}) && $curl{0} == "<" && $skip > 0) $skip++;
- }
- $curl = preg_replace('/&([^#])(?![a-z12]{1,8};)/', '&$1', $curl);
- $output .= $curl;
}
- return $output;
+ $curl = preg_replace('/&([^#])(?![a-z12]{1,8};)/', '&$1', $curl);
+ $output .= $curl;
}
-
-
-
- function clean_pre($matches) {
- if ( is_array($matches) )
- $text = $matches[1] . $matches[2] . "";
- else
- $text = $matches;
+ return $output;
+}
- /* NWM: a bit hackish? where are the slashes for double quotes added? */
- $text = str_replace('\"', '"', $text);
- $text = str_replace(' ', '', $text);
- $text = str_replace('', "\n", $text);
- $text = str_replace('
', '', $text);
- return $text;
- }
+function clean_pre($matches) {
+ if (is_array($matches))
+ $text = $matches [1] . $matches [2] . "";
+ else
+ $text = $matches;
+ /* NWM: a bit hackish? where are the slashes for double quotes added? */
+ $text = str_replace('\"', '"', $text);
+ $text = str_replace(' ', '', $text);
+ $text = str_replace('', "\n", $text);
+ $text = str_replace('
', '', $text);
+ return $text;
+}
+
/**
* Replaces double line-breaks with paragraph elements.
*
@@ -85,14 +102,15 @@
* or 'false'.
*
* @since 0.71
- *
- * @param string $pee The text which has to be formatted.
- * @param int|bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
+ *
+ * @param string $pee
+ * The text which has to be formatted.
+ * @param int|bool $br
+ * Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
* @return string Text which has been converted into correct paragraph tags.
*/
function wpautop($pee, $br = 1) {
-
- if ( trim($pee) === '' )
+ if (trim($pee) === '')
return '';
$pee = $pee . "\n"; // just to make things a little easier, pad the end
$pee = preg_replace('| \s* |', "\n\n", $pee);
@@ -100,16 +118,19 @@ function wpautop($pee, $br = 1) {
$allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
$pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
$pee = preg_replace('!(' . $allblocks . '>)!', "$1\n\n", $pee);
- $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
- if ( strpos($pee, ']*)>\s*|', " ", $pee); // no pee inside object/embed
$pee = preg_replace('|\s*\s*|', '', $pee);
}
$pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
- // make paragraphs, including one at the end
+ // make paragraphs, including one at the end
$pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY);
$pee = '';
- foreach ( $pees as $tinkle )
+ foreach ($pees as $tinkle)
$pee .= '' . trim($tinkle, "\n") . "
\n";
$pee = preg_replace('|\s*
|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
$pee = preg_replace('!([^<]+)(div|address|form)>!', "
$1
$2>", $pee);
@@ -127,9 +148,9 @@ function wpautop($pee, $br = 1) {
$pee = preg_replace('!(?' . $allblocks . '[^>]*>)\s* !', "$1", $pee);
$pee = preg_replace('! (\s*?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
if (strpos($pee, ']*>)(.*?) !is', 'clean_pre', $pee );
- $pee = preg_replace( "|\n
$|", '', $pee );
-
+ $pee = preg_replace_callback('!(]*>)(.*?) !is', 'clean_pre', $pee);
+ $pee = preg_replace("|\n$|", '', $pee);
+
return $pee;
}
@@ -138,259 +159,380 @@ function wpautop($pee, $br = 1) {
*
* @since 3.1.0
* @access private
- * @param array $matches preg_replace_callback matches array
+ * @param array $matches
+ * preg_replace_callback matches array
* @returns string
*/
-function _autop_newline_preservation_helper( $matches ) {
- return str_replace("\n", " ", $matches[0]);
+function _autop_newline_preservation_helper($matches) {
+ return str_replace("\n", " ", $matches [0]);
}
-
- function seems_utf8($Str) { # by bmorel at ssi dot fr
- for ($i=0; $i', '>', $text);
+ if ($quotes) {
+ $text = str_replace('"', '"', $text);
+ $text = str_replace("'", ''', $text);
+ }
+
+ return $text;
+}
+
+function utf8_uri_encode($utf8_string) {
+ $unicode = '';
+ $values = array();
+ $num_octets = 1;
+
+ for($i = 0; $i < strlen($utf8_string); $i++) {
+
+ $value = ord($utf8_string [$i]);
+
+ if ($value < 128) {
+ $unicode .= chr($value);
+ } else {
+ if (count($values) == 0)
+ $num_octets = ($value < 224) ? 2 : 3;
+
+ $values [] = $value;
+
+ if (count($values) == $num_octets) {
+ if ($num_octets == 3) {
+ $unicode .= '%' . dechex($values [0]) . '%' . dechex($values [1]) . '%' . dechex($values [2]);
+ } else {
+ $unicode .= '%' . dechex($values [0]) . '%' . dechex($values [1]);
+ }
+
+ $values = array();
+ $num_octets = 1;
}
}
- return true;
}
+
+ return $unicode;
+}
- function fmt_escape_separator($text, $sep='|') {
-
- return str_replace( '|', '|', $text );
-
- }
-
- function fmt_unescape_separator($text, $sep='|') {
-
- return str_replace('|', '|', $text );
-
- }
-
- function wp_specialchars( $text, $quotes = 0 ) {
- // Like htmlspecialchars except don't double-encode HTML entities
-
-
- $text = preg_replace('/&([^#])(?![a-z12]{1,8};)/', '&$1', $text);
- $text = str_replace('<', '<', $text);
- $text = str_replace('>', '>', $text);
- if ( $quotes ) {
- $text = str_replace('"', '"', $text);
- $text = str_replace("'", ''', $text);
- }
-
- return $text;
- }
-
- function utf8_uri_encode( $utf8_string ) {
- $unicode = '';
- $values = array();
- $num_octets = 1;
-
- for ($i = 0; $i < strlen( $utf8_string ); $i++ ) {
-
- $value = ord( $utf8_string[ $i ] );
-
- if ( $value < 128 ) {
- $unicode .= chr($value);
- } else {
- if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
-
- $values[] = $value;
-
- if ( count( $values ) == $num_octets ) {
- if ($num_octets == 3) {
- $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
- } else {
- $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
- }
-
- $values = array();
- $num_octets = 1;
- }
- }
- }
-
- return $unicode;
- }
-
- function remove_accents($string) {
- if (seems_utf8($string)) {
- $chars = array(
+function remove_accents($string) {
+ if (seems_utf8($string)) {
+ $chars = array(
// Decompositions for Latin-1 Supplement
- chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
- chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
- chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
- chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
- chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
- chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
- chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
- chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
- chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
- chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
- chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
- chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
- chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
- chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
- chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
- chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
- chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
- chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
- chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
- chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
- chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
- chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
- chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
- chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
- chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
- chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
- chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
- chr(195).chr(191) => 'y',
+ chr(195) . chr(128) => 'A',
+ chr(195) . chr(129) => 'A',
+ chr(195) . chr(130) => 'A',
+ chr(195) . chr(131) => 'A',
+ chr(195) . chr(132) => 'A',
+ chr(195) . chr(133) => 'A',
+ chr(195) . chr(135) => 'C',
+ chr(195) . chr(136) => 'E',
+ chr(195) . chr(137) => 'E',
+ chr(195) . chr(138) => 'E',
+ chr(195) . chr(139) => 'E',
+ chr(195) . chr(140) => 'I',
+ chr(195) . chr(141) => 'I',
+ chr(195) . chr(142) => 'I',
+ chr(195) . chr(143) => 'I',
+ chr(195) . chr(145) => 'N',
+ chr(195) . chr(146) => 'O',
+ chr(195) . chr(147) => 'O',
+ chr(195) . chr(148) => 'O',
+ chr(195) . chr(149) => 'O',
+ chr(195) . chr(150) => 'O',
+ chr(195) . chr(153) => 'U',
+ chr(195) . chr(154) => 'U',
+ chr(195) . chr(155) => 'U',
+ chr(195) . chr(156) => 'U',
+ chr(195) . chr(157) => 'Y',
+ chr(195) . chr(159) => 's',
+ chr(195) . chr(160) => 'a',
+ chr(195) . chr(161) => 'a',
+ chr(195) . chr(162) => 'a',
+ chr(195) . chr(163) => 'a',
+ chr(195) . chr(164) => 'a',
+ chr(195) . chr(165) => 'a',
+ chr(195) . chr(167) => 'c',
+ chr(195) . chr(168) => 'e',
+ chr(195) . chr(169) => 'e',
+ chr(195) . chr(170) => 'e',
+ chr(195) . chr(171) => 'e',
+ chr(195) . chr(172) => 'i',
+ chr(195) . chr(173) => 'i',
+ chr(195) . chr(174) => 'i',
+ chr(195) . chr(175) => 'i',
+ chr(195) . chr(177) => 'n',
+ chr(195) . chr(178) => 'o',
+ chr(195) . chr(179) => 'o',
+ chr(195) . chr(180) => 'o',
+ chr(195) . chr(181) => 'o',
+ chr(195) . chr(182) => 'o',
+ chr(195) . chr(182) => 'o',
+ chr(195) . chr(185) => 'u',
+ chr(195) . chr(186) => 'u',
+ chr(195) . chr(187) => 'u',
+ chr(195) . chr(188) => 'u',
+ chr(195) . chr(189) => 'y',
+ chr(195) . chr(191) => 'y',
// Decompositions for Latin Extended-A
- chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
- chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
- chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
- chr(196).chr(134) => 'C', chr(196).chr(134) => 'c',
- chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
- chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
- chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
- chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
- chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
- chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
- chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
- chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
- chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
- chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
- chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
- chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
- chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
- chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
- chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
- chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
- chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
- chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
- chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
- chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
- chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
- chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
- chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
- chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
- chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
- chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
- chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
- chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
- chr(197).chr(128) => 'l', chr(196).chr(129) => 'L',
- chr(197).chr(130) => 'l', chr(196).chr(131) => 'N',
- chr(197).chr(132) => 'n', chr(196).chr(133) => 'N',
- chr(197).chr(134) => 'n', chr(196).chr(135) => 'N',
- chr(197).chr(136) => 'n', chr(196).chr(137) => 'N',
- chr(197).chr(138) => 'n', chr(196).chr(139) => 'N',
- chr(197).chr(140) => 'O', chr(196).chr(141) => 'o',
- chr(197).chr(142) => 'O', chr(196).chr(143) => 'o',
- chr(197).chr(144) => 'O', chr(196).chr(145) => 'o',
- chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
- chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
- chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
- chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
- chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
- chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
- chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
- chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
- chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
- chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
- chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
- chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
- chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
- chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
- chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
- chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
- chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
- chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
- chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
- chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
- chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
- chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
- chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
+ chr(196) . chr(128) => 'A',
+ chr(196) . chr(129) => 'a',
+ chr(196) . chr(130) => 'A',
+ chr(196) . chr(131) => 'a',
+ chr(196) . chr(132) => 'A',
+ chr(196) . chr(133) => 'a',
+ chr(196) . chr(134) => 'C',
+ chr(196) . chr(134) => 'c',
+ chr(196) . chr(136) => 'C',
+ chr(196) . chr(137) => 'c',
+ chr(196) . chr(138) => 'C',
+ chr(196) . chr(139) => 'c',
+ chr(196) . chr(140) => 'C',
+ chr(196) . chr(141) => 'c',
+ chr(196) . chr(142) => 'D',
+ chr(196) . chr(143) => 'd',
+ chr(196) . chr(144) => 'D',
+ chr(196) . chr(145) => 'd',
+ chr(196) . chr(146) => 'E',
+ chr(196) . chr(147) => 'e',
+ chr(196) . chr(148) => 'E',
+ chr(196) . chr(149) => 'e',
+ chr(196) . chr(150) => 'E',
+ chr(196) . chr(151) => 'e',
+ chr(196) . chr(152) => 'E',
+ chr(196) . chr(153) => 'e',
+ chr(196) . chr(154) => 'E',
+ chr(196) . chr(155) => 'e',
+ chr(196) . chr(156) => 'G',
+ chr(196) . chr(157) => 'g',
+ chr(196) . chr(158) => 'G',
+ chr(196) . chr(159) => 'g',
+ chr(196) . chr(160) => 'G',
+ chr(196) . chr(161) => 'g',
+ chr(196) . chr(162) => 'G',
+ chr(196) . chr(163) => 'g',
+ chr(196) . chr(164) => 'H',
+ chr(196) . chr(165) => 'h',
+ chr(196) . chr(166) => 'H',
+ chr(196) . chr(167) => 'h',
+ chr(196) . chr(168) => 'I',
+ chr(196) . chr(169) => 'i',
+ chr(196) . chr(170) => 'I',
+ chr(196) . chr(171) => 'i',
+ chr(196) . chr(172) => 'I',
+ chr(196) . chr(173) => 'i',
+ chr(196) . chr(174) => 'I',
+ chr(196) . chr(175) => 'i',
+ chr(196) . chr(176) => 'I',
+ chr(196) . chr(177) => 'i',
+ chr(196) . chr(178) => 'IJ',
+ chr(196) . chr(179) => 'ij',
+ chr(196) . chr(180) => 'J',
+ chr(196) . chr(181) => 'j',
+ chr(196) . chr(182) => 'K',
+ chr(196) . chr(183) => 'k',
+ chr(196) . chr(184) => 'k',
+ chr(196) . chr(185) => 'L',
+ chr(196) . chr(186) => 'l',
+ chr(196) . chr(187) => 'L',
+ chr(196) . chr(188) => 'l',
+ chr(196) . chr(189) => 'L',
+ chr(196) . chr(190) => 'l',
+ chr(196) . chr(191) => 'L',
+ chr(197) . chr(128) => 'l',
+ chr(196) . chr(129) => 'L',
+ chr(197) . chr(130) => 'l',
+ chr(196) . chr(131) => 'N',
+ chr(197) . chr(132) => 'n',
+ chr(196) . chr(133) => 'N',
+ chr(197) . chr(134) => 'n',
+ chr(196) . chr(135) => 'N',
+ chr(197) . chr(136) => 'n',
+ chr(196) . chr(137) => 'N',
+ chr(197) . chr(138) => 'n',
+ chr(196) . chr(139) => 'N',
+ chr(197) . chr(140) => 'O',
+ chr(196) . chr(141) => 'o',
+ chr(197) . chr(142) => 'O',
+ chr(196) . chr(143) => 'o',
+ chr(197) . chr(144) => 'O',
+ chr(196) . chr(145) => 'o',
+ chr(197) . chr(146) => 'OE',
+ chr(197) . chr(147) => 'oe',
+ chr(197) . chr(148) => 'R',
+ chr(197) . chr(149) => 'r',
+ chr(197) . chr(150) => 'R',
+ chr(197) . chr(151) => 'r',
+ chr(197) . chr(152) => 'R',
+ chr(197) . chr(153) => 'r',
+ chr(197) . chr(154) => 'S',
+ chr(197) . chr(155) => 's',
+ chr(197) . chr(156) => 'S',
+ chr(197) . chr(157) => 's',
+ chr(197) . chr(158) => 'S',
+ chr(197) . chr(159) => 's',
+ chr(197) . chr(160) => 'S',
+ chr(197) . chr(161) => 's',
+ chr(197) . chr(162) => 'T',
+ chr(197) . chr(163) => 't',
+ chr(197) . chr(164) => 'T',
+ chr(197) . chr(165) => 't',
+ chr(197) . chr(166) => 'T',
+ chr(197) . chr(167) => 't',
+ chr(197) . chr(168) => 'U',
+ chr(197) . chr(169) => 'u',
+ chr(197) . chr(170) => 'U',
+ chr(197) . chr(171) => 'u',
+ chr(197) . chr(172) => 'U',
+ chr(197) . chr(173) => 'u',
+ chr(197) . chr(174) => 'U',
+ chr(197) . chr(175) => 'u',
+ chr(197) . chr(176) => 'U',
+ chr(197) . chr(177) => 'u',
+ chr(197) . chr(178) => 'U',
+ chr(197) . chr(179) => 'u',
+ chr(197) . chr(180) => 'W',
+ chr(197) . chr(181) => 'w',
+ chr(197) . chr(182) => 'Y',
+ chr(197) . chr(183) => 'y',
+ chr(197) . chr(184) => 'Y',
+ chr(197) . chr(185) => 'Z',
+ chr(197) . chr(186) => 'z',
+ chr(197) . chr(187) => 'Z',
+ chr(197) . chr(188) => 'z',
+ chr(197) . chr(189) => 'Z',
+ chr(197) . chr(190) => 'z',
+ chr(197) . chr(191) => 's',
// Euro Sign
- chr(226).chr(130).chr(172) => 'E');
-
- $string = strtr($string, $chars);
- } else {
- // Assume ISO-8859-1 if not UTF-8
- $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
- .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
- .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
- .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
- .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
- .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
- .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
- .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
- .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
- .chr(252).chr(253).chr(255);
-
- $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
-
- $string = strtr($string, $chars['in'], $chars['out']);
- $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
- $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
- $string = str_replace($double_chars['in'], $double_chars['out'], $string);
- }
-
- return $string;
+ chr(226) . chr(130) . chr(172) => 'E'
+ );
+
+ $string = strtr($string, $chars);
+ } else {
+ // Assume ISO-8859-1 if not UTF-8
+ $chars ['in'] = chr(128) . chr(131) . chr(138) . chr(142) . chr(154) . chr(158) . chr(159) . chr(162) . chr(165) . chr(181) . chr(192) . chr(193) . chr(194) . chr(195) . chr(196) . chr(197) . chr(199) . chr(200) . chr(201) . chr(202) . chr(203) . chr(204) . chr(205) . chr(206) . chr(207) . chr(209) . chr(210) . chr(211) . chr(212) . chr(213) . chr(214) . chr(216) . chr(217) . chr(218) . chr(219) . chr(220) . chr(221) . chr(224) . chr(225) . chr(226) . chr(227) . chr(228) . chr(229) . chr(231) . chr(232) . chr(233) . chr(234) . chr(235) . chr(236) . chr(237) . chr(238) . chr(239) . chr(241) . chr(242) . chr(243) . chr(244) . chr(245) . chr(246) . chr(248) . chr(249) . chr(250) . chr(251) . chr(252) . chr(253) . chr(255);
+
+ $chars ['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
+
+ $string = strtr($string, $chars ['in'], $chars ['out']);
+ $double_chars ['in'] = array(
+ chr(140),
+ chr(156),
+ chr(198),
+ chr(208),
+ chr(222),
+ chr(223),
+ chr(230),
+ chr(240),
+ chr(254)
+ );
+ $double_chars ['out'] = array(
+ 'OE',
+ 'oe',
+ 'AE',
+ 'DH',
+ 'TH',
+ 'ss',
+ 'ae',
+ 'dh',
+ 'th'
+ );
+ $string = str_replace($double_chars ['in'], $double_chars ['out'], $string);
}
- function sanitize_title($title, $fallback_title = '') {
- $title = strip_tags($title);
- $title = apply_filters('sanitize_title', $title);
+ return $string;
+}
+
+function sanitize_title($title, $fallback_title = '') {
+ $title = strip_tags($title);
- if (empty($title)) {
- $title = $fallback_title;
- }
+ $title = apply_filters('sanitize_title', $title);
- return $title;
+ if (empty($title)) {
+ $title = $fallback_title;
}
- function sanitize_title_with_dashes($title) {
- $title = strip_tags($title);
- // Preserve escaped octets.
- $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
- // Remove percent signs that are not part of an octet.
- $title = str_replace('%', '', $title);
- // Restore octets.
- $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
+ return $title;
+}
+
+function sanitize_title_with_dashes($title) {
+ $title = strip_tags($title);
- $title = remove_accents($title);
- if (seems_utf8($title)) {
- if (function_exists('mb_strtolower')) {
- $title = mb_strtolower($title, 'UTF-8');
- }
- $title = utf8_uri_encode($title);
+ if (seems_utf8($title)) {
+ if (function_exists('mb_strtolower')) {
+ $title = mb_strtolower($title, 'UTF-8');
}
-
- $title = strtolower($title);
- $title = preg_replace('/&.+?;/', '', $title); // kill entities
- $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
- $title = preg_replace('/\s+/', '-', $title);
- $title = preg_replace('|-+|', '-', $title);
- $title = trim($title, '-');
-
- return $title;
+ $title = utf8_uri_encode($title);
}
- function convert_chars($content, $flag = 'obsolete') {
- // Translation of invalid Unicode references range to valid range
- $wp_htmltranswinuni = array(
+ // Preserve escaped octets.
+ $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
+ // Remove percent signs that are not part of an octet.
+ $title = str_replace('%', '', $title);
+ // Restore octets.
+ $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
+ // and finally: Kill octets
+ $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '', $title);
+
+ // remove accents
+ $title = remove_accents($title);
+
+ // title is in lower case always
+ $title = strtolower($title);
+
+ // kill entities
+ $title = preg_replace('/&.+?;/', '', $title);
+
+ // kill special chars
+ $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
+
+ // replace spaces by dash
+ $title = preg_replace('/\s+/', '-', $title);
+ // Kill multiple dashes
+ $title = preg_replace('|-+|', '-', $title);
+ // kill dashes at beginning and end of string
+ $title = trim($title, '-');
+
+ return $title;
+}
+
+function convert_chars($content, $flag = 'obsolete') {
+ // Translation of invalid Unicode references range to valid range
+ $wp_htmltranswinuni = array(
'' => '€', // the Euro sign
'' => '',
'' => '‚', // these are Windows CP1252 specific characters
- '' => 'ƒ', // they would look weird on non-Windows browsers
+ '' => 'ƒ', // they would look weird on non-Windows browsers
'' => '„',
'
' => '…',
'' => '†',
@@ -419,660 +561,653 @@ function _autop_newline_preservation_helper( $matches ) {
'' => '',
'' => '',
'' => 'Ÿ'
- );
+ );
- // Remove metadata tags
- $content = preg_replace('/(.+?)<\/title>/','',$content);
- $content = preg_replace('/(.+?)<\/category>/','',$content);
+ // Remove metadata tags
+ $content = preg_replace('/(.+?)<\/title>/', '', $content);
+ $content = preg_replace('/(.+?)<\/category>/', '', $content);
- // Converts lone & characters into & (a.k.a. &)
- $content = preg_replace('/&([^#])(?![a-z]{1,8};)/i', '&$1', $content);
+ // Converts lone & characters into & (a.k.a. &)
+ $content = preg_replace('/&([^#])(?![a-z]{1,8};)/i', '&$1', $content);
- // Fix Word pasting
- $content = strtr($content, $wp_htmltranswinuni);
+ // Fix Word pasting
+ $content = strtr($content, $wp_htmltranswinuni);
- // Just a little XHTML help
- $content = str_replace(' ', ' ', $content);
- $content = str_replace(' ', ' ', $content);
+ // Just a little XHTML help
+ $content = str_replace(' ', ' ', $content);
+ $content = str_replace(' ', ' ', $content);
- return $content;
- }
+ return $content;
+}
+
+function funky_javascript_fix($text) {
+ // Fixes for browsers' javascript bugs
+ global $is_macIE, $is_winIE;
- function funky_javascript_fix($text) {
- // Fixes for browsers' javascript bugs
- global $is_macIE, $is_winIE;
-
- if ( $is_winIE || $is_macIE )
- $text = preg_replace("/\%u([0-9A-F]{4,4})/e", "''.base_convert('\\1',16,10).';'", $text);
-
- return $text;
- }
+ if ($is_winIE || $is_macIE)
+ $text = preg_replace("/\%u([0-9A-F]{4,4})/e", "''.base_convert('\\1',16,10).';'", $text);
+
+ return $text;
+}
+
+/*
+ * balanceTags
+ *
+ * Balances Tags of string using a modified stack.
+ *
+ * @param text Text to be balanced
+ * @return Returns balanced text
+ * @author Leonard Lin (leonard@acm.org)
+ * @version v1.1
+ * @date November 4, 2001
+ * @license GPL v2.0
+ * @notes
+ * @changelog
+ * --- Modified by Scott Reilly (coffee2code) 02 Aug 2004
+ * 1.2 ***TODO*** Make better - change loop condition to $text
+ * 1.1 Fixed handling of append/stack pop order of end text
+ * Added Cleaning Hooks
+ * 1.0 First Version
+ */
+function balanceTags($text, $is_comment = 0) {
/*
- balanceTags
-
- Balances Tags of string using a modified stack.
-
- @param text Text to be balanced
- @return Returns balanced text
- @author Leonard Lin (leonard@acm.org)
- @version v1.1
- @date November 4, 2001
- @license GPL v2.0
- @notes
- @changelog
- --- Modified by Scott Reilly (coffee2code) 02 Aug 2004
- 1.2 ***TODO*** Make better - change loop condition to $text
- 1.1 Fixed handling of append/stack pop order of end text
- Added Cleaning Hooks
- 1.0 First Version
- */
- function balanceTags($text, $is_comment = 0) {
-
- /*
- if (get_settings('use_balanceTags') == 0) {
- return $text;
- }
- */
-
- $tagstack = array(); $stacksize = 0; $tagqueue = ''; $newtext = '';
+ * if (get_settings('use_balanceTags') == 0) {
+ * return $text;
+ * }
+ */
+ $tagstack = array();
+ $stacksize = 0;
+ $tagqueue = '';
+ $newtext = '';
- # WP bug fix for comments - in case you REALLY meant to type '< !--'
- $text = str_replace('< !--', '< !--', $text);
- # WP bug fix for LOVE <3 (and other situations with '<' before a number)
- $text = preg_replace('#<([0-9]{1})#', '<$1', $text);
+ // WP bug fix for comments - in case you REALLY meant to type '< !--'
+ $text = str_replace('< !--', '< !--', $text);
+ // WP bug fix for LOVE <3 (and other situations with '<' before a number)
+ $text = preg_replace('#<([0-9]{1})#', '<$1', $text);
- while (preg_match("/<(\/?\w*)\s*([^>]*)>/",$text,$regex)) {
- $newtext .= $tagqueue;
-
- $i = strpos($text,$regex[0]);
- $l = strlen($regex[0]);
-
- // clear the shifter
- $tagqueue = '';
- // Pop or Push
- if ($regex[1][0] == "/") { // End Tag
- $tag = strtolower(substr($regex[1],1));
- // if too many closing tags
- if($stacksize <= 0) {
- $tag = '';
- //or close to be safe $tag = '/' . $tag;
- }
- // if stacktop value = tag close value then pop
- else if ($tagstack[$stacksize - 1] == $tag) { // found closing tag
- $tag = '' . $tag . '>'; // Close Tag
- // Pop
- array_pop ($tagstack);
- $stacksize--;
- } else { // closing tag not at top, search for it
- for ($j=$stacksize-1;$j>=0;$j--) {
- if ($tagstack[$j] == $tag) {
- // add tag to tagqueue
- for ($k=$stacksize-1;$k>=$j;$k--){
- $tagqueue .= '' . array_pop ($tagstack) . '>';
- $stacksize--;
- }
- break;
- }
- }
- $tag = '';
- }
- } else { // Begin Tag
- $tag = strtolower($regex[1]);
-
- // Tag Cleaning
-
- // If self-closing or '', don't do anything.
- if((substr($regex[2],-1) == '/') || ($tag == '')) {
- }
- // ElseIf it's a known single-entity tag but it doesn't close itself, do so
- elseif ($tag == 'br' || $tag == 'img' || $tag == 'hr' || $tag == 'input') {
- $regex[2] .= '/';
- } else { // Push the tag onto the stack
- // If the top of the stack is the same as the tag we want to push, close previous tag
- if (($stacksize > 0) && ($tag != 'div') && ($tagstack[$stacksize - 1] == $tag)) {
- $tagqueue = '' . array_pop ($tagstack) . '>';
- $stacksize--;
- }
- $stacksize = array_push ($tagstack, $tag);
- }
-
- // Attributes
- $attributes = $regex[2];
- if($attributes) {
- $attributes = ' '.$attributes;
- }
- $tag = '<'.$tag.$attributes.'>';
- //If already queuing a close tag, then put this tag on, too
- if ($tagqueue) {
- $tagqueue .= $tag;
- $tag = '';
- }
- }
- $newtext .= substr($text,0,$i) . $tag;
- $text = substr($text,$i+$l);
- }
-
- // Clear Tag Queue
+ while (preg_match("/<(\/?\w*)\s*([^>]*)>/", $text, $regex)) {
$newtext .= $tagqueue;
-
- // Add Remaining text
- $newtext .= $text;
-
- // Empty Stack
- while($x = array_pop($tagstack)) {
- $newtext .= '' . $x . '>'; // Add remaining tags to close
- }
-
- // WP fix for the bug with HTML comments
- $newtext = str_replace("< !--","