From aa008542eff581c8a0754761efeec0357495ff92 Mon Sep 17 00:00:00 2001 From: Joao Inacio Date: Tue, 26 Jul 2016 15:52:11 +0100 Subject: [PATCH] EZP-26058: update script for non-breaking spaces in ezxmltext --- update/common/scripts/updatenbxmlcontents.php | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 update/common/scripts/updatenbxmlcontents.php diff --git a/update/common/scripts/updatenbxmlcontents.php b/update/common/scripts/updatenbxmlcontents.php new file mode 100644 index 00000000000..603a3eefee1 --- /dev/null +++ b/update/common/scripts/updatenbxmlcontents.php @@ -0,0 +1,120 @@ + "Updates non-break space encoding in ezxml contents. See issue EZP-18220\n", + 'use-session' => true, + 'use-modules' => false, + 'use-extensions' => true + ) +); +$script->startup(); + +$options = $script->getOptions( + "[dry-run][n][v][iteration-sleep:][iteration-limit:]", + "", + array( + 'dry-run' => 'Dry run', + 'iteration-sleep' => 'Sleep duration between batches, in seconds (default: 1)', + 'iteration-limit' => 'Batch size (default: 100)', + 'n' => 'Do not wait 30 seconds before starting', + ) +); +$optDryRun = (bool)$options['dry-run']; +$optIterationSleep = $options['iteration-sleep'] ? (int)$options['iteration-sleep'] : 1; +$optIterationLimit = $options['iteration-limit'] ? (int)$options['iteration-limit'] : 100; +$verboseLevel = $script->verboseOutputLevel(); + +$limit = array( + "offset" => 0, + "limit" => $optIterationLimit, +); + +$script->initialize(); +$db = eZDB::instance(); + +if ( $optDryRun ) +{ + $cli->warning( "dry-run mode" ); +} + +/** + * Updates non-breaking spaces from existing "&nbsp;" to proper "\xC2\xA0" + * + * @param array $attribute + */ +function updateEzxmlNonbreakSpaces( $attribute, $optDryRun, $verbose ) +{ + $id = $attribute['id']; + $contentId = $attribute['contentobject_id']; + $version = $attribute['version']; + $xmlData = $attribute['data_text']; + + $matchTags = implode('|', array( 'paragraph', 'header') ); + $pattern = '/(<(?' . $matchTags . ')[^>]*\>)(.*)&nbsp;(.*)(<\/(?P=tag)>)/'; + $replace = "\\1\\3\xC2\xA0\\4\\5"; + + do { + $xmlData = preg_replace( $pattern, $replace, $xmlData, -1, $countReplaced ); + } while ($countReplaced > 0); + + if ( $verbose ) { + eZCLI::instance()->output( "Updating data for content #$contentId (ver. $version) ..." ); + } + if ( !$optDryRun ) { + eZDB::instance()->query( "UPDATE ezcontentobject_attribute SET data_text='$xmlData' WHERE id='$id'" ); + } +} + +if ( !$options['n'] ) +{ + $cli->output(); + $cli->warning( "You have 30 seconds to break the script before actual processing starts (press Ctrl-C)." ); + $cli->warning( "Execute the script with '-n' switch to skip this delay." ); + sleep( 30 ); +} + +$attributeCount = $db->arrayQuery( + "SELECT count(id) as count " . + "FROM ezcontentobject_attribute attr " . + "WHERE data_type_string='ezxmltext' AND data_text LIKE '%&nbsp;%' " +); +$attributeCount = $attributeCount[0]['count']; + +$cli->output( "Number of xml attributes to update: " . $attributeCount ); + +// main loop +do { + $rows = $db->arrayQuery( + "SELECT id, contentobject_id, version, data_text " . + "FROM ezcontentobject_attribute attr " . + "WHERE data_type_string='ezxmltext' AND data_text LIKE '%&nbsp;%' ", + $limit + ); + + $db->begin(); + foreach ( $rows as $attribute ) + { + updateEzxmlNonbreakSpaces( $attribute, $optDryRun, $verboseLevel ); + } + $db->commit(); + + $cli->output("."); + + $limit["offset"] += $optIterationLimit; + sleep( $optIterationSleep ); +} while ( count($rows) == $optIterationLimit ); + +$cli->output( "Update has been completed." ); + +$script->shutdown();