forked from kannera/TermBank
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TemplateParser.php
190 lines (167 loc) · 4.92 KB
/
TemplateParser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
<?php
/**
* Semantic pages consists mostly of templates. Semantic data is inside wiki
* pages in these templates. To update the data we need to modify the pages.
*
* @author Niklas Laxstrom
* @copyright Copyright © 2012, Niklas Laxström
* @license Public domain
* @file
*/
/**
* With this class you can parse a page containing semantic templates into
* structured array based data, compare and change some fields and regenerate
* the page text otherwise unchanged.
*/
class TemplateParser {
/**
* Holds the original page text.
* @var String
*/
protected $text;
/**
* Holds the page text with templates replaced by placeholders.
* @var String
*/
protected $layoutTemplate;
/**
* Holds the parsed templates.
* Array keys are the placeholder strings.
* Each item is an array, which contains:
* - text: the original template text
* - name: the name of the template
* - params: associative array keys and values
*
* @var Array*[String => String, String => String, String => Array*[String => String]]
*/
protected $holders;
protected function __construct( $text ) {
$this->text = $text;
}
/**
* Construct a new instance from text.
* @param String $text
* @return TemplateParser
*/
public function newFromText( $text ) {
return new self( $text );
}
/**
* Construct a new instance from a title.
* @return TemplateParser
*/
public function newFromTitle( Title $title ) {
$page = new WikiPage( $title );
$text = $page->getText( Revision::RAW );
return new self( $text );
}
/**
* Returns a random string that can be used as a placeholder.
* @return String
*/
protected static function placeholder() {
static $i = 0;
return "\x7fUNIQ" . dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ) . '|' . $i++;
}
/**
* Replaces part of $string between $start and $end with $rep.
* @param String $string Full text
* @param String $rep Replacement text
* @param Int $start Start index of text to replace
* @param Int $end End index of text to replace
* @return String
*/
protected static function index_replace( $string, $rep, $start, $end ) {
return substr( $string, 0, $start ) . $rep . substr( $string, $end );
}
/**
* Parses the page.
* @return Array
*/
public function extractTemplates() {
$copy = $this->text;
$holders = array();
$offset = 0;
while ( true ) {
$re = '~^{{.*\n}}$~smU';
$matches = array();
$ok = preg_match( $re, $copy, $matches, PREG_OFFSET_CAPTURE, $offset );
if ( $ok === 0 ) {
break; // No matches
}
// Do-placehold for the whole stuff
$content = $matches[0][0];
$ph = self::placeholder();
$start = $matches[0][1];
$len = strlen( $content );
$end = $start + $len;
$copy = self::index_replace( $copy, $ph, $start, $end );
$holders[$ph] = self::parseTemplate( $content );
}
$this->layoutTemplate = $copy;
$this->holders = $holders;
return $holders;
}
/**
* Uses the holders returned by extractTemplates to reconstruct
* the page text. If you change param fields, unset the text field.
* To remove the template, set text field to empty string.
* To add new template, append new placeholder to the array with
* numerical index (like $holders[] = ...).
* @return String
*/
public function updateText( Array $holders ) {
$copy = $this->layoutTemplate;
foreach ( $holders as $placeholder => $template ) {
$templateString = self::formatTemplate( $template );
if ( is_int( $placeholder ) ) {
$copy .= "\n" . $templateString;
continue;
}
// Avoid build-up of whitespace when removing templates
if ( $templateString === '' ) $placeholder .= "\n";
$copy = str_replace( $placeholder, $templateString, $copy );
}
if ( $copy !== $this->text ) {
return $copy;
}
}
/**
* Given one template as string, parses it to easily modifiable format.
* @return Array
*/
protected static function parseTemplate( $text ) {
$orig = $text;
preg_match( '~^{{(.*)\n(.*)}}$~sU', $text, $m );
list( $full, $name, $paramtext ) = $m;
preg_match_all( '~^\|(.*)=(.*)((?=\n\|)|\Z)~smU', $paramtext, $p, PREG_SET_ORDER );
foreach ( $p as $match ) {
list( $full, $key, $value ) = $match;
$params[$key] = $value;
}
return array(
'text' => $text,
'name' => $name,
'params' => $params,
);
}
/**
* Same as parseTemplate but in reverse.
* @return String
*/
protected static function formatTemplate( $ph ) {
// Shortcut, if template was not modified, just return the original text
if ( isset( $ph['text'] ) ) return $ph['text'];
$name = $ph['name'];
$params = '';
foreach ( $ph['params'] as $key => $value ) {
// If no params, the new line never gets added and we get {{daa}}
if ( $params === '' ) $params = "\n";
$params .= "|$key=$value\n";
}
return '{{' . $name . $params . '}}';
}
}
/*$a = new TemplateParser( file_get_contents( 'mallikäsite.wiki' ) );
$holders = $a->extractTemplates();
echo $a->updateText( $holders );*/