-
Notifications
You must be signed in to change notification settings - Fork 1
/
Unescaper.php
133 lines (124 loc) · 3.6 KB
/
Unescaper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
<?php
namespace Frosit\Component\Yar;
use Frosit\Component\Yar\Exception\ParseException;
/**
* Unescaper encapsulates unescaping rules for single and double-quoted
* YAML strings.
*
* @author Matthew Lewinski <[email protected]>
*
* @internal
*/
class Unescaper
{
/**
* Regex fragment that matches an escaped character in a double quoted string.
*/
const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
/**
* Unescapes a single quoted string.
*
* @param string $value A single quoted string
*
* @return string The unescaped string
*/
public function unescapeSingleQuotedString($value)
{
return str_replace('\'\'', '\'', $value);
}
/**
* Unescapes a double quoted string.
*
* @param string $value A double quoted string
*
* @return string The unescaped string
*/
public function unescapeDoubleQuotedString($value)
{
$callback = function ($match) {
return $this->unescapeCharacter($match[0]);
};
// evaluate the string
return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
}
/**
* Unescapes a character that was found in a double-quoted string.
*
* @param string $value An escaped character
*
* @return string The unescaped character
*/
private function unescapeCharacter($value)
{
switch ($value[1]) {
case '0':
return "\x0";
case 'a':
return "\x7";
case 'b':
return "\x8";
case 't':
return "\t";
case "\t":
return "\t";
case 'n':
return "\n";
case 'v':
return "\xB";
case 'f':
return "\xC";
case 'r':
return "\r";
case 'e':
return "\x1B";
case ' ':
return ' ';
case '"':
return '"';
case '/':
return '/';
case '\\':
return '\\';
case 'N':
// U+0085 NEXT LINE
return "\xC2\x85";
case '_':
// U+00A0 NO-BREAK SPACE
return "\xC2\xA0";
case 'L':
// U+2028 LINE SEPARATOR
return "\xE2\x80\xA8";
case 'P':
// U+2029 PARAGRAPH SEPARATOR
return "\xE2\x80\xA9";
case 'x':
return self::utf8chr(hexdec(substr($value, 2, 2)));
case 'u':
return self::utf8chr(hexdec(substr($value, 2, 4)));
case 'U':
return self::utf8chr(hexdec(substr($value, 2, 8)));
default:
throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
}
}
/**
* Get the UTF-8 character for the given code point.
*
* @param int $c The unicode code point
*
* @return string The corresponding UTF-8 character
*/
private static function utf8chr($c)
{
if (0x80 > $c %= 0x200000) {
return chr($c);
}
if (0x800 > $c) {
return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
}
if (0x10000 > $c) {
return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
}
return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
}
}