-
Notifications
You must be signed in to change notification settings - Fork 2
/
canonical-text-stream.dylan
301 lines (254 loc) · 10.3 KB
/
canonical-text-stream.dylan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
module: canonical-text-stream
author: Dustin Voss
// TODO: Not sure, but I think rather than inheriting from <basic-wrapper-stream>,
// this should inherit from <wrapper-stream> and the inner stream should be wrapped
// in <basic-wrapper-stream> if necessary.
/**
Class: <canonical-text-stream>
------------------------------
A text stream that detabs, removes or replaces control characters, standardizes
line endings, and tracks row and column.
NOTE: 'inner-stream' will return a <replacing-stream>. That stream's inner stream
will be the 'inner-stream'.
Make keywords:
tabstop-size - Size of a tab stop. Defaults to 8.
end-of-line - Canonical end-of-line sequence. Defaults to "\n".
control-chars - Replacement for control characters (0-31 and 127), or #f to
leave control characters alone. Defaults to "".
*/
define class <canonical-text-stream> (<basic-wrapper-stream>, <positionable-stream>)
constant slot tabstop-size :: <integer> = 8, init-keyword: #"tabstop-size";
constant slot eol :: <string> = "\n", init-keyword: #"end-of-line";
constant slot control :: false-or(<string>) = "", init-keyword: #"control-chars";
slot line-positions = make(<stretchy-vector>);
slot tabstop-fillers :: <vector>;
slot unchecked-position :: <integer> = 0;
slot checked-final-eol? :: <boolean> = #f;
keyword inner-stream:, type: <positionable-stream>;
end class;
define method make (cts-class == <canonical-text-stream>, #rest keys,
#key inner-stream)
=> (object :: <object>)
// TODO: I shouldn't really do this. The inner-stream returned by <c-t-s>
// won't be what the user expects.
let replacing-stream = make(<replacing-stream>, inner-stream: inner-stream);
apply(next-method, cts-class, inner-stream:, replacing-stream, keys);
end method;
define method initialize (cts :: <canonical-text-stream>, #key) => ()
next-method();
assert(subtype?(cts.inner-stream.stream-element-type, <character>),
"Element type of inner stream must be <character>");
cts.line-positions[0] := 0;
cts.tabstop-fillers := make(<vector>, size: cts.tabstop-size);
for (i from 0 below cts.tabstop-size)
let filler-size = cts.tabstop-size - i;
cts.tabstop-fillers[i] := make(<string>, size: filler-size, fill: ' ');
end for;
end method;
define method read-element
(cts :: <canonical-text-stream>, #rest keys, #key on-end-of-stream)
=> (elem :: <object>)
check-elements-to-stream-position(cts);
apply(read-element, cts.inner-stream, keys);
end method;
define method peek
(cts :: <canonical-text-stream>, #rest keys, #key on-end-of-stream)
=> (elem :: <object>)
check-elements-to-stream-position(cts);
apply(peek, cts.inner-stream, keys);
end method;
define method write-element (cts :: <canonical-text-stream>, elem :: <object>)
=> ()
check-elements-to-stream-position(cts);
write-element(cts.inner-stream, elem);
end method;
define method stream-position-setter
(position :: <integer>, cts :: <canonical-text-stream>)
=> (position :: <integer>)
if (position >= cts.inner-stream.stream-size)
cts.inner-stream.stream-position := #"end";
else
cts.inner-stream.stream-position := position;
end if;
check-elements-to-stream-position(cts);
cts.inner-stream.stream-position := position
end method;
define method stream-position-setter
(position == #"start", cts :: <canonical-text-stream>)
=> (position :: <integer>)
cts.inner-stream.stream-position := #"start";
cts.inner-stream.stream-position
end method;
define method stream-position-setter
(position == #"end", cts :: <canonical-text-stream>)
=> (position :: <integer>)
cts.inner-stream.stream-position := #"end";
check-elements-to-stream-position(cts);
cts.inner-stream.stream-position := #"end";
cts.inner-stream.stream-position
end method;
define method adjust-stream-position
(cts :: <canonical-text-stream>, delta :: <integer>,
#key from :: one-of(#"current", #"start", #"end") = #"current")
=> (new-position :: <integer>)
adjust-stream-position(cts.inner-stream, delta, from: from);
check-elements-to-stream-position(cts);
end method;
define method stream-size (cts :: <canonical-text-stream>)
=> (sz :: <integer>)
let saved = cts.stream-position;
cts.stream-position := #"end";
let sz = next-method();
cts.stream-position := saved;
sz
end method;
define method stream-contents
(cts :: <canonical-text-stream>, #key clear-contents?)
=> (contents :: <sequence>)
let saved = cts.stream-position;
cts.stream-position := #"end";
let cont = next-method();
cts.stream-position := saved;
cont
end method;
define method stream-contents-as
(type :: <type>, cts :: <canonical-text-stream>, #key clear-contents?)
=> (contents :: <sequence>)
let saved = cts.stream-position;
cts.stream-position := #"end";
let cont = next-method();
cts.stream-position := saved;
cont
end method;
/// Synopsis: Returns current line and column position.
/// Arguments:
/// cts - An instance of <canonical-text-stream>.
/// at: - An instance of false or <integer>. The line and column of the
/// current (if false) or given (if <integer>) stream position will
/// be returned. Defaults to #f.
/// Values:
/// line - An instance of <integer>. Line number, starting with 1.
/// col - An instance of <integer>. Column number, starting with 1.
define method line-col-position
(cts :: <canonical-text-stream>, #key at :: false-or(<integer>) = #f)
=> (line :: <integer>, col :: <integer>)
local method current-line-col-position (cts :: <canonical-text-stream>)
=> (line :: <integer>, col :: <integer>)
let pos = check-elements-to-stream-position(cts);
let line = find-last-key(cts.line-positions, rcurry(\<=, pos));
values(line + 1, pos - cts.line-positions[line] + 1);
end method;
if (at)
let saved-pos = cts.stream-position;
block ()
cts.stream-position := at;
cts.current-line-col-position;
cleanup
cts.stream-position := saved-pos;
end block;
else
cts.current-line-col-position;
end if;
end method;
define function check-elements-to-stream-position (cts :: <canonical-text-stream>)
=> (new-pos :: <integer>)
let inner = cts.inner-stream;
let position = inner.stream-position;
when (position >= cts.unchecked-position)
// We make canonical replacements of \t, \r, \n and control characters up to
// and including current stream position. If current stream position is also
// eos, we ensure the stream is terminated by eol. In scanning the stream, we
// must change the stream position. We save the original stream position,
// scan the stream but do not actually perform any replacements, restore the
// original stream position, and then do all the replacements at once. The
// replacing automatically adjusts the stream position, saving us from having
// to figure out the new position ourselves.
let replacements = make(<stretchy-vector>);
local method add-replacement (start-pos, end-pos, str, orig)
let new-rep = make(<vector>, size: 4);
new-rep[0] := start-pos;
new-rep[1] := end-pos;
new-rep[2] := str;
new-rep[3] := (str ~= orig); // #t if replacement actually needed
replacements := add!(replacements, new-rep);
end method;
let cur-col = cts.unchecked-position - cts.line-positions.last;
let cur-pos = cts.unchecked-position;
inner.stream-position := cts.unchecked-position;
// Plan replacements.
while (cur-pos <= position & ~inner.stream-at-end?)
let elem = read-element(inner);
case
elem = '\t' =>
let filler = cts.tabstop-fillers[modulo(cur-col, cts.tabstop-size)];
add-replacement(cur-pos, cur-pos + 1, filler, #f);
cur-pos := cur-pos + 1;
cur-col := cur-col + filler.size;
elem = '\n' =>
add-replacement(cur-pos, cur-pos + 1, cts.eol, "\n");
cur-pos := cur-pos + 1;
cur-col := 0;
elem = '\r' =>
let new-pos = cur-pos + 1;
let orig-eol =
if (peek(inner, on-end-of-stream: #f) == '\n')
read-element(inner);
new-pos := new-pos + 1;
"\r\n";
else
"\r";
end if;
add-replacement(cur-pos, new-pos, cts.eol, orig-eol);
cur-pos := new-pos;
cur-col := 0;
cts.control & (elem < '\<20>' | elem = '\<7F>') =>
add-replacement(cur-pos, cur-pos + 1, cts.control,
make(<string>, size: 1, fill: elem));
cur-pos := cur-pos + 1;
cur-col := cur-col + cts.control.size;
otherwise =>
cur-pos := cur-pos + 1;
cur-col := cur-col + 1;
end case;
end while;
// If we scanned to end of stream, ensure that it has a preceding eol.
// We cannot reasonably check the last few bytes of the stream, and we
// cannot rely on unchecked-position because eos always qualifies as a
// position to check and thus eol would always be added, so we just use
// a flag to indicate we already checked it and it is good.
when (~cts.checked-final-eol? & inner.stream-at-end?)
let needs-eol = replacements.empty? |
~(replacements.last[2] == cts.eol &
replacements.last[1] == inner.stream-limit);
when (needs-eol)
add-replacement(inner.stream-limit, inner.stream-limit, cts.eol, #f);
end when;
cts.checked-final-eol? := #t;
end when;
// Do replacements in a batch. Update line positions and track cumulative
// offsets (so we can adjust replacement start positions) as we go.
inner.stream-position := position;
let offset = 0;
for (rep in replacements)
let start-pos = rep[0] + offset;
let end-pos = rep[1] + offset;
let rep-str = rep[2];
let rep-end =
if (rep[3])
let (new-start, new-end) = add-replacement-contents
(inner, rep-str, start: start-pos, end: end-pos);
offset := offset + (rep-str.size - (end-pos - start-pos));
new-end
else
end-pos
end if;
cts.unchecked-position := rep-end;
// Was it a new line?
if (rep-str == cts.eol)
cts.line-positions := add!(cts.line-positions, rep-end);
end if;
end for;
position := inner.stream-position;
end when;
position
end function;