mirror of https://github.com/Wilfred/difftastic/
112 lines
3.2 KiB
Plaintext
112 lines
3.2 KiB
Plaintext
use encoding::utf8;
|
|
|
|
// An iterator which yields each rune from a string.
|
|
export type iterator = struct {
|
|
dec: utf8::decoder,
|
|
push: (rune | void),
|
|
};
|
|
|
|
// Initializes a string iterator, starting at the beginning of the string.
|
|
export fn iter(src: str) iterator = iterator {
|
|
dec = utf8::decode(src),
|
|
push = void,
|
|
};
|
|
|
|
// Initializes a string iterator, starting at the end of the string.
|
|
export fn riter(src: str) iterator = {
|
|
let ret = iterator {
|
|
dec = utf8::decode(src),
|
|
push = void,
|
|
};
|
|
ret.dec.offs = len(src);
|
|
return ret;
|
|
};
|
|
|
|
// Get the next rune from an iterator, or void if there are none left.
|
|
//
|
|
// Be aware that a rune is not the minimum lexographical unit of language in
|
|
// Unicode strings. If you use these runes to construct a new string,
|
|
// reordering, editing, or omitting any of the runes without careful discretion
|
|
// may cause linguistic errors to arise. To avoid this, you may need to use
|
|
// [unicode::graphiter] instead.
|
|
export fn next(iter: *iterator) (rune | void) = {
|
|
match (iter.push) {
|
|
r: rune => {
|
|
iter.push = void;
|
|
return r;
|
|
},
|
|
void => void,
|
|
};
|
|
return match (utf8::next(&iter.dec)) {
|
|
r: rune => r,
|
|
void => void,
|
|
(utf8::more | utf8::invalid) =>
|
|
abort("Invalid UTF-8 string (this should not happen)"),
|
|
};
|
|
};
|
|
|
|
// Get the previous rune from an iterator, or void when at the start of the
|
|
// string.
|
|
export fn prev(iter: *iterator) (rune | void) = {
|
|
assert(iter.push is void);
|
|
return match (utf8::prev(&iter.dec)) {
|
|
r: rune => r,
|
|
void => void,
|
|
(utf8::more | utf8::invalid) =>
|
|
abort("Invalid UTF-8 string (this should not happen)"),
|
|
};
|
|
};
|
|
|
|
// Causes the next call to [next] to return the provided rune, effectively
|
|
// un-reading it. The next call using this iterator *must* be [next]; all other
|
|
// functions will cause the program to abort until the pushed rune is consumed.
|
|
// This does not modify the underlying string, and as such, subsequent calls to
|
|
// functions like [prev] or [iter_str] will behave as if push were never called.
|
|
export fn push(iter: *iterator, r: rune) void = {
|
|
assert(iter.push is void);
|
|
iter.push = r;
|
|
};
|
|
|
|
// Return a substring from the next rune to the end of the string.
|
|
export fn iter_str(iter: *iterator) str = {
|
|
assert(iter.push is void);
|
|
return fromutf8(iter.dec.src[iter.dec.offs..]);
|
|
};
|
|
|
|
@test fn iter() void = {
|
|
let s = iter("こんにちは");
|
|
assert(prev(&s) is void);
|
|
const expected1 = ['こ', 'ん'];
|
|
for (let i = 0z; i < len(expected1); i += 1) {
|
|
match (next(&s)) {
|
|
r: rune => assert(r == expected1[i]),
|
|
void => abort(),
|
|
};
|
|
};
|
|
assert(iter_str(&s) == "にちは");
|
|
assert(prev(&s) as rune == 'ん');
|
|
const expected2 = ['ん', 'に', 'ち', 'は'];
|
|
for (let i = 0z; i < len(expected2); i += 1) {
|
|
match (next(&s)) {
|
|
r: rune => assert(r == expected2[i]),
|
|
void => abort(),
|
|
};
|
|
};
|
|
assert(next(&s) is void);
|
|
assert(next(&s) is void);
|
|
push(&s, 'q');
|
|
assert(next(&s) as rune == 'q');
|
|
assert(prev(&s) as rune == 'は');
|
|
|
|
s = riter("にちは");
|
|
const expected3 = ['は', 'ち', 'に'];
|
|
for (let i = 0z; i< len(expected3); i += 1) {
|
|
match (prev(&s)) {
|
|
r: rune => assert(r == expected3[i]),
|
|
void => abort(),
|
|
};
|
|
};
|
|
assert(prev(&s) is void);
|
|
assert(next(&s) as rune == 'に');
|
|
};
|