mirror of https://github.com/Wilfred/difftastic/
Merge commit '7dc4fb60390218b09bc351062eeede7dcdbb4d9f'
commit
4e77f83dd9
@ -1,2 +1,2 @@
|
||||
win_* text eol=crlf
|
||||
win_* eol=crlf
|
||||
* eol=lf
|
||||
|
||||
@ -0,0 +1 @@
|
||||
*.txt
|
||||
@ -0,0 +1,34 @@
|
||||
# fuzz test
|
||||
|
||||
The directory contains the scripts that test the implementation to avoid the problem that a symbol is parsed as a number or vice versa.
|
||||
|
||||
## Resource
|
||||
|
||||
* ~6 minutes to run
|
||||
* ~300M generated files
|
||||
|
||||
## Run
|
||||
|
||||
```shell
|
||||
$ cd fuzztest
|
||||
# 30s
|
||||
$ racket gen_cases.rkt
|
||||
cpu time: 26531 real time: 26835 gc time: 1069
|
||||
5114173 cases generated
|
||||
$ cd ..
|
||||
$ tree-sitter generate
|
||||
# 3 minutes
|
||||
$ tree-sitter parse fuzztest/case.txt > fuzztest/res1.txt
|
||||
# 2 minutes
|
||||
$ cd fuzztest && racket postprocess.rkt
|
||||
# should show nothing
|
||||
$ sdiff -s <(cat -n expect.txt) <(cat -n res.txt)
|
||||
|
||||
# If there is some error, run
|
||||
$ sdiff -s <(cat -n expect.txt) <(cat -n res.txt) | less
|
||||
# then get the first error case at `N`-th line
|
||||
$ cat case.txt | sed -n 'Np'
|
||||
```
|
||||
|
||||
You can edit `gen_cases.rkt` to generate less cases during development.
|
||||
|
||||
@ -0,0 +1,57 @@
|
||||
#lang racket
|
||||
|
||||
(require racket/extflonum)
|
||||
|
||||
;; all characters that can appear in a valid number/exflonum
|
||||
;; remove some insignificant parts to improve performance
|
||||
;; (define alphabet-char "abdefilnost")
|
||||
(define alphabet-char "abdefilnostx")
|
||||
(define special-char "#./@+-")
|
||||
;; (define numeric-char "0123456789")
|
||||
(define numeric-char "0179")
|
||||
(define all-char
|
||||
(string-append alphabet-char
|
||||
special-char
|
||||
numeric-char))
|
||||
|
||||
(define cnt 0)
|
||||
(define max-len 5)
|
||||
|
||||
(define case-port (open-output-file "case.txt" #:exists 'replace))
|
||||
(define expect-port (open-output-file "expect.txt" #:exists 'replace))
|
||||
|
||||
(define (gen i case)
|
||||
(with-handlers ([exn:fail? (lambda _ (void))])
|
||||
(when (> i 0)
|
||||
(define case-str (list->string case))
|
||||
;; ".0@.0" should be a number according the document,
|
||||
;; but it's actually a symbol.
|
||||
;; It's a bug of Racket reader, and will fix in new Racket release.
|
||||
;; we skip these cases.
|
||||
(when (not (string-contains? case-str "@."))
|
||||
(with-handlers ([exn:fail? void])
|
||||
(with-input-from-string case-str
|
||||
(lambda ()
|
||||
(define fst (read))
|
||||
(define snd (read))
|
||||
(when (eof-object? snd)
|
||||
(cond [(symbol? fst)
|
||||
(set! cnt (add1 cnt))
|
||||
(displayln case-str case-port)
|
||||
(displayln "symbol" expect-port)]
|
||||
[(number? fst)
|
||||
(set! cnt (add1 cnt))
|
||||
(displayln case-str case-port)
|
||||
(displayln "number" expect-port)]
|
||||
;; it's here for possible future change that
|
||||
;; split extflonum from number
|
||||
[(extflonum? fst)
|
||||
(set! cnt (add1 cnt))
|
||||
(displayln case-str case-port)
|
||||
(displayln "number" expect-port)]))))))))
|
||||
(when (< i max-len)
|
||||
(for ([c all-char])
|
||||
(gen (add1 i) (cons c case)))))
|
||||
|
||||
(time (gen 0 '()))
|
||||
(displayln (format "~a cases generated" cnt))
|
||||
@ -0,0 +1,26 @@
|
||||
#lang racket
|
||||
|
||||
(define port (open-input-file "res1.txt"))
|
||||
(define all-result (drop (read port) 4))
|
||||
(define all-result-line
|
||||
(for/list ([r all-result])
|
||||
(cons (car r) (caadr r))))
|
||||
(with-output-to-file "res.txt"
|
||||
#:exists 'replace
|
||||
(lambda ()
|
||||
(let loop ([line 0]
|
||||
[firstline? #t]
|
||||
[lst all-result-line])
|
||||
(match lst
|
||||
['() (void)]
|
||||
[(cons fst rem)
|
||||
#:when (= (cdr fst) line)
|
||||
(when (not firstline?)
|
||||
(display " "))
|
||||
(display (car fst))
|
||||
(loop line (if firstline? #t #f) rem)]
|
||||
[(cons fst rem)
|
||||
(newline)
|
||||
(display (car fst))
|
||||
(loop (add1 line) #f rem)]))
|
||||
(newline)))
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,153 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum TokenType {
|
||||
HERE_STRING_BODY,
|
||||
};
|
||||
|
||||
// a hand written string implmentation
|
||||
// data[0], data[1], ..., data[len-1] is the content of string
|
||||
// data[len] is `\0` for typical string `char*` compatibility
|
||||
// So 0 <= len < cap
|
||||
typedef struct {
|
||||
size_t len;
|
||||
size_t cap;
|
||||
char *data;
|
||||
} String;
|
||||
|
||||
static void check_alloc(void *ptr) {
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Scanner: Failed to allocate memory\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
static String string_new(void) {
|
||||
size_t init_len = 16;
|
||||
// (init_len + 1) for null terminator
|
||||
size_t cap = init_len + 1;
|
||||
void *tmp = calloc(1, sizeof(char) * cap);
|
||||
check_alloc(tmp);
|
||||
return (String){.cap = cap, .len = 0, .data = tmp};
|
||||
}
|
||||
|
||||
static void string_resize(String *str, size_t new_cap) {
|
||||
void *block = realloc(str->data, new_cap * sizeof(char));
|
||||
check_alloc(block);
|
||||
str->data = block;
|
||||
str->cap = new_cap;
|
||||
memset(str->data + str->len, 0, (new_cap - str->len) * sizeof(char));
|
||||
}
|
||||
|
||||
static void string_push(String *str, int32_t elem) {
|
||||
if (str->len + sizeof(elem) >= str->cap) {
|
||||
// str->cap * 2 + 1 > str->len + sizeof(elem) always holds
|
||||
// as str->cap > 16
|
||||
string_resize(str, str->cap * 2 + 1);
|
||||
}
|
||||
// NOTE: we don't consider little-endian/big-endian here
|
||||
// the character in string is only for compare.
|
||||
// They only need to be store in consistent way
|
||||
memcpy(str->data + str->len, &elem, sizeof(elem));
|
||||
str->len += sizeof(elem);
|
||||
}
|
||||
|
||||
static void string_free(String *str) {
|
||||
if (str->data != NULL) {
|
||||
free(str->data);
|
||||
str->data = NULL;
|
||||
str->len = 0;
|
||||
str->cap = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void string_clear(String *str) {
|
||||
memset(str->data, 0, str->len * sizeof(char));
|
||||
str->len = 0;
|
||||
}
|
||||
|
||||
static void advance(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
static void skip(TSLexer *lexer) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
// NOTE: only "\n" is allowed as newline here,
|
||||
// It implies that "\r" can also be terminator.
|
||||
static bool isnewline(int32_t chr) {
|
||||
return chr == '\n';
|
||||
}
|
||||
|
||||
// `read_line` read strings until a newline or EOF
|
||||
static void read_line(String *str, TSLexer *lexer) {
|
||||
while (!isnewline(lexer->lookahead) && !lexer->eof(lexer)) {
|
||||
string_push(str, lexer->lookahead);
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
// Suppose terminator is `T`, newline (\n) is `$`,
|
||||
// It should accept "#<<T$T" or "#<<T$...$T", where `...`
|
||||
// is the string content.
|
||||
static bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
if (!valid_symbols[HERE_STRING_BODY]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String terminator = string_new();
|
||||
read_line(&terminator, lexer);
|
||||
|
||||
if (lexer->eof(lexer)) {
|
||||
string_free(&terminator);
|
||||
return false;
|
||||
}
|
||||
|
||||
// skip `\n`
|
||||
skip(lexer);
|
||||
|
||||
String current_line = string_new();
|
||||
while (true) {
|
||||
read_line(¤t_line, lexer);
|
||||
if (strcmp(terminator.data, current_line.data) == 0) {
|
||||
lexer->result_symbol = HERE_STRING_BODY;
|
||||
string_free(&terminator);
|
||||
string_free(¤t_line);
|
||||
return true;
|
||||
}
|
||||
if (lexer->eof(lexer)) {
|
||||
string_free(&terminator);
|
||||
string_free(¤t_line);
|
||||
return false;
|
||||
}
|
||||
string_clear(¤t_line);
|
||||
// skip `\n`
|
||||
skip(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
void *tree_sitter_racket_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned tree_sitter_racket_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tree_sitter_racket_external_scanner_deserialize(void *payload,
|
||||
const char *buffer,
|
||||
unsigned length) {
|
||||
}
|
||||
|
||||
bool tree_sitter_racket_external_scanner_scan(void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
return scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
void tree_sitter_racket_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
@ -1,119 +0,0 @@
|
||||
#include <string>
|
||||
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using std::u32string;
|
||||
|
||||
enum TokenType {
|
||||
HERE_STRING_BODY,
|
||||
};
|
||||
|
||||
class optional_str {
|
||||
bool valid;
|
||||
u32string str;
|
||||
|
||||
public:
|
||||
optional_str() : valid(true) {}
|
||||
|
||||
static optional_str none() {
|
||||
optional_str emp;
|
||||
emp.valid = false;
|
||||
return emp;
|
||||
}
|
||||
|
||||
bool is_none() const { return !this->valid; }
|
||||
const u32string &content() const { return this->str; }
|
||||
u32string &content() { return this->str; }
|
||||
};
|
||||
|
||||
// NOTE: only "\n" is allowed as newline here,
|
||||
// It implies that "\r" can also be terminator.
|
||||
inline bool isnewline(int32_t c) {
|
||||
return c == '\n';
|
||||
}
|
||||
|
||||
inline optional_str read_terminator(TSLexer *lexer) {
|
||||
optional_str line;
|
||||
|
||||
while (true) {
|
||||
if (isnewline(lexer->lookahead)) {
|
||||
return line;
|
||||
} else if (lexer->eof(lexer)) {
|
||||
return optional_str::none();
|
||||
} else {
|
||||
line.content().push_back(lexer->lookahead);
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `read_line` read strings until a newline or EOF
|
||||
inline u32string read_line(TSLexer *lexer) {
|
||||
u32string line;
|
||||
|
||||
while (!isnewline(lexer->lookahead) && !lexer->eof(lexer)) {
|
||||
line.push_back(lexer->lookahead);
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
// Suppose terminator is `T`, newline (\n) is `$`,
|
||||
// It should accept "#<<T$T" or "#<<T$...$T", where `...`
|
||||
// is the string content.
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
if (!valid_symbols[HERE_STRING_BODY]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const optional_str terminator = read_terminator(lexer);
|
||||
|
||||
if (terminator.is_none()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// skip `\n`
|
||||
lexer->advance(lexer, false);
|
||||
while (true) {
|
||||
const u32string line = read_line(lexer);
|
||||
if (line == terminator.content()) {
|
||||
lexer->result_symbol = HERE_STRING_BODY;
|
||||
return true;
|
||||
}
|
||||
if (lexer->eof(lexer)) {
|
||||
return false;
|
||||
}
|
||||
// skip `\n`
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *tree_sitter_racket_external_scanner_create(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_racket_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
|
||||
unsigned tree_sitter_racket_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tree_sitter_racket_external_scanner_deserialize(void *payload,
|
||||
const char *buffer,
|
||||
unsigned length) {
|
||||
}
|
||||
|
||||
bool tree_sitter_racket_external_scanner_scan(void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
return scan(lexer, valid_symbols);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue