mirror of https://github.com/Wilfred/difftastic/
Merge branch 'master' into pr-add_lua_parser
commit
eb29b824c3
@ -0,0 +1,314 @@
|
||||
# the "GNU Emacs Lisp Reference Manual" has very useful info
|
||||
# in the code below section names will be mentioned, like:
|
||||
# see "Special Read Syntax"
|
||||
|
||||
# bl - begin line
|
||||
# bc - begin column
|
||||
# el - end line
|
||||
# ec - end column
|
||||
(defn make-attrs
|
||||
[& items]
|
||||
(zipcoll [:bl :bc :el :ec]
|
||||
items))
|
||||
|
||||
(defn atom-node
|
||||
[node-type peg-form]
|
||||
~(cmt (capture (sequence (line) (column)
|
||||
,peg-form
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 -2)) (last $&)]))
|
||||
|
||||
(defn reader-macro-node
|
||||
[node-type sigil]
|
||||
~(cmt (capture (sequence (line) (column)
|
||||
,sigil
|
||||
(any :non-form)
|
||||
:form
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
;(slice $& 2 -4)]))
|
||||
|
||||
(defn collection-node
|
||||
[node-type open-delim close-delim]
|
||||
~(cmt
|
||||
(capture
|
||||
(sequence
|
||||
(line) (column)
|
||||
,open-delim
|
||||
(any :input)
|
||||
(choice ,close-delim
|
||||
(error
|
||||
(replace (sequence (line) (column))
|
||||
,|(string/format
|
||||
"line: %p column: %p missing %p for %p"
|
||||
$0 $1 close-delim node-type))))
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
;(slice $& 2 -4)]))
|
||||
|
||||
(def loc-grammar
|
||||
~{:main (sequence (line) (column)
|
||||
(some :input)
|
||||
(line) (column))
|
||||
#
|
||||
:input (choice :non-form
|
||||
:form)
|
||||
#
|
||||
:non-form (choice :whitespace
|
||||
:comment)
|
||||
#
|
||||
:whitespace ,(atom-node :whitespace
|
||||
'(choice (some (set " \f\t\v"))
|
||||
(choice "\r\n"
|
||||
"\r"
|
||||
"\n")))
|
||||
#
|
||||
:comment ,(atom-node :comment
|
||||
'(sequence ";"
|
||||
(any (if-not (set "\r\n") 1))))
|
||||
#
|
||||
:form (choice # reader macros
|
||||
:backquote
|
||||
:function
|
||||
:quote
|
||||
:unquote-splice
|
||||
:unquote
|
||||
# collections
|
||||
:list
|
||||
:vector
|
||||
:char-table
|
||||
:sub-char-table
|
||||
:hash-table
|
||||
:record
|
||||
:bytecode
|
||||
:string-text-props
|
||||
# atoms
|
||||
# XXX: might need assertions at end of things before
|
||||
# symbols. see the partial job in :integer-10 below
|
||||
:float
|
||||
:integer
|
||||
:char
|
||||
:string
|
||||
:symbol)
|
||||
# see "Backquote"
|
||||
:backquote ,(reader-macro-node :backquote "`")
|
||||
# see "Anonymous Functions"
|
||||
:function ,(reader-macro-node :function "#'")
|
||||
# see "Quoting"
|
||||
:quote ,(reader-macro-node :quote "'")
|
||||
# see "Backquote"
|
||||
:unquote-splice ,(reader-macro-node :unquote-splice ",@")
|
||||
# see "Backquote"
|
||||
:unquote ,(reader-macro-node :unquote ",")
|
||||
#
|
||||
# see "Cons Cell Type"
|
||||
:list ,(collection-node :list "(" ")")
|
||||
# see "Vectors"
|
||||
:vector ,(collection-node :vector "[" "]")
|
||||
# see "Char-Table Type"
|
||||
:char-table ,(collection-node :char-table "#^[" "]")
|
||||
# see "Char-Table Type"
|
||||
:sub-char-table ,(collection-node :sub-char-table "#^^[" "]")
|
||||
# see "Byte-Code Objects"
|
||||
:bytecode ,(collection-node :bytecode "#[" "]")
|
||||
# see "Hash Tables"
|
||||
:hash-table ,(collection-node :hash-table "#s(hash-table" ")")
|
||||
# see "Records"
|
||||
:record ,(collection-node :record "#s(" ")")
|
||||
# see "Text Props and Strings"
|
||||
:string-text-props
|
||||
,(collection-node :string-text-props "#(" ")")
|
||||
#
|
||||
# see "Float Basics"
|
||||
:float ,(atom-node :float
|
||||
'(choice :float-dec
|
||||
:float-exp
|
||||
:float-both
|
||||
:float-inf
|
||||
:float-nan))
|
||||
#
|
||||
:float-dec (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
"."
|
||||
:d+)
|
||||
#
|
||||
:float-exp (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
(choice "e" "E")
|
||||
:d+)
|
||||
#
|
||||
:float-both (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
"."
|
||||
:d+
|
||||
(choice "e" "E")
|
||||
:d+)
|
||||
#
|
||||
:float-inf (sequence (opt "-")
|
||||
"1.0"
|
||||
(choice "e" "E")
|
||||
"+INF")
|
||||
#
|
||||
:float-nan (sequence (opt "-")
|
||||
"0.0"
|
||||
(choice "e" "E")
|
||||
"+NaN")
|
||||
# see "Integer Basics"
|
||||
:integer ,(atom-node :integer
|
||||
'(choice :integer-10
|
||||
:integer-base))
|
||||
#
|
||||
:integer-10 (sequence (opt (choice "+" "-"))
|
||||
:d+
|
||||
(opt ".")
|
||||
# XXX: hack?
|
||||
(not (set "+-")))
|
||||
#
|
||||
:integer-base (sequence "#"
|
||||
(choice "b"
|
||||
"o"
|
||||
"x"
|
||||
# XXX: found in xml.el, but docs...(?)
|
||||
"X"
|
||||
(sequence :d+ "r"))
|
||||
# XXX: docs contradict this(?), but works...
|
||||
(opt (choice "+" "-"))
|
||||
(some (choice :a :d)))
|
||||
# see "Basic Char Syntax"
|
||||
:char ,(atom-node :char
|
||||
'(sequence "?"
|
||||
(choice :char-octal
|
||||
:char-hex
|
||||
:char-uni-name
|
||||
#:char-uni-val
|
||||
:char-uni-val-low
|
||||
:char-uni-val-up
|
||||
:char-meta-octal
|
||||
:char-key
|
||||
:char-basic)))
|
||||
# see "General Escape Syntax"
|
||||
:char-octal (sequence "\\" (3 (range "07")))
|
||||
:char-hex (sequence "\\x" :h+)
|
||||
:char-uni-name (sequence "\\N{" (thru "}"))
|
||||
#:char-uni-val (sequence "\\N{U+" :h+ "}")
|
||||
:char-uni-val-low (sequence "\\u" (4 :h))
|
||||
:char-uni-val-up (sequence "\\U" (8 :h))
|
||||
# see "Meta-Char Syntax"
|
||||
:char-meta-octal (sequence "\\M-" :char-octal)
|
||||
# see "Ctl-Char Syntax"
|
||||
# see "Other Char Bits"
|
||||
:char-key
|
||||
(sequence (some (sequence "\\"
|
||||
(choice (sequence (set "ACHMSs") "-")
|
||||
"^")))
|
||||
# XXX: not strictly correct?
|
||||
(choice :char-octal
|
||||
:char-hex
|
||||
:char-uni-name
|
||||
#:char-uni-val
|
||||
:char-uni-val-low
|
||||
:char-uni-val-up
|
||||
:char-meta-octal
|
||||
:char-basic))
|
||||
# XXX: not strictly correct, but perhaps it's ok?
|
||||
:char-basic (choice (sequence "\\" 1)
|
||||
1)
|
||||
# see "Syntax for Strings"
|
||||
# XXX: escaped newline and escaped space in "Syntax for Strings"?
|
||||
:string
|
||||
,(atom-node :string
|
||||
'(sequence "\""
|
||||
(any (choice :escape
|
||||
(if-not "\"" 1)))
|
||||
"\""))
|
||||
# XXX: is this complete?
|
||||
:escape (sequence "\\" (set "0abdefnrstvx\"\\"))
|
||||
# see "Symbol Type"
|
||||
# XXX: review about whitespace in symbol names
|
||||
:symbol
|
||||
,(atom-node :symbol
|
||||
'(choice (sequence :sym-char-head
|
||||
(any :sym-char-rest))
|
||||
# XXX: some below not really symbols
|
||||
# see "Circular Objects"
|
||||
(sequence "#" :d+ "=")
|
||||
(sequence "#" :d+ "#")
|
||||
# see "Special Read Syntax"
|
||||
#(sequence "#" :d+)
|
||||
# see "Documentation Strings and Compilation"
|
||||
"#$"
|
||||
# see "Symbol Type"
|
||||
"##"))
|
||||
#
|
||||
:sym-char-head (choice :sym-char-esc
|
||||
# don't start with
|
||||
#(if-not (set " \"#'(),.;?[]`") 1)) # allow .
|
||||
(if-not (set " \"#'(),;?[]`") 1))
|
||||
#
|
||||
:sym-char-rest (choice :sym-char-esc
|
||||
# . and ? are allowed "inside"
|
||||
(if-not (set " \"#'(),;[]`\n") 1))
|
||||
# need to be escaped
|
||||
:sym-char-esc (sequence "\\" (set " \"#'(),;?[]`"))
|
||||
})
|
||||
|
||||
(comment
|
||||
|
||||
(get (peg/match loc-grammar " ") 2)
|
||||
# =>
|
||||
'(:whitespace @{:bc 1 :bl 1 :ec 2 :el 1} " ")
|
||||
|
||||
(get (peg/match loc-grammar "; hi there") 2)
|
||||
# =>
|
||||
'(:comment @{:bc 1 :bl 1 :ec 11 :el 1} "; hi there")
|
||||
|
||||
(get (peg/match loc-grammar "8.3") 2)
|
||||
# =>
|
||||
'(:float @{:bc 1 :bl 1 :ec 4 :el 1} "8.3")
|
||||
|
||||
(get (peg/match loc-grammar "printf") 2)
|
||||
# =>
|
||||
'(:symbol @{:bc 1 :bl 1 :ec 7 :el 1} "printf")
|
||||
|
||||
(get (peg/match loc-grammar ":smile") 2)
|
||||
# =>
|
||||
'(:symbol @{:bc 1 :bl 1 :ec 7 :el 1} ":smile")
|
||||
|
||||
(get (peg/match loc-grammar `"fun"`) 2)
|
||||
# =>
|
||||
'(:string @{:bc 1 :bl 1 :ec 6 :el 1} "\"fun\"")
|
||||
|
||||
(get (peg/match loc-grammar "[8]") 2)
|
||||
# =>
|
||||
'(:vector @{:bc 1 :bl 1
|
||||
:ec 4 :el 1}
|
||||
(:integer @{:bc 2 :bl 1
|
||||
:ec 3 :el 1} "8"))
|
||||
|
||||
(get (peg/match loc-grammar "(1+ 1)") 2)
|
||||
# =>
|
||||
'(:list @{:bc 1 :bl 1
|
||||
:ec 7 :el 1}
|
||||
(:symbol @{:bc 2 :bl 1
|
||||
:ec 4 :el 1} "1+")
|
||||
(:whitespace @{:bc 4 :bl 1
|
||||
:ec 5 :el 1} " ")
|
||||
(:integer @{:bc 5 :bl 1
|
||||
:ec 6 :el 1} "1"))
|
||||
|
||||
(get (peg/match loc-grammar "`x") 2)
|
||||
# =>
|
||||
'(:backquote @{:bc 1 :bl 1
|
||||
:ec 3 :el 1}
|
||||
(:symbol @{:bc 2 :bl 1
|
||||
:ec 3 :el 1} "x"))
|
||||
|
||||
(try
|
||||
(peg/match loc-grammar "(+ 1")
|
||||
([e]
|
||||
e))
|
||||
# =>
|
||||
`line: 1 column: 5 missing ")" for :list`
|
||||
|
||||
)
|
||||
|
||||
@ -0,0 +1,320 @@
|
||||
# the "GNU Emacs Lisp Reference Manual" has very useful info
|
||||
# in the code below section names will be mentioned, like:
|
||||
# see "Special Read Syntax"
|
||||
|
||||
# bl - begin line
|
||||
# bc - begin column
|
||||
# el - end line
|
||||
# ec - end column
|
||||
(defn make-attrs
|
||||
[& items]
|
||||
(zipcoll [:bl :bc :el :ec]
|
||||
items))
|
||||
|
||||
(defn atom-node
|
||||
[node-type peg-form]
|
||||
~(cmt (capture (sequence (line) (column)
|
||||
,peg-form
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 -2)) (last $&)]))
|
||||
|
||||
(defn reader-macro-node
|
||||
[node-type sigil]
|
||||
~(cmt (capture (sequence (line) (column)
|
||||
,sigil
|
||||
(any :non-form)
|
||||
:form
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
;(slice $& 2 -4)]))
|
||||
|
||||
(defn collection-node
|
||||
[node-type open-delim close-delim]
|
||||
~(cmt
|
||||
(capture
|
||||
(sequence
|
||||
(line) (column)
|
||||
,open-delim
|
||||
(any :input)
|
||||
(choice ,close-delim
|
||||
(error
|
||||
(replace (sequence (line) (column))
|
||||
,|(string/format
|
||||
"line: %p column: %p missing %p for %p"
|
||||
$0 $1 close-delim node-type))))
|
||||
(line) (column)))
|
||||
,|[node-type (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
;(slice $& 2 -4)]))
|
||||
|
||||
(def loc-grammar
|
||||
~{:main (sequence (line) (column)
|
||||
(some :input)
|
||||
(line) (column))
|
||||
#
|
||||
:input (choice :non-form
|
||||
:form)
|
||||
#
|
||||
:non-form (choice :whitespace
|
||||
:comment)
|
||||
#
|
||||
:whitespace ,(atom-node :whitespace
|
||||
'(choice (some (set " \f\t\v"))
|
||||
(choice "\r\n"
|
||||
"\r"
|
||||
"\n")))
|
||||
# :whitespace
|
||||
# (cmt (capture (sequence (line) (column)
|
||||
# (choice (some (set " \f\t\v"))
|
||||
# (choice "\r\n"
|
||||
# "\r"
|
||||
# "\n"))
|
||||
# (line) (column)))
|
||||
# ,|[:whitespace (make-attrs ;(slice $& 0 -2)) (last $&)])
|
||||
#
|
||||
:comment ,(atom-node :comment
|
||||
'(sequence ";"
|
||||
(any (if-not (set "\r\n") 1))))
|
||||
#
|
||||
:form (choice # reader macros
|
||||
:backquote
|
||||
:function
|
||||
:quote
|
||||
:unquote-splice
|
||||
:unquote
|
||||
# collections
|
||||
:list
|
||||
:vector
|
||||
:char-table
|
||||
:sub-char-table
|
||||
:hash-table
|
||||
:record
|
||||
:bytecode
|
||||
:string-text-props
|
||||
# atoms
|
||||
# XXX: might need assertions at end of things before
|
||||
# symbols. see the partial job in :integer-10 below
|
||||
:float
|
||||
:integer
|
||||
:char
|
||||
:string
|
||||
:symbol)
|
||||
# see "Backquote"
|
||||
:backquote ,(reader-macro-node :backquote "`")
|
||||
# :backquote
|
||||
# (cmt (capture (sequence (line) (column)
|
||||
# "`"
|
||||
# (any :non-form)
|
||||
# :form
|
||||
# (line) (column)))
|
||||
# ,|[:backquote (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
# ;(slice $& 2 -4)])
|
||||
# see "Anonymous Functions"
|
||||
:function ,(reader-macro-node :function "#'")
|
||||
# see "Quoting"
|
||||
:quote ,(reader-macro-node :quote "'")
|
||||
# see "Backquote"
|
||||
:unquote-splice ,(reader-macro-node :unquote-splice ",@")
|
||||
# see "Backquote"
|
||||
:unquote ,(reader-macro-node :unquote ",")
|
||||
#
|
||||
# see "Cons Cell Type"
|
||||
:list ,(collection-node :list "(" ")")
|
||||
# :list
|
||||
# (cmt
|
||||
# (capture
|
||||
# (sequence
|
||||
# (line) (column)
|
||||
# "("
|
||||
# (any :input)
|
||||
# (choice ")"
|
||||
# (error
|
||||
# (replace (sequence (line) (column))
|
||||
# ,|(string/format
|
||||
# "line: %p column: %p missing %p for %p"
|
||||
# $0 $1 ")" :list))))
|
||||
# (line) (column)))
|
||||
# ,|[:list (make-attrs ;(slice $& 0 2) ;(slice $& -4 -2))
|
||||
# ;(slice $& 2 -4)])
|
||||
# see "Vectors"
|
||||
:vector ,(collection-node :vector "[" "]")
|
||||
# see "Char-Table Type"
|
||||
:char-table ,(collection-node :char-table "#^[" "]")
|
||||
# see "Char-Table Type"
|
||||
:sub-char-table ,(collection-node :sub-char-table "#^^[" "]")
|
||||
# see "Byte-Code Objects"
|
||||
:bytecode ,(collection-node :bytecode "#[" "]")
|
||||
# see "Hash Tables"
|
||||
:hash-table ,(collection-node :hash-table "#s(hash-table" ")")
|
||||
# see "Records"
|
||||
:record ,(collection-node :record "#s(" ")")
|
||||
# see "Text Props and Strings"
|
||||
:string-text-props
|
||||
,(collection-node :string-text-props "#(" ")")
|
||||
#
|
||||
# see "Float Basics"
|
||||
:float ,(atom-node :float
|
||||
'(choice :float-dec
|
||||
:float-exp
|
||||
:float-both
|
||||
:float-inf
|
||||
:float-nan))
|
||||
#
|
||||
:float-dec (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
"."
|
||||
:d+)
|
||||
#
|
||||
:float-exp (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
(choice "e" "E")
|
||||
:d+)
|
||||
#
|
||||
:float-both (sequence (opt (choice "+" "-"))
|
||||
:d*
|
||||
"."
|
||||
:d+
|
||||
(choice "e" "E")
|
||||
:d+)
|
||||
#
|
||||
:float-inf (sequence (opt "-")
|
||||
"1.0"
|
||||
(choice "e" "E")
|
||||
"+INF")
|
||||
#
|
||||
:float-nan (sequence (opt "-")
|
||||
"0.0"
|
||||
(choice "e" "E")
|
||||
"+NaN")
|
||||
# see "Integer Basics"
|
||||
:integer ,(atom-node :integer
|
||||
'(choice :integer-10
|
||||
:integer-base))
|
||||
#
|
||||
:integer-10 (sequence (opt (choice "+" "-"))
|
||||
:d+
|
||||
(opt ".")
|
||||
# XXX: hack?
|
||||
(not (set "+-")))
|
||||
#
|
||||
:integer-base (sequence "#"
|
||||
(choice "b"
|
||||
"o"
|
||||
"x"
|
||||
# XXX: found in xml.el, but docs...(?)
|
||||
"X"
|
||||
(sequence :d+ "r"))
|
||||
# XXX: docs contradict this(?), but works...
|
||||
(opt (choice "+" "-"))
|
||||
(some (choice :a :d)))
|
||||
# see "Basic Char Syntax"
|
||||
:char ,(atom-node :char
|
||||
'(sequence "?"
|
||||
(choice :char-octal
|
||||
:char-hex
|
||||
:char-uni-name
|
||||
#:char-uni-val
|
||||
:char-uni-val-low
|
||||
:char-uni-val-up
|
||||
:char-meta-octal
|
||||
:char-key
|
||||
:char-basic)))
|
||||
# see "General Escape Syntax"
|
||||
:char-octal (sequence "\\" (3 (range "07")))
|
||||
:char-hex (sequence "\\x" :h+)
|
||||
:char-uni-name (sequence "\\N{" (thru "}"))
|
||||
#:char-uni-val (sequence "\\N{U+" :h+ "}")
|
||||
:char-uni-val-low (sequence "\\u" (4 :h))
|
||||
:char-uni-val-up (sequence "\\U" (8 :h))
|
||||
# see "Meta-Char Syntax"
|
||||
:char-meta-octal (sequence "\\M-" :char-octal)
|
||||
# see "Ctl-Char Syntax"
|
||||
# see "Other Char Bits"
|
||||
:char-key
|
||||
(sequence (some (sequence "\\"
|
||||
(choice (sequence (set "ACHMSs") "-")
|
||||
"^")))
|
||||
# XXX: not strictly correct?
|
||||
(choice :char-octal
|
||||
:char-hex
|
||||
:char-uni-name
|
||||
#:char-uni-val
|
||||
:char-uni-val-low
|
||||
:char-uni-val-up
|
||||
:char-meta-octal
|
||||
:char-basic))
|
||||
# XXX: not strictly correct, but perhaps it's ok?
|
||||
:char-basic (choice (sequence "\\" 1)
|
||||
1)
|
||||
# see "Syntax for Strings"
|
||||
# XXX: escaped newline and escaped space in "Syntax for Strings"?
|
||||
:string
|
||||
,(atom-node :string
|
||||
'(sequence "\""
|
||||
(any (choice :escape
|
||||
(if-not "\"" 1)))
|
||||
"\""))
|
||||
# XXX: is this complete?
|
||||
:escape (sequence "\\" (set "0abdefnrstvx\"\\"))
|
||||
# see "Symbol Type"
|
||||
# XXX: review about whitespace in symbol names
|
||||
:symbol
|
||||
,(atom-node :symbol
|
||||
'(choice (sequence :sym-char-head
|
||||
(any :sym-char-rest))
|
||||
# XXX: some below not really symbols
|
||||
# see "Circular Objects"
|
||||
(sequence "#" :d+ "=")
|
||||
(sequence "#" :d+ "#")
|
||||
# see "Special Read Syntax"
|
||||
#(sequence "#" :d+)
|
||||
# see "Documentation Strings and Compilation"
|
||||
"#$"
|
||||
# see "Symbol Type"
|
||||
"##"))
|
||||
#
|
||||
:sym-char-head (choice :sym-char-esc
|
||||
# don't start with
|
||||
#(if-not (set " \"#'(),.;?[]`") 1)) # allow .
|
||||
(if-not (set " \"#'(),;?[]`") 1))
|
||||
#
|
||||
:sym-char-rest (choice :sym-char-esc
|
||||
# . and ? are allowed "inside"
|
||||
(if-not (set " \"#'(),;[]`\n") 1))
|
||||
# need to be escaped
|
||||
:sym-char-esc (sequence "\\" (set " \"#'(),;?[]`"))
|
||||
})
|
||||
|
||||
(comment
|
||||
|
||||
(get (peg/match loc-grammar " ") 2)
|
||||
# =>
|
||||
'(:whitespace @{:bc 1 :bl 1 :ec 2 :el 1} " ")
|
||||
|
||||
(get (peg/match loc-grammar "8.3") 2)
|
||||
# =>
|
||||
'(:float @{:bc 1 :bl 1 :ec 4 :el 1} "8.3")
|
||||
|
||||
(get (peg/match loc-grammar "printf") 2)
|
||||
# =>
|
||||
'(:symbol @{:bc 1 :bl 1 :ec 7 :el 1} "printf")
|
||||
|
||||
(get (peg/match loc-grammar ":smile") 2)
|
||||
# =>
|
||||
'(:symbol @{:bc 1 :bl 1 :ec 7 :el 1} ":smile")
|
||||
|
||||
(get (peg/match loc-grammar "[8]") 2)
|
||||
# =>
|
||||
'(:vector @{:bc 1 :bl 1
|
||||
:ec 4 :el 1}
|
||||
(:integer @{:bc 2 :bl 1
|
||||
:ec 3 :el 1} "8"))
|
||||
|
||||
(get (peg/match loc-grammar "`x") 2)
|
||||
# =>
|
||||
'(:backquote @{:bc 1 :bl 1
|
||||
:ec 3 :el 1}
|
||||
(:symbol @{:bc 2 :bl 1
|
||||
:ec 3 :el 1} "x"))
|
||||
|
||||
)
|
||||
|
||||
@ -0,0 +1 @@
|
||||
../tree-sitter-janet-simple/queries/highlights.scm
|
||||
@ -0,0 +1 @@
|
||||
tree-sitter-janet-simple/src
|
||||
@ -0,0 +1,4 @@
|
||||
node_modules
|
||||
bin
|
||||
build
|
||||
*.log
|
||||
@ -0,0 +1,25 @@
|
||||
[package]
|
||||
name = "tree-sitter-janet-simple"
|
||||
description = "janet grammar for the tree-sitter parsing library"
|
||||
version = "0.0.3"
|
||||
keywords = ["incremental", "parsing", "janet"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/sogaiu/tree-sitter-janet-simple"
|
||||
edition = "2018"
|
||||
|
||||
build = "bindings/rust/build.rs"
|
||||
include = [
|
||||
"bindings/rust/*",
|
||||
"grammar.js",
|
||||
"queries/*",
|
||||
"src/*",
|
||||
]
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
tree-sitter = "0.19.3"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
@ -0,0 +1,114 @@
|
||||
# tree-sitter-janet-simple
|
||||
|
||||
## Status
|
||||
|
||||
Subject to change, grammar still evolving.
|
||||
|
||||
Coincidentally, it appears [another effort by GrayJack](https://github.com/GrayJack/tree-sitter-janet/) was started at about the same time.
|
||||
|
||||
The main difference between these two are that GrayJack's grammar supports higher level constructs (e.g. `def` is recognized by the grammar).
|
||||
|
||||
There might end up being different trade-offs in either approach and my belief is that there is room in the world for multiple attempts (especially for lisp-like languages).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* [emsdk](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions) -- emscripten via homebrew seems to work for macos
|
||||
* node >= 12 (nvm recommended) -- recently tested 12.9.1, 12,16,1
|
||||
|
||||
## Fine Print
|
||||
|
||||
* The instructions below assume emsdk has been installed, but `emcc` (tool that can be used to compile to wasm) is not necessarily on one's `PATH`. If an appropriate `emcc` is on one's `PATH` (e.g. emscripten installed via homebrew), the emsdk steps (e.g. `source ~/src/emsdk/emsdk_env.sh`) below may be ignored.
|
||||
|
||||
* `node-gyp` (tool for compiling native addon modules for Node.js) may fail on machines upgraded to macos Catalina. [This document](https://github.com/nodejs/node-gyp/blob/master/macOS_Catalina.md) may help cope with such a situation.
|
||||
|
||||
## Initial Setup
|
||||
|
||||
Suppose typical development sources are stored under `~/src`.
|
||||
|
||||
```
|
||||
# clone repository
|
||||
cd ~/src
|
||||
git clone https://github.com/sogaiu/tree-sitter-janet-simple
|
||||
cd tree-sitter-janet-simple
|
||||
|
||||
# create / populate
|
||||
# `node_modules` with dependencies
|
||||
# `src` with tree-sitter .c goodness
|
||||
# `build`
|
||||
# `build/Release` and build `tree_sitter_janet_simple_binding.node`
|
||||
npm install
|
||||
|
||||
# included in previous command
|
||||
#npx tree-sitter generate
|
||||
#npx node-gyp configure
|
||||
#npx node-gyp rebuild
|
||||
```
|
||||
|
||||
## Grammar Development
|
||||
|
||||
Hack on grammar and interactively test.
|
||||
|
||||
```
|
||||
# prepare emsdk (specifically emcc) for building .wasm
|
||||
source ~/src/emsdk/emsdk_env.sh
|
||||
|
||||
# edit grammar.js using some editor
|
||||
|
||||
# rebuild tree-sitter stuff and invoke web-ui for interactive testing
|
||||
npx tree-sitter generate && \
|
||||
npx node-gyp rebuild && \
|
||||
npx tree-sitter build-wasm && \
|
||||
npx tree-sitter web-ui
|
||||
|
||||
# in appropriate browser window, paste code in left pane
|
||||
|
||||
# examine results in right pane -- can even click on nodes
|
||||
|
||||
# find errors and loop back to edit step above...
|
||||
```
|
||||
|
||||
Parse individual files.
|
||||
|
||||
```
|
||||
# create and populate sample code file for parsing named `sample.janet`
|
||||
|
||||
# parse sample file
|
||||
npx tree-sitter parse sample.janet
|
||||
|
||||
# examine output similar to web-ui, but less convenient
|
||||
```
|
||||
|
||||
## Measure Performance
|
||||
|
||||
```
|
||||
# single measurement
|
||||
npx tree-sitter parse --time sample.janet
|
||||
|
||||
# mutliple measurements with `multitime`
|
||||
multitime -n10 -s1 npx tree-sitter parse --time --quiet sample.janet
|
||||
```
|
||||
|
||||
## Build .wasm
|
||||
|
||||
Assuming emsdk is installed appropriately under `~/src/emsdk`.
|
||||
|
||||
```
|
||||
# prepare emsdk (specifically emcc) for use
|
||||
source ~/src/emsdk/emsdk_env.sh
|
||||
|
||||
# create `tree-sitter-janet-simple.wasm`
|
||||
npx tree-sitter build-wasm
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
* [Guide to your first Tree-sitter grammar](https://gist.github.com/Aerijo/df27228d70c633e088b0591b8857eeef)
|
||||
* [tree-sitter](http://tree-sitter.github.io/tree-sitter/)
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
* 314eter - handling null characters
|
||||
* Aerijo - Guide to your first Tree-sitter grammar
|
||||
* bakpakin - janet
|
||||
* GrayJack - tree-sitter-janet
|
||||
* maxbrunsfeld - tree-sitter and related
|
||||
@ -0,0 +1,19 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_janet_simple_binding",
|
||||
"include_dirs": [
|
||||
"<!(node -e \"require('nan')\")",
|
||||
"src"
|
||||
],
|
||||
"sources": [
|
||||
"src/parser.c",
|
||||
"bindings/node/binding.cc",
|
||||
"src/scanner.c"
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c99",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
#include "tree_sitter/parser.h"
|
||||
#include <node.h>
|
||||
#include "nan.h"
|
||||
|
||||
using namespace v8;
|
||||
|
||||
extern "C" TSLanguage * tree_sitter_janet_simple();
|
||||
|
||||
namespace {
|
||||
|
||||
NAN_METHOD(New) {}
|
||||
|
||||
void Init(Local<Object> exports, Local<Object> module) {
|
||||
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
|
||||
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
|
||||
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
|
||||
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_janet_simple());
|
||||
|
||||
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("janet_simple").ToLocalChecked());
|
||||
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
|
||||
}
|
||||
|
||||
NODE_MODULE(tree_sitter_janet_simple_binding, Init)
|
||||
|
||||
} // namespace
|
||||
@ -0,0 +1,19 @@
|
||||
try {
|
||||
module.exports = require("../../build/Release/tree_sitter_janet_simple_binding");
|
||||
} catch (error1) {
|
||||
if (error1.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error1;
|
||||
}
|
||||
try {
|
||||
module.exports = require("../../build/Debug/tree_sitter_janet_simple_binding");
|
||||
} catch (error2) {
|
||||
if (error2.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error2;
|
||||
}
|
||||
throw error1
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
module.exports.nodeTypeInfo = require("../../src/node-types.json");
|
||||
} catch (_) {}
|
||||
@ -0,0 +1,38 @@
|
||||
fn main() {
|
||||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.include(&src_dir);
|
||||
c_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable")
|
||||
.flag_if_supported("-Wno-trigraphs");
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
|
||||
// If your language uses an external scanner written in C,
|
||||
// then include this block of code:
|
||||
|
||||
let scanner_path = src_dir.join("scanner.c");
|
||||
c_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
|
||||
c_config.compile("parser");
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
|
||||
// If your language uses an external scanner written in C++,
|
||||
// then include this block of code:
|
||||
|
||||
/*
|
||||
let mut cpp_config = cc::Build::new();
|
||||
cpp_config.cpp(true);
|
||||
cpp_config.include(&src_dir);
|
||||
cpp_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable");
|
||||
let scanner_path = src_dir.join("scanner.cc");
|
||||
cpp_config.file(&scanner_path);
|
||||
cpp_config.compile("scanner");
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
*/
|
||||
}
|
||||
@ -0,0 +1,52 @@
|
||||
//! This crate provides janet_simple language support for the [tree-sitter][] parsing library.
|
||||
//!
|
||||
//! Typically, you will use the [language][language func] function to add this language to a
|
||||
//! tree-sitter [Parser][], and then use the parser to parse some code:
|
||||
//!
|
||||
//! ```
|
||||
//! let code = "";
|
||||
//! let mut parser = tree_sitter::Parser::new();
|
||||
//! parser.set_language(tree_sitter_janet_simple::language()).expect("Error loading janet_simple grammar");
|
||||
//! let tree = parser.parse(code, None).unwrap();
|
||||
//! ```
|
||||
//!
|
||||
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
//! [language func]: fn.language.html
|
||||
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
|
||||
//! [tree-sitter]: https://tree-sitter.github.io/
|
||||
|
||||
use tree_sitter::Language;
|
||||
|
||||
extern "C" {
|
||||
fn tree_sitter_janet_simple() -> Language;
|
||||
}
|
||||
|
||||
/// Get the tree-sitter [Language][] for this grammar.
|
||||
///
|
||||
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
pub fn language() -> Language {
|
||||
unsafe { tree_sitter_janet_simple() }
|
||||
}
|
||||
|
||||
/// The content of the [`node-types.json`][] file for this grammar.
|
||||
///
|
||||
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
|
||||
|
||||
// Uncomment these to include any queries that this grammar contains
|
||||
|
||||
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
|
||||
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
|
||||
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
|
||||
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_can_load_grammar() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(super::language())
|
||||
.expect("Error loading janet_simple language");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,228 @@
|
||||
// numbers
|
||||
const SIGN =
|
||||
choice('-', '+');
|
||||
const DIGIT =
|
||||
/[0-9]/;
|
||||
const HEX_DIGIT =
|
||||
/[0-9A-Fa-f]/;
|
||||
const RADIX =
|
||||
choice('2', '3', '4', '5', '6', '7', '8', '9', '10',
|
||||
'11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
|
||||
'21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
|
||||
'31', '32', '33', '34', '35', '36');
|
||||
const ALPHA_NUM =
|
||||
/[a-zA-Z0-9]/;
|
||||
|
||||
// symbols and keywords
|
||||
// janet/tools/symcharsgen.c
|
||||
const SYM_CHAR_NO_DIGIT_NO_COLON =
|
||||
/[a-zA-Z!$%&*+\-./<?=>@^_]/;
|
||||
const SYM_CHAR =
|
||||
/[0-9:a-zA-Z!$%&*+\-./<?=>@^_]/;
|
||||
|
||||
// strings
|
||||
const STRING_DOUBLE_QUOTE_CONTENT =
|
||||
repeat(choice(/[^\\"]/,
|
||||
/\\(.|\n)/)); // thanks to tree-sitter-haskell
|
||||
|
||||
module.exports = grammar({
|
||||
name: 'janet_simple',
|
||||
|
||||
// mdn says \s is:
|
||||
//
|
||||
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
|
||||
//
|
||||
// but that doesn't seem to match what tree-sitter thinks as it appears that
|
||||
// for example, leaving out \x0b, \x0c, or \x00 from the following yields
|
||||
// different behavior (other stuff may also differ)
|
||||
extras: $ => [
|
||||
/\s|\x0b|\x0c|\x00/,
|
||||
$.comment
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
$.long_buf_lit,
|
||||
$.long_str_lit
|
||||
],
|
||||
|
||||
rules: {
|
||||
// THIS MUST BE FIRST -- even though this doesn't look like it matters
|
||||
source: $ =>
|
||||
repeat($._lit),
|
||||
|
||||
comment: $ =>
|
||||
/#.*/,
|
||||
|
||||
_lit: $ =>
|
||||
choice($.bool_lit,
|
||||
$.buf_lit,
|
||||
$.kwd_lit,
|
||||
$.long_buf_lit,
|
||||
$.long_str_lit,
|
||||
$.nil_lit,
|
||||
$.num_lit,
|
||||
$.str_lit,
|
||||
$.sym_lit,
|
||||
//
|
||||
$.par_arr_lit,
|
||||
$.sqr_arr_lit,
|
||||
$.struct_lit,
|
||||
$.tbl_lit,
|
||||
$.par_tup_lit,
|
||||
$.sqr_tup_lit,
|
||||
//
|
||||
$.qq_lit,
|
||||
$.quote_lit,
|
||||
$.short_fn_lit,
|
||||
$.splice_lit,
|
||||
$.unquote_lit),
|
||||
|
||||
// simplest things
|
||||
|
||||
bool_lit: $ =>
|
||||
// XXX: without the token here, false and true are exposed as
|
||||
// anonymous nodes it seems...
|
||||
// yet, the same does not happen for nil...strange
|
||||
token(choice('false',
|
||||
'true')),
|
||||
|
||||
kwd_lit: $ =>
|
||||
prec(2, token(seq(':',
|
||||
repeat(SYM_CHAR)))),
|
||||
|
||||
nil_lit: $ =>
|
||||
'nil',
|
||||
|
||||
num_lit: $ =>
|
||||
prec(5, choice($._dec,
|
||||
$._hex,
|
||||
$._radix)),
|
||||
|
||||
_dec: $ =>
|
||||
token(seq(optional(SIGN),
|
||||
choice(seq(repeat1(DIGIT),
|
||||
repeat('_'),
|
||||
optional('.'),
|
||||
repeat('_'),
|
||||
repeat(DIGIT),
|
||||
repeat('_')),
|
||||
seq(repeat(DIGIT),
|
||||
repeat('_'),
|
||||
optional('.'),
|
||||
repeat('_'),
|
||||
repeat1(DIGIT),
|
||||
repeat('_'))),
|
||||
optional(seq(choice('e', 'E'),
|
||||
optional(SIGN),
|
||||
repeat1(DIGIT))))),
|
||||
|
||||
_hex: $ =>
|
||||
token(seq(optional(SIGN),
|
||||
'0',
|
||||
'x',
|
||||
choice(seq(repeat1(HEX_DIGIT),
|
||||
repeat('_'),
|
||||
optional('.'),
|
||||
repeat('_'),
|
||||
repeat(HEX_DIGIT),
|
||||
repeat('_')),
|
||||
seq(repeat(HEX_DIGIT),
|
||||
repeat('_'),
|
||||
optional('.'),
|
||||
repeat('_'),
|
||||
repeat1(HEX_DIGIT),
|
||||
repeat('_'))))),
|
||||
|
||||
_radix: $ =>
|
||||
token(seq(optional(SIGN),
|
||||
seq(RADIX,
|
||||
choice('r', 'R'),
|
||||
ALPHA_NUM,
|
||||
repeat(choice(repeat(ALPHA_NUM),
|
||||
repeat('_'))),
|
||||
optional(seq('&',
|
||||
optional(SIGN),
|
||||
repeat1(DIGIT)))))),
|
||||
|
||||
str_lit: $ =>
|
||||
token(seq('"',
|
||||
STRING_DOUBLE_QUOTE_CONTENT,
|
||||
'"')),
|
||||
|
||||
buf_lit: $ =>
|
||||
token(seq('@"',
|
||||
STRING_DOUBLE_QUOTE_CONTENT,
|
||||
'"')),
|
||||
|
||||
sym_lit: $ =>
|
||||
token(seq(SYM_CHAR_NO_DIGIT_NO_COLON,
|
||||
repeat(SYM_CHAR))),
|
||||
|
||||
// collection-ish things
|
||||
|
||||
par_arr_lit: $ =>
|
||||
seq('@(',
|
||||
repeat($._lit),
|
||||
')'),
|
||||
|
||||
sqr_arr_lit: $ =>
|
||||
seq('@[',
|
||||
repeat($._lit),
|
||||
']'),
|
||||
|
||||
struct_lit: $ =>
|
||||
seq('{',
|
||||
repeat($._lit),
|
||||
'}'),
|
||||
|
||||
tbl_lit: $ =>
|
||||
seq('@{',
|
||||
repeat($._lit),
|
||||
'}'),
|
||||
|
||||
par_tup_lit: $ =>
|
||||
seq('(',
|
||||
repeat($._lit),
|
||||
')'),
|
||||
|
||||
sqr_tup_lit: $ =>
|
||||
seq('[',
|
||||
repeat($._lit),
|
||||
']'),
|
||||
|
||||
// macro-related
|
||||
|
||||
qq_lit: $ =>
|
||||
seq('~',
|
||||
$._lit),
|
||||
|
||||
quote_lit: $ =>
|
||||
seq("'",
|
||||
$._lit),
|
||||
|
||||
// following all work at the repl..
|
||||
// |8, ||8, |||8, etc.
|
||||
// |~(:x)
|
||||
// |{:a 1}
|
||||
// |[1 2]
|
||||
// |"a"
|
||||
// |:w
|
||||
// |a-sym
|
||||
// |@[8 9]
|
||||
// |(= $ 1)
|
||||
// XXX: |() doesn't work...but don't bother disallowing
|
||||
short_fn_lit: $ =>
|
||||
seq('|',
|
||||
$._lit),
|
||||
|
||||
// XXX: ?
|
||||
splice_lit: $ =>
|
||||
seq(';',
|
||||
$._lit),
|
||||
|
||||
unquote_lit: $ =>
|
||||
seq(',',
|
||||
$._lit),
|
||||
|
||||
}
|
||||
});
|
||||
@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "tree-sitter-janet-simple",
|
||||
"version": "0.0.3",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
"nan": {
|
||||
"version": "2.14.2",
|
||||
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
|
||||
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ=="
|
||||
},
|
||||
"tree-sitter-cli": {
|
||||
"version": "0.19.3",
|
||||
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.19.3.tgz",
|
||||
"integrity": "sha512-UlntGxLrlkQCKVrhm7guzfi+ovM4wDLVCCu3z5jmfDgFNoUoKa/23ddaQON5afD5jB9a02xv4N5MXJfCx+/mpw==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "tree-sitter-janet-simple",
|
||||
"version": "0.0.3",
|
||||
"description": "Janet grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"scripts": {
|
||||
"build": "npx tree-sitter generate && npx node-gyp build",
|
||||
"fresh-build": "npx tree-sitter generate && npx node-gyp configure && npx node-gyp rebuild",
|
||||
"install": "npx tree-sitter generate && npx node-gyp configure && npx node-gyp rebuild"
|
||||
},
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"nan": "2.14.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tree-sitter-cli": "0.19.3"
|
||||
},
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.janet",
|
||||
"file-types": [
|
||||
"janet"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
(num_lit) @number
|
||||
|
||||
[
|
||||
(buf_lit)
|
||||
(long_buf_lit)
|
||||
(long_str_lit)
|
||||
(str_lit)
|
||||
] @string
|
||||
|
||||
[
|
||||
(bool_lit)
|
||||
(nil_lit)
|
||||
] @constant.builtin
|
||||
|
||||
(kwd_lit) @constant
|
||||
|
||||
(comment) @comment
|
||||
|
||||
;; Treat quasiquotation as operators for the purpose of highlighting.
|
||||
|
||||
[
|
||||
"'"
|
||||
"~"
|
||||
","
|
||||
] @operator
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,111 @@
|
||||
#include <tree_sitter/parser.h>
|
||||
#include <wctype.h>
|
||||
|
||||
enum TokenType {
|
||||
LONG_BUF_LIT,
|
||||
LONG_STR_LIT
|
||||
};
|
||||
|
||||
void* tree_sitter_janet_simple_external_scanner_create(
|
||||
void
|
||||
)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_janet_simple_external_scanner_destroy(
|
||||
void* payload
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
void tree_sitter_janet_simple_external_scanner_reset(
|
||||
void* payload
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
unsigned tree_sitter_janet_simple_external_scanner_serialize(
|
||||
void* payload,
|
||||
char* buffer
|
||||
)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tree_sitter_janet_simple_external_scanner_deserialize(
|
||||
void *payload,
|
||||
const char *buffer,
|
||||
unsigned length
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
bool tree_sitter_janet_simple_external_scanner_scan(
|
||||
void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols
|
||||
)
|
||||
{
|
||||
// skip a bit brother
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
// there can be only...two?
|
||||
if (valid_symbols[LONG_BUF_LIT] || valid_symbols[LONG_STR_LIT]) {
|
||||
// so which one was it?
|
||||
if (lexer->lookahead == '@') {
|
||||
lexer->result_symbol = LONG_BUF_LIT;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
lexer->result_symbol = LONG_STR_LIT;
|
||||
}
|
||||
// long strings start with one or more backticks
|
||||
// consume the first backtick
|
||||
if (lexer->lookahead != '`') {
|
||||
return false;
|
||||
}
|
||||
// getting here means a backtick was encountered
|
||||
lexer->advance(lexer, false);
|
||||
uint32_t n_backticks = 1;
|
||||
// arrive at a total number of backticks
|
||||
for (;;) {
|
||||
if (lexer->lookahead == 0) {
|
||||
return false;
|
||||
}
|
||||
// found one!
|
||||
if (lexer->lookahead == '`') {
|
||||
n_backticks++;
|
||||
lexer->advance(lexer, false);
|
||||
continue;
|
||||
} else { // nope, time to bail
|
||||
lexer->advance(lexer, false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// getting here means the last character examined was NOT a backtick.
|
||||
// now keep looking until n_backticks are found
|
||||
uint32_t cbt = 0; // consecutive backticks
|
||||
for (;;) {
|
||||
if (lexer->lookahead == 0) {
|
||||
return false;
|
||||
}
|
||||
// found one!
|
||||
if (lexer->lookahead == '`') {
|
||||
cbt++;
|
||||
// are we there yet?
|
||||
if (cbt == n_backticks) {
|
||||
lexer->advance(lexer, false);
|
||||
return true;
|
||||
}
|
||||
} else { // nope, better reset the count
|
||||
cbt = 0;
|
||||
}
|
||||
// next!
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -0,0 +1,223 @@
|
||||
#ifndef TREE_SITTER_PARSER_H_
|
||||
#define TREE_SITTER_PARSER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
bool named;
|
||||
bool supertype;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
TSParseActionTypeReduce,
|
||||
TSParseActionTypeAccept,
|
||||
TSParseActionTypeRecover,
|
||||
} TSParseActionType;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t type;
|
||||
TSStateId state;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
} shift;
|
||||
struct {
|
||||
uint8_t type;
|
||||
uint8_t child_count;
|
||||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint16_t production_id;
|
||||
} reduce;
|
||||
uint8_t type;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
uint8_t count;
|
||||
bool reusable;
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
uint32_t state_count;
|
||||
uint32_t large_state_count;
|
||||
uint32_t production_id_count;
|
||||
uint32_t field_count;
|
||||
uint16_t max_alias_sequence_length;
|
||||
const uint16_t *parse_table;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char **symbol_names;
|
||||
const char **field_names;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
};
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
result = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
#define END_STATE() return result;
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_REPEAT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value, \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeRecover \
|
||||
}}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeAccept \
|
||||
}}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSER_H_
|
||||
Loading…
Reference in New Issue