add temp code for parser

This commit is contained in:
Michael Gran 2021-11-18 08:18:38 -08:00
parent 2cdb174189
commit 8aeef1d8eb
9 changed files with 602 additions and 2 deletions

@ -208,3 +208,10 @@ target file, based on the filename extensions.
as a single space-separated string as a single space-separated string
$$? the prerequisites that are files newer than the target file $$? the prerequisites that are files newer than the target file
as a scheme list of strings as a scheme list of strings
## POSIX Makefile Parser
Recipes can contain the following parser function
(parse ...) reads a standard Makefile and creates
rules based on its contents.

28
make/main.scm Normal file

@ -0,0 +1,28 @@
(define-module (make main)
#:use-module (ice-9 getopt-long)
#:export (main)
)
(define EXIT_SUCCESS 0)
(define EXIT_NOT_UP_TO_DATE 1)
(define EXIT_FAILURE 2)
(define option-spec
'((environment-overrides (single-char #\e))
(makefile (single-char #\f) (value #t))
(ignore-errors (single-char #\i))
(keep-going (single-char #\k))
(dry-run (single-char #\n))
(print-data-base (single-char #\p))
(question (single-char #\q))
(no-builtin-rules (single-char #\r))
(stop (single-char #\S))
(silent (single-char #\s))
(touch (single-char #\t))))
(define (main args)
(let ((options (getopt-long args option-spec)))
(write options)
(newline)
EXIT_SUCCESS))

3
pmake Executable file

@ -0,0 +1,3 @@
#!/usr/bin/env sh
exec guile -L . -e '(@ (make main) main)' -s "$0" "$@"
!#

@ -1,4 +1,4 @@
(define-module (potato exceptions) -(define-module (potato exceptions)
#:use-module (ice-9 exceptions) #:use-module (ice-9 exceptions)
#:export (bad-key-type #:export (bad-key-type
bad-value-type bad-value-type

@ -9,10 +9,16 @@
#:use-module (potato text) #:use-module (potato text)
#:export( #:export(
read-line-handle-escaped-newline read-line-handle-escaped-newline
string-collapse-continuations
string-count-backslashes-at-end string-count-backslashes-at-end
string-count-matching-chars-at-end string-count-matching-chars-at-end
string-find-char-unquote string-find-char-unquote
string-find-preceding-backslashes
string-find-repeated-chars
string-next-token
string-remove-comments string-remove-comments
string-starts-with?
string-shrink-whitespace
)) ))
(define (read-line-handle-escaped-newline) (define (read-line-handle-escaped-newline)
@ -37,6 +43,42 @@ It returns two values
;; else ;; else
(values (string-append output line "\n") (1+ nline))))))) (values (string-append output line "\n") (1+ nline)))))))
(define* (string-collapse-continuations str #:optional (squash-whitespace? #f))
"Returns a new string where backslash+newline is discarded, and
backslash+backslash+newline becomes backslash+newline. Any whitespace
after the newline may be squashed to a single space, if
squash-whitespace? is #t."
(let loop ((str str)
(newline-index (string-rindex str #\newline)))
(if (not newline-index)
(string-copy str)
;; else
(let* ((backslash-index (string-find-preceding-backslashes str newline-index))
(backslash-count (- newline-index backslash-index)))
(cond
((even? backslash-count)
;; We cut the number of backslashes in half, but, keep the newline.
(loop
(string-append
(substring str 0 backslash-index)
(make-string (quotient backslash-count 2) #\\)
(substring str newline-index))
(string-rindex str #\newline 0 backslash-index)))
(else
;; We cut the number of backslashes in half, remove a backslash
;; and newline, maybe squash any following whitespace.
(loop
(string-append
(substring str 0 backslash-index)
(make-string (quotient backslash-count 2) #\\)
(if squash-whitespace?
(string-shrink-whitespace
(substring str (1+ newline-index))
0)
(substring str (1+ newline-index))))
(string-rindex str #\newline 0 backslash-index))))))))
(define (string-count-backslashes-at-end str) (define (string-count-backslashes-at-end str)
(string-count-matching-chars-at-end str #\\)) (string-count-matching-chars-at-end str #\\))
@ -138,9 +180,75 @@ Returns two values
(values i str2) (values i str2)
(values #f #f)))) (values #f #f))))
(define (string-find-preceding-backslashes str i)
"Given I, a position in a string, this returns a position,
of first of a range sequential backslashes that immediately precedes
that position in the string. If no backslashes precede that position,
I is returned."
(let loop ((j i))
(cond
((= j 0)
0)
((char=? (string-ref str (1- j)) #\\)
(loop (1- j)))
(else
j))))
(define (string-find-repeated-chars str c)
"Given a character c, this finds the position of of first instance
of the character in the string. If the character repeats, it returns,
as a pair, the position of the character, and the position after the
run of characters. If the character is not present, it returns #f"
(let ((i (string-index str c)))
(if (not i)
#f
;; else
(let ((len (string-length str)))
(if (= (1+ i) len)
(cons i (1+ i))
;; else
(let loop ((j (1+ i)))
(if (false-if-exception (char=? (string-ref str j) c))
(loop (1+ j))
;; else
(cons i j))))))))
(define (string-next-token str i)
"Given a position i, this returns the position of the first
non-whitespace character after i. If the end of line is reached,
it returns the length of the string."
(or
(string-index str
(lambda (c)
(char-set-contains? char-set:blank c))
i)
(string-length str)))
(define (string-remove-comments str) (define (string-remove-comments str)
"Returns a copy of str with any '#' comments removed" "Returns a copy of str with any '#' comments removed"
(let ((i (string-find-char-unquote str #\#))) (let ((i (string-find-char-unquote str #\#)))
(if i (if i
(string-take str i) (string-take str i)
(string-copy str)))) (string-copy str))))
(define (string-shrink-whitespace str start)
"Given a string, and a location in a string, this returns a new copy
of string where all the whitespace beginning at location i is replaced
by a single space"
(let ((len (string-length str)))
(if (or (>= start len)
(not (char-set-contains? char-set:blank (string-ref str start))))
(string-copy str)
;; else
(let loop ((end start))
(cond
((>= end len)
(string-append (substring str 0 start) " "))
((char-set-contains? char-set:blank (string-ref str end))
(loop (1+ end)))
(else
(string-append (substring str 0 start) " " (substring str end))))))))
(define (string-starts-with? str c)
(char=? (string-ref str 0) c))

107
potato/parse.scm Normal file

@ -0,0 +1,107 @@
(define-module (potato parse)
#:use-module (srfi srfi-1)
#:use-module (ice-9 receive)
#:use-module (system vm trace)
#:use-module (potato exceptions)
#:use-module (potato makevars)
#:use-module (potato rules)
#:use-module (potato text)
#:use-module (potato parse-lib)
#:export (parse _readline))
;; A makefile can contain rules, macro definitions, include lines,
;; and comments.
(define (parse filename)
(with-input-from-file filename _eval #:guess-encoding #t))
(define (_eval)
(let ((filenames #f)
(ignoring #f)
(commands '()))
(while #t
(receive (line nlines)
(read-line-handle-escaped-newline)
(cond
((zero? nlines)
(break))
((string-starts-with? line #\tab)
;; Shell-command lines
(when filenames
(when ignoring
(continue))
(set! commands (append commands (list line)))))
(else
(display
(string-trim-both
(string-remove-comments
(string-collapse-continuations line #t))))
(newline)))))))
(define (string-parse-variable-definition str i)
"Parse a string as a variable definition."
(let loop ((i (string-next-token str)))
(cond
((= i (string-length str))
(values i 'null))
((char=? (string-ref str i) #\#)
;; Comments aren't variable definitions.
(values i 'null))
((char=? (string-ref str i) #\$)
;; This begins a variable expansion reference.
(let* ((openparen (false-if-exception (string-ref str (1+ i))))
(closeparen (if (eqv? openparen #\()
#\)
(if (eqv? openparen #\{)
#\}
#f))))
(if (not closeparen)
(values i 'null)
;; else, skip over the matching closeparen
(begin
(let ((count 0))
(while #t
(set! i (1+ i))
(when (char=? (string-ref str i) openparen)
(set! count (1+ count)))
(when (char=? (string-ref str i) closeparen)
(set! count (1- count))
(when (zero? count)
(set! i (1+ i))
(break)))))
;; Any whitespace before the operator?
(when (char-set-contains? char-set:blank (string-ref str i))
(set! wspace #t)
(set! i (string-next-token str i)))
(cond
((eqv? (string-ref str i) #\=)
(values (1+ i) 'recursive))
((and (eqv? (string-ref str i) #\:)
(eqv? (string-ref str (1+ i)) #\=))
(values (+ i 2) 'simple))
((and (eqv? (string-ref str i) #\+)
(eqv? (string-ref str (1+ i)) #\=))
(values (+ i 2) 'append))
((and (eqv? (string-ref str i) #\?)
(eqv? (string-ref str (1+ i)) #\=))
(values (+ i 2) 'conditional))
(else
(values i 'null)))))))
(else
(values i 'null)))))
#|
(define (parse-var-assignment line)
(let ((i (string-next-token line 0)))
(if (= i (string-length line))
#f
;; else
(while #t
|#

176
potato/parser.scm Normal file

@ -0,0 +1,176 @@
(define-module (potato parser)
#:use-module (ice-9 peg)
#:use-module (srfi srfi-1)
#:use-module (ice-9 rdelim)
#:use-module (system vm trace)
#:use-module (potato exceptions)
#:use-module (potato makevars)
#:use-module (potato rules)
#:use-module (potato text)
#:export (parse))
;; A makefile can contain rules, macro definitions, include lines,
;; and comments.
(define (parse filename)
(with-input-from-file filename parse-input #:guess-encoding #t))
(define (last-char str)
(string-ref str (1- (string-length str))))
(define (parse-input)
(while #t
(let loop ((line "")
(str (read-line)))
(cond
((eof-object? str)
(break))
((char=? (last-char str) #\\)
(loop (string-append line str) (read-line)))
(else
(parse-line (string-append line str)))))))
;; For include lines
(define-peg-pattern I_TOK none "include")
(define-peg-pattern I_SPACE none (or " " "\t"))
(define-peg-pattern I_FILENAME_CHAR body (or (range #\a #\z)
(range #\A #\Z)
(range #\0 #\9)
"_" "-" "."))
(define-peg-pattern I_FILENAME all (+ I_FILENAME_CHAR))
(define-peg-pattern I_NL none "\n")
(define-peg-pattern I_COMMENT none (and "#" (* peg-any)))
(define-peg-pattern INCLUDE all (and I_TOK
(+ (and (* I_SPACE)
I_FILENAME))
(* I_SPACE)
(? I_COMMENT)))
;; For comment lines
(define-peg-pattern C_SPACE none (or " " "\t"))
(define-peg-pattern C_COMMENT none (and "#" (* peg-any)))
(define-peg-pattern COMMENT none (or C_COMMENT
(and (+ C_SPACE) (not-followed-by peg-any))))
(define (parse-line line)
(write (peg:tree (match-pattern INCLUDE line)))
(newline)
(write (peg:tree (match-pattern COMMENT line)))
(newline)
(cond
((line-is-include? line)
(format #t "INCLUDE: ~S~%" line))
((line-is-comment? line)
(format #t "COMMENT: ~S~%" line))
((line-is-macro? line)
(format #t "MACRO: ~S~%" line))
((line-is-special-target? line)
(format #t "SPECIAL: ~S~%" line))
((line-is-inference-rule? line)
(format #t "INFERENCE: ~S~%" line))
((line-is-rule? line)
(format #t "RULE: ~S~%" line))
(else
(format #t "UNKNOWN: ~S~%" line))))
(define (line-is-include? line)
(and (> (string-length line) 8)
(string= line "include " 0 8)))
(define (line-is-comment? line)
(or (string-null? (string-trim-both line char-set:whitespace))
(char=? (string-ref line 0) #\#)))
(define (line-is-macro? line)
(let ((len (string-length line)))
(let loop ((i 0))
(if (>= i len)
#f
;; else
(let ((c (string-ref line i)))
(cond
((and (zero? i)
(not (char-is-pcs? c)))
#f)
((and (not (zero? i))
(char=? #\= c))
#t)
((not (char-is-pcs-or-space? c))
#f)
(else
(loop (+ i 1)))))))))
(define (line-is-special-target? line)
(or (and (>= (string-length line) 8)
(string= line ".DEFAULT" 0 8))
(and (>= (string-length line) 8)
(string= line ".IGNORE" 0 7))
(and (>= (string-length line) 6)
(string= line ".POSIX"))
(and (>= (string-length line) 9)
(string= line ".PRECIOUS" 0 9))
(and (>= (string-length line) 9)
(string= line ".SCCS_GET" 0 9))
(and (>= (string-length line) 7)
(string= line ".SILENT" 0 7))))
(define (line-is-rule? line)
(let ((len (string-length line)))
(let loop ((i 0))
(if (>= i len)
#f
;; else
(let ((c (string-ref line i)))
(cond
((and (zero? i)
(not (char-is-pcs? c)))
#f)
((and (not (zero? i))
(char=? #\: c))
#t)
((not (char-is-pcs-or-space? c))
#f)
(else
(loop (+ i 1)))))))))
(define (line-is-inference-rule? line)
(let ((len (string-length line)))
(let loop ((i 0)
(dot-count 0))
(if (>= i len)
#f
;; else
(let ((c (string-ref line i)))
(cond
((and (zero? i)
(not (char=? #\. c)))
#f)
((and (not (zero? i))
(char=? #\: c))
(if (or (= dot-count 1)
(= dot-count 2))
#t
#f))
((not (char-is-pcs? c))
#f)
(else
(loop (+ i 1)
(+ dot-count
(if (char=? c #\.)
1
0))))))))))
(define (char-is-pcs? c)
(or (and (char<=? #\a c) (char>=? #\z c))
(and (char<=? #\A c) (char>=? #\Z c))
(and (char<=? #\0 c) (char>=? #\9 c))
(char=? #\. c)
(char=? #\_ c)))
(define (char-is-pcs-or-space? c)
(or (char-is-pcs? c)
(char=? #\space c)))

13
tests/parse.sh Executable file

@ -0,0 +1,13 @@
#!/usr/bin/env sh
exec guile -L . -s "$0" "$@"
!#
(use-modules (potato parse)
;; (potato parse)
(ice-9 receive)
(srfi srfi-1)
(srfi srfi-64))
(parse "../guile/Makefile")
;; Local Variables:
;; mode: scheme
;; End:

@ -2,7 +2,7 @@
exec guile -L . -s "$0" "$@" exec guile -L . -s "$0" "$@"
!# !#
(use-modules (potato parse-lib) (use-modules (potato parse-lib)
(potato parse) ;; (potato parse)
(ice-9 receive) (ice-9 receive)
(srfi srfi-1) (srfi srfi-1)
(srfi srfi-64)) (srfi srfi-64))
@ -210,7 +210,165 @@ exec guile -L . -s "$0" "$@"
(test-end "read-line-handle-escaped-newline") (test-end "read-line-handle-escaped-newline")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(test-begin "string-shrink-whitespace")
(test-equal "no whitespace at location"
"abc"
(string-shrink-whitespace "abc" 0))
(test-equal "single space at beginning of line"
" abc"
(string-shrink-whitespace " abc" 0))
(test-equal "single tab at beginning of line"
" abc"
(string-shrink-whitespace "\tabc" 0))
(test-equal "multiple spaces at beginning of line"
" abc"
(string-shrink-whitespace " abc" 0))
(test-equal "multiple tabs at beginning of line"
" abc"
(string-shrink-whitespace "\t\t\tabc" 0))
(test-equal "single space in middle of line"
"ab cd"
(string-shrink-whitespace "ab cd" 2))
(test-equal "single tab in middle of line"
"ab cd"
(string-shrink-whitespace "ab\tcd" 2))
(test-equal "multiple spaces in middle of line"
"ab cd"
(string-shrink-whitespace "ab cd" 2))
(test-equal "multiple tabs in middle of line"
"ab cd"
(string-shrink-whitespace "ab\t\t\tcd" 2))
(test-equal "single space at end of line"
"ab "
(string-shrink-whitespace "ab " 2))
(test-equal "single tab at end of line"
"ab "
(string-shrink-whitespace "ab\t" 2))
(test-equal "multiple spaces at end of line"
"ab "
(string-shrink-whitespace "ab " 2))
(test-equal "multiple tabs at end of line"
"ab "
(string-shrink-whitespace "ab\t\t\t" 2))
(test-end "string-shrink-whitespace")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(test-begin "string-find-repeated-chars")
(test-equal "empty string"
#f
(string-find-repeated-chars "" #\a))
(test-equal "single-char non-matching string"
#f
(string-find-repeated-chars "a" #\b))
(test-equal "single-char matching string"
'(0 . 1)
(string-find-repeated-chars "a" #\a))
(test-equal "non-matching string"
#f
(string-find-repeated-chars "abcdef" #\g))
(test-equal "matching non-repeating string"
'(2 . 3)
(string-find-repeated-chars "abcdef" #\c))
(test-equal "matching repeating string"
'(2 . 5)
(string-find-repeated-chars "abcccdef" #\c))
(test-end "string-find-repeated-chars")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(test-begin "string-find-preceding-backslashes")
(test-equal "empty string"
0
(string-find-preceding-backslashes "" 0))
(test-equal "single-character string"
0
(string-find-preceding-backslashes "a" 0))
(test-equal "two-character string w/o backslashes"
1
(string-find-preceding-backslashes "ab" 1))
(test-equal "two-character string with a backslash"
0
(string-find-preceding-backslashes "\\b" 1))
(test-equal "three-character string with a backslash"
1
(string-find-preceding-backslashes "a\\b" 2))
(test-equal "three-character string with two backslash"
0
(string-find-preceding-backslashes "\\\\b" 2))
(test-end "string-find-preceding-backslashes")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(test-begin "string-collapse-continuations")
(test-equal "empty string"
""
(string-collapse-continuations ""))
(test-equal "string with no newline"
"abc"
(string-collapse-continuations "abc"))
(test-equal "string with terminal newline"
"abc\n"
(string-collapse-continuations "abc\n"))
(test-equal "string with medial newline"
"abc\ndef"
(string-collapse-continuations "abc\ndef"))
(test-equal "string with terminal continuation"
"abc"
(string-collapse-continuations "abc\\\n"))
(test-equal "string with medial continuation"
"abcdef"
(string-collapse-continuations "abc\\\ndef"))
(test-equal "string with medial continuation and single whitespace"
"abc def"
(string-collapse-continuations "abc\\\n def"))
(test-equal "string with medial continuation and multiple whitespace"
"abc def"
(string-collapse-continuations "abc\\\n def"))
(test-equal "string with medial continuation and multiple whitespace, squash"
"abc def"
(string-collapse-continuations "abc\\\n def" #t))
(test-end "string-collapse-continuations")
;; Local Variables: ;; Local Variables:
;; mode: scheme ;; mode: scheme
;; End: ;; End: