From 8aeef1d8eb2524de447d102c5e0b435d4c71a4d7 Mon Sep 17 00:00:00 2001 From: Michael Gran Date: Thu, 18 Nov 2021 08:18:38 -0800 Subject: [PATCH] add temp code for parser --- README.md | 7 ++ make/main.scm | 28 +++++++ pmake | 3 + potato/exceptions.scm | 2 +- potato/parse-lib.scm | 108 ++++++++++++++++++++++++++ potato/parse.scm | 107 +++++++++++++++++++++++++ potato/parser.scm | 176 ++++++++++++++++++++++++++++++++++++++++++ tests/parse.sh | 13 ++++ tests/parser | 160 +++++++++++++++++++++++++++++++++++++- 9 files changed, 602 insertions(+), 2 deletions(-) create mode 100644 make/main.scm create mode 100755 pmake create mode 100644 potato/parse.scm create mode 100644 potato/parser.scm create mode 100755 tests/parse.sh diff --git a/README.md b/README.md index 3bd06ff..7a6cfe1 100644 --- a/README.md +++ b/README.md @@ -208,3 +208,10 @@ target file, based on the filename extensions. as a single space-separated string $$? the prerequisites that are files newer than the target file as a scheme list of strings + +## POSIX Makefile Parser + + Recipes can contain the following parser function + + (parse ...) reads a standard Makefile and creates + rules based on its contents. diff --git a/make/main.scm b/make/main.scm new file mode 100644 index 0000000..fff1645 --- /dev/null +++ b/make/main.scm @@ -0,0 +1,28 @@ +(define-module (make main) + #:use-module (ice-9 getopt-long) + #:export (main) + ) + +(define EXIT_SUCCESS 0) +(define EXIT_NOT_UP_TO_DATE 1) +(define EXIT_FAILURE 2) + +(define option-spec + '((environment-overrides (single-char #\e)) + (makefile (single-char #\f) (value #t)) + (ignore-errors (single-char #\i)) + (keep-going (single-char #\k)) + (dry-run (single-char #\n)) + (print-data-base (single-char #\p)) + (question (single-char #\q)) + (no-builtin-rules (single-char #\r)) + (stop (single-char #\S)) + (silent (single-char #\s)) + (touch (single-char #\t)))) + +(define (main args) + (let ((options (getopt-long args option-spec))) + (write options) + (newline) + EXIT_SUCCESS)) + diff --git a/pmake b/pmake new file mode 100755 index 0000000..0827bd4 --- /dev/null +++ b/pmake @@ -0,0 +1,3 @@ +#!/usr/bin/env sh +exec guile -L . -e '(@ (make main) main)' -s "$0" "$@" +!# diff --git a/potato/exceptions.scm b/potato/exceptions.scm index e147da6..1eed9f6 100644 --- a/potato/exceptions.scm +++ b/potato/exceptions.scm @@ -1,4 +1,4 @@ -(define-module (potato exceptions) +-(define-module (potato exceptions) #:use-module (ice-9 exceptions) #:export (bad-key-type bad-value-type diff --git a/potato/parse-lib.scm b/potato/parse-lib.scm index 4e739ca..1319bd4 100644 --- a/potato/parse-lib.scm +++ b/potato/parse-lib.scm @@ -9,10 +9,16 @@ #:use-module (potato text) #:export( read-line-handle-escaped-newline + string-collapse-continuations string-count-backslashes-at-end string-count-matching-chars-at-end string-find-char-unquote + string-find-preceding-backslashes + string-find-repeated-chars + string-next-token string-remove-comments + string-starts-with? + string-shrink-whitespace )) (define (read-line-handle-escaped-newline) @@ -37,6 +43,42 @@ It returns two values ;; else (values (string-append output line "\n") (1+ nline))))))) +(define* (string-collapse-continuations str #:optional (squash-whitespace? #f)) + "Returns a new string where backslash+newline is discarded, and +backslash+backslash+newline becomes backslash+newline. Any whitespace +after the newline may be squashed to a single space, if +squash-whitespace? is #t." + (let loop ((str str) + (newline-index (string-rindex str #\newline))) + (if (not newline-index) + (string-copy str) + ;; else + (let* ((backslash-index (string-find-preceding-backslashes str newline-index)) + (backslash-count (- newline-index backslash-index))) + (cond + ((even? backslash-count) + ;; We cut the number of backslashes in half, but, keep the newline. + (loop + (string-append + (substring str 0 backslash-index) + (make-string (quotient backslash-count 2) #\\) + (substring str newline-index)) + (string-rindex str #\newline 0 backslash-index))) + + (else + ;; We cut the number of backslashes in half, remove a backslash + ;; and newline, maybe squash any following whitespace. + (loop + (string-append + (substring str 0 backslash-index) + (make-string (quotient backslash-count 2) #\\) + (if squash-whitespace? + (string-shrink-whitespace + (substring str (1+ newline-index)) + 0) + (substring str (1+ newline-index)))) + (string-rindex str #\newline 0 backslash-index)))))))) + (define (string-count-backslashes-at-end str) (string-count-matching-chars-at-end str #\\)) @@ -138,9 +180,75 @@ Returns two values (values i str2) (values #f #f)))) +(define (string-find-preceding-backslashes str i) + "Given I, a position in a string, this returns a position, +of first of a range sequential backslashes that immediately precedes +that position in the string. If no backslashes precede that position, +I is returned." + (let loop ((j i)) + (cond + ((= j 0) + 0) + ((char=? (string-ref str (1- j)) #\\) + (loop (1- j))) + (else + j)))) + +(define (string-find-repeated-chars str c) + "Given a character c, this finds the position of of first instance +of the character in the string. If the character repeats, it returns, +as a pair, the position of the character, and the position after the +run of characters. If the character is not present, it returns #f" + (let ((i (string-index str c))) + (if (not i) + #f + ;; else + (let ((len (string-length str))) + (if (= (1+ i) len) + (cons i (1+ i)) + ;; else + (let loop ((j (1+ i))) + (if (false-if-exception (char=? (string-ref str j) c)) + (loop (1+ j)) + ;; else + (cons i j)))))))) + +(define (string-next-token str i) + "Given a position i, this returns the position of the first +non-whitespace character after i. If the end of line is reached, +it returns the length of the string." + (or + (string-index str + (lambda (c) + (char-set-contains? char-set:blank c)) + i) + (string-length str))) + (define (string-remove-comments str) "Returns a copy of str with any '#' comments removed" (let ((i (string-find-char-unquote str #\#))) (if i (string-take str i) (string-copy str)))) + +(define (string-shrink-whitespace str start) + "Given a string, and a location in a string, this returns a new copy +of string where all the whitespace beginning at location i is replaced +by a single space" + (let ((len (string-length str))) + (if (or (>= start len) + (not (char-set-contains? char-set:blank (string-ref str start)))) + (string-copy str) + ;; else + (let loop ((end start)) + (cond + ((>= end len) + (string-append (substring str 0 start) " ")) + ((char-set-contains? char-set:blank (string-ref str end)) + (loop (1+ end))) + (else + (string-append (substring str 0 start) " " (substring str end)))))))) + + +(define (string-starts-with? str c) + (char=? (string-ref str 0) c)) diff --git a/potato/parse.scm b/potato/parse.scm new file mode 100644 index 0000000..f4c3a57 --- /dev/null +++ b/potato/parse.scm @@ -0,0 +1,107 @@ +(define-module (potato parse) + #:use-module (srfi srfi-1) + #:use-module (ice-9 receive) + #:use-module (system vm trace) + #:use-module (potato exceptions) + #:use-module (potato makevars) + #:use-module (potato rules) + #:use-module (potato text) + #:use-module (potato parse-lib) + #:export (parse _readline)) + +;; A makefile can contain rules, macro definitions, include lines, +;; and comments. + +(define (parse filename) + (with-input-from-file filename _eval #:guess-encoding #t)) + +(define (_eval) + (let ((filenames #f) + (ignoring #f) + (commands '())) + (while #t + (receive (line nlines) + (read-line-handle-escaped-newline) + (cond + ((zero? nlines) + (break)) + + ((string-starts-with? line #\tab) + ;; Shell-command lines + (when filenames + (when ignoring + (continue)) + (set! commands (append commands (list line))))) + + (else + (display + (string-trim-both + (string-remove-comments + (string-collapse-continuations line #t)))) + (newline))))))) + +(define (string-parse-variable-definition str i) + "Parse a string as a variable definition." + (let loop ((i (string-next-token str))) + (cond + ((= i (string-length str)) + (values i 'null)) + + ((char=? (string-ref str i) #\#) + ;; Comments aren't variable definitions. + (values i 'null)) + + ((char=? (string-ref str i) #\$) + ;; This begins a variable expansion reference. + (let* ((openparen (false-if-exception (string-ref str (1+ i)))) + (closeparen (if (eqv? openparen #\() + #\) + (if (eqv? openparen #\{) + #\} + #f)))) + (if (not closeparen) + (values i 'null) + + ;; else, skip over the matching closeparen + (begin + (let ((count 0)) + (while #t + (set! i (1+ i)) + (when (char=? (string-ref str i) openparen) + (set! count (1+ count))) + (when (char=? (string-ref str i) closeparen) + (set! count (1- count)) + (when (zero? count) + (set! i (1+ i)) + (break))))) + + ;; Any whitespace before the operator? + (when (char-set-contains? char-set:blank (string-ref str i)) + (set! wspace #t) + (set! i (string-next-token str i))) + + (cond + ((eqv? (string-ref str i) #\=) + (values (1+ i) 'recursive)) + ((and (eqv? (string-ref str i) #\:) + (eqv? (string-ref str (1+ i)) #\=)) + (values (+ i 2) 'simple)) + ((and (eqv? (string-ref str i) #\+) + (eqv? (string-ref str (1+ i)) #\=)) + (values (+ i 2) 'append)) + ((and (eqv? (string-ref str i) #\?) + (eqv? (string-ref str (1+ i)) #\=)) + (values (+ i 2) 'conditional)) + (else + (values i 'null))))))) + (else + (values i 'null))))) +#| +(define (parse-var-assignment line) + (let ((i (string-next-token line 0))) + (if (= i (string-length line)) + #f + ;; else + (while #t + +|# diff --git a/potato/parser.scm b/potato/parser.scm new file mode 100644 index 0000000..f164d7b --- /dev/null +++ b/potato/parser.scm @@ -0,0 +1,176 @@ +(define-module (potato parser) + #:use-module (ice-9 peg) + #:use-module (srfi srfi-1) + #:use-module (ice-9 rdelim) + #:use-module (system vm trace) + #:use-module (potato exceptions) + #:use-module (potato makevars) + #:use-module (potato rules) + #:use-module (potato text) + #:export (parse)) + +;; A makefile can contain rules, macro definitions, include lines, +;; and comments. + +(define (parse filename) + (with-input-from-file filename parse-input #:guess-encoding #t)) + +(define (last-char str) + (string-ref str (1- (string-length str)))) + +(define (parse-input) + (while #t + (let loop ((line "") + (str (read-line))) + (cond + ((eof-object? str) + (break)) + ((char=? (last-char str) #\\) + (loop (string-append line str) (read-line))) + (else + (parse-line (string-append line str))))))) + +;; For include lines +(define-peg-pattern I_TOK none "include") +(define-peg-pattern I_SPACE none (or " " "\t")) +(define-peg-pattern I_FILENAME_CHAR body (or (range #\a #\z) + (range #\A #\Z) + (range #\0 #\9) + "_" "-" ".")) +(define-peg-pattern I_FILENAME all (+ I_FILENAME_CHAR)) +(define-peg-pattern I_NL none "\n") +(define-peg-pattern I_COMMENT none (and "#" (* peg-any))) +(define-peg-pattern INCLUDE all (and I_TOK + (+ (and (* I_SPACE) + I_FILENAME)) + (* I_SPACE) + (? I_COMMENT))) + +;; For comment lines +(define-peg-pattern C_SPACE none (or " " "\t")) +(define-peg-pattern C_COMMENT none (and "#" (* peg-any))) +(define-peg-pattern COMMENT none (or C_COMMENT + (and (+ C_SPACE) (not-followed-by peg-any)))) + + +(define (parse-line line) + (write (peg:tree (match-pattern INCLUDE line))) + (newline) + (write (peg:tree (match-pattern COMMENT line))) + (newline) + (cond + ((line-is-include? line) + (format #t "INCLUDE: ~S~%" line)) + ((line-is-comment? line) + (format #t "COMMENT: ~S~%" line)) + ((line-is-macro? line) + (format #t "MACRO: ~S~%" line)) + ((line-is-special-target? line) + (format #t "SPECIAL: ~S~%" line)) + ((line-is-inference-rule? line) + (format #t "INFERENCE: ~S~%" line)) + ((line-is-rule? line) + (format #t "RULE: ~S~%" line)) + (else + (format #t "UNKNOWN: ~S~%" line)))) + + + +(define (line-is-include? line) + (and (> (string-length line) 8) + (string= line "include " 0 8))) + +(define (line-is-comment? line) + (or (string-null? (string-trim-both line char-set:whitespace)) + (char=? (string-ref line 0) #\#))) + +(define (line-is-macro? line) + (let ((len (string-length line))) + (let loop ((i 0)) + (if (>= i len) + #f + ;; else + (let ((c (string-ref line i))) + (cond + ((and (zero? i) + (not (char-is-pcs? c))) + #f) + ((and (not (zero? i)) + (char=? #\= c)) + #t) + ((not (char-is-pcs-or-space? c)) + #f) + (else + (loop (+ i 1))))))))) + +(define (line-is-special-target? line) + (or (and (>= (string-length line) 8) + (string= line ".DEFAULT" 0 8)) + (and (>= (string-length line) 8) + (string= line ".IGNORE" 0 7)) + (and (>= (string-length line) 6) + (string= line ".POSIX")) + (and (>= (string-length line) 9) + (string= line ".PRECIOUS" 0 9)) + (and (>= (string-length line) 9) + (string= line ".SCCS_GET" 0 9)) + (and (>= (string-length line) 7) + (string= line ".SILENT" 0 7)))) + +(define (line-is-rule? line) + (let ((len (string-length line))) + (let loop ((i 0)) + (if (>= i len) + #f + ;; else + (let ((c (string-ref line i))) + (cond + ((and (zero? i) + (not (char-is-pcs? c))) + #f) + ((and (not (zero? i)) + (char=? #\: c)) + #t) + ((not (char-is-pcs-or-space? c)) + #f) + (else + (loop (+ i 1))))))))) + +(define (line-is-inference-rule? line) + (let ((len (string-length line))) + (let loop ((i 0) + (dot-count 0)) + (if (>= i len) + #f + ;; else + (let ((c (string-ref line i))) + (cond + ((and (zero? i) + (not (char=? #\. c))) + #f) + ((and (not (zero? i)) + (char=? #\: c)) + (if (or (= dot-count 1) + (= dot-count 2)) + #t + #f)) + ((not (char-is-pcs? c)) + #f) + (else + (loop (+ i 1) + (+ dot-count + (if (char=? c #\.) + 1 + 0)))))))))) + +(define (char-is-pcs? c) + (or (and (char<=? #\a c) (char>=? #\z c)) + (and (char<=? #\A c) (char>=? #\Z c)) + (and (char<=? #\0 c) (char>=? #\9 c)) + (char=? #\. c) + (char=? #\_ c))) + +(define (char-is-pcs-or-space? c) + (or (char-is-pcs? c) + (char=? #\space c))) + diff --git a/tests/parse.sh b/tests/parse.sh new file mode 100755 index 0000000..89dad3e --- /dev/null +++ b/tests/parse.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env sh +exec guile -L . -s "$0" "$@" +!# +(use-modules (potato parse) + ;; (potato parse) + (ice-9 receive) + (srfi srfi-1) + (srfi srfi-64)) +(parse "../guile/Makefile") + + ;; Local Variables: +;; mode: scheme +;; End: diff --git a/tests/parser b/tests/parser index 1c48443..279d608 100755 --- a/tests/parser +++ b/tests/parser @@ -2,7 +2,7 @@ exec guile -L . -s "$0" "$@" !# (use-modules (potato parse-lib) - (potato parse) + ;; (potato parse) (ice-9 receive) (srfi srfi-1) (srfi srfi-64)) @@ -210,7 +210,165 @@ exec guile -L . -s "$0" "$@" (test-end "read-line-handle-escaped-newline") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(test-begin "string-shrink-whitespace") + +(test-equal "no whitespace at location" + "abc" + (string-shrink-whitespace "abc" 0)) + +(test-equal "single space at beginning of line" + " abc" + (string-shrink-whitespace " abc" 0)) + +(test-equal "single tab at beginning of line" + " abc" + (string-shrink-whitespace "\tabc" 0)) + +(test-equal "multiple spaces at beginning of line" + " abc" + (string-shrink-whitespace " abc" 0)) + +(test-equal "multiple tabs at beginning of line" + " abc" + (string-shrink-whitespace "\t\t\tabc" 0)) + +(test-equal "single space in middle of line" + "ab cd" + (string-shrink-whitespace "ab cd" 2)) + +(test-equal "single tab in middle of line" + "ab cd" + (string-shrink-whitespace "ab\tcd" 2)) + +(test-equal "multiple spaces in middle of line" + "ab cd" + (string-shrink-whitespace "ab cd" 2)) + +(test-equal "multiple tabs in middle of line" + "ab cd" + (string-shrink-whitespace "ab\t\t\tcd" 2)) + +(test-equal "single space at end of line" + "ab " + (string-shrink-whitespace "ab " 2)) + +(test-equal "single tab at end of line" + "ab " + (string-shrink-whitespace "ab\t" 2)) + +(test-equal "multiple spaces at end of line" + "ab " + (string-shrink-whitespace "ab " 2)) + +(test-equal "multiple tabs at end of line" + "ab " + (string-shrink-whitespace "ab\t\t\t" 2)) + +(test-end "string-shrink-whitespace") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(test-begin "string-find-repeated-chars") + +(test-equal "empty string" + #f + (string-find-repeated-chars "" #\a)) + +(test-equal "single-char non-matching string" + #f + (string-find-repeated-chars "a" #\b)) + +(test-equal "single-char matching string" + '(0 . 1) + (string-find-repeated-chars "a" #\a)) + +(test-equal "non-matching string" + #f + (string-find-repeated-chars "abcdef" #\g)) + +(test-equal "matching non-repeating string" + '(2 . 3) + (string-find-repeated-chars "abcdef" #\c)) + +(test-equal "matching repeating string" + '(2 . 5) + (string-find-repeated-chars "abcccdef" #\c)) + +(test-end "string-find-repeated-chars") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(test-begin "string-find-preceding-backslashes") + +(test-equal "empty string" + 0 + (string-find-preceding-backslashes "" 0)) + +(test-equal "single-character string" + 0 + (string-find-preceding-backslashes "a" 0)) + +(test-equal "two-character string w/o backslashes" + 1 + (string-find-preceding-backslashes "ab" 1)) + +(test-equal "two-character string with a backslash" + 0 + (string-find-preceding-backslashes "\\b" 1)) + +(test-equal "three-character string with a backslash" + 1 + (string-find-preceding-backslashes "a\\b" 2)) + +(test-equal "three-character string with two backslash" + 0 + (string-find-preceding-backslashes "\\\\b" 2)) + +(test-end "string-find-preceding-backslashes") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(test-begin "string-collapse-continuations") + +(test-equal "empty string" + "" + (string-collapse-continuations "")) + +(test-equal "string with no newline" + "abc" + (string-collapse-continuations "abc")) + +(test-equal "string with terminal newline" + "abc\n" + (string-collapse-continuations "abc\n")) + +(test-equal "string with medial newline" + "abc\ndef" + (string-collapse-continuations "abc\ndef")) + +(test-equal "string with terminal continuation" + "abc" + (string-collapse-continuations "abc\\\n")) + +(test-equal "string with medial continuation" + "abcdef" + (string-collapse-continuations "abc\\\ndef")) + +(test-equal "string with medial continuation and single whitespace" + "abc def" + (string-collapse-continuations "abc\\\n def")) + +(test-equal "string with medial continuation and multiple whitespace" + "abc def" + (string-collapse-continuations "abc\\\n def")) + +(test-equal "string with medial continuation and multiple whitespace, squash" + "abc def" + (string-collapse-continuations "abc\\\n def" #t)) + +(test-end "string-collapse-continuations") ;; Local Variables: ;; mode: scheme ;; End: