一个使用 Swift 语言实现的 解析表达式文法 (Parsing Expression Grammar, PEG),用于生成带备忘录功能的 Packrat 解析器,并支持左递归规则。
包含一个自描述文法实现,可用于描述文法,并随后为这些文法生成解析器。
该库假定已预先实现了适用于匹配文法的适当原始词法分析器 (raw tokenizer),定义了 parser.expect("<some token literal>")
和 parser.expect(kind: .someTokenKind)
的结果。 并且存在一组适当的类型来描述所需的 AST (抽象语法树) 表示,以便在 { actions }
中使用,以在解析期间生成所需的 AST 结构。
灵感来自 Python 创始人 Guido van Rossum 的 PEG 解析 系列博客文章。
从左递归数学表达式的文法文件开始
# Define that the parser being generated is called 'TestGrammarParser'. Assumes it already exists as a subclass of PEGParser<RawTokenizer>
@parserName "TestGrammarParser" ; # @meta-properties can be used to signal grammar processors and code generators of certain properties, and must precede all rules in a grammar
@token NAME ; # Identifiers must resolve to rules/tokens; @token meta-property forward-declares token identifiers and is detected by GrammarProcessor
@token NUMBER ;
@token NEWLINE ;
start[TestGrammarAST.Expression]: # Rules can optionally specify their return value within [ square brackets ]
| expr _=NEWLINE? { expr } # Optional code with { curly braces } to be executed and returned when alternative matches
;
expr[TestGrammarAST.Expr]:
# v Tokens can be specified as string literals- no need to forward declare
| expr '+' term { .add(expr, term) }
| expr '-' term { .sub(expr, term) }
| term { .term(term) }
;
term[TestGrammarAST.Term]:
| term '*' factor { .mul(term, factor) }
| term '/' factor { .div(term, factor) }
| factor { .factor(factor) }
;
factor[TestGrammarAST.Factor]:
| '(' expr ')' { .expr(expr) }
| atom { .atom(atom) }
;
atom[TestGrammarAST.Atom]:
| NAME { .name(name) }
| NUMBER { .number(number) }
;
并为文法定义了自定义语法树
indirect enum Expr: CustomStringConvertible {
case add(Self, Term)
case sub(Self, Term)
case term(Term)
var description: String { ... }
}
indirect enum Term: CustomStringConvertible {
case mul(Self, Factor)
case div(Self, Factor)
case factor(Factor)
var description: String { ... }
}
indirect enum Factor: CustomStringConvertible {
case expr(Expr)
case atom(Atom)
var description: String { ... }
}
enum Atom: CustomStringConvertible {
case name(Substring)
case number(Double)
var description: String { ... }
}
可以使用以下代码生成解析器
let tokenizer = GrammarRawTokenizer(source: grammarString)
let parser = GrammarParser(raw: tokenizer)
guard let grammar = try parser.start(), parser.isEOF() else {
throw parser.makeSyntaxError()
}
let processor = GrammarProcessor(delegate: nil)
// Process and validate grammar
let processedGrammar = try processor.process(grammar)
let codeGen = SwiftCodeGen(from: processedGrammar)
print(try codeGen.generateParser())
或者通过命令行使用 generate-parser 插件
swift package --allow-writing-to-package-directory generate-parser generate --grammar-file ./grammar.gram --output TestGrammarParser.swift
生成一个扩展解析器的类型定义,如下所示
extension TestGrammarParser {
/// ```
/// start[TestGrammarAST.Expr]:
/// | expr _=NEWLINE? { expr }
/// ;
/// ```
@memoized("start")
@inlinable
public func __start() throws -> TestGrammarAST.Expr? {
let mark = self.mark()
if
let expr = try self.expr(),
case _ = try self.NEWLINE()
{
return expr
}
self.restore(mark)
return nil
}
...
}
并且与适当实现的词法分析器配对后,可以用于从原始文法解析语法树
let tokenizer = TestGrammarRawTokenizer(source: """
a + b + (c / d) * 10
""")
let parser = TestGrammarParser(raw: tokenizer)
guard let expression = try parser.start(), tokenizer.isEOF else {
throw parser.makeSyntaxError()
}
dump(expression)
打印结果
▿ Optional(a + b + (c / d) * 10.0)
▿ some: a + b + (c / d) * 10.0
▿ add: (2 elements)
▿ .0: a + b
▿ add: (2 elements)
▿ .0: a
▿ term: a
▿ factor: a
▿ atom: a
- name: "a"
▿ .1: b
▿ factor: b
▿ atom: b
- name: "b"
▿ .1: (c / d) * 10.0
▿ mul: (2 elements)
▿ .0: (c / d)
▿ factor: (c / d)
▿ expr: c / d
▿ term: c / d
▿ div: (2 elements)
▿ .0: c
▿ factor: c
▿ atom: c
- name: "c"
▿ .1: d
▿ atom: d
- name: "d"
▿ .1: 10.0
▿ atom: 10.0
- number: 10.0
此示例的完整实现可以在 TestGrammarParser.swift 中找到,该文件在单元测试 <代码>testGenerateParser_fullGrammar 中启动,位于 SwiftCodeGenTests.swift。