SwiftPEG

一个使用 Swift 语言实现的 解析表达式文法 (Parsing Expression Grammar, PEG),用于生成带备忘录功能的 Packrat 解析器,并支持左递归规则。

包含一个自描述文法实现,可用于描述文法,并随后为这些文法生成解析器。

该库假定已预先实现了适用于匹配文法的适当原始词法分析器 (raw tokenizer),定义了 parser.expect("<some token literal>")parser.expect(kind: .someTokenKind) 的结果。 并且存在一组适当的类型来描述所需的 AST (抽象语法树) 表示,以便在 { actions } 中使用,以在解析期间生成所需的 AST 结构。

灵感来自 Python 创始人 Guido van Rossum 的 PEG 解析 系列博客文章。

示例

从左递归数学表达式的文法文件开始

# Define that the parser being generated is called 'TestGrammarParser'. Assumes it already exists as a subclass of PEGParser<RawTokenizer>
@parserName "TestGrammarParser" ; # @meta-properties can be used to signal grammar processors and code generators of certain properties, and must precede all rules in a grammar

@token NAME ;     # Identifiers must resolve to rules/tokens; @token meta-property forward-declares token identifiers and is detected by GrammarProcessor
@token NUMBER ;
@token NEWLINE ;

start[TestGrammarAST.Expression]:  # Rules can optionally specify their return value within [ square brackets ]
    | expr _=NEWLINE? { expr }     # Optional code with { curly braces } to be executed and returned when alternative matches
    ;

expr[TestGrammarAST.Expr]:
          # v Tokens can be specified as string literals- no need to forward declare
    | expr '+' term { .add(expr, term) }
    | expr '-' term { .sub(expr, term) }
    | term { .term(term) }
    ;

term[TestGrammarAST.Term]:
    | term '*' factor { .mul(term, factor) }
    | term '/' factor { .div(term, factor) }
    | factor { .factor(factor) }
    ;

factor[TestGrammarAST.Factor]:
    | '(' expr ')' { .expr(expr) }
    | atom { .atom(atom) }
    ;

atom[TestGrammarAST.Atom]:
    | NAME { .name(name) }
    | NUMBER { .number(number) }
    ;

并为文法定义了自定义语法树

indirect enum Expr: CustomStringConvertible {
    case add(Self, Term)
    case sub(Self, Term)
    case term(Term)

    var description: String { ... }
}

indirect enum Term: CustomStringConvertible {
    case mul(Self, Factor)
    case div(Self, Factor)
    case factor(Factor)

    var description: String { ... }
}

indirect enum Factor: CustomStringConvertible {
    case expr(Expr)
    case atom(Atom)

    var description: String { ... }
}

enum Atom: CustomStringConvertible {
    case name(Substring)
    case number(Double)

    var description: String { ... }
}

可以使用以下代码生成解析器

let tokenizer = GrammarRawTokenizer(source: grammarString)
let parser = GrammarParser(raw: tokenizer)

guard let grammar = try parser.start(), parser.isEOF() else {
    throw parser.makeSyntaxError()
}

let processor = GrammarProcessor(delegate: nil)
// Process and validate grammar
let processedGrammar = try processor.process(grammar)

let codeGen = SwiftCodeGen(from: processedGrammar)
print(try codeGen.generateParser())

或者通过命令行使用 generate-parser 插件

swift package --allow-writing-to-package-directory generate-parser generate --grammar-file ./grammar.gram --output TestGrammarParser.swift

生成一个扩展解析器的类型定义,如下所示

extension TestGrammarParser {
    /// ```
    /// start[TestGrammarAST.Expr]:
    ///     | expr _=NEWLINE? { expr }
    ///     ;
    /// ```
    @memoized("start")
    @inlinable
    public func __start() throws -> TestGrammarAST.Expr? {
        let mark = self.mark()

        if
            let expr = try self.expr(),
            case _ = try self.NEWLINE()
        {
            return expr
        }

        self.restore(mark)
        return nil
    }

    ...
}

并且与适当实现的词法分析器配对后,可以用于从原始文法解析语法树

let tokenizer = TestGrammarRawTokenizer(source: """
a + b + (c / d) * 10
""")
let parser = TestGrammarParser(raw: tokenizer)

guard let expression = try parser.start(), tokenizer.isEOF else {
    throw parser.makeSyntaxError()
}

dump(expression)

打印结果

▿ Optional(a + b + (c / d) * 10.0)
  ▿ some: a + b + (c / d) * 10.0
    ▿ add: (2 elements)
      ▿ .0: a + b
        ▿ add: (2 elements)
          ▿ .0: a
            ▿ term: a
              ▿ factor: a
                ▿ atom: a
                  - name: "a"
          ▿ .1: b
            ▿ factor: b
              ▿ atom: b
                - name: "b"
      ▿ .1: (c / d) * 10.0
        ▿ mul: (2 elements)
          ▿ .0: (c / d)
            ▿ factor: (c / d)
              ▿ expr: c / d
                ▿ term: c / d
                  ▿ div: (2 elements)
                    ▿ .0: c
                      ▿ factor: c
                        ▿ atom: c
                          - name: "c"
                    ▿ .1: d
                      ▿ atom: d
                        - name: "d"
          ▿ .1: 10.0
            ▿ atom: 10.0
              - number: 10.0

此示例的完整实现可以在 TestGrammarParser.swift 中找到,该文件在单元测试 <代码>testGenerateParser_fullGrammar 中启动,位于 SwiftCodeGenTests.swift