🔖 编译原理语法制导翻译计算机

前言

设计一个文法,匹配合法的计算式,并返回正确计算式的结果。

一些定义:

  • 语法制导定义(Syntax-directed definitions, SDD
  • 语法制导翻译方案(Syntax-directed Translation Schema, SDT

其它一些编译原理相关的前置知识可以参考: 编译原理-语法分析 | 光和尘

文法

对于一个只支持加减乘除、括号、正负数的计算表达式,不难得到其产生式:

ABD+BDBDBCECdigit(A)D+BDBDεE×CE÷CEε

可求得它的 FIRSTFOLLOW 集为:

FIRST(A)={digit,(,+,}FOLLOW(A)={$,)}FIRST(B)={digit,(}FOLLOW(B)={$,),+,}FIRST(C)={digit,(}FOLLOW(C)={$,),,÷,+,}FIRST(D)={+,,ε}FOLLOW(D)={$,)}FIRST(E)={×,÷,ε}FOLLOW(E)={$,),+,}

LL(1) 预测分析表:

Tokendigit()+÷$
AABDABDA+BDABD
BBCEBCE
CCdigitC(A)
DDεD+BDDBDDε
EEεEεEεE×CEE÷CEEε

SDD

#产生式语义规则
0ABD(1)D.inh=B.syn(2)A.syn=D.syn
1A+BD(3)D.inh=B.syn(4)A.syn=D.syn
2ABD(5)D.inh=B.syn(6)A.syn=D.syn
3BCE(7)E.inh=C.syn(8)B.syn=E.syn
4Cdigit(9)C.syn=digit.lexval
5C(A)(10)C.syn=A.syn
6D+BD1(11)D1.inh=D.inh+B.syn(12)D.syn=D1.syn
7DBD1(13)D1.inh=D.inhB.syn(14)D.syn=D1.syn
8Dε(15)D.syn=D.inh
9E×CE1(16)E1.inh=E.inh×C.syn(17)E.syn=E1.syn
10E÷CE1(18)E1.inh=E.inh÷C.syn(19)E.syn=E1.syn
11Eε(20)E.syn=E.inh

程序实现

  • C++

    calculator.cpp  | 288 lines.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    #include <algorithm>
    #include <cstdio>
    #include <cstring>
    #include <exception>
    #include <iostream>
    #include <vector>
    using namespace std;
    struct node {
    int id, syn, inh;
    node(int id = 0, int syn = 0, int inh = 0) : id(id), syn(syn), inh(inh) {
    }
    };
    char ll1Idx[128];
    int ll1Table[10][10];
    vector<int> ssdTable[10];
    inline int idx(char c) {
    return ll1Idx[c];
    }
    inline void initLL1Table() {
    memset(ll1Idx, 0, sizeof ll1Idx);
    memset(ll1Table, -1, sizeof ll1Table);
    for (int k = '0'; k <= '9'; ++k) ll1Idx[k] = 1;
    ll1Idx['('] = 2;
    ll1Idx[')'] = 3;
    ll1Idx['+'] = 4;
    ll1Idx['-'] = 5;
    ll1Idx['*'] = 6;
    ll1Idx['/'] = 7;
    ll1Idx['$'] = 8;
    ll1Idx['A'] = -1;
    ll1Idx['B'] = -2;
    ll1Idx['C'] = -3;
    ll1Idx['D'] = -4;
    ll1Idx['E'] = -5;
    // 0: A -> BD
    ll1Table[1][1] = 0;
    ll1Table[1][2] = 0;
    // 1: A -> +BD
    ll1Table[1][4] = 1;
    // 2: A -> -BD
    ll1Table[1][5] = 2;
    // 3: B --> CE
    ll1Table[2][1] = 3;
    ll1Table[2][2] = 3;
    // 4: C --> digit
    ll1Table[3][1] = 4;
    // 5: C --> (A)
    ll1Table[3][2] = 5;
    // 6: D --> +BD
    ll1Table[4][4] = 6;
    // 7: D --> -BD
    ll1Table[4][5] = 7;
    // 8: D --> \varepsilon
    ll1Table[4][3] = 8;
    ll1Table[4][8] = 8;
    // 9: E --> *CE
    ll1Table[5][6] = 9;
    // 10: E --> /CE
    ll1Table[5][7] = 10;
    // 11: E --> \varepsilon
    ll1Table[5][3] = 11;
    ll1Table[5][4] = 11;
    ll1Table[5][5] = 11;
    ll1Table[5][8] = 11;
    }
    inline void initSSDTable() {
    #define pb push_back
    for (int i = 0; i < 10; ++i) ssdTable[i].clear();
    // 0: A --> BD
    ssdTable[0].pb(idx('B'));
    ssdTable[0].pb(idx('D'));
    // 1: A --> +BD
    ssdTable[1].pb(idx('+'));
    ssdTable[1].pb(idx('B'));
    ssdTable[1].pb(idx('D'));
    // 2: A --> -BD
    ssdTable[2].pb(idx('-'));
    ssdTable[2].pb(idx('B'));
    ssdTable[2].pb(idx('D'));
    // 3: B --> CE
    ssdTable[3].pb(idx('C'));
    ssdTable[3].pb(idx('E'));
    // 4: C --> digit
    ssdTable[4].pb(idx('0'));
    // 5: C -> (A)
    ssdTable[5].pb(idx('('));
    ssdTable[5].pb(idx('A'));
    ssdTable[5].pb(idx(')'));
    // 6: D --> +BD
    ssdTable[6].pb(idx('+'));
    ssdTable[6].pb(idx('B'));
    ssdTable[6].pb(idx('D'));
    // 7: D --> -BD
    ssdTable[7].pb(idx('-'));
    ssdTable[7].pb(idx('B'));
    ssdTable[7].pb(idx('D'));
    // 8: D --> \varepsilon
    // 9: E --> *CE
    ssdTable[9].pb(idx('*'));
    ssdTable[9].pb(idx('C'));
    ssdTable[9].pb(idx('E'));
    // 10: E --> /CE
    ssdTable[10].pb(idx('/'));
    ssdTable[10].pb(idx('C'));
    ssdTable[10].pb(idx('E'));
    // 11: E --> \varepsilon
    #undef pb
    }
    int getnum(const char*& s) {
    int num = 0;
    for (; isdigit(*s); ++s) num = num * 10 + *s - '0';
    return num;
    }
    const int endsym = idx('$');
    void calculate(const char*& s, node& sy, int cur) {
    int id = idx(*s);
    if (!id) throw "Syntax Error";
    if (sy.id != endsym) {
    if (sy.id == id) {
    if (id == 1) {
    sy.syn = getnum(s);
    } else {
    ++s;
    }
    return;
    }
    if (sy.id > 0) throw "Syntax Error!";
    int Mid = ll1Table[-sy.id][id];
    if (Mid < 0) throw "Syntax Error!";
    node sym[4];
    for (int i = 0; i < ssdTable[Mid].size(); ++i) sym[i].id = ssdTable[Mid][i];
    if (ssdTable[Mid].size()) calculate(s, sym[0], cur + 1);
    switch (Mid) {
    // 0: A --> BD
    case 0:
    sym[1].inh = sym[0].syn;
    calculate(s, sym[1], cur + 1);
    sy.syn = sym[1].syn;
    break;
    // 1: A --> +BD
    case 1:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 2: A --> -BD
    case 2:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = -sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 3: B --> CE
    case 3:
    sym[1].inh = sym[0].syn;
    calculate(s, sym[1], cur + 1);
    sy.syn = sym[1].syn;
    break;
    // 4: C --> digit
    case 4:
    sy.syn = sym[0].syn;
    break;
    // 5: C --> (A)
    case 5:
    calculate(s, sym[1], cur + 1);
    sy.syn = sym[1].syn;
    calculate(s, sym[2], cur + 1);
    break;
    // 6: D --> +BD
    case 6:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = sy.inh + sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 7: D --> -BD
    case 7:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = sy.inh - sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 8: D --> \varepsilon
    case 8:
    sy.syn = sy.inh;
    break;
    // 9: E --> *CE
    case 9:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = sy.inh * sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 10: E --> /CE
    case 10:
    calculate(s, sym[1], cur + 1);
    sym[2].inh = sy.inh / sym[1].syn;
    calculate(s, sym[2], cur + 1);
    sy.syn = sym[2].syn;
    break;
    // 11: E --> \varepsilon
    case 11:
    sy.syn = sy.inh;
    break;
    }
    }
    }
    int main() {
    string in;
    string coin;
    initLL1Table();
    initSSDTable();
    while (getline(cin, in)) {
    in.push_back('$');
    int len = in.length();
    const char* s = in.c_str();
    // 去除空格
    coin.clear();
    for (int i = 0; i < len; ++i)
    if (s[i] != ' ' && s[i] != '\t' && s[i] != '\n') coin.push_back(s[i]);
    coin.push_back('\0');
    try {
    node sy = node(idx('A'));
    s = coin.c_str();
    calculate(s, sy, 0);
    int ans = sy.syn;
    for (int i = 0; i < len - 1; ++i) putchar(in[i]);
    printf(" = %d\n", ans);
    printf("succuss!\n");
    } catch (const char* str) {
    puts(str);
    }
    }
    return 0;
    }
  • Typescript (可以直接使用 @algorithm.ts/calculate)

    calculator.ts  | 242 lines.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    export enum TokenSymbol {
    DIGIT = 1,
    OPEN_PAREN = 2,
    CLOSE_PAREN = 3,
    PLUS = 4,
    MINUS = 5,
    MULTI = 6,
    DIVIDE = 7,
    END = 8,
    A = -1,
    B = -2,
    C = -3,
    D = -4,
    E = -5,
    }
    /**
    * Priority map.
    */
    const ll1IdxMap: Record<string, TokenSymbol> = Object.freeze({
    '0': TokenSymbol.DIGIT,
    '1': TokenSymbol.DIGIT,
    '2': TokenSymbol.DIGIT,
    '3': TokenSymbol.DIGIT,
    '4': TokenSymbol.DIGIT,
    '5': TokenSymbol.DIGIT,
    '6': TokenSymbol.DIGIT,
    '7': TokenSymbol.DIGIT,
    '8': TokenSymbol.DIGIT,
    '9': TokenSymbol.DIGIT,
    '(': TokenSymbol.OPEN_PAREN,
    ')': TokenSymbol.CLOSE_PAREN,
    '+': TokenSymbol.PLUS,
    '-': TokenSymbol.MINUS,
    '*': TokenSymbol.MULTI,
    '/': TokenSymbol.DIVIDE,
    $: TokenSymbol.END,
    A: TokenSymbol.A,
    B: TokenSymbol.B,
    C: TokenSymbol.C,
    D: TokenSymbol.D,
    E: TokenSymbol.E,
    })
    export const idx = (c: string): number => ll1IdxMap[c]
    export const sddTable: number[][] = [
    'BD', // 0: A --> BD
    '+BD', // 1: A --> +BD
    '-BD', // 2: A --> -BD
    'CE', // 3: B --> CE
    '0', // 4: C --> digit
    '(A)', // 5: C --> (A)
    '+BD', // 6: D --> +BD
    '-BD', // 7: D --> -BD
    '', // 8: D --> \varepsilon
    '*CE', // 9: E --> *CE
    '/CE', // 10: E --> /CE
    '', // 11: E --> \varepsilon
    ].map(x => x.split('').map(idx))
    // tokens: A,B,C,D,E
    export const MAX_TOKENS = 5
    // symbols: digit, (, ), +, -, *, /, $
    export const MAX_SYMBOLS = 8
    // LL1 table.
    export const ll1Table: Int8Array[] = new Array(MAX_TOKENS + 1)
    // Initialize LL1Table
    {
    for (let i = 0; i <= MAX_TOKENS; ++i) {
    ll1Table[i] = new Int8Array(MAX_SYMBOLS + 1).fill(-1)
    }
    // 0: A -> BD
    ll1Table[1][1] = 0
    ll1Table[1][2] = 0
    // 1: A -> +BD
    ll1Table[1][4] = 1
    // 2: A -> -BD
    ll1Table[1][5] = 2
    // 3: B --> CE
    ll1Table[2][1] = 3
    ll1Table[2][2] = 3
    // 4: C --> digit
    ll1Table[3][1] = 4
    // 5: C --> (A)
    ll1Table[3][2] = 5
    // 6: D --> +BD
    ll1Table[4][4] = 6
    // 7: D --> -BD
    ll1Table[4][5] = 7
    // 8: D --> \varepsilon
    ll1Table[4][3] = 8
    ll1Table[4][8] = 8
    // 9: E --> *CE
    ll1Table[5][6] = 9
    // 10: E --> /CE
    ll1Table[5][7] = 10
    // 11: E --> \varepsilon
    ll1Table[5][3] = 11
    ll1Table[5][4] = 11
    ll1Table[5][5] = 11
    ll1Table[5][8] = 11
    }
    export function getNum(s: string, start: number): [number, number] {
    let result = 0
    let i: number = start
    for (; i < s.length; ++i) {
    const c = s[i]
    if (!/\d/.test(c)) break
    result = result * 10 + Number(c)
    }
    return [i, result]
    }
    export function calculate(rawExpression: string): number {
    let cur = 0
    const expression = rawExpression.replace(/[\s]+/g, '')
    const result: number = dfs(idx('A'), 0, 0)
    return cur === expression.length ? result : Number.NaN
    function dfs(id: number, syn: number, inh: number): number {
    if (cur === expression.length) {
    // Only D and E could be parsed as \varepsilon
    if (id === TokenSymbol.D || id === TokenSymbol.E) return inh
    return Number.NaN
    }
    const id0 = idx(expression[cur])
    // Unrecognized symbol.
    if (id0 === undefined) return Number.NaN
    // Matched an operator.
    if (id === id0) {
    // Matched digits.
    if (id0 === TokenSymbol.DIGIT) {
    const [nextCur, value] = getNum(expression, cur)
    // No valid digit found.
    if (cur === nextCur) return Number.NaN
    cur = nextCur
    return value
    }
    cur += 1
    return syn
    }
    // Syntax error.
    if (id > 0) return Number.NaN
    const ssdId = ll1Table[-id][id0]
    if (ssdId < 0) return Number.NaN
    const tokens: ReadonlyArray<number> = sddTable[ssdId]
    const syn0: number = tokens.length > 0 ? dfs(tokens[0], 0, 0) : 0
    switch (ssdId) {
    // 0: A --> BD
    case 0:
    return dfs(tokens[1], 0, syn0)
    // 1: A --> +BD
    case 1: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, val1)
    }
    // 2: A --> -BD
    case 2: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, -val1)
    }
    // 3: B --> CE
    case 3:
    return dfs(tokens[1], 0, syn0)
    // 4: C --> digit
    case 4:
    return syn0
    // 5: C --> (A)
    case 5: {
    const result: number = dfs(tokens[1], 0, 0)
    dfs(tokens[2], 0, 0)
    return result
    }
    // 6: D --> +BD
    case 6: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, inh + val1)
    }
    // 7: D --> -BD
    case 7: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, inh - val1)
    }
    // 8: D --> \varepsilon
    case 8:
    return inh
    // 9: E --> *CE
    case 9: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, inh * val1)
    }
    // 10: E --> /CE
    case 10: {
    const val1: number = dfs(tokens[1], 0, 0)
    return dfs(tokens[2], 0, inh / val1)
    }
    // 11: E --> \varepsilon
    case 11:
    return inh
    }
    return 0
    }
    }
© 2017-2025 光和尘有花满渚、有酒盈瓯

Comments