编译原理实验二

实验二：语法分析

实验目的

实现算术表达式的语法分析器

实验内容

1、PL/0文法：

在词法规则基础上，引入一组非终结符和产生式集合构成PL/0语言的上下文无关文法。

VN = { program, block, statement, condition, expression, term, factor }

其中：program 为开始符号；

VT = { ident, number, "const", "var", "procedure", "call", "begin", "end", "if", "then", "while", "do", "odd", ".", ",", "=", ";", ":=", "#", "<", ">", "+", "-", "*", "/", "(", ")" }

其中：ident 代表标识符，number 代表数值，双引号括起来的符号是源程序中出现的原始字符串（包括关键字、算符等），每个对应一个单词种别。

下面给出消除左递归和回溯的PL/0的EBNF文法，作为构造递归下降分析程序时的参考。

program → block "."

block → constdecl vardecl procdecl statement

constdecl → ["const" constitem {"," constitem} ";"] 

constitem → ident "=" number

vardecl → ["var" ident {"," ident} ";"] 

procdecl → {"procedure" ident ";" block ";"}

statement → assignstmt 

    | callstmt 

    | compstmt 

    | ifstmt 

    | whilestmt

assignstmt → [ident ":=" expression]

callstmt → ["call" ident]

compstmt → ["begin" statement {";" statement} "end"]

ifstmt → ["if" condition "then" statement]

whilestmt → ["while" condition "do" statement]

condition = "odd" expression 

    | expression ("="|"#"|"<"|">") expression

expression → term { ("+"|"-") term}

term → factor {("*"|"/") factor}

factor → [ "+"|"-"] (ident | number | "(" expression ")")

实验步骤

（1）按照EBNF文法设计语法分析程序的整体结构；

（2）针对每个语法单位编写相应的子程序，完成递归下降分析程序。

实验数据记录

写出给定文法中每个非终结符的FIRST和FOLLOW集。

FIRST集：

program= {const, var,.,ident, ε ,call,begin,if,while}
block={const,var,ident, ε,call,begin,if,while}
statement={ident, ε,call,begin,if,while}
condition={odd}
expression={+,-}
term={+,-}
factor={+,-}
constdecl={const, ε}
vardecl={var, ε}
procdecl={procedure, ε}
constitem={ident}
assignstmt={ident, ε}
callstmt={call, ε}
compstmt={begin, ε}
ifstmt={if, ε}
whilestmt={if, ε}

FOLLOW集：

program={#}
block={.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
constdecl={const,.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
constitem={, , ;}
vardecl={.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
procdecl={procedure ε}
statement={.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
assignstmt={then,do, = , # ,< , > ,+ , - , ）, * , / }
callstmt={ ε}
compstmt={ ε}
ifstmt={.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
whilestmt={.,then,do, = , # ,< , > ,+ , - , ）, * , / , ε, { , } , ; ,end}
condition={then,do,= , # ,< , > ,+ , - , ）, * , / }
expression={then,do, = , # ,< , > ,+ , - , ）, * , / }
term={+ , - ,= , # ,< , > , ) , * , / ,then,do}
factor={+ , - ,= , # ,< , > , ) , * , / ,then,do}

语法分析的结果

写出一段有代表性的程序并加以说明

void parse_constdecl()
{
    if (tokens[index] == "const")
    {
        match("const");
        parse_constitem();
        while (tokens[index] == ",")
        {
            match(",");
            parse_constitem();
        }
        match(";");
    }
}
void parse_constitem()
{
    parse_ident();
    match("=");
    parse_number();
}
void parse_vardecl()
{
    if (tokens[index] == "var")
    {
        match("var");
        parse_ident();
        while (tokens[index] == ",")
        {
            match(",");
            parse_ident();
        }
        match(";");
    }
}
void parse_procdecl()
{
    while (tokens[index] == "procedure")
    {
        match("procedure");
        parse_ident();
        match(";");
        parse_block();
        match(";");
    }
}
/*这段程序是一个编译器的语法分析器，用于解析代码中的常量、变量和过程声明。其中包括以下几个函数：
parse_constdecl():用于解析代码中的常量声明。它会首先匹配关键字 "const"，然后循环调用parse_constitem()函数解析每个常量项，每个常量项之间用逗号隔开，并以分号结尾。
parse_constitem():用于解析每个常量项，它首先调用parse_ident()函数解析常量名，然后匹配等号 "="，最后调用 parse_number() 函数解析常量值。
parse_vardecl():用于解析代码中的变量声明。它会首先匹配关键字"var",然后循环调用parse_ident()函数解析每个变量名，每个变量名之间用逗号隔开，并以分号结尾。
parse_procdecl():用于解析代码中的过程声明。它会循环匹配关键字 "procedure"，然后调用parse_ident()函数解析过程名，接着匹配分号 ";"，然后调用 parse_block()函数解析过程体部分，最后再匹配分号";"。
上述几个函数中都调用了其他辅助函数，例如 parse_ident() 用于解析标识符，match() 用于匹配关键字和符号等。这段程序可以作为一个基础框架，可以根据具体语言的语法规则进行适当修改以实现完整的语法分析功能。*/

源代码（仅供参考）

# include<iostream>
# include<string>
# include<fstream>
# include<sstream>
# include<vector>
# include<map>
#include <iostream>
#include<algorithm>
using namespace std;

//函数列表
bool isIdentifier(string s);//标识符
bool isKeywords(string s);  //关键字
bool isDigit(string s);     //整型数字
bool isOperator(string s);  //多字符运算符
bool isOperator(char c);    //单字符运算符
string result(string s);    //根据传入的参数s产生对应的输出

//函数实现
bool isIdentifier(string s)
{
    if (!isKeywords(s)) //标识符不能是关键字
    {
        if ((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z'))   //首字母必须是字母
        {
            for (int i = 1; i < s.length(); i++)
            {
                if ((s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z')    //非首字母能为字母或数字
                    || (s[i] >= '0' && s[i] <= '9'))
                    continue;
                else return false;
            }
            return true;
        }
        return false;
    }
    return false;
}

bool isKeywords(string s)
{
    static vector<string> keyVec = { "begin" , "end" , "if" , "then" , "while" , "do" , "const" ,"var" , "call" , "procedure" , "odd" };  //PL/0的关键字列表
    vector<string>::iterator result = find(keyVec.begin(), keyVec.end(), s);    //查找字符串是否存在关键词列表中
    if (result != keyVec.end()) //如果结果不为end则存在，也即该词为关键字
        return true;
    else return false;
}

bool isDigit(string s)
{
    if (s[0] >= '0' && s[0] <= '9') //首字符为数字
    {
        for (int i = 1; i < s.length(); ++i)    //后面字符也为数字
            if (s[i] >= '0' && s[i] <= '9')
                continue;
            else return false;
        return true;
    }
    return false;
}

bool isOperator(string s)
{
    static vector<string> opeVec = { "+" , "-" , "*" , "/" , "=", "#" , "<" , ">" , ":=" , "(" , ")" , "," , "." , ";" };   //PL/0的运算符列表
    vector<string>::iterator result = find(opeVec.begin(), opeVec.end(), s);    //
    if (result != opeVec.end())
        return true;
    else return false;
}

bool isOperator(char c)
{
    static vector<char> opeVec = { '+' , '-' , '*' , '/' , '=', '#' , '<' , '>' , '(' , ')' , ',' , '.' , ';' , ':' };    //PL/0的单字符运算符列表,其中单字:应也算为合法
    vector<char>::iterator result = find(opeVec.begin(), opeVec.end(), c);
    if (result != opeVec.end())
        return true;
    else return false;
}

string result(string s) //根据传入的参数s产生对应的输出
{
    //种别码
    //1.标识符
    if (isIdentifier(s))
        return "标识符——12," + s;

    //2.整型常量
    if (isDigit(s))
        return "整型常量——13," + s;

    //建立静态字典
    static map<string, string> WordsDictionary;

    //3.关键字
    WordsDictionary["begin"] = "1";
    WordsDictionary["end"] = "2";
    WordsDictionary["if"] = "3";
    WordsDictionary["then"] = "4";
    WordsDictionary["while"] = "5";
    WordsDictionary["do"] = "6";
    WordsDictionary["const"] = "7";
    WordsDictionary["var"] = "8";
    WordsDictionary["call"] = "9";
    WordsDictionary["procedure"] = "10";
    WordsDictionary["odd"] = "11";
    if (isKeywords(s))
        return "关键字——" + WordsDictionary[s] + "," + s;

    //4.运算符
    WordsDictionary["+"] = "15";
    WordsDictionary["-"] = "16";
    WordsDictionary["*"] = "17";
    WordsDictionary["/"] = "18";
    WordsDictionary["="] = "19";
    WordsDictionary["#"] = "20";
    WordsDictionary["<"] = "21";
    WordsDictionary[">"] = "22";
    WordsDictionary[":="]="14";
    WordsDictionary["("] = "23";
    WordsDictionary[")"] = "24";
    WordsDictionary[","] = "25";
    WordsDictionary["."] = "26";
    WordsDictionary[";"] = "27";
    if (isOperator(s))
        return "运算符——" + WordsDictionary[s] + "," + s;
    return "Error";
}

//分析词法将其记录下来，用于语法分析
vector<string> LexicalAnalysis()
{
    string file = ("D:\\Desktop\\in.txt");
    ifstream input(file);   //输入文件流,注意编码，文本文件编码格式需和项目一直，否则乱码

    ofstream output("D:\\Desktop\\out.txt", ofstream::app);
    string copy;

    getline(input, copy, '\0');
    cout << copy << endl;   //测试是否正确

    input.close();
    input.open(file);   //此时input已经指到了文件尾，为了后面的读取，需要关闭再打开

    output << "原数据:\n";
    output << copy << endl;
    output << "处理后结果:\n";   //测试结果要求以原数据与结果对照的形式输出并保存在out.txt中，同时要把结果输出到屏幕。

    string str;
    string words;

    cout << "处理后结果:\n";
    bool skip = false;

    vector<string> AllWords;    //记录所有被识别的单词

    while (getline(input, str)) //读取文件每一次读取一行,遇到EOF结束
    {
        istringstream strCin(str);  //从输入流中获取单词，需要用到输入流对象，即istringstream
        string s;
        if (skip)
        {
            if (str.find("*)") != -1)
                skip = false;
            continue;
        }
        while (strCin >> words)
        {
            if (words == "//")  //跳过注释
                break;
            else if (words == "(*" || words == "*)")
            {
                skip = true;
                break;
            }
            /*注意处理逗号，比如int a,b;这里有一个单词"a,b;”,所以要处理一个字符串里面的各种运算符,但是这样会很麻烦，
            发现没有，用ide写代码写完一句输入分号时，ide会自动加入空格，这样就方便处理多了*/
            //1.首先可以确定的是关键字肯定是单独作为一个单词的
            if (isKeywords(words))
            {
                s = result(words);
                AllWords.push_back(words);  //记录关键字
                cout << s << endl;
                output << s << endl;
                continue;
            }
            //2,对单词进行扫描，肯定是标识符，运算符，逗号分号,数字等等混合在一起的单词
            vector<int> index = { 0 };
            for (int i = 0; i < words.length(); i++)
            {
                //运算符有两位的，比如"<=",">=","==","!="
                if ((i < words.length() - 1) && isOperator(words[i]) && isOperator(words[i + 1]))
                {
                    //但是要注意只有以上四种两位运算符，比如+-,))就不是,但是))还是要输出),)
                    if (string(words.begin() + i, words.begin() + i + 2) == ":=")
                    {
                        if (find(index.begin(), index.end(), i) == index.end()) //避免重复下标
                            index.push_back(i);
                        index.push_back(i + 2);
                        ++i;
                    }
                    else if (isOperator(words[i]))
                    {
                        if (find(index.begin(), index.end(), i) == index.end())
                            index.push_back(i);
                        if (find(index.begin(), index.end(), i + 1) == index.end())
                            index.push_back(i + 1);
                    }
                }
                //逗号，运算符作为分隔
                else if (isOperator(words[i]))
                {
                    if (find(index.begin(), index.end(), i) == index.end())
                        //比如遇到"a,b"这里下标0和1将a分开，1到2将逗号分开，2到3将b分开
                        index.push_back(i);
                    if (find(index.begin(), index.end(), i + 1) == index.end())
                        index.push_back(i + 1);
                    //如果是a<=b这样的呢？一样，先0和1将a分开，1和2将<分开，2和3将=分开
                    //3和4将b分开，然后后面分隔单词时，注意如果相邻都是运算符，则忽略，比如
                    //后面判断到1和2，2和3都是运算符，则忽略2
                }
                for (int i = 0; i < index.size() - 1; i++)
                {
                    string rel;
                    //比如遇到"<="，需要提取”<=“
                    /*if (isOperator(words[index[i]]) && isOperator(words[index[i + 1]]))
                    {
                        rel = result(string(words.begin() + index[i], words.begin() + index[i + 2]));
                        ++i;
                    }
                    else*/
                    rel = result(string(words.begin() + index[i], words.begin() + index[i + 1]));
                    AllWords.push_back(string(words.begin() + index[i], words.begin() + index[i + 1])); //记录单词
                    output << rel << endl;
                    cout << rel << endl;
                }
            }
            if (index.size() == 1)  //考虑一下是不是标识符
            {
                string rel;
                rel = result(words);
                AllWords.push_back(words);//记录标识符
                output << rel << endl;
                cout << rel << endl;
            }
        }
    }
    output.close();
    input.close();
    return AllWords;
}
//token数据及下标定义
vector<string> tokens = LexicalAnalysis();
int index = 0;
bool success = true;
string symbolarray[] = { "=", "#", "<", ">" };
int symbollength = 4;
//对实验文档中的消除左递归和回溯的PL/0的EBNF文法分析
/*
    program：程序，由 block 和 "." 组成。
    block：代码块，包含 constdecl、vardecl、procdecl 和 statement 四部分。
    constdecl：常量声明，由可选的 "const" 关键字，多个 constitem，以及 ";" 组成。
    constitem：常量条目，包括标识符 ident 和数字 number 之间的 "=" 号连接。
    vardecl：变量声明，由可选的 "var" 关键字，多个标识符 ident，以及 ";" 组成。
    procdecl：过程声明，由多个 "procedure" 关键字，标识符 ident，代码块 block 和 ";" 组成。
    statement：语句，包括赋值语句 assignstmt、过程调用语句 callstmt、复合语句 compstmt、条件语句 ifstmt 和循环语句 whilestmt。
    assignstmt：赋值语句，包括标识符 ident，":=" 连接后面的 expression。
    callstmt：过程调用语句，包括 "call" 关键字和标识符 ident。
    compstmt：复合语句，由 "begin" 关键字和多个语句 statement，以及 "end" 关键字组成。
    ifstmt：条件语句，由 "if" 关键字、condition、"then" 关键字和语句 statement 组成。
    whilestmt：循环语句，由 "while" 关键字、condition、"do" 关键字和语句 statement 组成。
    condition：条件判断，包括 "odd" 和 expression 之间的关系运算符（"="、"#"、"<"、">"）。
    expression：表达式，由 term 和多个加减法运算符组成。
    term：项，由 factor 和多个乘除法运算符组成。
    factor：因子，包括可选的正负号、标识符 ident、数字 number 和括号中的 expression。
*/
//函数列表，注释为对应语法单位
void exception_print(int type, string expected);    //打印异常
void match(string expected);           //单输入匹配函数
void match(string expected, bool majority);             //多输入匹配函数
void parse_program();   //program -> block "."
void parse_block();     //block -> constdecl vardecl procdecl statement
void parse_constdecl(); //constdecl ->["const" constitem{ "," constitem } ";"]
void parse_constitem(); //constitem -> ident "=" number
void parse_vardecl();   //vardecl -> ["var" ident {"," ident} ";"]
void parse_procdecl();  //procdecl -> {"procedure" ident ";" block ";"}
void parse_statement(); //statement -> assignstmt | callstmt | compstmt | ifstmt | whilestmt
void parse_assignstmt();//assignstmt ->[ident ":=" expression]
void parse_callstmt();  //callstmt -> ["call" ident]
void parse_compstmt();  //compstmt -> ["begin" statement {";" statement} "end"]
void parse_ifstmt();    //ifstmt -> ["if" condition "then" statement]
void parse_whilestmt(); //whilestmt -> ["while" condition "do" statement]
void parse_condition(); //condition = "odd" expression | expression ("="|"#"|"<"|">") expression
void parse_expression();//expression -> term { ("+"|"-") term}
void parse_term();      //term -> factor {("*"|"/") factor}
void parse_factor();    //factor -> [ "+"|"-"] (ident | number | "(" expression ")")
void parse_ident();     //ident -> letter {letter | digit}
void parse_number();    //number -> digit {digit}
//函数实现
void exception_print(int type, string expected = "")
{
    string rel = "";
    switch (type)
    {
    case 0: //语法错误
        rel = "语法错误：预期为 " + expected + "，但输入的是 " + tokens[index];
        break;
    case 1: //标识符错误
        rel = "标识符 " + tokens[index] + " 格式错误";
        break;
    case 2: //整数错误
        rel = "整数 " + tokens[index] + "格式错误";
        break;
    }
    ofstream output("Result.txt", ofstream::app);
    cout << rel << endl;
    output << rel << endl;
    success = false;
    exit(100);
}
void match(string expected)
{
    if (tokens[index] == expected)
        index++;
    else
        exception_print(0, expected);
}
void match(string expected, bool majority)
{
    bool isequal = false;
    for (int i = 0; i < symbollength; i++)
    {
        if (tokens[index] == symbolarray[i])
        {
            index++;
            isequal = true;
            break;
        }
        expected += symbolarray[i];
        if (i != symbollength - 1)
            expected += " ";
    }
    if (!isequal)
        exception_print(0, expected);
}
void parse_program()
{
    parse_block();
    match(".");
}
void parse_block()
{
    parse_constdecl();
    parse_vardecl();
    parse_procdecl();
    parse_statement();
}
void parse_constdecl()
{
    if (tokens[index] == "const")
    {
        match("const");
        parse_constitem();
        while (tokens[index] == ",")
        {
            match(",");
            parse_constitem();
        }
        match(";");
    }
}
void parse_constitem()
{
    parse_ident();
    match("=");
    parse_number();
}
void parse_vardecl()
{
    if (tokens[index] == "var")
    {
        match("var");
        parse_ident();
        while (tokens[index] == ",")
        {
            match(",");
            parse_ident();
        }
        match(";");
    }
}
void parse_procdecl()
{
    while (tokens[index] == "procedure")
    {
        match("procedure");
        parse_ident();
        match(";");
        parse_block();
        match(";");
    }
}
void parse_statement()
{
    if (isIdentifier(tokens[index]))
        parse_assignstmt();
    else if (tokens[index] == "call")
        parse_callstmt();
    else if (tokens[index] == "begin")
        parse_compstmt();
    else if (tokens[index] == "if")
        parse_ifstmt();
    else if (tokens[index] == "while")
        parse_whilestmt();
}
void parse_assignstmt()
{
    parse_ident();
    match(":=");
    parse_expression();
}
void parse_callstmt()
{
    match("call");
    parse_ident();
}
void parse_compstmt()
{
    match("begin");
    parse_statement();
    while (tokens[index] == ";")
    {
        match(";");
        parse_statement();
    }
    match("end");
}
void parse_ifstmt()
{
    match("if");
    parse_condition();
    match("then");
    parse_statement();
}
void parse_whilestmt()
{
    match("while");
    parse_condition();
    match("do");
    parse_statement();
}
void parse_condition()
{
    if (tokens[index] == "odd")
    {
        match("odd");
        parse_expression();
    }
    else
    {
        parse_expression();
        match("",true);
        parse_expression();
    }
}
void parse_expression()
{
    parse_term();
    while (tokens[index] == "+" || tokens[index] == "-")
    {
        match(tokens[index]);
        parse_term();
    }
}
void parse_term()
{
    parse_factor();
    while (tokens[index] == "*" || tokens[index] == "/")
    {
        match(tokens[index]);
        parse_factor();
    }
}
void parse_factor()
{
    if (tokens[index] == "+" || tokens[index] == "-")
        match(tokens[index]);
    if (isDigit(tokens[index]))
        parse_number();
    else if (isIdentifier(tokens[index]))
        parse_ident();
    else
    {
        match("(");
        parse_expression();
        match(")");
    }
}
void parse_ident()
{
    if (isIdentifier(tokens[index]))
        match(tokens[index]);
    else
        exception_print(1);
}
void parse_number()
{
    if (isDigit(tokens[index]))
        match(tokens[index]);
    else
        exception_print(2);
}
//分析语法，并输出其结果
bool GrammaticalAnalysis()
{
    parse_program();
    ofstream output("2.txt", ofstream::app);
    if (success)
    {
        cout << "语法正确" << endl;
        output << "语法正确" << endl;
    }
    return success;
}
//语法分析器
//主方法
int main()
{
    cout<<"* 符号        种别码  || 符号        种别码 *\n";
    cout<<"* begin       1       || *           17     *\n";
    cout<<"* end         2       || /           18     *\n";
    cout<<"* if          3       || =           19     *\n";
    cout<<"* then        4       || #           20     *\n";
    cout<<"* while       5       || <           21     *\n";
    cout<<"* do          6       || >           22     *\n";
    cout<<"* const       7       || (           23     *\n";
    cout<<"* var         8       || )           24     *\n";
    cout<<"* call        9       || ,           25     *\n";
    cout<<"* procedure   10      || .           26     *\n";
    cout<<"* odd         11      || ;           27     *\n";
    cout<<"* :=          14      || ident       12     *\n";//标识符
    cout<<"* +           15      || digit       13     *\n";//整形常数
    cout<<"* -           16      ||                    *\n";
    GrammaticalAnalysis();
    system("pause");
    return 0;

}