当前位置: 首页> 健康> 美食 > 编译原理项目——C++实现C语言编译器输出为gcc级汇编(代码/报告材料)

编译原理项目——C++实现C语言编译器输出为gcc级汇编(代码/报告材料)

时间:2025/9/7 23:17:32来源:https://blog.csdn.net/weixin_45605341/article/details/140086276 浏览次数:0次

完整的代码 材料 见文章末尾 以下为核心内容和部分结果

项目介绍

在这里插入图片描述

function.cpp 实现了共有的函数
lexer.cpp 词法分析器
get_predict_table.cpp获取预测分析表
LR.cpp语法分析
generate.cpp 语义分析 中间代码生成
to_asm.cpp目标代码生成

部分核心代码

LR分析

#include"compile.h"
node::node(const string& type, const string& text) {this->type = type;this->text = text;
}
node::~node() {/*for (node* t : this->child) {delete t;}*/
}
string node::str() {string out= "<" + this->type + ", " + this->text + ">";return out;
}
vector<string> stack_text(const vector<node*>& stack) {vector<string> ss;for (const auto* s : stack) {ss.push_back(s->type);}return ss;
}node* analysis(const vector<unordered_map<string, string>>& word_table,bool show)
{auto predict_table = create_predict_table();vector<node*> stack;node* root = new node("Program");node* end = new node("#");stack.push_back(end);stack.push_back(root);int index = 0;bool success = false;/*分析预测表的三个状态1. cur = #  解析完成2. cur = w  输入的字符表与符号栈中节点匹配3. cur 为非终结符,继续生成子节点4. error*/while (!stack.empty()) {node* cur = stack.back();stack.pop_back();// 状态 1if (cur->type == "#" && stack.empty()) {cout << "分析完成!" << endl;success = true;return root;}// 状态 2else if (cur->type == word_table[index].at("type")) {if (show) {cout << "符号栈:";for (auto& s : stack_text(stack)) {cout << s << " ";}cout << std::endl;cout << "匹配字符: " << word_table[index].at("word") << std::endl;}cur->text = word_table[index].at("word");index++;}// 状态 3else {string w = word_table[index].at("type");if (predict_table[cur->type].count(w) > 0) {if (predict_table[cur->type][w] == "null") {continue;}auto next_pr = split(predict_table[cur->type][w], ' ');if (show) {cout << "\n符号栈:";for (auto& s : stack_text(stack)) {cout << s << " ";}cout << "\n";cout << "产生式: " << cur->type << " -> " << predict_table[cur->type][w] << "\n";}vector<node*> node_list;/*产生式右部符号入栈子节点入栈注意:子节点入栈顺序应该与产生式符号相反*/for (auto np : next_pr) {node_list.push_back( new node(np));//cout << node_list.back() << endl;}for (auto nl : node_list) {//cout << nl << endl;cur->child.push_back(nl);}for (int i = node_list.size() - 1; i >= 0; i--) {stack.push_back(node_list[i]);}}// 状态 4 错误else {cout << "error" << std::endl;success = false;return nullptr;}}}
}

转换GCC汇编

#include"compile.h"
string global_head = "This is a simple compiler for C ;";string code_head =
"\t.cfi_startproc\n"
"\tpushq\t%rbp\n"
"\t.cfi_def_cfa_offset 16\n"
"\t.cfi_offset 6, -16\n"
"\tmovq\t%rsp, %rbp\n"
"\t.cfi_def_cfa_register 6\n";string code_footer =
"\tmovl\t$0, %eax\n"
"\tleave\n"
"\t.cfi_def_cfa 7, 8\n"
"\tret\n"
"\t.cfi_endproc\n"
".LFE6:\n"
"\t.size\tmain, .-main\n"
"\t.ident\t\"SCC: 1.0.0\"\n";int LC = 0;
string re = "";/*
对于数组给予相应长度地址空间
返回值[re, len]
re为变量名地址对照表, len为需要数据栈的高度(这里规定为12的倍数)
*/tuple<unordered_map<string, vector<string>>,int> init_data(vector<unordered_map<string, string>> name_list,unordered_map<string, vector<string>> arrs) 
{//cout << arrs["arr"][0] << "sss " << arrs["arr"][1] << endl;unordered_map<string, vector<string>>re;int i = 0;for (auto n : name_list) {if (n["name"] != "main") {if (n["flag"] == "int") {i += 4;re[n["name"]] = { to_string(i), "int" };}else if (n["flag"] == "char") {i += 1;re[n["name"]] = { to_string(i), "char" };}}}for (auto a : arrs) {if (a.second[1] == "int") {i += stoi(a.second[0]) * 4;re[a.first] = { to_string(i), "int" };}else if (a.second[1] == "char") {i += stoi(a.second[0]);re[a.first] = { to_string(i), "char" };}}for (auto rt : re) {cout << rt.first << " " << rt.second[0] << " " << rt.second[1] << endl;}return { re, (i / 12 + 1) * 12 };
}string init_string(vector<string> strings) {string re;for (int i = 0; i < strings.size(); i++) {re += ".LC" + to_string(i) + ":\n\t.string \"" + strings[i] + "\"\n";}return re;
}
string args(string n, unordered_map<string, vector<string>>& name) {if (name.count(n)) {return "-" + name[n][0] + "(%rbp)";}else if (n.find("[]") != string::npos) {vector<string> ags;ags.push_back(n.substr(0, n.find("[]")));ags.push_back(n.substr(n.find("[]") + 2));if (if_num(ags[1])) {int index = stoi(ags[1]);if (name[ags[0]][1] == "char") {return "-" + to_string(stoi(name[ags[0]][0]) - index) + "(%rbp)";}else if (name[ags[0]][1] == "int") {return "-" + to_string(stoi(name[ags[0]][0]) - index * 4) + "(%rbp)";}}else {re += "\tmovl\t" + args(ags[1], name) + ", %eax\n\tcltq\n";if (name[ags[0]][1] == "char") {return "-" + name[ags[0]][0] + "(%rbp, %rax, 1)";}else if (name[ags[0]][1] == "int") {return "-" + name[ags[0]][0] + "(%rbp, %rax, 4)";}}}else if (n.find("T") != string::npos) {return n + "(%rip)";}else if (if_num(n)) {return "$" + n;}else {return n;}
}
/*
传入参数:1. midcode中间代码(四元式)2. name变量地址参照表
可解析的汇编语句有1. 赋值语句(op,=)2. 四则运算(op,+-*  /)3. 跳转语句(op, j)4. 输出语句(op, print)
*/
string generate_code(vector<m_node>& mid_code,unordered_map<string, vector<string>>& name) {re = "";for (auto& m : mid_code) {string a1 = args(m.arg1, name);string a2 = args(m.arg2, name);string r = args(m.result, name);if (m.op == "=") {if (name.count(m.result) && name[m.result][1] == "char") {re += "\tmovb\t$" + to_string(int(m.arg1[0])) + ", " + r + "\n";}else if ((name.find(m.arg1)!= name.end()) || m.arg1.find("T")!=string::npos || m.arg1.find("[]") != string::npos) {re += "\tmovl\t" + a1 + ", %ecx\n";re += "\tmovl\t%ecx, " + r + "\n";}else {re += "\tmovl\t" + a1 + ", " + r + "\n";}}else if (m.op == "code_block") {re += "." + m.result + ":\n";continue;}else if (m.op.find("j") != string::npos) {if (m.op == "j") {re += "\tjmp\t." + m.result + "\n";}else {re += "\tmovl\t" + a1 + ", %eax\n";re += "\tcmpl\t" + a2 + ", %eax\n";if (m.op.find(">") != string::npos) {re += "\tjg\t." + m.result + "\n";}else if (m.op.find("<") != string::npos) {re += "\tjle\t." + m.result + "\n";}else if (m.op.find("=") != string::npos) {re += "\tje\t." + m.result + "\n";}}}else if (m.op=="+"||m.op=="-") {re += "\tmovl\t" + a1 + ", %edx\n";re += "\tmovl\t" + a2 + ", %eax\n";if (m.op == "+") {re += "\taddl\t%edx, %eax\n";}else {re += "\tsubl\t%edx, %eax\n";}re += "\tmovl\t%eax, " + r + "\n";}else if (m.op == "*" || m.op == "/") {if (name.count(m.arg1)) {re += "\tmovl\t" + a2 + ", %eax\n";re += "\timull\t" + a1 + ", %eax\n";re += "\tmovl\t%eax, " + r + "\n";}else if (name.count(m.arg2) && name.count(m.arg1) == 0) {re += "\tmovl\t" + a2 + ", %eax\n";re += "\timull\t" + a1 + ", %eax, %eax\n";re += "\tmovl\t%eax, " + r + "\n";}else if (name.count(m.arg2) == 0 && name.count(m.arg1) == 0) {int num = stoi(m.arg2) * stoi(m.arg1);re += "\tmovl\t$" + to_string(num) + ", " + r + "\n";}}else if (m.op == "print") {if (m.arg1 != "-1") {if (name.count(m.arg1) && name[m.arg1][1] == "char") {re += "\tmovsbl\t" + a1 + ", %eax\n";}else {re += "\tmovl\t" + a1 + ", %eax\n";}}if (m.arg2 != "-1") {if ( name.count(m.arg2) && name[m.arg2][1] == "char") {re += "\tmovsbl\t" + a2 + ", %edx\n";}else {re += "\tmovl\t" + a2 + ", %edx\n";}}if (m.result != "-1") {if (name.count(m.result) && name[m.result][1] == "char") {re += "\tmovsbl\t" + r + ", %ecx\n";}else {re += "\tmovl\t" + r + ", %ecx\n";}}re += "\tmovl\t%eax, %esi\n";re+="\tleaq\t.LC" + to_string(LC) + "(%rip), %rdi\n";LC += 1;re += "\tmovl\t$0, %eax\n\tcall\tprintf@PLT\n";}}return re;
}
/*
将生成的临时变量,汇编代码,头部,结束部分等一些内容拼接在一起
传入参数:1. tmp 临时变量(其实在代码里作为全局变量)2. strs 字符串变量3. code 主函数汇编代码4. subq 数据栈高度
*/
string connect(int tmp,string strs,string code,int subq) {string data = "";for (int i = 0; i < tmp; i++) {data += "\t.comm\tT" + to_string(i) + ",4,4\n";}string re = "\t.text\n\t.section\t.rodata\n" + data + strs + \"\t.text\n\t.globl\tmain\n\t.type\tmain, @function\nmain:\n" + code_head + \"\tsubq\t$" + to_string(subq) + ", %rsp\n" + code + code_footer;return re;
}

部分输出

LR分析输出语法树

输出语法分析树
第1层
<Program, > 第2层
<type, int> <M, > <C, > <Pro, > 第3层
<name, main> <(, (> <cc, > <), )> <{, {> <Pr, > <}, }> 第4层
<P, > <Pr, > 第5层
<type, int> <L, > <;, ;> <P, > <Pr, > 第6层
<M, > <LM, > <type, int> <L, > <;, ;> <P, > <Pr, > 第7层
<name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第8层
<[, [> <E, > <], ]> <name, index> <=, => <FE, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第9层
<T, > <ET, > <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第10层
<F, > <TT, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <Pan, > <P, > <Pr, > 第11层
<number, 25> <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <Ptype, > <P_block, > <Pro, > <printf, printf> <OUT, > <;, ;> 第12层
<number, 0> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <while, while> <(, (> <Pbc, > <), )> <{, {> <Pr, > <}, }> <(, (> <TXT, > <V, > <), )> 第13层
<number, 0> <number, 1> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <E, > <PM, > <P, > <Pr, > <TEXT, finish\n> 第14层
<number, 1> <number, 2> <F, > <TT, > <F, > <TT, > <T, > <ET, > <Cmp, <> <E, > <type, int> <L, > <;, ;> <P, > <Pr, > 第15层
<number, 2> <number, 3> <F, > <TT, > <T, > <ET, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第16层
<M, > <MS, > <F, > <TT, > <name, b> <=, => <FE, > <M, > <LM, > <printf, printf> <OUT, > <;, ;> <P, > <Pr, > 第17层
<name, index> <number, 10> <*, *> <F, > <TT, > <E, > <name, arr> <Size, > <AM, > <(, (> <TXT, > <V, > <), )> <L, > <;, ;> 第18层
<number, 2> <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <TEXT, f(%d)=%d\n> <,, ,> <E, > <VV, > <M, > <LM, > 第19层
<F, > <TT, > <T, > <ET, > <T, > <ET, > <T, > <ET, > <,, ,> <E, > <VV, > <name, index> <=, => <FE, > 第20层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <T, > <ET, > <E, > 第21层
<name, arr> <Size, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <T, > <ET, > 第22层
<[, [> <E, > <], ]> <name, index> <number, 2> <name, arr> <Size, > <M, > <MS, > <name, index> <M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第23层
<T, > <ET, > <[, [> <E, > <], ]> <name, b> <name, b> <M, > <MS, > <F, > <TT, > 第24层
<F, > <TT, > <T, > <ET, > <name, index> <number, 1> 第25层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第26层
<name, index> <M, > <MS, > <F, > <TT, > 第27层
<name, index> <number, 1> 

输出预测表

输出预测分析表
Lname: M LM
Programtype: type M C Pro
LM=: = FE;: null[: Size AM
C(: ( cc )
Pro{: { Pr }
Prtype: P Prwhile: P Prif: P Prname: P Prprintf: P Pr}: null
Ptype: type L ;while: Panif: Panname: L ;printf: printf OUT ;
TXTTEXT: TEXT
FEnumber: E(: ETEXT: TEXTname: ECHAR: CHAR
Mname: name
Enumber: T ET(: T ETname: T ET
Size[: [ E ]
ET;: null+: + T ET]: null-: - T ET): null,: nullCmp: null
Tnumber: F TT(: F TTname: F TT
TT*: * F TT/: / F TT): null,: nullCmp: null]: null+: null;: null-: null
Fnumber: number(: BRAname: M MS
MS+: null;: null[: Size): null,: nullCmp: null-: null]: null*: null/: null
Pbcnumber: E PM(: E PMname: E PM
BRA(: ( E )
OUT(: ( TXT V )
V,: , E VV): null
VV,: , E VV): null
Panwhile: Ptype P_block Proif: Ptype P_block Pro
Ptypewhile: whileif: if
P_block(: ( Pbc )
PMCmp: Cmp E): null
AM=: = E;: null
cc): null

结果展示

输入文件为:

int main(){int arr[25];int index = 0;// 求0~20的斐波那契数列arr[0] = 1;arr[1] = 2;arr[2] = 3;while(index < 10*2 ){int b = arr[index];arr[index+2]=arr[index+1] + b;printf("f(%d)=%d\n",index,b);index = index +1;}printf("finish\n");
}

输出的gcc汇编为

	.text.section	.rodata.comm	T0,4,4.comm	T1,4,4.comm	T2,4,4.comm	T3,4,4
.LC0:.string "f(%d)=%d\n"
.LC1:.string "fnish\n".text.globl	main.type	main, @function
main:.cfi_startprocpushq	%rbp.cfi_def_cfa_offset 16.cfi_offset 6, -16movq	%rsp, %rbp.cfi_def_cfa_register 6subq	$120, %rspmovl	$0, -8(%rbp)movl	$1, -112(%rbp)movl	$2, -108(%rbp)movl	$3, -104(%rbp)
.W4:movl	-8(%rbp), %eaxcmpl	$20, %eaxjle	.code6jmp	.block6
.code6:movl	-8(%rbp), %eaxcltqmovl	-112(%rbp, %rax, 4), %ecxmovl	%ecx, -12(%rbp)movl	-8(%rbp), %edxmovl	$1, %eaxaddl	%edx, %eaxmovl	%eax, T0(%rip)movl	T0(%rip), %eaxcltqmovl	-112(%rbp, %rax, 4), %edxmovl	-12(%rbp), %eaxaddl	%edx, %eaxmovl	%eax, T1(%rip)movl	-8(%rbp), %edxmovl	$2, %eaxaddl	%edx, %eaxmovl	%eax, T2(%rip)movl	T2(%rip), %eaxcltqmovl	T1(%rip), %ecxmovl	%ecx, -112(%rbp, %rax, 4)movl	-8(%rbp), %eaxmovl	-12(%rbp), %edxmovl	%eax, %esileaq	.LC0(%rip), %rdimovl	$0, %eaxcall	printf@PLTmovl	-8(%rbp), %edxmovl	$1, %eaxaddl	%edx, %eaxmovl	%eax, T3(%rip)movl	T3(%rip), %ecxmovl	%ecx, -8(%rbp)jmp	.W4
.block6:movl	%eax, %esileaq	.LC1(%rip), %rdimovl	$0, %eaxcall	printf@PLTmovl	$0, %eaxleave.cfi_def_cfa 7, 8ret.cfi_endproc
.LFE6:.size	main, .-main.ident	"PCC: 1.0.0"

生成汇编代码.s文件,后续的链接等工作将交给gcc,可以调用gcc将汇编编译为可执行文件

数组下标可以用变量表示,并且print语句中可以有表达式存在

在这里插入图片描述
在这里插入图片描述

While 语句也可以循环嵌套

在这里插入图片描述

在这里插入图片描述

获取方式

点这里 只需要一点点辛苦费,不需要你写材料 报告 ppt

在这里插入图片描述

在这里插入图片描述

关键字:编译原理项目——C++实现C语言编译器输出为gcc级汇编(代码/报告材料)

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com

责任编辑: