编译原理生成中间代码(flex和bison版)

大大源码 • 2023年4月24日 am12:25 • 其他

简单说明

有些地方懒得再输入一遍，所以从课设报告上截了不少图
最好有一定的flex和bison基础

flex和bison并不难可以看看如下视频，大概花一个小时熟悉一下就可以了

Flex和bison视频讲解1

Flex和bison视频讲解2

题目要求

在这里插入图片描述
题目来源：陈火旺《编译原理》P218习题7
实验环境：Ubuntu 20.04
前置环境：sudo apt-get install flex bison安装flex和bison

翻译模式

第一步当然是设计翻译模式，按照书上给定的翻译模式进行组合，得到如下翻译模式，当然如下翻译模式并不完美，存在移进/规约冲突，后续可以改进.

对于不同文法需要设计不同的翻译模式，但是对于书上课后题通常只需要进行简单修改即可

在这里插入图片描述

顺便把语法树画了一下
在这里插入图片描述

源代码

一共要有三个核心文件

lex.l //词法分析器
yacc.y //语法分析器
xu.h //头文件

在这里插入图片描述

lex.l

这里只需要掌握说明部分视频里的知识即可
简要说明 :

delim 空格回车换行
ws 多个空格回车换行

其他的应该没问题

再第二部分使用了filloperator 函数，这个函数的定义再xu.h，对于RELOP关系运算符通过filloperator 函数来进行传递.

%{
	#include "yacc.tab.h"
%}
delim	[ \t\n\r]
ws	[delim]+
letter	[A-za-z]
digit        [0-9]
id        {letter}({letter}|{digit})*
number        {digit}+

%%
if	{ return( IF ); }
else	{ return( ELSE ); }
then	{ return( THEN ); }
while	{ return( WHILE ); }
do	{ return( DO ); }
and	{return( AND ); }
"+"    { return( '+' ); }
"-"    { return( '-' ); }
"*"    { return( '*' ); }
"/"    { return( '/' ); }
":="    { filloperator(&yylval, yytext); return( ASSIGN ); }
"<"|"<="|">"|">="|"!="|"="    { filloperator(&yylval, yytext); return( RELOP ); }
";"    { return( ';' ); }
{ws}    { }
{id}    { filllexeme(&yylval, yytext);  return( ID ); }
{number}    { filllexeme(&yylval, yytext); return( NUMBER ); }
%%

int yywrap() 
{ 
return (1); 
}

yacc.y

只是实现了翻译模式中的语法规则，对于语法规则的实现在说明部分的视频中也有提到，我这里说几个重要一点的函数和变量

变量 $n e x t q u a d$ ,它指向下一条将要生成但尚未生成的四元式地址。 $n e x t q u a d$ 的初值为1，每当执行一次 $G e n$ 之后， $n e x t q u a d$ 自动加一
函数 $m a k e l i s t (i)$ ,它创建一个仅包含i的新链表，其中i是四元式数组的一个下标，函数返回指向这个链的指
函数 $merge(p_1,p_2)$ ,把以 $p_1$ 和 $P_2$ 为链首的两条链合并为一，作为函数值，回送合并后的链首。
函数 $b a c k p a t c h (p, t)$ 其功能是完成“回填”，把p所链接的每个四元式的第四区都填为t。

其余函数在xu.h中实现

%{
	#include "xu.h"
	#define YYSTYPE node
	#include "yacc.tab.h"
	
	int yyerror();
	int yyerror(char* msg);
	extern int yylex();    
	codelist* list;
%}

%token IF ELSE THEN
%token WHILE DO
%token RELOP
%token NUMBER ID
%token AND
%left AND
%left '+' '-'
%left '*' '/'
%right '='
%right ASSIGN

%%
S	:	IF E THEN M S	
		{ 	
			backpatch(list, $3.truelist, $$.instr); 
			$$.nextlist = merge($2.falselist, $5.nextlist); 
		}

		|IF E THEN M S ELSE N M S	
		{ 	
			backpatch(list, $2.truelist, $4.instr);    
			backpatch(list, $2.falselist, $8.instr);
                  	$5.nextlist = merge($5.nextlist, $7.nextlist);    
                  	$$.nextlist = merge($5.nextlist, $9.nextlist); 
          } 
            
		|WHILE M E DO M S
		{ 	
			backpatch(list, $6.nextlist, $2.instr);    
			backpatch(list, $3.truelist, $5.instr);
                 	$$.nextlist = $3.falselist;	
                 	gen_goto(list, $2.instr); 
          }
		
		
		|OP ASSIGN E
		{
			copyaddr(&$1, $1.lexeme); 
			gen_assignment(list, $1, $3);
		}
		
		|L
		{
			$$.nextlist = $1.nextlist;
		}
		|	{}
		;
		
L	:	L ';' M S
		{
			backpatch(list, $1.nextlist, $3.instr);
			$$.nextlist = $4.nextlist;
		}
		|S
		{
			$$.nextlist = $1.nextlist;
		}
		;

E	:	E AND M E    
		{ 	
			backpatch(list, $1.truelist, $3.instr);
		  	$$.truelist = $4.truelist; 
		  	$$.falselist = merge($1.falselist, $4.falselist); 
		}
		
		|OP RELOP OP
		{	
			$$.truelist = new_instrlist(nextinstr(list));
                  $$.falselist = new_instrlist(nextinstr(list)+1);
                  gen_if(list, $1, $2.oper, $3);
                  gen_goto_blank(list); 
          }
          
		|OP
		{
			copyaddr_fromnode(&$$, $1);
          }
		;
		
OP	:	OP '+' OP { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "+", $3); }
		|NUMBER {copyaddr(&$$, $1.lexeme);}
		|ID {copyaddr(&$$, $1.lexeme);}
		;
		

M	:	{ $$.instr = nextinstr(list); }
		;

N	:	{
			$$.nextlist = new_instrlist(nextinstr(list));
                 	gen_goto_blank(list);
		}
          ;
          
%%

int yyerror(char* msg)
{
printf("\nERROR with message: %s\n", msg);
return 0;
}

xu.h

这里只有拉链-回填会比较难理解
在这里插入图片描述
为了实现拉链-回填的过程需要定义如下结构体和函数，listele代表链表，其中的instrno代表四元式的标号，对于构造的链表结构体instrlist有一个头指针指向链首，一个尾指针指向链尾，对于指向相同出口的链表进行合并，在回填过程中就可以实现对于同一出口的跳转语句进行一次回填即可。

其实对于中间代码生成这里基本不用修改，只需要看懂都可以拿去用，只需要修改Gen等函数中的printf函数就可以变成生成三元式的程序了

#ifndef CP_H
#define CP_H

#include <stdio.h>
#include <string.h>
#include <malloc.h>

typedef struct listele
{
 int instrno;
 struct listele *next;
}listele;

 listele* new_listele(int no)
 {
     listele* p = (listele*)malloc(sizeof(listele));
     p->instrno = no;
     p->next = NULL;
     return p;
 }


typedef struct instrlist
{
 listele *first,*last;
}instrlist;

 instrlist* new_instrlist(int instrno)
 {
     instrlist* p = (instrlist*)malloc(sizeof(instrlist));
     p->first = p->last = new_listele(instrno);
     return p;
 }

 instrlist* merge(instrlist *list1, instrlist *list2)
 {
     instrlist *p;
     if (list1 == NULL) p = list2;
     else
     {
         if (list2!=NULL)
         {
             if (list1->last == NULL)
             {
                 list1->first = list2->first;
                 list1->last =list2->last;
             }else list1->last->next = list2->first;
             list2->first = list2->last = NULL;
             free(list2);
         }
         p = list1;
     }
     return p;
 }

typedef struct node
{
  instrlist *truelist, *falselist, *nextlist;
 char addr[256];
 char lexeme[256];
 char oper[3];
 int instr;
}node;

 int filloperator(node *dst, char *src)
 {
     strcpy(dst->oper, src);
     return 0;
 }    
 int filllexeme(node *dst, char *yytext)
 {
     strcpy(dst->lexeme, yytext);
     return 0;
 }
 int copyaddr(node *dst, char *src)
 {
     strcpy(dst->addr, src);
     return 0;
 }
 int new_temp(node *dst, int index)
 {
     sprintf(dst->addr, "t%d", index-100);
     return 0;
 }
 int copyaddr_fromnode(node *dst, node src)
 {
     strcpy(dst->addr, src.addr);
     return 0;
 }

typedef struct codelist
{
 int linecnt, capacity;
 int temp_index;
 char **code;
}codelist;

   codelist* newcodelist()
 {
     codelist* p = (codelist*)malloc(sizeof(codelist));
     p->linecnt = 100;
     p->capacity = 1024;
     p->temp_index = 100;
     p->code = (char**)malloc(sizeof(char*)*1024);
     return p;
 }

 int get_temp_index(codelist* dst)
 {
     return dst->temp_index++;
 }

 int nextinstr(codelist *dst) { return dst->linecnt; }
 
   int Gen(codelist *dst, char *str)
 {

     if (dst->linecnt >= dst->capacity)
     {
         dst->capacity += 1024;
         dst->code = (char**)realloc(dst->code, sizeof(char*)*dst->capacity);
         if (dst->code == NULL)
         {
             printf("short of memeory\n");
             return 0;
         }
     }
     dst->code[dst->linecnt] = (char*)malloc(strlen(str)+20);
     strcpy(dst->code[dst->linecnt], str);
     dst->linecnt++;
     return 0;
 }

 char tmp[1024];

 int gen_goto_blank(codelist *dst)
 {
     Gen(dst, tmp);
     return 0;
 }

 int gen_goto(codelist *dst, int instrno)
 {
     sprintf(tmp, "(j, -, -, %d)", instrno);
     Gen(dst, tmp);
     return 0;
 }

 int gen_if(codelist *dst, node left, char* op, node right)
 {
     sprintf(tmp, "(j%s, %s, %s,", op, left.addr, right.addr);
     Gen(dst, tmp);
     return 0;
 }

 int gen_1addr(codelist *dst, node left, char* op)
 {
     sprintf(tmp, "%s %s", left.addr, op);
     Gen(dst, tmp);
     return 0;
 }

 int gen_2addr(codelist *dst, node left, char* op, node right)
 {
     sprintf(tmp, "(%s, %s, -, %s)", op, right.addr, left.addr);
     Gen(dst, tmp);
     return 0;
 }

 int gen_3addr(codelist *dst, node left, node op1, char* op, node op2)
 {
     sprintf(tmp, "(%s, %s, %s, %s) ", op, op1.addr, op2.addr, left.addr);
     Gen(dst, tmp);
     return 0;
 }

 int gen_assignment(codelist *dst, node left, node right)
 {
     gen_2addr(dst, left, "-", right);
     return 0;
 }

 int backpatch(codelist *dst, instrlist *list, int instrno)
 {
     if (list!=NULL)
     {
         listele *p=list->first;
         char tmp[20];
     
         sprintf(tmp, " %d)", instrno);
         while (p!=NULL)
         {
             if (p->instrno < dst->linecnt)
                 strcat(dst->code[p->instrno], tmp);
             p=p->next;
         }
     }
     return 0;
 }

 int print(codelist* dst)
 {
     int i;
     
     for (i=100; i <= dst->linecnt; i++)
         printf("%5d:  %s\n", i, dst->code[i]);
     return 0;
 }

#endif

编译过程

命令行窗口输入如下三行命令就可以完成编译了，建议把这三行写入txt文件中，在重新编译时只需要bash xxx.txt就可以了，当然写makefile也是可以的

bison -d yacc.y
flex lex.l
gcc yacc.tab.c lex.yy.c -olm

原文链接：https://blog.csdn.net/weixin_45537232/article/details/125610062

THE END

大大搜源码

搜索内容