编译原理(十) SLR文法分析法(算法原理和C++实现)
最后更新于:2022-04-01 14:15:32
## 前情提要
因为SLR文法分析法就是对LR(0)的一种优化,它提供了一种解决冲突的方法,所以很多之前在LR(0)提及的东西,在此只提供一个引用。
[LR(0)文法分析法](http://blog.csdn.net/qq_24451605/article/details/50152029)
## 算法描述
**SLR文法构造分析表的主要思想是:**许多冲突性的动作都可能通过考察有关非终结符的FOLLOW集而获解决。
**解决冲突的方法:**解决冲突的方法是分析所有含A和B的句型,考察集合FOLLOW(A)和FOLLOW(B),如果这两个集合不相交,而且也不包含b,那么当状态I面临输入符号a时,我们可以使用如下策略:
- 若a=b,则移进。
- 若a∈FOLLOW(A),则用产生式A→α进行归约;
- 若a∈FOLLOW(B),则用产生式B→α进行归约;
- 此外,报错***
**SLR的基本算法:**
- 假定LR(0)规范族的一个项目集I中含有m个移进项目
A1→α•a1β1,A2→α•a2β2,…,Am→α•amβm;
同时含有n个归约项目
B1→α•,B2→α•,…,B3→α•,
- 如果集合{ a1,…, am},FOLLOW(B1),…,FOLLOW(Bn)两两不相交(包括不得有两个FOLLOW集合有#),则隐含在I中的动作冲突可以通过检查现行输入符号a属于上述n+1个集合中的哪个集合而活的解决:
- 若a是某个ai,i=1,2,…,m,则移进。
- 若a∈FOLLOW(Bi),i=1,2,…,m,则用产生式Bi→α进行归约;
- 此外,报错
这种冲突的解决方法叫做***SLR(1)解决办法***。
**SLR语法分析表的构造方法:**
首先把G拓广为G’,对G’构造LR(0)项目集规范族C和活前缀识别自动机的状态转换函数GO。函数ACTION和GOTO可按如下方法构造:
- 若项目A→α•bβ属于Ik,GO(Ik,a)= Ij,a为终结符,置ACTION[k,a]为“把状态j和符号a移进栈”,简记为“sj”;
- 若项目A→α•属于Ik,那么,对任何非终结符a,a∈FOLLOW(A),置ACTION[k,a]为“用产生式A→α进行归约”,简记为“rj”;其中,假定A→α为文法G’的第j个产生式
- 若项目S’→S•属于Ik,则置ACTION[k,#]为可“接受”,简记为“acc”;
- 若GO(Ik, A)= Ij,A为非终结符,则置GOTO[k, A]=j;
分析表中凡不能用规则1至4填入信息的空白格均填上“出错标志”。
语法分析器的初始状态是包含S’ →•S的项目集合的状态
***SLR解决的冲突只是移进-规约冲突和规约-规约冲突***
## 代码实现
~~~
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <cctype>
#include <vector>
#include <string>
#include <queue>
#include <map>
#include <set>
#include <sstream>
#define MAX 507
#define DEBUG
/*Author : byj*/
using namespace std;
class WF
{
public:
string left,right;
int back;
int id;
WF ( char s1[] , char s2[] , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
WF ( const string& s1 , const string& s2 , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
bool operator < ( const WF& a ) const
{
if ( left == a.left )
return right < a.right;
return left < a.left;
}
bool operator == ( const WF& a ) const
{
return ( left == a.left )&& ( right == a.right );
}
void print ( )
{
printf ( "%s->%s\n" , left.c_str() , right.c_str() );
}
};
class Closure
{
public:
vector<WF> element;
void print ( string str )
{
printf ( "%-15s%-15s\n" , "" , str.c_str());
for ( int i = 0 ; i < element.size() ; i++ )
element[i].print();
}
bool operator == ( const Closure& a ) const
{
if ( a.element.size() != element.size() ) return false;
for ( int i = 0 ; i < a.element.size() ; i++ )
if ( element[i] == a.element[i] ) continue;
else return false;
return true;
}
};
struct Content
{
int type;
int num;
string out;
Content(){ type = -1; }
Content ( int a , int b )
:type(a),num(b){}
};
vector<WF> wf;
map<string,vector<int> > dic;
map<string,vector<int> > VN_set;
map<string,bool> vis;
string start = "S";
vector<Closure> collection;
vector<WF> items;
char CH = '$';
int go[MAX][MAX];
int to[MAX];
vector<char> V;
bool used[MAX];
Content action[MAX][MAX];
int Goto[MAX][MAX];
map<string,set<char> > first;
map<string,set<char> > follow;
void make_item ( )
{
memset ( to , -1 , sizeof ( -1 ) );
for ( int i = 0 ; i < wf.size() ; i++ )
VN_set[wf[i].left].push_back ( i );
for ( int i = 0 ; i < wf.size() ; i++ )
for ( int j = 0 ; j <= wf[i].right.length() ; j++ )
{
string temp = wf[i].right;
temp.insert ( temp.begin()+j , CH );
dic[wf[i].left].push_back ( items.size() );
if ( j )
to[items.size()-1] = items.size();
items.push_back ( WF ( wf[i].left , temp , i , items.size()) );
}
#ifdef DEBUG
puts("-------------------------项目表-------------------------");
for ( int i = 0 ; i < items.size() ; i++ )
printf ( "%s->%s back:%d id:%d\n" , items[i].left.c_str() , items[i].right.c_str() , items[i].back , items[i].id );
puts("--------------------------------------------------------");
#endif
}
void dfs ( const string& x )
{
if ( vis[x] ) return;
vis[x] = 1;
vector<int>& id = VN_set[x];
for ( int i = 0 ; i < id.size() ; i++ )
{
string& left = wf[id[i]].left;
string& right = wf[id[i]].right;
for ( int j = 0 ; j < right.length() ; j++ )
if ( isupper(right[j] ) )
{
dfs ( right.substr(j,1) );
set<char>& temp = first[right.substr(j,1)];
set<char>::iterator it = temp.begin();
bool flag = true;
for ( ; it != temp.end() ; it++ )
{
if ( *it == '~' ) flag = false;
first[left].insert (*it );
}
if ( flag ) break;
}
else
{
first[left].insert ( right[j] );
break;
}
}
}
void make_first ( )
{
vis.clear();
map<string,vector<int> >::iterator it2 = dic.begin();
for ( ; it2 != dic.end() ; it2++ )
if ( vis[it2->first] ) continue;
else dfs ( it2->first );
#ifdef DEBUG
puts ("****************FIRST集***************************");
map<string,set<char> >::iterator it = first.begin();
for ( ; it != first.end() ; it++ )
{
printf ( "FIRST(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}" );
}
#endif
}
void append ( const string& str1 , const string& str2 )
{
set<char>& from = follow[str1];
set<char>& to = follow[str2];
set<char>::iterator it = from.begin();
for ( ; it != from.end() ; it++ )
to.insert ( *it );
}
bool _check ( const vector<int>& id, const string str )
{
for ( int i = 0 ; i < id.size() ; i++ )
{
int x = id[i];
if ( wf[x].right == str ) return true;
}
return false;
}
void make_follow ( )
{
while ( true )
{
bool goon = false;
map<string,vector<int> >::iterator it2 = VN_set.begin();
for ( ; it2 != VN_set.end() ; it2++ )
{
vector<int>& id = it2->second;
for ( int i = 0 ; i < id.size() ; i++ )
{
bool flag = true;
WF& tt = wf[id[i]];
string& left = tt.left;
const string& right = tt.right;
for ( int j = right.length()-1 ; j >= 0 ; j-- )
if ( isupper( right[j] ) )
{
if ( flag )
{
int tx = follow[right.substr(j,1)].size();
append( left , right.substr(j,1) );
int tx1 = follow[right.substr(j,1)].size();
if ( tx1 > tx ) goon = true;
if ( _check ( id , "~" ) )
flag = false;
}
for ( int k = j+1 ; k < right.length() ; k++ )
if ( isupper(right[k] ) )
{
string idd = right.substr(k,1);
set<char>& from = first[idd];
set<char>& to = follow[right.substr(j,1)];
set<char>::iterator it1 = from.begin();
int tx = follow[right.substr(j,1)].size();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert ( *it1 );
int tx1 = follow[right.substr(j,1)].size();
if ( tx1 > tx ) goon = true;
if ( _check ( id , "~" ) )
break;
}
else
{
int tx = follow[right.substr(j,1)].size();
follow[right.substr(j,1)].insert ( right[k] );
int tx1 = follow[right.substr(j,1)].size();
if ( tx1 > tx ) goon = true;
break;
}
}
else flag = false;
}
}
if ( !goon ) break;
}
#ifdef DEBUG
puts ("***************FOLLOW集*******************");
map<string,set<char> >::iterator it = follow.begin();
for ( ; it != follow.end() ; it++ )
{
printf ( "FOLLOW(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
//if ( it->first[0] == 'S' )
temp.insert ( '#' );
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
void make_set ( )
{
bool has[MAX];
for ( int i = 0 ; i < items.size() ; i++ )
if ( items[i].left[0] == 'S' && items[i].right[0] == CH )
{
Closure temp;
string& str = items[i].right;
vector<WF>& element = temp.element;
element.push_back ( items[i] );
int x = 0;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
memset ( has , 0 , sizeof ( has ) );
has[i] = 1;
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
collection.push_back ( temp );
}
for ( int i = 0 ; i < collection.size() ; i++ )
{
map<int,Closure> temp;
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
int ii;
//cout << i << "previous: " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << i <<"after: " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k< items.size() ; k++ )
if ( items[k] == cmp )
{
ii = k;
break;
}
//string& str1 = items[ii].right;
memset ( has , 0 , sizeof ( has ) );
vector<WF>& element = temp[y].element;
element.push_back ( items[ii] );
has[ii] = 1;
x++;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
}
map<int,Closure>::iterator it = temp.begin();
for ( ; it != temp.end() ; it++ )
collection.push_back ( it->second );
for ( int i = 0 ; i < collection.size() ; i++ )
sort ( collection[i].element.begin() , collection[i].element.end() );
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = i+1 ; j < collection.size() ; j++ )
if ( collection[i] == collection[j] )
collection.erase ( collection.begin()+j );
}
#ifdef DEBUG
puts ("-------------CLOSURE---------------------");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
sin.clear();
string out;
sin <<"closure-I" << i;
sin >> out;
collection[i].print ( out );
}
puts("");
#endif
}
void make_V ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );
}
void make_cmp ( vector<WF>& cmp1 , int i , char ch )
{
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int k;
for ( k = 0 ; k < str.length() ; k++ )
if ( str[k] == CH )
break;
if ( k != str.length() - 1 && str[k+1] == ch )
{
str.erase ( str.begin()+k);
str.insert ( str.begin()+k+1 , CH );
cmp1.push_back ( WF ( collection[i].element[j].left , str , -1 , -1 ) );
}
}
sort ( cmp1.begin() , cmp1.end() );
}
void make_go ( )
{
memset ( go , -1 , sizeof ( go ) );
int m = collection.size();
/*for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string left = collection[i].element[j].left;
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
//cout << "before : " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << "after : " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k < m ; k++ )
{
bool flag = false;
for ( int t = 0 ; t < collection[k].element.size() ; t++ )
{
if ( cmp == collection[k].element[t] )
{
flag = true;
break;
}
}
if ( flag )
{
go[i][y] = k;
}
}
}*/
for ( int t = 0 ; t < V.size() ; t++ )
{
char ch = V[t];
for ( int i = 0 ; i < m ; i++ )
{
vector<WF> cmp1;
make_cmp ( cmp1 , i , ch );
cout << cmp1.size() << endl;
if ( cmp1.size() == 0 ) continue;
for ( int j = 0 ; j < m ; j++ )
{
vector<WF> cmp2;
for ( int k = 0 ; k < collection[j].element.size() ; k++ )
{
string& str = collection[j].element[k].right;
int x;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
if ( x && str[x-1] == ch )
cmp2.push_back ( WF( collection[j].element[k].left , str , -1 , -1 ) );
}
sort ( cmp2.begin() , cmp2.end() );
cout << cmp2.size() << endl;
bool flag = true;
if ( cmp2.size() != cmp1.size() ) continue;
cout << cmp1.size() << endl;
for ( int k = 0 ; k < cmp1.size() ; k++ )
if ( cmp1[k] == cmp2[k] ) continue;
else flag = false;
cout << "out " << endl;
if ( flag )
go[i][ch] = j;
}
//cout << "YES" << endl;
}
}
#ifdef DEBUG
puts ("---------------EDGE----------------------");
stringstream sin;
string out;
for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < m ; j++ )
for ( int k = 0 ; k < MAX ; k++ )
if ( go[i][k] == j )
{
sin.clear();
sin << "I" << i << "--" <<(char)(k)<<"--I"<<j;
sin >> out;
printf ( "%s\n" , out.c_str() );
}
#endif
}
void make_table ( )
{
memset ( Goto , -1 , sizeof ( Goto ) );
/*memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );*/
//write s to the table
for( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
int x = go[i][ch];
if ( x == -1 ) continue;
if ( !isupper(ch) )
action[i][ch] = Content ( 0 , x );
else
Goto[i][ch] = x;
}
//write r and acc to the table
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
WF& tt = collection[i].element[j];
if ( tt.right[tt.right.length()-1] == CH )
{
if ( tt.left[0] == 'S' )
action[i]['#'] = Content ( 2 , -1 );
else
for ( int k = 0 ; k < V.size() ; k++ )
{
int y = V[k];
//cout << "YES " << endl;
//cout << tt.left << "->" << tt.right << " " << tt.back << endl;
if ( !follow[tt.left].count( V[k] ) ) continue;
//cout <<tt.left << "->" << tt.right << " " << i << " " << V[k] << " " << tt.back << endl;
action[i][y] = Content ( 1, tt.back );
}
}
}
#ifdef DEBUG
puts ( "------------------------------------------LR(0)分析表--------------------------------------------------------" );
printf ( "%10s%5c%5s" , "|" , V[0] , "|");
for ( int i = 1 ; i < V.size() ; i++ )
printf ( "%5c%5s" , V[i] , "|" );
puts ("");
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
printf ( "%5d%5s" , i , "|" );
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
if ( isupper(ch) )
{
if ( Goto[i][ch] == -1 )
printf ( "%10s" , "|" );
else
printf ( "%5d%5s" , Goto[i][ch] , "|" );
}
else
{
sin.clear();
if ( action[i][ch].type == -1 )
printf ( "%10s" , "|" );
else
{
Content& temp = action[i][ch];
if ( temp.type == 0 )
sin << "S";
if ( temp.type == 1 )
sin << "R";
if ( temp.type == 2 )
sin << "acc";
if ( temp.num != -1 )
sin << temp.num;
sin >> temp.out;
printf ( "%7s%3s" , temp.out.c_str() , "|" );
}
}
}
puts ("");
}
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
#endif
}
void print ( string s1 , string s2 , string s3 , string s4 , string s5 , string s6 , string s7 )
{
printf ( "%-15s|%-15s%-15s%-20s|%-15s%-15s%-15s\n" , s1.c_str() , s2.c_str() , s3.c_str() ,s4.c_str(),s5.c_str(),
s6.c_str() , s7.c_str() );
}
string get_steps ( int x )
{
stringstream sin;
sin << x;
string ret;
sin >> ret;
return ret;
}
template<class T>
string get_stk ( vector<T> stk )
{
stringstream sin;
for ( int i = 0 ; i < stk.size() ; i++ )
sin << stk[i];
string ret;
sin >> ret;
return ret;
}
string get_shift ( WF& temp )
{
stringstream sin;
sin << "reduce(" << temp.left << "->" << temp.right <<")";
string out;
sin >> out;
return out;
}
void analyse ( string src )
{
print ( "steps","op-stack" ,"input","operation","state-stack" , "ACTION" , "GOTO" );
vector<char> op_stack;
vector<int> st_stack;
src+= "#";
op_stack.push_back ( '#' );
st_stack.push_back ( 0 );
int steps= 1;
for ( int i = 0 ; i < src.length() ; i++ )
{
char u = src[i];
int top = st_stack[st_stack.size()-1];
Content& act = action[top][u];
//cout << "YES : " << i << " " << u << " " << top << " " << act.type << endl;
if ( act.type == 0 )
{
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i), "shift", get_stk( st_stack ) , act.out , "" );
op_stack.push_back ( u );
st_stack.push_back ( act.num );
}
else if ( act.type == 1 )
{
WF& tt = wf[act.num];
int y = st_stack[st_stack.size()-tt.right.length()-1];
int x = Goto[y][tt.left[0]];
//cout << y << " " << tt.left[0] << " " << x << endl;
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i) , get_shift(tt) ,get_stk( st_stack),act.out,get_steps(x));
for ( int j = 0 ; j < tt.right.length() ; j++ )
{
st_stack.pop_back();
op_stack.pop_back();
}
op_stack.push_back ( tt.left[0] );
st_stack.push_back ( x );
i--;
}
else if ( act.type == 2 )
{
print ( get_steps( steps++ ), get_stk( op_stack ) , src.substr(i) , "Accept" , get_stk(st_stack) , act.out , "" );
//i--;
}
else continue;
}
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen(s),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
wf.push_back ( WF ( s , s+j+2 ,-1 , -1 ) );
#ifdef DEBUG
wf[wf.size()-1].print();
#endif
}
make_item();
make_first();
make_follow();
make_set();
make_V();
make_go();
make_table();
analyse ( "(i*i)+i" );
}
}
~~~
### Input
7
S->E
E->E+T
E->T
T->T*F
T->F
F->(E)
F->i
### Output
### 生成的项目表
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac288f8.jpg "")
### 非终结符的follow集合
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac3dece.jpg "")
### 项目规范族
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac52d20.jpg "")
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac65dbd.jpg "")
### 构造出的DFA
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac772f5.jpg "")
### SLR文法表和一个例子的文法分析过程
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac8dd37.jpg "")
编译原理(九) LR(0)文法分析法(算法描述和C++代码实现)
最后更新于:2022-04-01 14:15:29
**后期DEBUG发现make_set函数和make_go存在问题,于2015年12月4日更新了代码,见谅**
## 概念梳理
最左推导:每一步替换最左边的非终结符
最右推导:每一步替换最右边的非终结符,最右推导称为规范推导
短语:令G是一个文法,S是文法的开始符号,假定αβδ是文法G的一个句型,如果有
S⇒∗αAδ且A⇒+β
则称 β是相对于非终结符A的, 句型αβδ的***短语***。
直接短语:令G是一个文法,S是文法的开始符号,假定αβδ是文法G的一个句型,如果有
S⇒∗αAδ且A⇒β
则称 β是相对于非终结符A的, 句型αβδ的***直接短语***。
***注意:短语和直接短语的区别在于第二个条件, 直接短语中的第二个条件表示有文法规则 A⇒β ,因此,每个直接短语都是某规则右部。***
句柄:一个句型的最左直接短语称为该句型的***句柄***。
- 句柄特征:
- (1) 它是直接短语,即某规则右部。
- (2) 它具有最左性。
规范归约:文法的最右推导为规范推导,自底向上分析是自顶向下最右推导的逆过程,叫**规范归约**
活前缀:指规范句型的一个前缀,这种前缀不含句柄之后任何符号。之所以称为活前缀,是因为在右边添加一些符号之首,就可以使它称为一个规范句型。
项目:对于一个文法G,我们首先要构造一个NFA,它能识别G的所有活前缀。这个NFA的每一个状态是下面定义的一个“项目”。
项目分类:
- **归约项目**
凡圆点在最右的项目,如A→α•称为一个“归约项目”
- **接受项目**
对文法的开始符号S’的归约项目,如S’→α•称为“接受”项目。
- **移进项目**
形如A→α•aβ的项目,其中a为终结符,称为“移进”项目。
- **待约项目**
形如A→α•Bβ的项目,其中B为非终结符,称为“待约”项目。
项目规范族:假定I是文法G’的任一项目集,定义和构造I的闭包CLOSURE(I)的办法是:
- I的任何项目都属于CLOSURE(I);
- 若A→α•Bβ属于CLOSURE(I),那么,对任何关于B的产生式B→γ,项目B→•γ也属于CLOSURE(I);
LR(0)文法:假如一个文法G的拓广文法G’的活前缀识别自动机的每个状态(项目集)不存在下述情况:
- 既含移进项目又含归约项目。
- 含多个归约项目。
则称G是一个LR(0)文法。换言之,LR(0)文法规范族的每个项目集**不包含任何冲突项目**。
拓广文法:假定文法G是一个以S为开始符号的文法,我们构造一个文法G’,它包含整个G,但它引进了一个不出现在G中的非终结符S’,并加进一个新产生式S’→S,而这个S’是G’的开始符号。那么我们称G’是G的拓广文法。
函数GO(I,X):函数GO(I,X)是一个状态转换函数。
- 第一个变元I是一个项目集,
- 第二个变元X是一个文法符号。
- 函数值GO(I,X)定义为GO(I,X)=CLOSURE(J),其中J={任何形如A→αX•β的项目 | A→α•Xβ属于I}
## 算法描述
### 项目集构造算法
枚举每个规范句型,然后枚举”⋅”的位置,获得所有的项目
### 项目集规范族构造算法
假定I是文法G’的任一项目集,定义和构造I的闭包CLOSURE(I)的办法是:
I的任何项目都属于CLOSURE(I);
若A→α•Bβ属于CLOSURE(I),那么,对任何关于B的产生式B→γ,项目B→•γ也属于CLOSURE(I);
重复执行上述两步骤直至CLOSURE(I)不再增大为止。
### Go(I,a)函数构造算法
遍历所有的项目,如果任意两个项目之间存在边(有向),那么这两个项目所在的项目规范族之间连上对应的有向边。
### LR(0)分析表构造算法
假定项目集规范族C={I0,I1,…,In}。令每一个项目集Ik的下标k作为分析器的状态。分析表的ACTION子表和GOTO子表可按如下方法构造
- 令那个包含项目S’→•S的集合Ik的下标k为分析器的初态。
- 若项目A→α•aβ属于Ik且GO(Ik , a)= Ij,a为终结符,置ACTION[k,a]为“把(j,a)移进栈”,简记为“sj”。
- 若项目A→α•属于Ik,对任何终结符a(或结束符#),置ACTION[k,a]为“用产生式A→α进行归约”,简记为“rj”(假定产生式A→α是文法G’的第j个产生式)。
- 若项目S’→S•属于Ik,则置ACTION[k,#]为“接受”,简记为“acc”。
- 若GO(Ik , A)= Ij,A为非终结符,则置GOTO[k,A]=j。
- 分析表中凡不能用规则1至4填入信息的空白格均填上“报错标志”。
### LR(0)分析法的分析过程
- 遍历输入字符串,对于每一个字符,获取当前状态栈的顶部的状态值,通过查询action表获取的当前的操作是移进、规约还是接受
- 如果当前操作是移进,将新的状态放入状态栈当中,当移入的字符放入符号栈中。
- 如果当前操作是规约,那么将需要规约的部分的状态从状态栈中弹出,将规约后的状态放入状态栈,将规约后的左部放入符号栈,当前的字符不向下推进
- 如果接收,则结束
## 代码实现
~~~
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <cctype>
#include <vector>
#include <string>
#include <queue>
#include <map>
#include <sstream>
#define MAX 507
#define DEBUG
using namespace std;
class WF
{
public:
string left,right;
int back;
int id;
WF ( char s1[] , char s2[] , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
WF ( const string& s1 , const string& s2 , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
bool operator < ( const WF& a ) const
{
if ( left == a.left )
return right < a.right;
return left < a.left;
}
bool operator == ( const WF& a ) const
{
return ( left == a.left )&& ( right == a.right );
}
void print ( )
{
printf ( "%s->%s\n" , left.c_str() , right.c_str() );
}
};
class Closure
{
public:
vector<WF> element;
void print ( string str )
{
printf ( "%-15s%-15s\n" , "" , str.c_str());
for ( int i = 0 ; i < element.size() ; i++ )
element[i].print();
}
bool operator == ( const Closure& a ) const
{
if ( a.element.size() != element.size() ) return false;
for ( int i = 0 ; i < a.element.size() ; i++ )
if ( element[i] == a.element[i] ) continue;
else return false;
return true;
}
};
struct Content
{
int type;
int num;
string out;
Content(){ type = -1; }
Content ( int a , int b )
:type(a),num(b){}
};
vector<WF> wf;
map<string,vector<int> > dic;
string start = "S";
vector<Closure> collection;
vector<WF> items;
char CH = '$';
int go[MAX][MAX];
int to[MAX];
vector<char> V;
bool used[MAX];
Content action[MAX][MAX];
int Goto[MAX][MAX];
void make_item ( )
{
memset ( to , -1 , sizeof ( -1 ) );
for ( int i = 0 ; i < wf.size() ; i++ )
for ( int j = 0 ; j <= wf[i].right.length() ; j++ )
{
string temp = wf[i].right;
temp.insert ( temp.begin()+j , CH );
dic[wf[i].left].push_back ( items.size() );
if ( j )
to[items.size()-1] = items.size();
items.push_back ( WF ( wf[i].left , temp , i , items.size()) );
}
#ifdef DEBUG
puts("-------------------------项目表-------------------------");
for ( int i = 0 ; i < items.size() ; i++ )
printf ( "%s->%s\n" , items[i].left.c_str() , items[i].right.c_str() );
puts("--------------------------------------------------------");
#endif
}
void make_set ( )
{
bool has[MAX];
for ( int i = 0 ; i < items.size() ; i++ )
if ( items[i].left[0] == 'S' && items[i].right[0] == CH )
{
Closure temp;
string& str = items[i].right;
vector<WF>& element = temp.element;
element.push_back ( items[i] );
int x = 0;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
memset ( has , 0 , sizeof ( has ) );
has[i] = 1;
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
collection.push_back ( temp );
}
for ( int i = 0 ; i < collection.size() ; i++ )
{
map<int,Closure> temp;
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
int ii;
//cout << i << "previous: " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << i <<"after: " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k< items.size() ; k++ )
if ( items[k] == cmp )
{
ii = k;
break;
}
//string& str1 = items[ii].right;
memset ( has , 0 , sizeof ( has ) );
vector<WF>& element = temp[y].element;
element.push_back ( items[ii] );
has[ii] = 1;
x++;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
}
map<int,Closure>::iterator it = temp.begin();
for ( ; it != temp.end() ; it++ )
collection.push_back ( it->second );
for ( int i = 0 ; i < collection.size() ; i++ )
sort ( collection[i].element.begin() , collection[i].element.end() );
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = i+1 ; j < collection.size() ; j++ )
if ( collection[i] == collection[j] )
collection.erase ( collection.begin()+j );
}
#ifdef DEBUG
puts ("-------------CLOSURE---------------------");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
sin.clear();
string out;
sin <<"closure-I" << i;
sin >> out;
collection[i].print ( out );
}
puts("");
#endif
}
void make_V ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );
}
void make_cmp ( vector<WF>& cmp1 , int i , char ch )
{
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int k;
for ( k = 0 ; k < str.length() ; k++ )
if ( str[k] == CH )
break;
if ( k != str.length() - 1 && str[k+1] == ch )
{
str.erase ( str.begin()+k);
str.insert ( str.begin()+k+1 , CH );
cmp1.push_back ( WF ( collection[i].element[j].left , str , -1 , -1 ) );
}
}
sort ( cmp1.begin() , cmp1.end() );
}
void make_go ( )
{
memset ( go , -1 , sizeof ( go ) );
int m = collection.size();
/*for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string left = collection[i].element[j].left;
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
//cout << "before : " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << "after : " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k < m ; k++ )
{
bool flag = false;
for ( int t = 0 ; t < collection[k].element.size() ; t++ )
{
if ( cmp == collection[k].element[t] )
{
flag = true;
break;
}
}
if ( flag )
{
go[i][y] = k;
}
}
}*/
for ( int t = 0 ; t < V.size() ; t++ )
{
char ch = V[t];
for ( int i = 0 ; i < m ; i++ )
{
vector<WF> cmp1;
make_cmp ( cmp1 , i , ch );
cout << cmp1.size() << endl;
if ( cmp1.size() == 0 ) continue;
for ( int j = 0 ; j < m ; j++ )
{
vector<WF> cmp2;
for ( int k = 0 ; k < collection[j].element.size() ; k++ )
{
string& str = collection[j].element[k].right;
int x;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
if ( x && str[x-1] == ch )
cmp2.push_back ( WF( collection[j].element[k].left , str , -1 , -1 ) );
}
sort ( cmp2.begin() , cmp2.end() );
cout << cmp2.size() << endl;
bool flag = true;
if ( cmp2.size() != cmp1.size() ) continue;
cout << cmp1.size() << endl;
for ( int k = 0 ; k < cmp1.size() ; k++ )
if ( cmp1[k] == cmp2[k] ) continue;
else flag = false;
cout << "out " << endl;
if ( flag )
go[i][ch] = j;
}
//cout << "YES" << endl;
}
}
#ifdef DEBUG
puts ("---------------EDGE----------------------");
stringstream sin;
string out;
for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < m ; j++ )
for ( int k = 0 ; k < MAX ; k++ )
if ( go[i][k] == j )
{
sin.clear();
sin << "I" << i << "--" <<(char)(k)<<"--I"<<j;
sin >> out;
printf ( "%s\n" , out.c_str() );
}
#endif
}
void make_table ( )
{
memset ( Goto , -1 , sizeof ( Goto ) );
/*memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );*/
//write s to the table
for( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
int x = go[i][ch];
if ( x == -1 ) continue;
if ( !isupper(ch) )
action[i][ch] = Content ( 0 , x );
else
Goto[i][ch] = x;
}
//write r and acc to the table
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
WF& tt = collection[i].element[j];
if ( tt.right[tt.right.length()-1] == CH )
{
if ( tt.left[0] == 'S' )
action[i]['#'] = Content ( 2 , -1 );
else
for ( int k = 0 ; k < V.size() ; k++ )
{
int y = V[k];
//cout << "YES " << endl;
action[i][y] = Content ( 1, tt.back );
}
}
}
#ifdef DEBUG
puts ( "------------------------------------------LR(0)分析表--------------------------------------------------------" );
printf ( "%10s%5c%5s" , "|" , V[0] , "|");
for ( int i = 1 ; i < V.size() ; i++ )
printf ( "%5c%5s" , V[i] , "|" );
puts ("");
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
printf ( "%5d%5s" , i , "|" );
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
if ( isupper(ch) )
{
if ( Goto[i][ch] == -1 )
printf ( "%10s" , "|" );
else
printf ( "%5d%5s" , Goto[i][ch] , "|" );
}
else
{
sin.clear();
if ( action[i][ch].type == -1 )
printf ( "%10s" , "|" );
else
{
Content& temp = action[i][ch];
if ( temp.type == 0 )
sin << "S";
if ( temp.type == 1 )
sin << "R";
if ( temp.type == 2 )
sin << "acc";
if ( temp.num != -1 )
sin << temp.num;
sin >> temp.out;
printf ( "%7s%3s" , temp.out.c_str() , "|" );
}
}
}
puts ("");
}
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
#endif
}
void print ( string s1 , string s2 , string s3 , string s4 , string s5 , string s6 , string s7 )
{
printf ( "%-15s|%-15s%-15s%-20s|%-15s%-15s%-15s\n" , s1.c_str() , s2.c_str() , s3.c_str() ,s4.c_str(),s5.c_str(),
s6.c_str() , s7.c_str() );
}
string get_steps ( int x )
{
stringstream sin;
sin << x;
string ret;
sin >> ret;
return ret;
}
template<class T>
string get_stk ( vector<T> stk )
{
stringstream sin;
for ( int i = 0 ; i < stk.size() ; i++ )
sin << stk[i];
string ret;
sin >> ret;
return ret;
}
string get_shift ( WF& temp )
{
stringstream sin;
sin << "reduce(" << temp.left << "->" << temp.right <<")";
string out;
sin >> out;
return out;
}
void analyse ( string src )
{
print ( "steps","op-stack" ,"input","operation","state-stack" , "ACTION" , "GOTO" );
vector<char> op_stack;
vector<int> st_stack;
src+= "#";
op_stack.push_back ( '#' );
st_stack.push_back ( 0 );
int steps= 1;
for ( int i = 0 ; i < src.length() ; i++ )
{
char u = src[i];
int top = st_stack[st_stack.size()-1];
Content& act = action[top][u];
//cout << "YES : " << i << " " << u << " " << top << " " << act.type << endl;
if ( act.type == 0 )
{
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i), "shift", get_stk( st_stack ) , act.out , "" );
op_stack.push_back ( u );
st_stack.push_back ( act.num );
}
else if ( act.type == 1 )
{
WF& tt = wf[act.num];
int y = st_stack[st_stack.size()-tt.right.length()-1];
int x = Goto[y][tt.left[0]];
//cout << y << " " << tt.left[0] << " " << x << endl;
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i) , get_shift(tt) ,get_stk( st_stack),act.out,get_steps(x));
for ( int j = 0 ; j < tt.right.length() ; j++ )
{
st_stack.pop_back();
op_stack.pop_back();
}
op_stack.push_back ( tt.left[0] );
st_stack.push_back ( x );
i--;
}
else if ( act.type == 2 )
{
print ( get_steps( steps++ ), get_stk( op_stack ) , src.substr(i) , "Accept" , get_stk(st_stack) , act.out , "" );
//i--;
}
else continue;
}
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen(s),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
wf.push_back ( WF ( s , s+j+2 ,-1 , -1 ) );
#ifdef DEBUG
wf[wf.size()-1].print();
#endif
}
make_item();
make_set();
make_V();
make_go();
make_table();
analyse ( "abbcde" );
}
}
~~~
### Input:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fabc6517.jpg "")
### Output:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fabd95f9.jpg "")
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fac07fd9.jpg "")
## 后期调试
关于12月4日对于make_set函数和make_go函数的更正:
- 错误主要源自于自己对于概念的模糊,make_set过程中如果加入的新的产生式的右侧”⋅”之后依旧是没有出现过的非终结符,那么也要把以它为左部的”$cdot”在首位的项目加入当前的项目规范族
- make_go是在对规范族之间建边时,一定是通过一个字符的移进,所有与其相关的式子都符合才能建边,而不是只要有两个规范族中有符合要求的规范式就能建边
编译原理(八) 算符优先分析法(分析过程的算法和C++实现)
最后更新于:2022-04-01 14:15:27
## 前情提要
[算符优先分析法(构造算法优先关系表)](http://blog.csdn.net/qq_24451605/article/details/50096487)
## 算法描述
**算符优先关系主要用于界定右句型的句柄:**
> ***<***标记句柄的左端;
***=***出现在句柄的内部;
***>***标记句柄的右端。
**发现句柄的过程:**
- 从左端开始扫描串,直到遇到第一个>为止。
- 向左扫描,跳过所有的=,直到遇到一个<为止。
- 句柄包括从上一步遇到的<右部到第一个>左部之间的所有符号,包括介于期间或者两边的非终结符
**非终结符的处理:**
因为非终结符不能影响语法分析,所以不需要区分它们,于是只用一个占位符来代替它们
**算法的主体思想:**
用栈存储已经看到的输入符号,用优先关系指导移动归约语法分析器的动作
如果栈顶的终结符和下一个输入符之间的优先关系是<或=,则语法分析器移动,表示还没有发现句柄的右端
如果是>关系,就调用归约
**算法描述:**
输入:输入字符串ω和优先关系表
输出:如果ω是语法产生的一个句子,则输出其用来归约的产生式;如果有错误,则转入错误处理
## 代码实现
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <vector>
#include <stack>
#include <map>
#include <set>
#include <algorithm>
#include <string>
#include <cstdlib>
#include <cctype>
#define MAX 507
using namespace std;
class WF
{
public:
string left;
vector<string> right;
WF ( const string& str )
{
left = str;
}
void insert ( char str[] )
{
right.push_back(str);
}
void print ( )
{
printf ( "%s->%s" , left.c_str() , right[0].c_str() );
for ( int i = 1 ; i < right.size() ; i++ )
printf ( "|%s" , right[i].c_str() );
puts("");
}
};
char relation[MAX][MAX];
vector<char> VT;
vector<WF> VN_set;
map<string,int> VN_dic;
set<char> first[MAX];
set<char> last[MAX];
int used[MAX];
int vis[MAX];
void dfs ( int x )
{
if ( vis[x] ) return;
vis[x] = 1;
string& left = VN_set[x].left;
for ( int i = 0 ; i < VN_set[x].right.size() ; i++ )
{
string& str = VN_set[x].right[i];
if ( isupper(str[0]) )
{
int y = VN_dic[str.substr(0,1)]-1;
if ( str.length() > 1 && !isupper(str[1] ) )
first[x].insert ( str[1] );
dfs ( y );
set<char>::iterator it = first[y].begin();
for ( ; it!= first[y].end() ; it++ )
first[x].insert ( *it );
}
else
first[x].insert ( str[0] );
}
}
void make_first ( )
{
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
if ( vis[i] ) continue;
else dfs ( i );
#define DEBUG
#ifdef DEBUG
puts("------------FIRSTVT集-------------------");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "%s : " , VN_set[i].left.c_str() );
set<char>::iterator it = first[i].begin();
for ( ; it!= first[i].end() ; it++ )
printf ( "%c " , *it );
puts ("" );
}
#endif
}
void dfs1 ( int x )
{
if ( vis[x] ) return;
vis[x] = 1;
string& left = VN_set[x].left;
for ( int i = 0 ; i < VN_set[x].right.size() ; i++ )
{
string& str = VN_set[x].right[i];
int n = str.length() -1;
if ( isupper(str[n] ) )
{
int y = VN_dic[str.substr(n,1)]-1;
if ( str.length() > 1 && !isupper(str[n-1]) )
last[x].insert ( str[1] );
dfs1 ( y );
set<char>::iterator it = last[y].begin();
for ( ; it != last[y].end() ; it++ )
last[x].insert ( *it );
}
else
last[x].insert ( str[n] );
}
}
void make_last ( )
{
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
if ( vis[i] ) continue;
else dfs1 ( i );
#define DEBUG
#ifdef DEBUG
puts("--------------LASTVT集---------------------");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "%s : " , VN_set[i].left.c_str() );
set<char>::iterator it = last[i].begin();
for ( ; it!= last[i].end() ; it++ )
printf ( "%c " , *it );
puts ("" );
}
#endif
}
void make_table ( )
{
for ( int i = 0 ; i < MAX ; i++ )
for ( int j = 0 ; j < MAX ; j++ )
relation[i][j] = ' ';
for ( int i = 0 ; i < VN_set.size() ; i++ )
for ( int j = 0 ; j < VN_set[i].right.size() ; j++ )
{
string& str = VN_set[i].right[j];
for ( int k = 0 ; k < str.length()-1 ; k++ )
{
if ( !isupper(str[k]) && !isupper(str[k+1]) )
relation[str[k]][str[k+1]] = '=';
if ( !isupper(str[k]) && isupper(str[k+1]) )
{
int x = VN_dic[str.substr(k+1,1)]-1;
set<char>::iterator it = first[x].begin();
for ( ; it != first[x].end() ; it++ )
relation[str[k]][*it] = '<';
}
if ( isupper(str[k]) && !isupper(str[k+1]) )
{
int x = VN_dic[str.substr(k,1)]-1;
set<char>::iterator it = last[x].begin();
for ( ; it != last[x].end() ; it++ )
relation[*it][str[k+1]] = '>';
}
if ( k > str.length()-2 ) continue;
if ( !isupper(str[k]) && !isupper(str[k+2]) && isupper(str[k+1]) )
relation[str[k]][str[k+2]] = '=';
}
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i < VT.size()*5 ; i++ )
printf ("-");
printf ( "算符优先关系表" );
for ( int i = 0 ; i < VT.size()*5 ; i++ )
printf ( "-" );
puts("");
printf ( "|%8s|" , "" );
for ( int i = 0 ; i < VT.size() ; i++ )
printf ( "%5c%5s" , VT[i] , "|" );
puts ("");
for ( int i = 0 ; i < (VT.size()+1)*10 ; i++ )
printf ("-");
puts("");
for ( int i = 0 ; i < VT.size() ; i++ )
{
printf ( "|%4c%5s" , VT[i] , "|");
for ( int j = 0 ; j < VT.size() ; j++ )
printf ( "%5c%5s" , relation[VT[i]][VT[j]] , "|" );
puts ("");
for ( int i = 0 ; i < (VT.size()+1)*10 ; i++ )
printf ("-");
puts("");
}
#endif
}
int fa[MAX];
int _find ( int x )
{
return x==fa[x]?x:fa[x] = _find ( fa[x] );
}
bool judge ( char x , char y )
{
if ( _find ( x ) == _find ( y ) )
return true;
return false;
}
void _union ( char x , char y )
{
x = _find(x);
y = _find(y);
fa[x] = y;
}
void print ( string s1 , string s2 , string s3 , string s4 , string s5 , string s6 )
{
printf ( "%-14s|%-15s%-15s%-15s%-15s%-15s\n" , s1.c_str(), s2.c_str(), s3.c_str() ,s4.c_str(),s5.c_str() , s6.c_str() );
}
void init ( )
{
for ( int i = 0 ; i < MAX ; i++ )
fa[i] = i;
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
string& left = VN_set[i].left;
for ( int j = 0 ; j < VN_set[i].right.size() ; j++ )
{
string& str = VN_set[i].right[j];
if ( left.length() == 1 && str.length() == 1 )
{
// cout << left[0] << " " << str[0] << endl;
_union ( left[0] , str[0] );
// cout << _find(left[0]) << " " << _find ( str[0] ) << endl;
}
}
}
print("步骤","栈","优先关系","当前符号","剩余符号","动作");
}
string get_stk ( vector<char>& stk )
{
string ret = "";
for ( int i = 0 ; i < stk.size() ; i++ )
ret += stk[i];
return ret;
}
bool check ( const string& str1 , const string& str2 )
{
if ( str1.length() != str2.length() )
return false;
for ( int i = 0 ; i < str1.length() ; i++ )
if ( isupper(str1[i]) )
{
if ( !judge(str1[i],str2[i])) return false;
}
else
{
if ( str1[i] != str2[i] ) return false;
}
return true;
}
string reduction ( string src )
{
for ( int i = 0 ; i < VN_set.size() ; i++ )
for ( int j = 0 ; j < VN_set[i].right.size() ; j++ )
if ( check ( VN_set[i].right[j] , src ) )
return VN_set[i].left;
return "";
}
void move_reduction ( string src )
{
init ();
//cout <<"flag : " << check ( "P+P" , "E+T" ) << endl;;
vector<char> stk;
int steps= 1;
src += "#";
stk.push_back ( '#' );
for ( int i = 0 ; i < src.length() ; i++ )
{
char top = stk[stk.size()-1];
for ( int j = stk.size()-1 ; j >= 0 ; j-- )
if ( isupper(stk[j]) ) continue;
else
{
top = stk[j];
break;
}
char ch = relation[top][src[i]];
if ( ch == '<' || ch == '=' )
{
string temp = "";
if ( i == src.length() - 1 )
print ( temp+(char)(steps+48) , get_stk( stk ) , temp+ch , temp+src[i] , "" , "移进" );
else
print ( temp+(char)(steps+48) , get_stk( stk ) , temp+ch , temp+src[i] , src.substr(i+1,src.length()-i-1) , "移进" );
stk.push_back ( src[i] );
}
else
{
string temp ="";
string str ="";
int x = stk.size()-2;
if ( i == src.length() )
print ( temp+(char)(steps+48) , get_stk(stk) , temp+ch , temp + src[i] , "" , "归约" );
else
print ( temp+(char)(steps+48) , get_stk(stk) , temp+ch , temp + src[i] , src.substr(i+1,src.length()-i-1) , "归约" );
while ( true )
{
if ( stk.size() == 0 ) break;
if ( !isupper(stk[x] ) &&relation[stk[x]][top] == '<' )
break;
x--;
}
for ( int j = stk.size()-1 ; j > x; j-- )
{
str += stk[j];
stk.pop_back();
}
//cout << "YES : " << i <<" " << str << endl;
str = reduction(str);
//cout << "finish: " << i << " " << str << endl;
for ( int j = 0 ; j < str.length() ; j++ )
stk.push_back ( str[j] );
/*if ( i == src.length() )
print ( temp+(char)(steps+48) , get_stk(stk) , temp+ch , temp + src[i] , "" , "移进" );
else
print ( temp+(char)(steps+48) , get_stk(stk) , temp+ch , temp + src[i] , src.substr(i+1,src.length()-i-1) , "移进" );
stk.push_back ( src[i] );*/
i--;
}
steps++;
}
//string temp ="";
//cout << "result" << endl;
//print ( temp+(char)(steps+48) , get_stk(stk) , "","","","");
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen(s),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' )
break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( WF(s) );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
for ( int k = 0 ; k < j; k++ )
if ( !isupper(s[k] ) )
{
if ( used[s[k]] ) continue;
used[s[k]] = 1;
VT.push_back ( s[k] );
}
for ( int k = j+2 ; k < len; k++ )
if ( !isupper(s[k] ) )
{
if ( used[s[k]] ) continue;
VT.push_back ( s[k] );
used[s[k]] = VT.size();
}
}
#define DEBUG
#ifdef DEBUG
puts ("************VT集*******************");
for ( int i = 0 ; i < VT.size() ; i++ )
printf ( "%c " , VT[i] );
puts ("");
puts("*************产生式*****************");
for ( int i = 0 ; i < VN_set.size() ; i++ )
VN_set[i].print();
puts("************************************");
#endif
make_first();
make_last();
make_table();
move_reduction("i+i");
}
}
~~~
### Input:
6
S->#E#
P->i
F->P
T->F
E->T
E->E+T
### Output:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fabab0a6.jpg "")
编译原理(七) 算符优先分析法(构造算符优先关系表算法及C++实现)
最后更新于:2022-04-01 14:15:25
## 概念简述
移动归约分析法:自底向上的语法分析方法,也称为***移动归约分析法***。
- 最易于实现的一种移动归约分析方法,叫做***算符优先分析法***,
- 而更一般的移动归约分析方法叫做***LR分析法***,LR分析法可以用作许多自动的语法分析器的生成器。
短语:文法G[S],αβδ是文法G的一个句型,S=>*αAδ且A=>+β则称β是句型αβδ相对于非终结符A的短语。
直接短语:若有A ⇒+β则称β是句型αβδ相对于该规则A→β的直接短语。
句柄:一个句型的最左直接短语称为该句型的句柄。
**其实作为一个常年和树打交道的acmer来说,我觉得下面这种定义方法更容易理解….**
短语:一棵子树的所有叶子自左至右排列起来形成一个相对于子树根的短语。
直接短语:仅有父子两代的一棵子树,它的所有叶子自左至右排列起来所形成的符号串。
句柄:一个句型的分析树中最左那棵只有父子两代的子树的所有叶子的自左至右排列。
算符文法的定义:
- 所有产生式的右部都不是ε或两个相邻的非终结符
- 设有一个文法G,如果G中没有形如A→…BC…的产生式,其中B和C为非终结符,则称G为算符文法(Operator Grammar)也称OG文法.
算符优先文法的特点:
- 一旦我们构造了算符优先语法分析器,就可以忽略原来的文法,栈中的非终结符仅仅作为与这些非终结符相关的属性的占位符
- 难以处理像减号这样有不同优先级的符号
- 由于分析的语言的文法和算符优先语法分析器本身的关系不是很紧密,所以不能肯定语法分析器接受的就是所期望的语言
## 算法优先关系构造算法
### 一、首先对于优先关系进行如下定义:
- a的优先级低于b
a < b: 文法中有形如A→…aB…的产生式而B+b…或B+Cb…
- a的优先级等于b
a = b: 文法中有形如A→…ab…或者A→…aBb…的产生式
- a的优先级高于b
a > b: 文法中有形如A…Bb…的产生式,而B+…a或B+…aC
- 算符的优先关系是有序的
- 如果a > b,不能推出b < a
- 如果a > b,有可能b > a
- 如果a > b, b > c,不一定a > c
根据这个大小关系的定义,我们知道为了确定两个终止符的优先关系,我们需要知道它的在所有的产生式中和前后非终止符的关系,那么我们做一个如(二)预处理:
### 二、定义并构造FIRSTVT和LASTVT两个集合
FIRSTVT(P)={a|P⇒+a⋯或P⇒+Qa⋯}
LASTVT(P)={a|P⇒+⋯a或P⇒+⋯aQ}
FIRSTVT(P)直接根据定义递归的构造即可:
-
a) 若有产生式 P→a••• 或 p→Qa•••
则 a∈FIRSTVT(P)
-
b) 若有产生式 P→Q••• ,
若 a∈FIRSTVT(Q)
则 a∈FIRSTVT(P)
LASTVT(P)直接根据定义递归的构造即可:
-
a) 若有产生式 P→•••a 或 p→•••aQ
则 a∈LASTVT(P)
-
b) 若有产生式 P→•••Q ,
若 a∈LASTVT(Q)
则 a∈LASTVT(P)
代码实现见代码部分的make_first和make_last函数,是两个简单的递归实现。
### 三、利用FIRSTVT和LASTVT集合构造算法优先关系表
~~~
FOR 每个产生式 P->X1 X2 ……Xn
DO FOR i:=1 TO n-1 DO
IF X[i]和X[i+1]均为终结符
THEN 置 X[i]=X[i+1]
IF X[i]和X[i+2]均为终结符,X[i+1]为非终结符,i≤n-2,
THEN 置 X[i]=X[i+2]
IF X[i]为终结符, 但X[i+1]为非终结符
THEN FOR FIRSTVT(X[i+1])中的每个a
DO 置 X[i]<a
IF Xi为非终结符, 但X i+1 为终结符
THEN FOR LASTVT(X i )中的每个a
DO 置 a>X[i+1]
~~~
## C++代码实现
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <vector>
#include <stack>
#include <map>
#include <set>
#include <algorithm>
#include <string>
#include <cstdlib>
#include <cctype>
#define MAX 507
using namespace std;
class WF
{
public:
string left;
vector<string> right;
WF ( const string& str )
{
left = str;
}
void insert ( char str[] )
{
right.push_back(str);
}
void print ( )
{
printf ( "%s->%s" , left.c_str() , right[0].c_str() );
for ( int i = 1 ; i < right.size() ; i++ )
printf ( "|%s" , right[i].c_str() );
puts("");
}
};
char relation[MAX][MAX];
vector<char> VT;
vector<WF> VN_set;
map<string,int> VN_dic;
set<char> first[MAX];
set<char> last[MAX];
int used[MAX];
int vis[MAX];
void dfs ( int x )
{
if ( vis[x] ) return;
vis[x] = 1;
string& left = VN_set[x].left;
for ( int i = 0 ; i < VN_set[x].right.size() ; i++ )
{
string& str = VN_set[x].right[i];
if ( isupper(str[0]) )
{
int y = VN_dic[str.substr(0,1)]-1;
if ( str.length() > 1 && !isupper(str[1] ) )
first[x].insert ( str[1] );
dfs ( y );
set<char>::iterator it = first[y].begin();
for ( ; it!= first[y].end() ; it++ )
first[x].insert ( *it );
}
else
first[x].insert ( str[0] );
}
}
void make_first ( )
{
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
if ( vis[i] ) continue;
else dfs ( i );
#define DEBUG
#ifdef DEBUG
puts("------------FIRSTVT集-------------------");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "%s : " , VN_set[i].left.c_str() );
set<char>::iterator it = first[i].begin();
for ( ; it!= first[i].end() ; it++ )
printf ( "%c " , *it );
puts ("" );
}
#endif
}
void dfs1 ( int x )
{
if ( vis[x] ) return;
vis[x] = 1;
string& left = VN_set[x].left;
for ( int i = 0 ; i < VN_set[x].right.size() ; i++ )
{
string& str = VN_set[x].right[i];
int n = str.length() -1;
if ( isupper(str[n] ) )
{
int y = VN_dic[str.substr(n,1)]-1;
if ( str.length() > 1 && !isupper(str[n-1]) )
last[x].insert ( str[1] );
dfs1 ( y );
set<char>::iterator it = last[y].begin();
for ( ; it != last[y].end() ; it++ )
last[x].insert ( *it );
}
else
last[x].insert ( str[n] );
}
}
void make_last ( )
{
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
if ( vis[i] ) continue;
else dfs1 ( i );
#define DEBUG
#ifdef DEBUG
puts("--------------LASTVT集---------------------");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "%s : " , VN_set[i].left.c_str() );
set<char>::iterator it = last[i].begin();
for ( ; it!= last[i].end() ; it++ )
printf ( "%c " , *it );
puts ("" );
}
#endif
}
void make_table ( )
{
for ( int i = 0 ; i < MAX ; i++ )
for ( int j = 0 ; j < MAX ; j++ )
relation[i][j] = ' ';
for ( int i = 0 ; i < VN_set.size() ; i++ )
for ( int j = 0 ; j < VN_set[i].right.size() ; j++ )
{
string& str = VN_set[i].right[j];
for ( int k = 0 ; k < str.length()-1 ; k++ )
{
if ( !isupper(str[k]) && !isupper(str[k+1]) )
relation[str[k]][str[k+1]] = '=';
if ( !isupper(str[k]) && isupper(str[k+1]) )
{
int x = VN_dic[str.substr(k+1,1)]-1;
set<char>::iterator it = first[x].begin();
for ( ; it != first[x].end() ; it++ )
relation[str[k]][*it] = '<';
}
if ( isupper(str[k]) && !isupper(str[k+1]) )
{
int x = VN_dic[str.substr(k,1)]-1;
set<char>::iterator it = last[x].begin();
for ( ; it != last[x].end() ; it++ )
relation[*it][str[k+1]] = '>';
}
if ( k > str.length()-2 ) continue;
if ( !isupper(str[k]) && !isupper(str[k+2]) && isupper(str[k+1]) )
relation[str[k]][str[k+2]] = '=';
}
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i < VT.size()*5 ; i++ )
printf ("-");
printf ( "算符优先关系表" );
for ( int i = 0 ; i < VT.size()*5 ; i++ )
printf ( "-" );
puts("");
printf ( "|%8s|" , "" );
for ( int i = 0 ; i < VT.size() ; i++ )
printf ( "%5c%5s" , VT[i] , "|" );
puts ("");
for ( int i = 0 ; i < (VT.size()+1)*10 ; i++ )
printf ("-");
puts("");
for ( int i = 0 ; i < VT.size() ; i++ )
{
printf ( "|%4c%5s" , VT[i] , "|");
for ( int j = 0 ; j < VT.size() ; j++ )
printf ( "%5c%5s" , relation[VT[i]][VT[j]] , "|" );
puts ("");
for ( int i = 0 ; i < (VT.size()+1)*10 ; i++ )
printf ("-");
puts("");
}
#endif
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen(s),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' )
break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( WF(s) );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
for ( int k = 0 ; k < j; k++ )
if ( !isupper(s[k] ) )
{
if ( used[s[k]] ) continue;
used[s[k]] = 1;
VT.push_back ( s[k] );
}
for ( int k = j+2 ; k < len; k++ )
if ( !isupper(s[k] ) )
{
if ( used[s[k]] ) continue;
VT.push_back ( s[k] );
used[s[k]] = VT.size();
}
}
#define DEBUG
#ifdef DEBUG
puts ("************VT集*******************");
for ( int i = 0 ; i < VT.size() ; i++ )
printf ( "%c " , VT[i] );
puts ("");
puts("*************产生式*****************");
for ( int i = 0 ; i < VN_set.size() ; i++ )
VN_set[i].print();
puts("************************************");
#endif
make_first();
make_last();
make_table();
}
}
~~~
Input:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab7ce07.jpg "")
Output:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab90ed2.jpg "")
编译原理(六) LL(1)文法分析法(分析过程的C++实现)
最后更新于:2022-04-01 14:15:22
##前情了解
[快速通道](http://blog.csdn.net/qq_24451605/article/details/50086689)
##算法分析
预测分析程序的总控程序在任何时候都是按STACK栈顶符号X和当前的输入符号a行事的。如下图所示,对于任何(X,a),总控程序每次都执行下述三种可能的动作之一:
- 若X = a = ‘#’,则宣布分析成功,停止分析过程。
- 若X = a ≠‘#’,则把X从STACK栈顶弹出,让a指向下一个输入符号。
- 若X是一个非终结符,则查看分析表M。
- 若M[X,a]中存放着关于X的一个产生式,那么,先把X弹出STACK栈顶,然后把产生式的右部符号串按反序一一推进STACK栈(若右部符号为ε,则意味着不推什么东西进栈)。
- 在把产生式的右部符号退进栈的同时应该做这个产生式对应的语义动作(目前暂且不管)。
- 若M[X,a]中存放着“出错标志”,则调用出错诊断程序ERROR。
##代码实现
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <string>
#include <cctype>
#include <stack>
#include <map>
#include <set>
#define MAX 507
using namespace std;
//大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon
class WF
{
public:
string left;
set<string> right;
WF ( char s[] )
{
left = s;
}
void print ( )
{
printf ( "%s->" , left.c_str() );
set<string>::iterator it = right.begin();
if ( right.begin()!= right.end() )
{
printf ( "%s" , it->c_str() );
it++;
}
for(; it != right.end() ; it++ )
printf ( "|%s" , it->c_str() );
puts("");
}
void insert ( char s[] )
{
right.insert ( s );
}
};
map<string,set<char> > first;
map<string,set<char> > follow;
map<string,int> VN_dic;
vector<WF> VN_set;
bool used[MAX];
void dfs ( int x )
{
if ( used[x] ) return;
used[x] = 1;
string& left = VN_set[x].left;
set<string>& right = VN_set[x].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
for ( int i = 0 ; i < it->length() ; i++ )
{
if ( !isupper( it->at(i) ) && it->at(i) != '\'' )
{
first[left].insert ( it->at(i) );
break;
}
if ( isupper( it->at(i) ) )
{
int y;
if ( i != it->length()-1 && it->at(i+1) == '\'' )
y = VN_dic[it->substr(i,2)]-1;
else y = VN_dic[it->substr(i,1)]-1;
string& tleft = VN_set[y].left;
dfs ( y );
set<char>& temp = first[tleft];
set<char>::iterator it1 = temp.begin();
bool flag = true;
for ( ; it1 != temp.end() ; it1++ )
{
if ( *it1 == '~' ) flag = false;
first[left].insert( *it1 );
}
if ( flag ) break;
}
else continue;
}
}
void make_first ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
dfs ( i );
#define DEBUG
#ifdef DEBUG
puts ("***************FIRST集***********************");
map<string,set<char> >::iterator it = first.begin();
for ( ; it != first.end() ; it++ )
{
printf ( "FIRST(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
void append ( const string& str1 , const string& str2 )
{
set<char>& from = follow[str1];
set<char>& to = follow[str2];
set<char>::iterator it = from.begin();
for ( ; it != from.end() ; it++ )
to.insert ( *it );
}
void make_follow ( )
{
while ( true )
{
bool goon = false;
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
string& left = VN_set[i].left;
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
{
bool flag = true;
const string& str = *it;
for ( int j = it->length()-1 ; j >= 0 ; j-- )
{
if ( str[j] == '\'' )
{
int x = VN_dic[it->substr(j-1,2)]-1;
if ( flag )
{
int tt = follow[it->substr(j-1,2)].size();
append ( left , it->substr(j-1,2) );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[x].right.count("~" ) )
flag = false;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper(str[k]) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j-1,2)];
int tt = to.size();
set<char>::iterator it1 = from.begin();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert ( *it1 );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j-1,2)].size();
follow[it->substr(j-1,2)].insert ( str[k] );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt )
goon = true;
break;
}
else continue;
}
j--;
}
else if ( isupper(str[j] ) )
{
int x = VN_dic[it->substr(j,1)]-1;
if ( flag )
{
int tt = follow[it->substr(j,1)].size();
append ( left , it->substr(j,1) );
if ( !VN_set[x].right.count("~") )
flag = false;
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper( str[k] ) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j,1)];
set<char>::iterator it1 = from.begin();
int tt = follow[it->substr(j,1)].size();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert( *it1 );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j,1)].size();
follow[it->substr(j,1)].insert ( str[k] );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
break;
}
else continue;
}
}
else flag = false;
}
}
}
if ( !goon ) break;
}
#define DEBUG
#ifdef DEBUG
puts ("****************FOLLOW集**********************" );
map<string,set<char> >::iterator it = follow.begin();
for ( ; it != follow.end() ; it++ )
{
printf ( "FOLLOW(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
temp.insert('#');
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
vector<map<char,string> > predict_table;
//检查一个字符是否属于一个字符串的FIRST集合
bool check_first ( const string& text , char ch )
{
for ( int i = 0 ; i < text.length() ; i++ )
{
bool hasEmpty = false;
if ( !isupper(text[i]) && text[i] != '\'' )
{
if ( text[i] != ch ) return false;
else return true;
}
else if ( isupper(text[i] ) )
{
string temp;
if ( i != text.length()-1 && text[i+1] == '\'' )
temp = text.substr(i,2);
else
temp = text.substr(i,1);
set<char>& dic = first[temp];
set<char>::iterator it = dic.begin();
for ( ; it != dic.end() ; it++ )
{
if ( *it == '~' ) hasEmpty = true;
if ( *it == ch ) return true;
}
if ( !hasEmpty) break;
}
else continue;
}
return false;
}
//检查一个字符是否属于一个字符串的FOLLOW集合
bool check_follow ( const string& text , char ch )
{
set<char>& dic = follow[text];
set<char>::iterator it = dic.begin();
for ( ; it != dic.end() ; it++ )
if ( *it == ch ) return true;
return false;
}
void make_table ()
{
map<char,string> temp;
vector<char> letter;
bool vis[500];
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it != right.end() ; it++ )
for ( int j = 0 ; j < it->length() ; j++ )
if ( !isupper(it->at(j)) && it->at(j) != '\'' )
{
if ( vis[it->at(j)] ) continue;
vis[it->at(j)] = true;
letter.push_back ( it->at(j) );
}
}
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
temp.clear();
string& left = VN_set[i].left;
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it != right.end() ; it++ )
for ( int j = 0 ; j < letter.size() ; j++ )
{
//cout << *it << " " << letter[j] << endl;
if ( check_first ( *it , letter[j] ) )
{
//cout << "YES" << endl;
temp[letter[j]] = *it;
}
if ( it->at(0) == '~' && check_follow ( left, letter[j] ))
temp[letter[j]] = *it;
}
predict_table.push_back ( temp );
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts ("");
printf ( "|%9s" , "|" );
for ( int i = 0 ; i < letter.size() ; i++ )
printf ( "%5c%5s" , letter[i] , "|" );
puts("");
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "|%5s%4s" , VN_set[i].left.c_str() , "|" );
for ( int j = 0 ; j < letter.size() ; j ++ )
if ( predict_table[i].count(letter[j] ) )
printf ( "%7s%3s" , predict_table[i][letter[j]].c_str() , "|" );
else printf ( "%10s" , "|" );
puts("");
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts ("");
}
#endif
}
void print ( int steps , stack<string> stk , string src , string wf , int x )
{
printf ( "%-10d" , steps );
string out = "";
while ( !stk.empty() )
{
out = stk.top()+out;
stk.pop();
}
printf ( "#%-9s" , out.c_str() );
out ="";
for ( int i = x ; i < src.length() ; i++ )
out += src[i];
printf ( "%-10s" , (out+"#").c_str() );
printf ( "%-10s\n" , wf.c_str() );
}
void analyse ( const string& src )
{
stack<string> stk;
stk.push ( "E" );
int steps = 0;
int idx = 0;
printf ( "%-10s%-10s%-10s%-10s\n" , "步骤","符号栈","输入串","所用产生式" );
while ( !stk.empty() )
{
string u = stk.top();
string tmp="";
stk.pop();
if ( !isupper(u[0]) )
{
if ( idx == src.length() && u[0] == '~' );
else if ( src[idx] == u[0] )
idx++;
}
else
{
int x = VN_dic[u]-1;
tmp = predict_table[x][src[idx]];
for ( int i = tmp.length()-1 ; i >= 0 ; i-- )
{
if ( tmp[i] == '\'' )
{
string v = tmp.substr(i-1,2);
stk.push ( v );
i--;
}
else
{
string v = tmp.substr(i,1);
stk.push( v );
}
}
tmp = u+"->"+tmp;
}
print ( steps++ , stk , src , tmp , idx );
}
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen ( s ),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( s );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
}
make_first();
make_follow();
make_table();
string in = "i*i+i";
analyse( in );
}
}
~~~
### 输入
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab0fd79.jpg "")
### 输出
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab5278b.jpg "")
编译原理(五) LL(1)文法分析法(预测分析表的构造算法C++实现)
最后更新于:2022-04-01 14:15:20
## 基本定义
- FIRST(α):
- 令G是一个不含左递归的文法,对G的所有非终结符的每个候选α定义它的终结首符集FIRST(α)为:
FIRST(α)={a | α=>*a…, a∈VT}
- 若α=>*ε,则规定ε∈FIRST(α)
- FIRST(α)是α的所有可能推导的开头终结符或可能的ε
- 如果非终结符A的所有候选首符集两两不相交,即A的任何两个不同候选αi和αj
FIRST(αi) ∩FIRST(αj)=Φ
- 那么当要求A匹配输入串时,A就能根据它所面临的第一个输入符号a,准确的指派某一个候选前去执行任务。这个候选就是那个终结首符集含a的α。
- FOLLOW(A):
- 假定S是文法G的开始符号,对于G的任何非终结符A,我们定义
FOLLOW(A)={a | S=>*…Aa…,a∈VT}
- FOLLOW(A)是所有句型中出现在紧接A之后的终结符或“#”。开始符号的FOLLOW集初始化时加入“#”。
- 当非终结符A面临输入符号a,且a不属于A的任意候选首符集但A的某个候选首符集包含ε时,只有当a∈FOLLOW(A)时,才可能允许A自动匹配。
- LL(1)文法:
- 文法不含左递归
- 对于文法中每一个非终结符A的各个产生式的候选首符集两两不相交。即,若A→α1 |α2 | … |αn,则FIRST(αi)∩FIRST(αj)=Φ (i≠j)
- 对文法中的每个非终结符A,若它存在某个候选首符集包含ε,则,FIRST(A)∩FOLLOW(A)=Φ
如果一个文法G满足以上条件,则称该文法G为***LL(1)文法***。
- LL(1)文法是***不带回溯***的***自顶向下***的文法
- 预测分析表:
- 预测分析表示一个M[A,a]形式的矩阵。其中A为非终结符,a是终结符或‘#’ 。
- 矩阵元素M[A,a]中存放着一条关于A的产生式,指出当A面临输入符号a时所应采用的候选。
M[A,a]中也可能存放一个“出错标志”,指出A根本不该面临输入符号a。
## 预测分析表的构造
### FIRST(α)的构造算法
要构造FIRST(α),根据定义:
α=X1⋯Xn
那么对于从前到后的Xi我们进行分类讨论:
- 如果Xi∈Vt,那么FIRST(α)=FIRST(Xi)={Xi}
- 如果Xi∈Vn,因为不存在左递归,所以Xi=a.......|ϵ,那么FIRST(Xi)={a,ϵ,FIRST(Xi+1)}
- 只要Xi−1不包含ϵ,那么Xi不可能影响FIRST(α)
- 那么我们通过记录每个a∈V,然后进行深度优先记忆化搜索,将所有的状态填满,因为LL(1)文法使不会回溯的,所以能够保证在O(n)的时间完成,采取递归的形式实现
### FIRST(α)的构造实现代码
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <string>
#include <cctype>
#include <map>
#include <set>
#define MAX 507
using namespace std;
//大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon
class WF
{
public:
string left;
set<string> right;
WF ( char s[] )
{
left = s;
}
void print ( )
{
printf ( "%s->" , left.c_str() );
set<string>::iterator it = right.begin();
if ( right.begin()!= right.end() )
{
printf ( "%s" , it->c_str() );
it++;
}
for(; it != right.end() ; it++ )
printf ( "|%s" , it->c_str() );
puts("");
}
void insert ( char s[] )
{
right.insert ( s );
}
};
map<string,set<char> > first;
map<string,set<char> > follow;
map<string,int> VN_dic;
vector<WF> VN_set;
bool used[MAX];
void dfs ( int x )
{
if ( used[x] ) return;
used[x] = 1;
string& left = VN_set[x].left;
set<string>& right = VN_set[x].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
for ( int i = 0 ; i < it->length() ; i++ )
{
if ( !isupper( it->at(i) ) && it->at(i) != '\'' )
{
first[left].insert ( it->at(i) );
break;
}
if ( isupper( it->at(i) ) )
{
int y;
if ( i != it->length()-1 && it->at(i+1) == '\'' )
y = VN_dic[it->substr(i,2)]-1;
else y = VN_dic[it->substr(i,1)]-1;
string& tleft = VN_set[y].left;
dfs ( y );
set<char>& temp = first[tleft];
set<char>::iterator it1 = temp.begin();
bool flag = true;
for ( ; it1 != temp.end() ; it1++ )
{
if ( *it1 == '~' ) flag = false;
first[left].insert( *it1 );
}
if ( flag ) break;
}
else continue;
}
}
void make_first ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
dfs ( i );
#define DEBUG
#ifdef DEBUG
map<string,set<char> >::iterator it = first.begin();
for ( ; it != first.end() ; it++ )
{
printf ( "FIRST(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen ( s ),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( s );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
}
make_first();
}
}
~~~
**Input:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faa9d383.jpg "")
**Output:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faab07e4.jpg "")
### FOLLOW(A)的构造算法
设S,A,B∈Vn,那么连续使用如下规则,直至follow集不再发生变化:
- (1) S为标识符,那么FOLLOW(S)包含“#”
- (2) 若A::=αBβ,那么FOLLOW(B)+=FIRST(B)−{ϵ}
- (3) 若A::=αB或者A::=αBβ,且β⇒∗ϵ,那么FOLLOW(B)+=FOLLOW(A)
### FOLLOW(A)的构造实现代码
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <string>
#include <cctype>
#include <map>
#include <set>
#define MAX 507
using namespace std;
//大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon
class WF
{
public:
string left;
set<string> right;
WF ( char s[] )
{
left = s;
}
void print ( )
{
printf ( "%s->" , left.c_str() );
set<string>::iterator it = right.begin();
if ( right.begin()!= right.end() )
{
printf ( "%s" , it->c_str() );
it++;
}
for(; it != right.end() ; it++ )
printf ( "|%s" , it->c_str() );
puts("");
}
void insert ( char s[] )
{
right.insert ( s );
}
};
map<string,set<char> > first;
map<string,set<char> > follow;
map<string,int> VN_dic;
vector<WF> VN_set;
bool used[MAX];
void dfs ( int x )
{
if ( used[x] ) return;
used[x] = 1;
string& left = VN_set[x].left;
set<string>& right = VN_set[x].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
for ( int i = 0 ; i < it->length() ; i++ )
{
if ( !isupper( it->at(i) ) && it->at(i) != '\'' )
{
first[left].insert ( it->at(i) );
break;
}
if ( isupper( it->at(i) ) )
{
int y;
if ( i != it->length()-1 && it->at(i+1) == '\'' )
y = VN_dic[it->substr(i,2)]-1;
else y = VN_dic[it->substr(i,1)]-1;
string& tleft = VN_set[y].left;
dfs ( y );
set<char>& temp = first[tleft];
set<char>::iterator it1 = temp.begin();
bool flag = true;
for ( ; it1 != temp.end() ; it1++ )
{
if ( *it1 == '~' ) flag = false;
first[left].insert( *it1 );
}
if ( flag ) break;
}
else continue;
}
}
void make_first ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
dfs ( i );
#define DEBUG
#ifdef DEBUG
puts ("***************FIRST集***********************");
map<string,set<char> >::iterator it = first.begin();
for ( ; it != first.end() ; it++ )
{
printf ( "FIRST(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
void append ( const string& str1 , const string& str2 )
{
set<char>& from = follow[str1];
set<char>& to = follow[str2];
set<char>::iterator it = from.begin();
for ( ; it != from.end() ; it++ )
to.insert ( *it );
}
void make_follow ( )
{
while ( true )
{
bool goon = false;
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
string& left = VN_set[i].left;
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
{
bool flag = true;
const string& str = *it;
for ( int j = it->length()-1 ; j >= 0 ; j-- )
{
if ( str[j] == '\'' )
{
int x = VN_dic[it->substr(j-1,2)]-1;
if ( flag )
{
int tt = follow[it->substr(j-1,2)].size();
append ( left , it->substr(j-1,2) );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[x].right.count("~" ) )
flag = false;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper(str[k]) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j-1,2)];
int tt = to.size();
set<char>::iterator it1 = from.begin();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert ( *it1 );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j-1,2)].size();
follow[it->substr(j-1,2)].insert ( str[k] );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt )
goon = true;
break;
}
else continue;
}
j--;
}
else if ( isupper(str[j] ) )
{
int x = VN_dic[it->substr(j,1)]-1;
if ( flag )
{
int tt = follow[it->substr(j,1)].size();
append ( left , it->substr(j,1) );
if ( !VN_set[x].right.count("~") )
flag = false;
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper( str[k] ) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j,1)];
set<char>::iterator it1 = from.begin();
int tt = follow[it->substr(j,1)].size();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert( *it1 );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j,1)].size();
follow[it->substr(j,1)].insert ( str[k] );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
break;
}
else continue;
}
}
else flag = false;
}
}
}
if ( !goon ) break;
}
#define DEBUG
#ifdef DEBUG
puts ("****************FOLLOW集**********************" );
map<string,set<char> >::iterator it = follow.begin();
for ( ; it != follow.end() ; it++ )
{
printf ( "FOLLOW(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
temp.insert('#');
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen ( s ),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( s );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
}
make_first();
make_follow();
}
}
~~~
**Input:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faac4093.jpg "")
**Output:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faad3248.jpg "")
### 预测分析表的构造算法
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faaedc8e.jpg "")
### 预测分析表的构造实现代码
~~~
#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <string>
#include <cctype>
#include <map>
#include <set>
#define MAX 507
using namespace std;
//大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon
class WF
{
public:
string left;
set<string> right;
WF ( char s[] )
{
left = s;
}
void print ( )
{
printf ( "%s->" , left.c_str() );
set<string>::iterator it = right.begin();
if ( right.begin()!= right.end() )
{
printf ( "%s" , it->c_str() );
it++;
}
for(; it != right.end() ; it++ )
printf ( "|%s" , it->c_str() );
puts("");
}
void insert ( char s[] )
{
right.insert ( s );
}
};
map<string,set<char> > first;
map<string,set<char> > follow;
map<string,int> VN_dic;
vector<WF> VN_set;
bool used[MAX];
void dfs ( int x )
{
if ( used[x] ) return;
used[x] = 1;
string& left = VN_set[x].left;
set<string>& right = VN_set[x].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
for ( int i = 0 ; i < it->length() ; i++ )
{
if ( !isupper( it->at(i) ) && it->at(i) != '\'' )
{
first[left].insert ( it->at(i) );
break;
}
if ( isupper( it->at(i) ) )
{
int y;
if ( i != it->length()-1 && it->at(i+1) == '\'' )
y = VN_dic[it->substr(i,2)]-1;
else y = VN_dic[it->substr(i,1)]-1;
string& tleft = VN_set[y].left;
dfs ( y );
set<char>& temp = first[tleft];
set<char>::iterator it1 = temp.begin();
bool flag = true;
for ( ; it1 != temp.end() ; it1++ )
{
if ( *it1 == '~' ) flag = false;
first[left].insert( *it1 );
}
if ( flag ) break;
}
else continue;
}
}
void make_first ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
dfs ( i );
#define DEBUG
#ifdef DEBUG
puts ("***************FIRST集***********************");
map<string,set<char> >::iterator it = first.begin();
for ( ; it != first.end() ; it++ )
{
printf ( "FIRST(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
void append ( const string& str1 , const string& str2 )
{
set<char>& from = follow[str1];
set<char>& to = follow[str2];
set<char>::iterator it = from.begin();
for ( ; it != from.end() ; it++ )
to.insert ( *it );
}
void make_follow ( )
{
while ( true )
{
bool goon = false;
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
string& left = VN_set[i].left;
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it!= right.end() ; it++ )
{
bool flag = true;
const string& str = *it;
for ( int j = it->length()-1 ; j >= 0 ; j-- )
{
if ( str[j] == '\'' )
{
int x = VN_dic[it->substr(j-1,2)]-1;
if ( flag )
{
int tt = follow[it->substr(j-1,2)].size();
append ( left , it->substr(j-1,2) );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[x].right.count("~" ) )
flag = false;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper(str[k]) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j-1,2)];
int tt = to.size();
set<char>::iterator it1 = from.begin();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert ( *it1 );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j-1,2)].size();
follow[it->substr(j-1,2)].insert ( str[k] );
int tt1 = follow[it->substr(j-1,2)].size();
if ( tt1 > tt )
goon = true;
break;
}
else continue;
}
j--;
}
else if ( isupper(str[j] ) )
{
int x = VN_dic[it->substr(j,1)]-1;
if ( flag )
{
int tt = follow[it->substr(j,1)].size();
append ( left , it->substr(j,1) );
if ( !VN_set[x].right.count("~") )
flag = false;
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
}
for ( int k = j+1 ; k < it->length() ; k++ )
{
if ( isupper( str[k] ) )
{
string id;
if ( k != it->length()-1 && str[k+1] == '\'' )
id = it->substr(k,2);
else id = it->substr(k,1);
set<char>& from = first[id];
set<char>& to = follow[it->substr(j,1)];
set<char>::iterator it1 = from.begin();
int tt = follow[it->substr(j,1)].size();
for ( ; it1 != from.end() ; it1++ )
if ( *it1 != '~' )
to.insert( *it1 );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
if ( !VN_set[VN_dic[id]-1].right.count("~") )
break;
}
else if ( str[k] != '\'' )
{
int tt = follow[it->substr(j,1)].size();
follow[it->substr(j,1)].insert ( str[k] );
int tt1 = follow[it->substr(j,1)].size();
if ( tt1 > tt ) goon = true;
break;
}
else continue;
}
}
else flag = false;
}
}
}
if ( !goon ) break;
}
#define DEBUG
#ifdef DEBUG
puts ("****************FOLLOW集**********************" );
map<string,set<char> >::iterator it = follow.begin();
for ( ; it != follow.end() ; it++ )
{
printf ( "FOLLOW(%s)={" , it->first.c_str() );
set<char> & temp = it->second;
temp.insert('#');
set<char>::iterator it1 = temp.begin();
bool flag = false;
for ( ; it1 != temp.end() ; it1++ )
{
if ( flag ) printf ( "," );
printf ( "%c" , *it1 );
flag = true;
}
puts ("}");
}
#endif
}
vector<map<char,string> > predict_table;
//检查一个字符是否属于一个字符串的FIRST集合
bool check_first ( const string& text , char ch )
{
for ( int i = 0 ; i < text.length() ; i++ )
{
bool hasEmpty = false;
if ( !isupper(text[i]) && text[i] != '\'' )
{
if ( text[i] != ch ) return false;
else return true;
}
else if ( isupper(text[i] ) )
{
string temp;
if ( i != text.length()-1 && text[i+1] == '\'' )
temp = text.substr(i,2);
else
temp = text.substr(i,1);
set<char>& dic = first[temp];
set<char>::iterator it = dic.begin();
for ( ; it != dic.end() ; it++ )
{
if ( *it == '~' ) hasEmpty = true;
if ( *it == ch ) return true;
}
if ( !hasEmpty) break;
}
else continue;
}
return false;
}
//检查一个字符是否属于一个字符串的FOLLOW集合
bool check_follow ( const string& text , char ch )
{
set<char>& dic = follow[text];
set<char>::iterator it = dic.begin();
for ( ; it != dic.end() ; it++ )
if ( *it == ch ) return true;
return false;
}
void make_table ()
{
map<char,string> temp;
vector<char> letter;
bool vis[500];
memset ( vis , 0 , sizeof ( vis ) );
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it != right.end() ; it++ )
for ( int j = 0 ; j < it->length() ; j++ )
if ( !isupper(it->at(j)) && it->at(j) != '\'' )
{
if ( vis[it->at(j)] ) continue;
vis[it->at(j)] = true;
letter.push_back ( it->at(j) );
}
}
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
temp.clear();
string& left = VN_set[i].left;
set<string>& right = VN_set[i].right;
set<string>::iterator it = right.begin();
for ( ; it != right.end() ; it++ )
for ( int j = 0 ; j < letter.size() ; j++ )
{
//cout << *it << " " << letter[j] << endl;
if ( check_first ( *it , letter[j] ) )
{
//cout << "YES" << endl;
temp[letter[j]] = *it;
}
if ( it->at(0) == '~' && check_follow ( left, letter[j] ))
temp[letter[j]] = *it;
}
predict_table.push_back ( temp );
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts ("");
printf ( "|%9s" , "|" );
for ( int i = 0 ; i < letter.size() ; i++ )
printf ( "%5c%5s" , letter[i] , "|" );
puts("");
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
printf ( "|%5s%4s" , VN_set[i].left.c_str() , "|" );
for ( int j = 0 ; j < letter.size() ; j ++ )
if ( predict_table[i].count(letter[j] ) )
printf ( "%7s%3s" , predict_table[i][letter[j]].c_str() , "|" );
else printf ( "%10s" , "|" );
puts("");
for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ )
printf ( "-" );
puts ("");
}
#endif
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen ( s ),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
if ( !VN_dic[s] )
{
VN_set.push_back ( s );
VN_dic[s] = VN_set.size();
}
int x = VN_dic[s]-1;
VN_set[x].insert ( s+j+2 );
}
make_first();
make_follow();
make_table();
}
}
~~~
**Input:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab0fd79.jpg "")
**Output:**
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab228be.jpg "")
编译原理(四) 提取左因子法消除回溯
最后更新于:2022-04-01 14:15:18
## 概念简述
回溯:分析工作部分地或全部地退回到之前的一个阶段,在当前阶段采取与之前不同的决策重新推进工作 FIRST(α):令G是一个不含左递归的文法,对G的所有非终结符的每个候选α定义它的终结首符集FIRST(α)为:
- FIRST(α)={a | α=>*a…, a∈VT}
- 若α=>*ε,则规定ε∈FIRST(α)
- FIRST(α)是α的所有可能推导的开头终结符或可能的ε
## 消除回溯
- 回溯的问题
回溯带来的问题就是低效率
- 回溯的条件
文法中,对于某个非终结符的规则其右部有多个选择,并根据所面临的输入字符不能准确的判断所要的选择,那么就需要搜索,就会导致回溯。
- 避免回溯的要求
FIRST(αi)∩FIRST(αj)=ϕ
令G是一个***不含左递归***的文法,对G的所有非终结符的每个候选α定义它的终结首符集FIRST(α)为:
- FIRST(α)={a | α=>*a…, a∈VT}
若α=>*ε,则规定ε∈FIRST(α)
FIRST(α)是α的所有可能推导的开头终结符或可能的ε
- 如果非终结符A的所有候选首符集两两不相交,即A的任何两个不同候选αi和αj
FIRST(αi) ∩FIRST(αj)=Φ
- 那么当要求A匹配输入串时,A就能根据它所面临的第一个输入符号a,准确的指派某一个候选前去执行任务。这个候选就是那个终结首符集含a的α。
- 提取左因子法消除回溯
- 假定A的规则是:
A→δβ1 |δβ2 | … |δβn |γ1 |γ2 | … |γm(其中,每个γ不以δ开头)
那么这些规则可以改写为:
A→δA’ |γ1 |γ2 | … |γm
A’→β1 |β2 | … |βn
- 经过反复提取左因子,就能够把每个非终结符(包括新引进者)的所有候选首符集便成为两两不相交。我们为此要***付出的代价是大量引进新的非终结符和ε产生式***。
- 实现过于简单,故不再实现代码
编译原理(三) 消除文法的左递归
最后更新于:2022-04-01 14:15:16
## 算法的功能
对于任意上下文无关的文法消除左递归
### 问题分析
### 一、产生式直接消除左递归
形如P→Pα|β可以通过直接消除转化为:
P→βP′P′→αP′|ϵ
### 二、产生式间接消除左递归
有时候虽然形式上产生式没有递归,但是因为形成了环,所以导致进行闭包运算后出现左递归,如下:
S→Qc|cQ→Rb|bR→Sa|a
虽不具有左递归,但S、Q、R都是左递归的,因为经过若干次推导有
- SQcRbcSabc
- QRbSabQcab
- RSaQcaRbca
就显现出其左递归性了,这就是间接左递归文法。
**消除间接左递归的方法**是:
把间接左递归文法改写为直接左递归文法,然后用消除直接左递归的方法改写文法。
如果一个文法不含有回路,即形如PP的推导,也不含有以ε为右部的产生式,那么就可以采用下述算法消除文法的所有左递归。
消除左递归算法:
- (1) 把文法G的所有非终结符按任一顺序排列,例如,A1,A2,…,An。
- (2)
~~~
for (i=1;i<=n;i++)
for (j=1;j<=i-1;j++)
{ 把形如Ai→Ajγ的产生式改写成Ai→δ1γ /δ2γ /…/δkγ
其中Aj→δ1 /δ2 /…/δk是关于的Aj全部规则;
消除Ai规则中的直接左递归;
}
~~~
- (3) 化简由(2)所得到的文法,即去掉多余的规则。
利用此算法可以将上述文法进行改写,来消除左递归。
首先,令非终结符的排序为R、Q、S。对于R,不存在直接左递归。把R代入到Q中的相关规则中,则Q的规则变为Q→Sab/ ab/ b。
代换后的Q不含有直接左递归,将其代入S,S的规则变为S→Sabc/ abc/ bc/ c。
此时,S存在直接左递归。在消除了S的直接左递归后,得到整个文法为:
S→abcS′|bcS′|cS′S′→abcS′|εQ→Sab|ab|bR→Sa|a
可以看到从文法开始符号S出发,永远无法达到Q和R,所以关于Q和R的规则是多余的,将其删除并化简,最后得到文法G[S]为:
S→abcS′|bcS′|cS′S′→abcS′|ε
当然如果对文法非终结符排序的不同,最后得到的文法在形式上可能不一样,但它们都是等价的。例如,如果对上述非终结符排序选为S、Q、R,那么最后得到的文法G[R]为:
R→bcaR′|caR′|aR′R′→bcaR′|ε
容易证明上述两个文法是等价的。
## 代码实现
~~~
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <string>
#include <vector>
#include <queue>
#include <cctype>
#include <map>
#include <set>
#define MAX 507
using namespace std;
class WF
{
public:
string left;
set<string> right;
WF ( const string& temp )
{
left = temp;
right.clear();
}
void print ( )
{
printf ( "%s::=" , left.c_str() );
set<string>::iterator it = right.begin();
printf ( "%s" , it->c_str());
it++;
for ( ; it!= right.end() ; it++ )
printf ( "|%s" , it->c_str() );
puts("");
}
void insert ( const string& temp )
{
right.insert(temp);
}
};
map<string,int> VN_dic;
vector<WF> VN_set;
string start;
bool used[MAX];
//消除间接左递归
void remove1 ( )
{
for ( int i = 0 ; i < VN_set.size() ; i++ )
for ( int j = 0 ; j < i ; j++ )
{
vector<string> cont;
set<string>& right1 = VN_set[i].right;
set<string>& right2 = VN_set[j].right;
char ch = VN_set[j].left[0];
set<string>::iterator it1 = right1.begin();
set<string>::iterator it2;
for ( ; it1 != right1.end() ; it1++ )
if ( it1->at(0) == ch )
for ( it2 = right2.begin() ; it2 != right2.end() ; it2++ )
cont.push_back ( *it2 + it1->substr(1) );
int nn = right1.size();
while ( nn-- )
{
if ( right1.begin()->at(0) == ch )
right1.erase ( right1.begin() );
else
{
cont.push_back ( *right1.begin() );
right1.erase ( right1.begin() );
}
}
for ( int i = 0 ; i < cont.size() ; i++ )
right1.insert ( cont[i] );
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i < VN_set.size() ; i++ )
VN_set[i].print();
#endif
}
//消除直接左递归
void remove2 ( )
{
for ( int i = 0 ; i < VN_set.size() ; i++ )
{
char ch = VN_set[i].left[0];
set<string>& temp = VN_set[i].right;
set<string>::iterator it = temp.begin();
string tt = VN_set[i].left.substr(0,1)+"\'";
bool flag = true;
for ( ; it != temp.end() ; it++ )
if ( it->at(0) == ch )
{
VN_set.push_back ( WF(tt));
VN_dic[tt] = VN_set.size();
flag = false;
break;
}
int x = VN_dic[tt]-1;
if ( flag ) continue;
vector<string> cont;
set<string>& ss = VN_set[x].right;
ss.insert ( "~" );
while (!temp.empty())
{
if ( temp.begin()->at(0) == ch )
ss.insert(temp.begin()->substr(1)+tt);
else
{
//cout << "YES : " << temp.begin()->substr(1)+tt;
cont.push_back (temp.begin()->substr(0)+tt);
}
temp.erase(temp.begin());
}
puts ("");
for ( int i = 0 ; i < cont.size() ; i++ )
{
//cout << cont[i] << endl;
temp.insert ( cont[i] );
}
}
#define DEBUG
#ifdef DEBUG
for ( int i = 0 ; i < VN_set.size() ; i++ )
VN_set[i].print();
#endif
}
void dfs ( int x )
{
if ( used[x] ) return;
used[x] = 1;
set<string>::iterator it = VN_set[x].right.begin();
for ( ; it != VN_set[x].right.end(); it++ )
for ( int i = 0 ; i < it->length() ; i++ )
if ( isupper(it->at(i)) )
{
if ( it->length() > i+1 && it->at(i+1) == '\'' )
dfs ( VN_dic[it->substr(i,2)]-1 );
else
dfs ( VN_dic[it->substr(i,1)]-1 );
}
}
//化简
void simplify ( )
{
memset ( used , 0 , sizeof ( used ) );
int x = VN_dic[start]-1;
dfs ( x );
puts ( "finish" );
vector<WF> res;
for ( int i = 0 ; i < VN_set.size() ; i++ )
if ( used[i] )
res.push_back ( VN_set[i] );
VN_set.clear();
VN_set = res;
}
void print ()
{
puts("**********消除左递归后的结果********");
for ( int i = 0 ; i < VN_set.size() ; i++ )
VN_set[i].print();
puts("");
}
int main ( )
{
char buf[MAX];
int n;
VN_dic.clear();
VN_set.clear();
start="S";
puts ("请输入文法G[S]的产生式数量");
while ( ~scanf ("%d" , &n ) )
{
scanf ( "%d" , &n );
while ( n-- )
{
scanf ( "%s" , buf );
int len = strlen ( buf ),i;
for ( i = 0 ; i < len ; i++ )
if ( buf[i] == ':' )
{
buf[i] = 0;
break;
}
string temp = buf;
if ( !VN_dic[temp] )
{
VN_set.push_back ( WF(temp));
VN_dic[temp] = VN_set.size();
}
int x = VN_dic[temp]-1;
temp = buf+i+3;
//cout <<"the right : " << temp << endl;
VN_set[x].insert(temp);
}
remove1();
remove2();
simplify();
print();
//puts ("请输入文法G[S]的产生式数量");
}
}
~~~
## 测试
测试样例:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faa6587f.jpg "")
测试结果:
![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faa7a8bd.jpg "")
编译原理(二) NFA的确定化及DFA的最小化的算法及C++实现
最后更新于:2022-04-01 14:15:13
## 1. NFA的确定化
### 1.1. 明确NFA的定义
一个非确定的有穷自动机(NFA)M是一个五元式:
- M=(S,∑,δ,S0,F)
- S是一个有限集,它额每个元素称为一个状态。
- ∑是一个有穷字母表,它的每个元素称为一个输入字符
- δ是一个从S×∑∗至S子集额单值映射。即:δ:S×∑∗→2⋅S
- S0⊆S,是一个非空的初态集
- F⊂ S , 是一个终态集(可空)
### 1.2. 定义运算
定义对状态集合I的几个有关运算:
- 状态集合I的ε-闭包,表示为ε-closure(I),定义为一状态集,是状态集I中的任何状态s经任意条ε弧而能到达的状态的集合。状态集合I的任何状态s都属于ε-closure(I)。
- 状态集合I的a弧转换,表示为move(I,a)定义为状态集合J,其中J是所有那些可从I的某一状态经过一条a弧而到达的状态的全体。
定义Ia = ε-closure(move(I,a))
### 1.3. 算法描述
- 每次从队头取出一个集合,**(开始队列内只有初态集合I的ε-闭包(I) )**,然后得到它对于任意一个字符a的Ia=ε−closure(move(I,a))
- 然后如果当前状态之前没有出现过,那么当前状态作为一个新的状态I,放入队列。
- 一直做如上操作,直到队列为空
### 1.4. 代码如下:
~~~
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <fstream>
#include <queue>
#include <vector>
#include <string>
#include <cstring>
#include <set>
#define MAX 500
#define M 1000007
using namespace std;
struct Set
{
set<int> elements;
int state;
}I[MAX];
vector<int> e[MAX];
vector<int> edge[MAX];
vector<int> val1[MAX];
vector<int> val2[MAX];
vector<int> hash[M];
vector<int> types;
int vis[MAX][MAX];
int cntp,cnts,start,finish,used[MAX];
void _clear( );
void clear( );
void _add( int u , int v , int x );
void add( int u , int v , int x );
void dfs ( set<int>& elements , int u , int d , int flag );
void ensure();
void minimize();
int get_hash( set<int>& item );
//#define DEBUG
int main ( )
{
int m;
puts("给出NFA的边的条数:");
while ( ~scanf ( "%d" , &m ) )
{
_clear();
clear();
puts("给出NFA的始态和终态:");
scanf ( "%d%d" , &start , &finish );
puts("给出所有的边,每行一条:");
for ( int i = 0 ; i < m ; i++ )
{
int u,v,x;
scanf ( "%d%d%d" , &u , &v , &x );
_add ( u , v , x );
}
#ifdef DEBUG
set<int> temp;
int num[]={2,3,6,7,8};
//for ( int i = 2 ; i < 6 ; i++ )
// dfs ( temp , i , 0 , 2 );
for ( int i = 0 ; i < 5 ; i++ )
dfs ( temp , num[i] , 0 , 1 );
set<int>::iterator it = temp.begin();
for ( ; it != temp.end() ; it++ )
printf ( "%d " , *it);
puts("");
#else
ensure();
puts("计算结果如下:");
printf ( "%d\n" , cnts );
for ( int i = 0 ; i < cnts ; i++ )
for ( int j = 0 ; j < edge[i].size() ; j++ )
printf ( "edges : %d %d %d\n" , i , edge[i][j] , val2[i][j] );
puts("\n给出DFA的边的条数:");
#endif
}
}
void _clear()
{
for ( int i = 0 ; i < MAX ; i++ )
{
e[i].clear();
val1[i].clear();
}
types.clear();
memset ( used , 0 , sizeof ( used ) );
}
void clear()
{
for ( int i = 0 ; i < MAX ; i++ )
{
edge[i].clear();
val2[i].clear();
}
}
void _add ( int u , int v , int x )
{
e[u].push_back ( v );
val1[u].push_back ( x );
if ( !used[x] )
{
types.push_back ( x );
used[x] = types.size();
}
}
int get_hash ( set<int>& item )
{
int sum = 0;
set<int>::iterator it = item.begin(),it1,it2;
for ( ; it!= item.end() ; it++ )
{
sum += (*it)*(*it)%M;
sum %= M;
}
for ( int i = 0 ; i < hash[sum].size() ; i++ )
{
int x = hash[sum][i];
set<int>& temp = I[x].elements;
it = temp.begin();
bool flag = true;
if ( temp.size() != item.size() ) continue;
for ( it1 = temp.begin() , it2 = item.begin(); it2!= item.end() ; it1++,it2++ )
if ( *it1 != *it2 )
{
flag = false;
break;
}
if ( flag ) return x;
}
return -1;
}
int make_hash ( set<int>& item )
{
int sum = 0;
set<int>::iterator it = item.begin();
for ( ; it!= item.end() ; it++ )
{
sum += (*it)*(*it)%M;
sum %= M;
}
hash[sum].push_back ( cnts );
return cnts++;
}
void add ( int u , int v , int x )
{
edge[u].push_back ( v );
val2[u].push_back ( x );
}
void dfs ( set<int>& elements , int u , int d , int flag )
{
if ( vis[u][d] ) return;
vis[u][d] = 1;
if ( d == 1 ) elements.insert ( u );
if ( d == 2 ) return;
int len = e[u].size();
for ( int i = 0 ; i < len ; i++ )
{
int v = e[u][i],dd;
int x = val1[u][i];
if ( x == flag ) dd = d+1;
else if ( x == 0 ) dd = d;
else continue;
dfs ( elements , v , dd , flag );
}
}
void ensure ( )
{
I[0].elements.insert(start);
cnts = 1;
for ( int i = 0 ; i < cnts ; i++ )
{
set<int>& cur = I[i].elements;
for ( int j = 0 ; j < types.size() ; j++ )
{
if ( types[j] == 0 ) continue;
memset ( vis , 0 , sizeof ( vis ) );
set<int>& temp = I[cnts].elements;
temp.clear();
set<int>::iterator it = cur.begin();
for ( ; it != cur.end() ; it++ )
dfs ( temp , *it , 0 , types[j] );
if ( temp.empty() ) continue;
int x = get_hash ( temp );
if ( x == -1 )
x = make_hash ( temp );
add ( i , x , types[j] );
}
set<int>::iterator it = cur.begin();
printf ( "I%d :" , i );
for ( ; it!= cur.end() ; it++ )
printf ( "%d " , *it );
puts ("");
}
}
~~~
## 2. DFA的最小化
### 2.1. 明确DFA的定义
一个确定的有穷自动机(DFA)M是一个五元式:
- M=(S, ∑, δ, s0, F)其中
- S是一个有限集,它的每个元素称为一个状态。
- ∑是一个有穷字母表,它的每个元素称为一个输入字符
-δ是一个从S×∑至S的单值映射。δ(s,a)=s’意味着:当现行状态-为s、输入字符为a时,将转换到下一个状态s’。我们称s’为s的一个后继状态。
- s0∈S,是唯一的初态。
- F ⊂ S,是一个终态集(可空)
### 2.2 算法描述
- DFA M =(K,∑,f, k0,, kt),最小状态DFA M’
- 1.构造状态的初始划分∏0:终态kt 和非终态K- kt两组
- 2.对∏施用传播性原则 构造新划分∏new
- 3.如∏new=∏,则令∏new=∏并继续步骤4,否则∏:=∏new重复2
- 4.为∏final中的每一组选一代表,这些代表构成M’的状态。若k是一代表且f(k,a)=t,令r是t组的代表,则M’中有一转换f’(k,a)=r M’ 的开始状态是含有K0的那组的代表 M’的终态是含有Kt的那组的代表
- 5.去掉M’中的死状态.
### 2.3 代码实现
~~~
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
#include <map>
#include <queue>
#include <fstream>
#define MAX 507
using namespace std;
struct Node
{
int x;
int type;
Node(){}
Node( int a , int b )
:x(a),type(b){}
bool operator < ( const Node& a ) const
{
return type < a.type;
}
};
int mp[MAX][MAX];
vector<int> _edge[MAX];
vector<int> _val[MAX];
vector<int> edge[MAX];
vector<int> val[MAX];
vector<int> point;
vector<pair<int,int> > ans1;
vector<int> ans2;
vector<Node> col;
int fa[MAX];
int type[MAX];
int state[MAX];
int used[MAX];
int kinds;
int groups_num;
int n;
int m;
void init ();
int _find( int x );
void _union ( int x , int y );
void _add ( int u , int v , int x );
void add ( int u , int v , int x );
void minimize ( );
void make_csv();
int main ( )
{
init ( );
scanf ( "%d%d" , &n , &m );
for ( int i = 1 ; i <= n ; i++ )
scanf ( "%d" , &type[i] );
for ( int i = 0 ; i < m ; i++ )
{
int u,v,x;
scanf ( "%d%d%d" , &u , &v , &x );
_add ( u ,v , x );
}
minimize();
//最小化后的点的保留结果
puts ("points : " );
printf ( "%d\n" , (int)point.size() );
for ( int i = 0 ; i < point.size(); i++ )
printf ( "%d " , point[i] );
puts("");
//最小化后的边的保留结果
puts ( "edge: ");
m = ans1.size();
for ( int i = 0 ; i < ans1.size() ; i++ )
printf ( "%d %d %d\n" , ans1[i].first , ans1[i].second , ans2[i] );
puts("");
make_csv();
}
void init ( )
{
for ( int i = 0 ; i < MAX ; i++ )
{
edge[i].clear();
_edge[i].clear();
}
point.clear();
memset ( type , 255 , sizeof ( type ) );
memset ( used , 0 , sizeof ( used ) );
kinds = 0;
groups_num = 2;
col.clear();
for ( int i = 0 ; i < MAX ; i++ )
fa[i] = i;
}
void _add ( int u , int v , int x )
{
_edge[u].push_back ( v );
_val[u].push_back ( x );
if ( used[x] ) return ;
used[x] = x;
kinds++;
}
void add ( int u , int v , int x )
{
edge[u].push_back ( v );
val[u].push_back ( x );
}
int _find ( int x )
{
return x == fa[x]? x: fa[x] = _find ( fa[x] );
}
void _union ( int x , int y )
{
x = _find ( x );
y = _find ( y );
fa[x] = y;
}
void minimize ( )
{
queue<vector<Node> > q[2];
col.clear();
for ( int i = 1 ; i <= n ; i++ )
{
if ( type[i] == 0 )
col.push_back ( Node( i , 0 ) );
}
q[1].push ( col );
col.clear();
for ( int i = 1 ; i <= n ;i++ )
{
if ( type[i] == 1 )
col.push_back ( Node ( i , 1 ) );
}
q[1].push( col );
col.clear();
for ( int i = 1 ; i <= kinds ; i++ )
{
int cur = i%2;
int next = (i+1)%2;
while ( !q[next].empty() ) q[next].pop();
while ( !q[cur].empty() )
{
vector<Node> front = q[cur].front();
q[cur].pop();
for ( int j = 0 ; j < front.size() ; j++ )
{
Node& temp = front[j];
int u = temp.x;
for ( int k = 0 ; k < _edge[u].size() ; k++ )
{
int v = _edge[u][k];
int x = _val[u][k];
if ( x != i ) continue;
temp.type = type[v];
}
}
sort ( front.begin() , front.end() );
if ( front[0].type == front[front.size()-1].type )
q[next].push ( front );
else
{
col.clear();
col.push_back ( front[0] );
for ( int j = 1 ; j < front.size() ; j++ )
{
if ( front[j].type != front[j-1].type )
{
q[cur].push ( col );
col.clear();
groups_num++;
}
type[front[j].x] = groups_num;
col.push_back ( front[j] );
}
q[cur].push( col );
}
}
}
//#define DEBUG
#ifdef DEBUG
int id = (kinds+1)%2;
int num = 1;
while ( !q[id].empty() )
{
vector<Node> temp = q[id].front();
q[id].pop();
printf ( "%d : ", num++ );
for ( int i = 0 ; i < temp.size() ; i++ )
printf ( "%d " , temp[i].x );
puts("");
}
#else
int id = (kinds+1)%2;
while ( !q[id].empty() )
{
vector<Node> temp = q[id].front();
q[id].pop();
for ( int i = 1 ; i < temp.size() ; i++ )
_union ( temp[i].x , temp[i-1].x);
}
memset ( used , 0 , sizeof ( used ) );
memset ( mp , 0 , sizeof ( mp ) );
for ( int i = 1 ; i <= n ;i++ )
{
int x = _find(i);
if ( used[x] ) continue;
used[x] = 1;
point.push_back ( x );
}
ans1.clear();
for ( int i = 1 ; i <= n ; i++ )
for ( int j = 0 ; j < _edge[i].size() ; j++ )
{
int v = _edge[i][j];
int x = _val[i][j];
int a = _find(i);
int b = _find(v);
if ( mp[a][b] ) continue;
mp[a][b] = 1;
add ( a , b , x );
ans1.push_back ( make_pair ( a , b ) );
ans2.push_back ( x );
}
#endif
}
void make_csv ( )
{
freopen ( "g1.csv" , "w" , stdout );
printf ( "%d,%d, \n" , n , m );
for ( int i = 0 ; i < point.size() ; i++ )
if ( i == 0 ) printf ( "%d" , point[i] );
else printf ( ",%d" , point[i] );
puts("");
for ( int i = 0 ; i < m ; i++ )
printf ( "%d,%d,%d\n" , ans1[i].first , ans1[i].second , ans2[i] );
fclose(stdout);
}
~~~
编译原理(一) Chomsky文法的判断方法及C++代码实现
最后更新于:2022-04-01 14:15:11
### 一、明确定义
- 0型文法:对任一产生式α→β,都有α∈(VN∪VT)+, β∈(VN∪VT)*
- 1型文法:对任一产生式α→β,都有|β|≥|α|, 仅仅 α→ε除外
- 2型文法:对任一产生式α→β,都有α∈VN , β∈(VN∪VT)*
- 3型文法:任一产生式α→β的形式都为A→aB或A→a,其中A∈VN,B∈VN,a∈VT。上述叫做右线性文法,另有左线性文法,二者等价。
### 二、基本思路
- 0型文法
- 首先字符串α的是(Vn⋃Vt)+,是全符号集的一个正闭包,那么包含符号集中所有符号的一个任一组合,但不包含ε元素。
- 字符串β的是(Vn⋃Vt)∗,是全符号集的一个闭包,那么它比α会多一个ε元素。
- 那么我们想要判断一个文法是否为0型文法,只需要判断***左侧非空即可***
- 任何0型语言都是递归可枚举的,故0型语言又称***递归可枚举集***
- 1型文法
- 首先1型文法必须是0型文法
- 1型文法除了α→ε这一个特例外,其他情况都满足***β的长度大于α的长度***
- 1型文法也叫作***上下文相关文法***
- 2型文法
- 首先2型文法必须是1型文法
- 2型文法左边***必须***是一个***非终结字符***
- 2型文法也叫做***上下文无关文法***
- 3型文法
- 首先3型文法必须是2型文法
- 3型文法必须是***线性文法***
- 也就是在A,B为***非终结符***,a是***终结符***的情况下,产生式只满足如下两种形式(如下为右线性的例子):
- A→aB
- A→a
### 三、代码实现:
- 提供两个实现方案:
- 根据产生式,自己判断Vn,Vt,然后判断文法的类型,支持产生式的缩写版本,是***最早实现***的版本,可能代码的安排上有些混乱,冗余代码也比较多
~~~
#include<iostream>
#include<string>
#include <cstdlib>
#include <cstdio>
#include <map>
#include <vector>
#include <cctype>
using namespace std;
typedef struct CSS
{
string left;
string right;//定义产生式的右部
}CSS;
bool Zero (CSS *p, int n)
{
int i,j;
for(i=0;i<n;i++)//循环n次,即遍历所有产生式
{
for(j=0;j<p[i].left.length();j++)//遍历产生式左部每一个字符
{
if(p[i].left[j]>='A'&&p[i].left[j]<='Z')//判断字符是否是非终结符
break;
}
if(j==p[i].left.length())
{
cout<<"该文法不是0型文法"<<endl;
return 0;
break;
}
}
if(i==n)
return 1;//如果每个产生时都能找到非终结符
}
bool First(CSS *p , int n )//判断1型文法
{
int i;
if(Zero(p,n)) //先判断是否是0型文法
{
for(i=0;i<n;i++)
{
if((p[i].left.length()>p[i].right.length())&&p[i].right.length()!=NULL)//判断产生式左部长度是否大于右部
break;
}
if (i == n )
return 1;
else
{
cout<<"该文法是0型文法"<<endl;
return 0;
}
}
else
return 0;
}
bool Second( CSS*p,int n)//判断2型文法
{
int i;
if(First(p,n))//同上,先判断低级文法是否成立
{
for(i=0;i<n;i++)//同上,遍历所有文法产生式
{
if((p[i].left.length()!=1)||!(p[i].left[0]>='A'&&p[i].left[0]<='Z'))
break;
}
if(i==n)
return 1;
else
{
cout<<"该文法是1型文法"<<endl;
return 0;
}
}
else
return 0;
}
void Third(CSS *p,int n)//判断3型文法
{
int i;
if(Second(p,n))//同上,先判断是否是2型文法
{
for(i=0;i<n;i++)//同上,遍历文法所有的产生式
{
if((p[i].right.length()==0)||(p[i].right.length()>=3)||(p[i].right[0]>='A'&&p[i].right[0]<='Z'))//判断产生式右部字符个数是否在12之间,判断右部第一个字符是否是非终结符
break;
}
if(i==n)
{
for(i=0;i<n;i++)
{
if(p[i].right.length()==2)
{
if(!(p[i].right[1]>='A'&&p[i].right[1]<='Z'))
break;
}
}
if(i==n)
{
cout<<"该文法属于3型文法"<<endl;
}
else
cout<<"该文法属于2型文法"<<endl;
}
else
cout<<"该文法属于2型文法"<<endl;
}
else
cout<<"结束"<<endl;
}
int main ( )
{
CSS *p = new CSS[100];
map<char,bool> dic;
map<char,bool> dic2;
vector<char> VN;
vector<char> VT;
string input1,input2,input3;
cout <<"请输入文法:"<<endl;
cin >> input1;
cout << "请输入VN: "<<endl;
cin >> input2;
for ( int i = 0 ; i < input2.length() ; i++ )
if ( isalnum ( input2[i] ) )
{
VN.push_back ( input2[i] );
dic[input2[i]]=true;
}
cout <<"请输入产生式规则的个数:"<<endl;
int n;
cin >> n;
cout <<"请输入产生式规则:"<<endl;
int cnt = 0;
for ( int i = 0 ; i < n ; i++ )
{
input3.erase ();
cin >> input3;
bool flag = false;
for ( int j = 0 ; j < input3.length() ; j++ )
if ( input3[j] =='|' ) flag = true;
if ( flag )
{
string temp;
int j;
for ( j = 0 ; j < input3.length(); j++ )
{
if ( input3[j] ==':' )
{
temp = input3.substr(0,j);
j = j+3;
break;
}
}
for ( int k =j ; k < input3.length() ; k++ )
{
if ( isalnum ( input3[k] ) )
{
p[cnt].left = temp;
int tt = k;
for ( ;tt < input3.length(); tt++ )
if ( input3[tt]=='|' ) break;
if ( input3[tt] == '|' ) tt--;
p[cnt].right = input3.substr( k , tt-k+1 );
if ( dic[input3[k]] == false )
{
VT.push_back ( input3[k] );
dic[input3[k]] = true;
}
cnt++;
k = tt;
}
}
continue;
}
for ( int j = 0 ; j < input3.length() ; j++ )
{
if ( input3[j]== ':' )
{
p[cnt].left=input3.substr(0,j);
p[cnt].right=input3.substr(j+3,input3.length());
cnt++;
break;
}
}
for ( int j = 0 ; j < input3.length() ; j++ )
{
if ( isalnum( input3[j] ) )
{
if ( dic[input3[j]] ) continue;
VT.push_back ( input3[j] );
dic[input3[j]] = true;
}
}
}
cout << input1 << " = ( {";
for ( int i = 0 ; i < VN.size()-1 ; i++ )
cout << VN[i] << ",";
cout << VN[VN.size()-1] <<"},{";
for ( int i = 0 ; i < VT.size()-1 ; i++ )
cout << VT[i] << ",";
cout << VT[VT.size()-1] << "},";
cout << "P," << input1[2] << " )"<<endl;
cout << "P : " << endl;
vector<string> output;
vector<string> head[500];
string pre[500];
for ( int i = 0 ; i < cnt ; i++ )
{
int x = p[i].left[0];
head[x].push_back ( p[i].right );
pre[x] = p[i].left;
}
for ( int i = 0 ; i < 500 ; i++ )
{
if ( head[i].size() == 0 ) continue;
string temp = pre[i]+" ::= ";
for ( int j = 0 ; j < head[i].size() ; j++ )
{
temp += head[i][j];
if ( j != head[i].size() - 1 ) temp += " | ";
}
output.push_back ( temp );
}
for ( int i = 0 ; i < output.size() ; i ++ )
cout << output[i] << endl;
Third ( p , cnt );
}
~~~
- 第二个版本则是代码和注释风格比较清楚的实现版本,是周末整理之后的一个缩减的版本,不支持缩写,需要提前设定字符集,但是给出了一个更良好的实现方案,适合理解这4种文法类型。
~~~
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <string>
#include <vector>
#include <cstring>
#include <cctype>
using namespace std;
const int VSIZE= 300;
class Principle
{
public:
string left;
string right;
Principle ( const char* , const char* );
};
//只考虑不包含varepsilon的情况,且所有元素均只包含一个字母
vector<char> VN;//非终结符集
vector<char> VT;//终结符集
vector<Principle> principle;//产生式的集合
int type[VSIZE];//每个字符的类型
void init();//清理工作
int get_type(char);//1代表是非终结符,2代表是终结符
bool set_type(char,int);//设置一个字符的类型
int get_result ( );//获得输入的文法的类型
int main ( )
{
char buf[1000];
char ** elements;
while ( true )
{
puts("输入VN:");
gets( buf );
for ( int i = 0 ; i < strlen(buf); i++ )
{
char ch = buf[i];
if ( !isupper(ch) ) continue;
if ( get_type(ch) ) continue;
VN.push_back ( ch );
set_type(ch,1);
}
puts("输入VT:");
gets( buf );
for ( int i = 0 ; i < strlen(buf); i++ )
{
char ch = buf[i];
if ( !islower(ch) ) continue;
if ( get_type(ch) ) continue;
VT.push_back ( ch );
set_type(ch,2);
}
puts("输入产生式:(格式为[A::=...a...]), 输入\"exit\"作为结束");
while ( true )
{
gets ( buf );
if ( !strcmp(buf , "exit" ) ) break;
int i;
for ( i = 0 ; i < strlen(buf) ; i++ )
if ( buf[i] == ':' )
{
buf[i] = 0;
i = i+3;
break;
}
principle.push_back ( Principle( buf , buf+i ) );
printf ( "%s|%s|\n" , buf , buf+i );
}
int flag = get_result();
switch ( flag )
{
case -1:
puts("产生式中出现未知字符");
break;
case 0:
puts("该文法为0型文法");
break;
case 1:
puts("该文法为1型文法");
break;
case 2:
puts("该文法为2型文法");
break;
case 3:
puts("该文法为左线性型文法");
break;
case 4:
puts("该文法为右线性型文法");
break;
}
}
return 0;
}
Principle::Principle ( const char*l , const char* r )
{
left = l;
right = r;
}
//判断字符串是否包含未知字符
bool hasError ( const string& s )
{
for ( int i = 0 ; i < s.length() ; i++ )
if ( !get_type(s[i]) ) return true;
return false;
}
//判断是否为0型文法
bool isZero ( )
{
for ( int i = 0 ; i < principle.size() ; i++ )
if ( hasError(principle[i].left) ) return false;
else if ( hasError(principle[i].right)) return false;
return true;
}
//判断一个0型文法是否为1型文法
bool isOne ( )
{
for ( int i = 0 ; i < principle.size(); i++ )
if ( principle[i].left.length() > principle[i].right.length() )
return false;
return true;
}
//判断一个1型文法是否为2型文法
bool isTwo ( )
{
for ( int i = 0 ; i < principle.size() ; i++ )
{
string left = principle[i].left;
if ( left.size() != 1 ) return false;
if ( get_type(left[0]) != 1 ) return false;
}
return true;
}
//判断一个2型文法是否为左线性文法
bool isLeftThree ()
{
for ( int i = 0 ; i < principle.size() ; i++ )
{
string right = principle[i].right;
for ( int j = 1; j < right.length() ; j++ )
if ( get_type(right[j]) != 2 ) return false;
}
return true;
}
//判断一个2型文法是否为右线性文法
bool isRightThree ()
{
for ( int i = 0 ; i < principle.size() ; i++ )
{
string right = principle[i].right;
for ( int j = 0 ; j < right.length()-1; j++ )
if ( get_type(right[j]) != 2 )
return false;
}
return true;
}
int get_result ( )
{
if ( !isZero() ) return -1;
if ( !isOne() ) return 0;
if ( !isTwo() ) return 1;
if ( isLeftThree() ) return 3;
if ( isRightThree() ) return 4;
return 2;
}
void init ( )
{
VN.clear();
VT.clear();
principle.clear();
memset ( type , 0 , sizeof ( type ) );
}
int get_type ( char ch )
{
return type[ch];
}
bool set_type ( char ch , int x )
{
type[ch] = x;
return true;
}
~~~
前言
最后更新于:2022-04-01 14:15:09
> 原文出处:[编译原理算法实现](http://blog.csdn.net/column/details/compile-principle.html)
作者:[qq_24451605](http://blog.csdn.net/qq_24451605)
**本系列文章经作者授权在看云整理发布,未经作者允许,请勿转载!**
# 编译原理算法实现
> 一些编译原理算法的实现,一些文法分析器的实现