编译原理(五) LL(1)文法分析法(预测分析表的构造算法C++实现)

最后更新于:2022-04-01 14:15:20

## 基本定义 - FIRST(α): - 令G是一个不含左递归的文法,对G的所有非终结符的每个候选α定义它的终结首符集FIRST(α)为: FIRST(α)={a | α=>*a…, a∈VT} - 若α=>*ε,则规定ε∈FIRST(α) - FIRST(α)是α的所有可能推导的开头终结符或可能的ε - 如果非终结符A的所有候选首符集两两不相交,即A的任何两个不同候选αi和αj FIRST(αi) ∩FIRST(αj)=Φ - 那么当要求A匹配输入串时,A就能根据它所面临的第一个输入符号a,准确的指派某一个候选前去执行任务。这个候选就是那个终结首符集含a的α。 - FOLLOW(A): - 假定S是文法G的开始符号,对于G的任何非终结符A,我们定义 FOLLOW(A)={a | S=>*…Aa…,a∈VT} - FOLLOW(A)是所有句型中出现在紧接A之后的终结符或“#”。开始符号的FOLLOW集初始化时加入“#”。 - 当非终结符A面临输入符号a,且a不属于A的任意候选首符集但A的某个候选首符集包含ε时,只有当a∈FOLLOW(A)时,才可能允许A自动匹配。 - LL(1)文法: - 文法不含左递归 - 对于文法中每一个非终结符A的各个产生式的候选首符集两两不相交。即,若A→α1 |α2 | … |αn,则FIRST(αi)∩FIRST(αj)=Φ (i≠j) - 对文法中的每个非终结符A,若它存在某个候选首符集包含ε,则,FIRST(A)∩FOLLOW(A)=Φ 如果一个文法G满足以上条件,则称该文法G为***LL(1)文法***。 - LL(1)文法是***不带回溯***的***自顶向下***的文法 - 预测分析表: - 预测分析表示一个M[A,a]形式的矩阵。其中A为非终结符,a是终结符或‘#’ 。 - 矩阵元素M[A,a]中存放着一条关于A的产生式,指出当A面临输入符号a时所应采用的候选。 M[A,a]中也可能存放一个“出错标志”,指出A根本不该面临输入符号a。 ## 预测分析表的构造 ### FIRST(α)的构造算法 要构造FIRST(α),根据定义: α=X1⋯Xn 那么对于从前到后的Xi我们进行分类讨论: - 如果Xi∈Vt,那么FIRST(α)=FIRST(Xi)={Xi} - 如果Xi∈Vn,因为不存在左递归,所以Xi=a.......|ϵ,那么FIRST(Xi)={a,ϵ,FIRST(Xi+1)} - 只要Xi−1不包含ϵ,那么Xi不可能影响FIRST(α) - 那么我们通过记录每个a∈V,然后进行深度优先记忆化搜索,将所有的状态填满,因为LL(1)文法使不会回溯的,所以能够保证在O(n)的时间完成,采取递归的形式实现 ### FIRST(α)的构造实现代码 ~~~ #include <iostream> #include <cstring> #include <cstdio> #include <algorithm> #include <cstdlib> #include <vector> #include <string> #include <cctype> #include <map> #include <set> #define MAX 507 using namespace std; //大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon class WF { public: string left; set<string> right; WF ( char s[] ) { left = s; } void print ( ) { printf ( "%s->" , left.c_str() ); set<string>::iterator it = right.begin(); if ( right.begin()!= right.end() ) { printf ( "%s" , it->c_str() ); it++; } for(; it != right.end() ; it++ ) printf ( "|%s" , it->c_str() ); puts(""); } void insert ( char s[] ) { right.insert ( s ); } }; map<string,set<char> > first; map<string,set<char> > follow; map<string,int> VN_dic; vector<WF> VN_set; bool used[MAX]; void dfs ( int x ) { if ( used[x] ) return; used[x] = 1; string& left = VN_set[x].left; set<string>& right = VN_set[x].right; set<string>::iterator it = right.begin(); for ( ; it!= right.end() ; it++ ) for ( int i = 0 ; i < it->length() ; i++ ) { if ( !isupper( it->at(i) ) && it->at(i) != '\'' ) { first[left].insert ( it->at(i) ); break; } if ( isupper( it->at(i) ) ) { int y; if ( i != it->length()-1 && it->at(i+1) == '\'' ) y = VN_dic[it->substr(i,2)]-1; else y = VN_dic[it->substr(i,1)]-1; string& tleft = VN_set[y].left; dfs ( y ); set<char>& temp = first[tleft]; set<char>::iterator it1 = temp.begin(); bool flag = true; for ( ; it1 != temp.end() ; it1++ ) { if ( *it1 == '~' ) flag = false; first[left].insert( *it1 ); } if ( flag ) break; } else continue; } } void make_first ( ) { memset ( used , 0 , sizeof ( used ) ); for ( int i = 0 ; i < VN_set.size() ; i++ ) dfs ( i ); #define DEBUG #ifdef DEBUG map<string,set<char> >::iterator it = first.begin(); for ( ; it != first.end() ; it++ ) { printf ( "FIRST(%s)={" , it->first.c_str() ); set<char> & temp = it->second; set<char>::iterator it1 = temp.begin(); bool flag = false; for ( ; it1 != temp.end() ; it1++ ) { if ( flag ) printf ( "," ); printf ( "%c" , *it1 ); flag = true; } puts ("}"); } #endif } int main ( ) { int n; char s[MAX]; while ( ~scanf ( "%d" , &n ) ) { for ( int i = 0 ; i < n ; i++ ) { scanf ( "%s" , s ); int len = strlen ( s ),j; for ( j = 0 ; j < len ; j++ ) if ( s[j] == '-' ) break; s[j] = 0; if ( !VN_dic[s] ) { VN_set.push_back ( s ); VN_dic[s] = VN_set.size(); } int x = VN_dic[s]-1; VN_set[x].insert ( s+j+2 ); } make_first(); } } ~~~ **Input:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faa9d383.jpg "") **Output:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faab07e4.jpg "") ### FOLLOW(A)的构造算法 设S,A,B∈Vn,那么连续使用如下规则,直至follow集不再发生变化: - (1) S为标识符,那么FOLLOW(S)包含“#” - (2) 若A::=αBβ,那么FOLLOW(B)+=FIRST(B)−{ϵ} - (3) 若A::=αB或者A::=αBβ,且β⇒∗ϵ,那么FOLLOW(B)+=FOLLOW(A) ### FOLLOW(A)的构造实现代码 ~~~ #include <iostream> #include <cstring> #include <cstdio> #include <algorithm> #include <cstdlib> #include <vector> #include <string> #include <cctype> #include <map> #include <set> #define MAX 507 using namespace std; //大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon class WF { public: string left; set<string> right; WF ( char s[] ) { left = s; } void print ( ) { printf ( "%s->" , left.c_str() ); set<string>::iterator it = right.begin(); if ( right.begin()!= right.end() ) { printf ( "%s" , it->c_str() ); it++; } for(; it != right.end() ; it++ ) printf ( "|%s" , it->c_str() ); puts(""); } void insert ( char s[] ) { right.insert ( s ); } }; map<string,set<char> > first; map<string,set<char> > follow; map<string,int> VN_dic; vector<WF> VN_set; bool used[MAX]; void dfs ( int x ) { if ( used[x] ) return; used[x] = 1; string& left = VN_set[x].left; set<string>& right = VN_set[x].right; set<string>::iterator it = right.begin(); for ( ; it!= right.end() ; it++ ) for ( int i = 0 ; i < it->length() ; i++ ) { if ( !isupper( it->at(i) ) && it->at(i) != '\'' ) { first[left].insert ( it->at(i) ); break; } if ( isupper( it->at(i) ) ) { int y; if ( i != it->length()-1 && it->at(i+1) == '\'' ) y = VN_dic[it->substr(i,2)]-1; else y = VN_dic[it->substr(i,1)]-1; string& tleft = VN_set[y].left; dfs ( y ); set<char>& temp = first[tleft]; set<char>::iterator it1 = temp.begin(); bool flag = true; for ( ; it1 != temp.end() ; it1++ ) { if ( *it1 == '~' ) flag = false; first[left].insert( *it1 ); } if ( flag ) break; } else continue; } } void make_first ( ) { memset ( used , 0 , sizeof ( used ) ); for ( int i = 0 ; i < VN_set.size() ; i++ ) dfs ( i ); #define DEBUG #ifdef DEBUG puts ("***************FIRST集***********************"); map<string,set<char> >::iterator it = first.begin(); for ( ; it != first.end() ; it++ ) { printf ( "FIRST(%s)={" , it->first.c_str() ); set<char> & temp = it->second; set<char>::iterator it1 = temp.begin(); bool flag = false; for ( ; it1 != temp.end() ; it1++ ) { if ( flag ) printf ( "," ); printf ( "%c" , *it1 ); flag = true; } puts ("}"); } #endif } void append ( const string& str1 , const string& str2 ) { set<char>& from = follow[str1]; set<char>& to = follow[str2]; set<char>::iterator it = from.begin(); for ( ; it != from.end() ; it++ ) to.insert ( *it ); } void make_follow ( ) { while ( true ) { bool goon = false; for ( int i = 0 ; i < VN_set.size() ; i++ ) { string& left = VN_set[i].left; set<string>& right = VN_set[i].right; set<string>::iterator it = right.begin(); for ( ; it!= right.end() ; it++ ) { bool flag = true; const string& str = *it; for ( int j = it->length()-1 ; j >= 0 ; j-- ) { if ( str[j] == '\'' ) { int x = VN_dic[it->substr(j-1,2)]-1; if ( flag ) { int tt = follow[it->substr(j-1,2)].size(); append ( left , it->substr(j-1,2) ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[x].right.count("~" ) ) flag = false; } for ( int k = j+1 ; k < it->length() ; k++ ) { if ( isupper(str[k]) ) { string id; if ( k != it->length()-1 && str[k+1] == '\'' ) id = it->substr(k,2); else id = it->substr(k,1); set<char>& from = first[id]; set<char>& to = follow[it->substr(j-1,2)]; int tt = to.size(); set<char>::iterator it1 = from.begin(); for ( ; it1 != from.end() ; it1++ ) if ( *it1 != '~' ) to.insert ( *it1 ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[VN_dic[id]-1].right.count("~") ) break; } else if ( str[k] != '\'' ) { int tt = follow[it->substr(j-1,2)].size(); follow[it->substr(j-1,2)].insert ( str[k] ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; break; } else continue; } j--; } else if ( isupper(str[j] ) ) { int x = VN_dic[it->substr(j,1)]-1; if ( flag ) { int tt = follow[it->substr(j,1)].size(); append ( left , it->substr(j,1) ); if ( !VN_set[x].right.count("~") ) flag = false; int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; } for ( int k = j+1 ; k < it->length() ; k++ ) { if ( isupper( str[k] ) ) { string id; if ( k != it->length()-1 && str[k+1] == '\'' ) id = it->substr(k,2); else id = it->substr(k,1); set<char>& from = first[id]; set<char>& to = follow[it->substr(j,1)]; set<char>::iterator it1 = from.begin(); int tt = follow[it->substr(j,1)].size(); for ( ; it1 != from.end() ; it1++ ) if ( *it1 != '~' ) to.insert( *it1 ); int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[VN_dic[id]-1].right.count("~") ) break; } else if ( str[k] != '\'' ) { int tt = follow[it->substr(j,1)].size(); follow[it->substr(j,1)].insert ( str[k] ); int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; break; } else continue; } } else flag = false; } } } if ( !goon ) break; } #define DEBUG #ifdef DEBUG puts ("****************FOLLOW集**********************" ); map<string,set<char> >::iterator it = follow.begin(); for ( ; it != follow.end() ; it++ ) { printf ( "FOLLOW(%s)={" , it->first.c_str() ); set<char> & temp = it->second; temp.insert('#'); set<char>::iterator it1 = temp.begin(); bool flag = false; for ( ; it1 != temp.end() ; it1++ ) { if ( flag ) printf ( "," ); printf ( "%c" , *it1 ); flag = true; } puts ("}"); } #endif } int main ( ) { int n; char s[MAX]; while ( ~scanf ( "%d" , &n ) ) { for ( int i = 0 ; i < n ; i++ ) { scanf ( "%s" , s ); int len = strlen ( s ),j; for ( j = 0 ; j < len ; j++ ) if ( s[j] == '-' ) break; s[j] = 0; if ( !VN_dic[s] ) { VN_set.push_back ( s ); VN_dic[s] = VN_set.size(); } int x = VN_dic[s]-1; VN_set[x].insert ( s+j+2 ); } make_first(); make_follow(); } } ~~~ **Input:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faac4093.jpg "") **Output:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faad3248.jpg "") ### 预测分析表的构造算法 ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171faaedc8e.jpg "") ### 预测分析表的构造实现代码 ~~~ #include <iostream> #include <cstring> #include <cstdio> #include <algorithm> #include <cstdlib> #include <vector> #include <string> #include <cctype> #include <map> #include <set> #define MAX 507 using namespace std; //大写字母为非终止符(可以多一个'的标号做区分),小写字母为终止符,用~代替epsilon class WF { public: string left; set<string> right; WF ( char s[] ) { left = s; } void print ( ) { printf ( "%s->" , left.c_str() ); set<string>::iterator it = right.begin(); if ( right.begin()!= right.end() ) { printf ( "%s" , it->c_str() ); it++; } for(; it != right.end() ; it++ ) printf ( "|%s" , it->c_str() ); puts(""); } void insert ( char s[] ) { right.insert ( s ); } }; map<string,set<char> > first; map<string,set<char> > follow; map<string,int> VN_dic; vector<WF> VN_set; bool used[MAX]; void dfs ( int x ) { if ( used[x] ) return; used[x] = 1; string& left = VN_set[x].left; set<string>& right = VN_set[x].right; set<string>::iterator it = right.begin(); for ( ; it!= right.end() ; it++ ) for ( int i = 0 ; i < it->length() ; i++ ) { if ( !isupper( it->at(i) ) && it->at(i) != '\'' ) { first[left].insert ( it->at(i) ); break; } if ( isupper( it->at(i) ) ) { int y; if ( i != it->length()-1 && it->at(i+1) == '\'' ) y = VN_dic[it->substr(i,2)]-1; else y = VN_dic[it->substr(i,1)]-1; string& tleft = VN_set[y].left; dfs ( y ); set<char>& temp = first[tleft]; set<char>::iterator it1 = temp.begin(); bool flag = true; for ( ; it1 != temp.end() ; it1++ ) { if ( *it1 == '~' ) flag = false; first[left].insert( *it1 ); } if ( flag ) break; } else continue; } } void make_first ( ) { memset ( used , 0 , sizeof ( used ) ); for ( int i = 0 ; i < VN_set.size() ; i++ ) dfs ( i ); #define DEBUG #ifdef DEBUG puts ("***************FIRST集***********************"); map<string,set<char> >::iterator it = first.begin(); for ( ; it != first.end() ; it++ ) { printf ( "FIRST(%s)={" , it->first.c_str() ); set<char> & temp = it->second; set<char>::iterator it1 = temp.begin(); bool flag = false; for ( ; it1 != temp.end() ; it1++ ) { if ( flag ) printf ( "," ); printf ( "%c" , *it1 ); flag = true; } puts ("}"); } #endif } void append ( const string& str1 , const string& str2 ) { set<char>& from = follow[str1]; set<char>& to = follow[str2]; set<char>::iterator it = from.begin(); for ( ; it != from.end() ; it++ ) to.insert ( *it ); } void make_follow ( ) { while ( true ) { bool goon = false; for ( int i = 0 ; i < VN_set.size() ; i++ ) { string& left = VN_set[i].left; set<string>& right = VN_set[i].right; set<string>::iterator it = right.begin(); for ( ; it!= right.end() ; it++ ) { bool flag = true; const string& str = *it; for ( int j = it->length()-1 ; j >= 0 ; j-- ) { if ( str[j] == '\'' ) { int x = VN_dic[it->substr(j-1,2)]-1; if ( flag ) { int tt = follow[it->substr(j-1,2)].size(); append ( left , it->substr(j-1,2) ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[x].right.count("~" ) ) flag = false; } for ( int k = j+1 ; k < it->length() ; k++ ) { if ( isupper(str[k]) ) { string id; if ( k != it->length()-1 && str[k+1] == '\'' ) id = it->substr(k,2); else id = it->substr(k,1); set<char>& from = first[id]; set<char>& to = follow[it->substr(j-1,2)]; int tt = to.size(); set<char>::iterator it1 = from.begin(); for ( ; it1 != from.end() ; it1++ ) if ( *it1 != '~' ) to.insert ( *it1 ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[VN_dic[id]-1].right.count("~") ) break; } else if ( str[k] != '\'' ) { int tt = follow[it->substr(j-1,2)].size(); follow[it->substr(j-1,2)].insert ( str[k] ); int tt1 = follow[it->substr(j-1,2)].size(); if ( tt1 > tt ) goon = true; break; } else continue; } j--; } else if ( isupper(str[j] ) ) { int x = VN_dic[it->substr(j,1)]-1; if ( flag ) { int tt = follow[it->substr(j,1)].size(); append ( left , it->substr(j,1) ); if ( !VN_set[x].right.count("~") ) flag = false; int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; } for ( int k = j+1 ; k < it->length() ; k++ ) { if ( isupper( str[k] ) ) { string id; if ( k != it->length()-1 && str[k+1] == '\'' ) id = it->substr(k,2); else id = it->substr(k,1); set<char>& from = first[id]; set<char>& to = follow[it->substr(j,1)]; set<char>::iterator it1 = from.begin(); int tt = follow[it->substr(j,1)].size(); for ( ; it1 != from.end() ; it1++ ) if ( *it1 != '~' ) to.insert( *it1 ); int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; if ( !VN_set[VN_dic[id]-1].right.count("~") ) break; } else if ( str[k] != '\'' ) { int tt = follow[it->substr(j,1)].size(); follow[it->substr(j,1)].insert ( str[k] ); int tt1 = follow[it->substr(j,1)].size(); if ( tt1 > tt ) goon = true; break; } else continue; } } else flag = false; } } } if ( !goon ) break; } #define DEBUG #ifdef DEBUG puts ("****************FOLLOW集**********************" ); map<string,set<char> >::iterator it = follow.begin(); for ( ; it != follow.end() ; it++ ) { printf ( "FOLLOW(%s)={" , it->first.c_str() ); set<char> & temp = it->second; temp.insert('#'); set<char>::iterator it1 = temp.begin(); bool flag = false; for ( ; it1 != temp.end() ; it1++ ) { if ( flag ) printf ( "," ); printf ( "%c" , *it1 ); flag = true; } puts ("}"); } #endif } vector<map<char,string> > predict_table; //检查一个字符是否属于一个字符串的FIRST集合 bool check_first ( const string& text , char ch ) { for ( int i = 0 ; i < text.length() ; i++ ) { bool hasEmpty = false; if ( !isupper(text[i]) && text[i] != '\'' ) { if ( text[i] != ch ) return false; else return true; } else if ( isupper(text[i] ) ) { string temp; if ( i != text.length()-1 && text[i+1] == '\'' ) temp = text.substr(i,2); else temp = text.substr(i,1); set<char>& dic = first[temp]; set<char>::iterator it = dic.begin(); for ( ; it != dic.end() ; it++ ) { if ( *it == '~' ) hasEmpty = true; if ( *it == ch ) return true; } if ( !hasEmpty) break; } else continue; } return false; } //检查一个字符是否属于一个字符串的FOLLOW集合 bool check_follow ( const string& text , char ch ) { set<char>& dic = follow[text]; set<char>::iterator it = dic.begin(); for ( ; it != dic.end() ; it++ ) if ( *it == ch ) return true; return false; } void make_table () { map<char,string> temp; vector<char> letter; bool vis[500]; memset ( vis , 0 , sizeof ( vis ) ); for ( int i = 0 ; i < VN_set.size() ; i++ ) { set<string>& right = VN_set[i].right; set<string>::iterator it = right.begin(); for ( ; it != right.end() ; it++ ) for ( int j = 0 ; j < it->length() ; j++ ) if ( !isupper(it->at(j)) && it->at(j) != '\'' ) { if ( vis[it->at(j)] ) continue; vis[it->at(j)] = true; letter.push_back ( it->at(j) ); } } for ( int i = 0 ; i < VN_set.size() ; i++ ) { temp.clear(); string& left = VN_set[i].left; set<string>& right = VN_set[i].right; set<string>::iterator it = right.begin(); for ( ; it != right.end() ; it++ ) for ( int j = 0 ; j < letter.size() ; j++ ) { //cout << *it << " " << letter[j] << endl; if ( check_first ( *it , letter[j] ) ) { //cout << "YES" << endl; temp[letter[j]] = *it; } if ( it->at(0) == '~' && check_follow ( left, letter[j] )) temp[letter[j]] = *it; } predict_table.push_back ( temp ); } #define DEBUG #ifdef DEBUG for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ ) printf ( "-" ); puts (""); printf ( "|%9s" , "|" ); for ( int i = 0 ; i < letter.size() ; i++ ) printf ( "%5c%5s" , letter[i] , "|" ); puts(""); for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ ) printf ( "-" ); puts(""); for ( int i = 0 ; i < VN_set.size() ; i++ ) { printf ( "|%5s%4s" , VN_set[i].left.c_str() , "|" ); for ( int j = 0 ; j < letter.size() ; j ++ ) if ( predict_table[i].count(letter[j] ) ) printf ( "%7s%3s" , predict_table[i][letter[j]].c_str() , "|" ); else printf ( "%10s" , "|" ); puts(""); for ( int i = 0 ; i <= (letter.size()+1)*10 ; i++ ) printf ( "-" ); puts (""); } #endif } int main ( ) { int n; char s[MAX]; while ( ~scanf ( "%d" , &n ) ) { for ( int i = 0 ; i < n ; i++ ) { scanf ( "%s" , s ); int len = strlen ( s ),j; for ( j = 0 ; j < len ; j++ ) if ( s[j] == '-' ) break; s[j] = 0; if ( !VN_dic[s] ) { VN_set.push_back ( s ); VN_dic[s] = VN_set.size(); } int x = VN_dic[s]-1; VN_set[x].insert ( s+j+2 ); } make_first(); make_follow(); make_table(); } } ~~~ **Input:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab0fd79.jpg "") **Output:** ![这里写图片描述](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/2016-04-20_57171fab228be.jpg "")
';