diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ca01e7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/file1.txt +/CMakeLists.txt +/cmake-build-debug/ +/.idea/ \ No newline at end of file diff --git a/README.md b/README.md index 76a7a86..8a016b5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,12 @@ -# regexRecognition +# regex recognition c++实现的正则表达式识别器。包含正则转换NFA和NFA转换DFA过程的展示。 +需要编译环境:c++ 11 +打开LA2.exe可直接使用本程序。 + +## 注意事项 + ++ 时间原因,正则表达式不支持转移符号,也不支持 . 来表示全体字符。因此该程序不支持对 | ( ) +等字符的识别。 ++ 设计原因,正则表达式中或的优先级高于连接符,如果需要表示 aa|bb,需要使用(aa)|(bb)的形式 ++ 使用时若需表示空串,使用`替代,因此本程序不支持对该字符的识别 + +正则表达式示例:(a|b)*bb(((>|<)(=|`))|((!|=)=))1 diff --git a/lA2.exe b/lA2.exe new file mode 100644 index 0000000..d97f108 Binary files /dev/null and b/lA2.exe differ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..63add61 --- /dev/null +++ b/main.cpp @@ -0,0 +1,512 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EPSILON '`' +using namespace std; + +class NFANode +{ + int state; + map> nextMap; +public: + NFANode(int state) + { + this->state=state; + } + void addNext(char c,NFANode* next) + { + this->nextMap[c].insert(next); + } + void setNext(map> existNextMap) + { + this->nextMap=std::move(existNextMap); + } + bool isEnd() + { + if(nextMap.empty()) + { + return true; + } + return false; + } + NFANode* getEnd() + { + set outedStates; + NFANode* ptr=this; + while(!ptr->isEnd()) + { + outedStates.insert(ptr->getState()); + for(const auto& itr:ptr->getNextMap()) + { + for(auto it:itr.second) + { + if(!outedStates.count(it->getState())) + { + ptr=it; + continue; + } + } + } + } + return ptr; + } + NFANode* next() + { + auto itr=nextMap.begin(); + return *(itr->second.begin()); + } + map> getNextMap() + { + return nextMap; + } + int getState() + { + return state; + } +}; + +class NFA +{ +public: + map> nfaTable[1000]; + vector< set > dfaStates; + set endStates; + set charSet; + stack operatorStack; + stack nfaMapStack; + set outedStateSet; + int maxState=0; + NFA() + { + nfaTable[0][EPSILON].insert(1); + nfaTable[1]['a'].insert(1); + nfaTable[1]['b'].insert(1); + nfaTable[1][EPSILON].insert(2); + nfaTable[2]['b'].insert(3); + nfaTable[3]['b'].insert(4); + nfaTable[4]['>'].insert(5); + nfaTable[4]['<'].insert(5); + nfaTable[4]['!'].insert(6); + nfaTable[4]['='].insert(6); + nfaTable[5]['='].insert(7); + nfaTable[5][EPSILON].insert(7); + nfaTable[6]['='].insert(7); + nfaTable[7]['1'].insert(8); + endStates.insert(8); + charSet={'a','b','<','>','=','!','1'}; + } + NFA(const string& regex) + { + for(int i=0;iaddNext(EPSILON,start); + startNode->addNext(EPSILON,endNode); + node=node->getEnd(); + node->addNext(EPSILON,start); + node->addNext(EPSILON,endNode); + nfaMapStack.push(startNode); + if(i+1addNext(c,endNode); + nfaMapStack.push(startNode); + if(i+1getEnd()->getState()); + nfaTable[0][EPSILON].insert(nfaNode->getState()); + } + void output() + { + list outputList; + outputList.push_back(nfaMapStack.top()); + outedStateSet.insert(nfaMapStack.top()->getState()); + while(!outputList.empty()) + { + NFANode* nfaNode=outputList.front(); + outputList.pop_front(); + cout<getState(); + for(const auto& itr:nfaNode->getNextMap()) + { + for(auto it:itr.second) + { + /*输出过程中将链表形式的nfa表转换为状态转换表*/ + nfaTable[nfaNode->getState()][itr.first].insert(it->getState()); + if(itr.first==EPSILON) + { + cout<<"EPSILON"<<"->"<getState()<<" "; + } + else + { + cout<<"'"<"<getState()<<" "; + } + } + } + if(nfaNode->isEnd()) + { + cout<<"(END)"; + } + cout<getNextMap()) + { + for(auto it:itr.second) + { + if(!outedStateSet.count(it->getState())) + { + outputList.push_back(it); + outedStateSet.insert(it->getState()); + } + } + } + } + cout<getEnd(); + prevNode->setNext(secondNode->getNextMap()); + } + if(op=='|'||op=='+') + { + auto* startNode=new NFANode(++maxState); + auto* endNode=new NFANode(++maxState); + NFANode* secondNode=nfaMapStack.top(); + nfaMapStack.pop(); + NFANode* prevNode=nfaMapStack.top(); + nfaMapStack.pop(); + startNode->addNext(EPSILON,prevNode); + startNode->addNext(EPSILON,secondNode); + prevNode=prevNode->getEnd(); + secondNode=secondNode->getEnd(); + secondNode->addNext(EPSILON,endNode); + prevNode->addNext(EPSILON,endNode); + nfaMapStack.push(startNode); + } + } + set epsilonExtend(const set& states) + { + set stateSet; + stateSet.insert(states.begin(), states.end()); + for(int i:states) + { + if(!nfaTable[i][EPSILON].empty()) + { + set result=epsilonExtend(nfaTable[i][EPSILON]); + stateSet.insert(result.begin(), result.end()); + } + } + return stateSet; + } + set move(const set& stateSet,char c) + { + set result; + for(int state : stateSet) + { + if(!nfaTable[state][c].empty()) + { + result.insert(nfaTable[state][c].begin(), nfaTable[state][c].end()); + } + } + return result; + } + int getNextState(const set& state,char c) + { + set result=move(state,c); + if(result.empty()) + { + return -1; + } + set newState; + set epsilonExtendedState=epsilonExtend(result); + newState.insert(epsilonExtendedState.begin(), epsilonExtendedState.end()); + int i; + for(i=0;i dfaTable[1000]; + set endStates; + set charSet; + DFA(NFA& nfa) + { + this->charSet=nfa.charSet; + nfa.dfaStates.push_back(nfa.epsilonExtend({0})); + int listLength=1; + int current=0; + int maxState=0; + int list[10000]; + list[0]=0; + while(listLength!=0) + { + map nextState; + for(char c:charSet) + { + nextState[c]=nfa.getNextState(nfa.dfaStates[list[current]],c); + this->dfaTable[list[current]][c]=nextState[c]; + if(nextState[c]==-1) + { + continue; + } + for(int i:nfa.dfaStates[nextState[c]]) + { + if(nfa.endStates.count(i)) + { + endStates.insert(nextState[c]); + break; + } + } + if(nextState[c]>maxState) + { + maxState=nextState[c]; + list[current+listLength]=nextState[c]; + listLength++; + } + } + listLength--; + current++; + } + } + int move(int currentState,char c) + { + if(currentState==-1) + { + return -1; + } + if(!charSet.count(c)) + { + return -1; + } + return dfaTable[currentState][c]; + } + void output() + { + int state=-1; + for(const map& dfaLine:dfaTable) + { + state++; + if(dfaLine.count(EPSILON)) + { + continue; + } + if(dfaLine.empty()) + { + break; + } + cout<"<& dfaLine:dfaTable) + { + for(auto & itr : dfaLine) + { + if(itr.second==j) + { + itr.second=i; + } + } + } + } + } + } + } + void match(const string& temp) + { + int currentState=0; + for(char c:temp) + { + if(c==';') + { + break; + } + if(c==' ') + { + continue; + } + currentState=move(currentState,c); + } + if(endStates.count(currentState)) + { + cout<>regex; + cout.setf(ios::left); + NFA nfa=NFA(regex); + cout<<"NFA:"<>temp; + if(temp=="exit") + { + break; + } + dfa.match(temp); + } + return 0; +}