libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2014 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_automaton.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 namespace __detail 00034 { 00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00036 00037 /** 00038 * @defgroup regex-detail Base and Implementation Classes 00039 * @ingroup regex 00040 * @{ 00041 */ 00042 00043 typedef long _StateIdT; 00044 static const _StateIdT _S_invalid_state_id = -1; 00045 00046 template<typename _CharT> 00047 using _Matcher = std::function<bool (_CharT)>; 00048 00049 /// Operation codes that define the type of transitions within the base NFA 00050 /// that represents the regular expression. 00051 enum _Opcode : int 00052 { 00053 _S_opcode_unknown, 00054 _S_opcode_alternative, 00055 _S_opcode_backref, 00056 _S_opcode_line_begin_assertion, 00057 _S_opcode_line_end_assertion, 00058 _S_opcode_word_boundary, 00059 _S_opcode_subexpr_lookahead, 00060 _S_opcode_subexpr_begin, 00061 _S_opcode_subexpr_end, 00062 _S_opcode_dummy, 00063 _S_opcode_match, 00064 _S_opcode_accept, 00065 }; 00066 00067 struct _State_base 00068 { 00069 _Opcode _M_opcode; // type of outgoing transition 00070 _StateIdT _M_next; // outgoing transition 00071 union // Since they are mutually exclusive. 00072 { 00073 size_t _M_subexpr; // for _S_opcode_subexpr_* 00074 size_t _M_backref_index; // for _S_opcode_backref 00075 struct 00076 { 00077 // for _S_opcode_alternative. 00078 _StateIdT _M_quant_index; 00079 // for _S_opcode_alternative or _S_opcode_subexpr_lookahead 00080 _StateIdT _M_alt; 00081 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or 00082 // quantifiers (ungreedy if set true) 00083 bool _M_neg; 00084 }; 00085 }; 00086 00087 explicit _State_base(_Opcode __opcode) 00088 : _M_opcode(__opcode), _M_next(_S_invalid_state_id) 00089 { } 00090 00091 protected: 00092 ~_State_base() = default; 00093 00094 public: 00095 #ifdef _GLIBCXX_DEBUG 00096 std::ostream& 00097 _M_print(std::ostream& ostr) const; 00098 00099 // Prints graphviz dot commands for state. 00100 std::ostream& 00101 _M_dot(std::ostream& __ostr, _StateIdT __id) const; 00102 #endif 00103 }; 00104 00105 template<typename _TraitsT> 00106 struct _State : _State_base 00107 { 00108 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 00109 00110 _MatcherT _M_matches; // for _S_opcode_match 00111 00112 explicit _State(_Opcode __opcode) : _State_base(__opcode) { } 00113 }; 00114 00115 struct _NFA_base 00116 { 00117 typedef size_t _SizeT; 00118 typedef regex_constants::syntax_option_type _FlagT; 00119 00120 explicit 00121 _NFA_base(_FlagT __f) 00122 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), 00123 _M_quant_count(0), _M_has_backref(false) 00124 { } 00125 00126 _NFA_base(_NFA_base&&) = default; 00127 00128 protected: 00129 ~_NFA_base() = default; 00130 00131 public: 00132 _FlagT 00133 _M_options() const 00134 { return _M_flags; } 00135 00136 _StateIdT 00137 _M_start() const 00138 { return _M_start_state; } 00139 00140 _SizeT 00141 _M_sub_count() const 00142 { return _M_subexpr_count; } 00143 00144 std::vector<size_t> _M_paren_stack; 00145 _FlagT _M_flags; 00146 _StateIdT _M_start_state; 00147 _SizeT _M_subexpr_count; 00148 _SizeT _M_quant_count; 00149 bool _M_has_backref; 00150 }; 00151 00152 template<typename _TraitsT> 00153 struct _NFA 00154 : _NFA_base, std::vector<_State<_TraitsT>> 00155 { 00156 typedef _State<_TraitsT> _StateT; 00157 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 00158 00159 using _NFA_base::_NFA_base; 00160 00161 // for performance reasons _NFA objects should only be moved not copied 00162 _NFA(const _NFA&) = delete; 00163 _NFA(_NFA&&) = default; 00164 00165 _StateIdT 00166 _M_insert_accept() 00167 { 00168 auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); 00169 return __ret; 00170 } 00171 00172 _StateIdT 00173 _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg) 00174 { 00175 _StateT __tmp(_S_opcode_alternative); 00176 // It labels every quantifier to make greedy comparison easier in BFS 00177 // approach. 00178 __tmp._M_quant_index = this->_M_quant_count++; 00179 __tmp._M_next = __next; 00180 __tmp._M_alt = __alt; 00181 __tmp._M_neg = __neg; 00182 return _M_insert_state(std::move(__tmp)); 00183 } 00184 00185 _StateIdT 00186 _M_insert_matcher(_MatcherT __m) 00187 { 00188 _StateT __tmp(_S_opcode_match); 00189 __tmp._M_matches = std::move(__m); 00190 return _M_insert_state(std::move(__tmp)); 00191 } 00192 00193 _StateIdT 00194 _M_insert_subexpr_begin() 00195 { 00196 auto __id = this->_M_subexpr_count++; 00197 this->_M_paren_stack.push_back(__id); 00198 _StateT __tmp(_S_opcode_subexpr_begin); 00199 __tmp._M_subexpr = __id; 00200 return _M_insert_state(std::move(__tmp)); 00201 } 00202 00203 _StateIdT 00204 _M_insert_subexpr_end() 00205 { 00206 _StateT __tmp(_S_opcode_subexpr_end); 00207 __tmp._M_subexpr = this->_M_paren_stack.back(); 00208 this->_M_paren_stack.pop_back(); 00209 return _M_insert_state(std::move(__tmp)); 00210 } 00211 00212 _StateIdT 00213 _M_insert_backref(size_t __index); 00214 00215 _StateIdT 00216 _M_insert_line_begin() 00217 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); } 00218 00219 _StateIdT 00220 _M_insert_line_end() 00221 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); } 00222 00223 _StateIdT 00224 _M_insert_word_bound(bool __neg) 00225 { 00226 _StateT __tmp(_S_opcode_word_boundary); 00227 __tmp._M_neg = __neg; 00228 return _M_insert_state(std::move(__tmp)); 00229 } 00230 00231 _StateIdT 00232 _M_insert_lookahead(_StateIdT __alt, bool __neg) 00233 { 00234 _StateT __tmp(_S_opcode_subexpr_lookahead); 00235 __tmp._M_alt = __alt; 00236 __tmp._M_neg = __neg; 00237 return _M_insert_state(std::move(__tmp)); 00238 } 00239 00240 _StateIdT 00241 _M_insert_dummy() 00242 { return _M_insert_state(_StateT(_S_opcode_dummy)); } 00243 00244 _StateIdT 00245 _M_insert_state(_StateT __s) 00246 { 00247 this->push_back(std::move(__s)); 00248 return this->size()-1; 00249 } 00250 00251 // Eliminate dummy node in this NFA to make it compact. 00252 void 00253 _M_eliminate_dummy(); 00254 00255 #ifdef _GLIBCXX_DEBUG 00256 std::ostream& 00257 _M_dot(std::ostream& __ostr) const; 00258 #endif 00259 }; 00260 00261 /// Describes a sequence of one or more %_State, its current start 00262 /// and end(s). This structure contains fragments of an NFA during 00263 /// construction. 00264 template<typename _TraitsT> 00265 class _StateSeq 00266 { 00267 public: 00268 typedef _NFA<_TraitsT> _RegexT; 00269 00270 public: 00271 _StateSeq(_RegexT& __nfa, _StateIdT __s) 00272 : _M_nfa(__nfa), _M_start(__s), _M_end(__s) 00273 { } 00274 00275 _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end) 00276 : _M_nfa(__nfa), _M_start(__s), _M_end(__end) 00277 { } 00278 00279 // Append a state on *this and change *this to the new sequence. 00280 void 00281 _M_append(_StateIdT __id) 00282 { 00283 _M_nfa[_M_end]._M_next = __id; 00284 _M_end = __id; 00285 } 00286 00287 // Append a sequence on *this and change *this to the new sequence. 00288 void 00289 _M_append(const _StateSeq& __s) 00290 { 00291 _M_nfa[_M_end]._M_next = __s._M_start; 00292 _M_end = __s._M_end; 00293 } 00294 00295 // Clones an entire sequence. 00296 _StateSeq 00297 _M_clone(); 00298 00299 public: 00300 _RegexT& _M_nfa; 00301 _StateIdT _M_start; 00302 _StateIdT _M_end; 00303 }; 00304 00305 //@} regex-detail 00306 _GLIBCXX_END_NAMESPACE_VERSION 00307 } // namespace __detail 00308 } // namespace std 00309 00310 #include <bits/regex_automaton.tcc>