bux API Reference 1.9.0
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
ImplScanner.h
Go to the documentation of this file.
1#pragma once
2
3#include "ScannerBase.h" // bux::C_LexTraits<>, bux::I_Scanner<>, bux::C_ActionRet, ...
4#include <vector> // std::vector<>
5
6namespace bux {
7
8//
9// Types
10//
11template<class T_Input, class T_State, class T_Char, class C_Traits = C_LexTraits<T_Char>>
12class C_ScannerImpl: public I_Scanner<T_Char>
13{
14public:
15
16 // Types
17 typedef C_ActionRet F_Action(const T_Char *c, size_t n);
18 typedef bool F_IsFinal(const T_Char *c, size_t n);
19
21 {
22 T_Input m_inputLB;
23 T_State m_nextState;
24 };
25
27 {
30 };
31
32 // Nonvirtuals
33 C_ScannerImpl(I_Parser &parser);
34
35 // Implement I_Scanner<T_Char>
36 void add(unsigned col, T_Char c) override;
37 void setLine(unsigned line) override;
38 void setSource(std::string_view src) override;
39
40protected:
41
42 // Nonvirtuals
43 void firstFits(const T_State *states, F_IsFinal *const *isFinal, size_t stateN)
44 { m_1stFits = states; m_isFinal = isFinal; m_1stFitN = stateN; }
45 void stateTables(const C_StateRec *stateRecs, const T_Input *gotoN)
46 { m_stateRecs = stateRecs; m_gotoN = gotoN; }
47
48private:
49
50 // Types
51 typedef std::vector<T_Char> C_ChStack;
52 typedef std::vector<C_SourcePos> C_PosStack;
53
54 // Data
55 I_Parser &m_Parser;
56 std::string_view m_OldSrc;
57 std::string_view m_CurSrc;
58 unsigned m_CurLine;
59 //---- Read State Begins
60 int m_LastSuccess;
61 F_Action *m_pAction; // valid if m_LastSuccess >= 0
62 T_State m_CurState;
63 C_ChStack m_ReadCh, m_UnreadCh;
64 C_PosStack m_ReadPos, m_UnreadPos;
65 //---- Read State Ends
66 //---- Transit Table Begins
67 const C_StateRec *m_stateRecs {nullptr};
68 const T_Input *m_gotoN {nullptr};
69 const T_State *m_1stFits {nullptr};
70 F_IsFinal *const *m_isFinal {nullptr};
71 size_t m_1stFitN {0};
72 //---- Transit Table Ends
73
74 // Nonvirtuals
75 void addToken(T_LexID token, C_SourcePos pos, I_LexAttr *unownedAttr);
76 void resetReadState();
77 void shrinkReadSize(size_t newSize);
78};
79
80//
81// Implement Class Templates
82//
83template<class T_Input, class T_State, class T_Char, class C_Traits>
85{
86 resetReadState();
87}
88
89template<class T_Input, class T_State, class T_Char, class C_Traits>
91{
92 for (bool consumed = false;;)
93 {
94 // Read the next char
95 if (m_UnreadCh.empty())
96 {
97 if (consumed)
98 // The only stop condition
99 break;
100
101 m_ReadCh.emplace_back(c);
102 m_ReadPos.emplace_back(m_CurSrc, m_CurLine, col);
103 consumed =true;
104 }
105 else
106 {
107 m_ReadPos.emplace_back(m_UnreadPos.back());
108 m_UnreadPos.pop_back();
109 m_ReadCh.emplace_back(m_UnreadCh.back());
110 m_UnreadCh.pop_back();
111 }
112
113 // Match the char against the transit table
114 const T_LexID idTop = C_Traits::id(m_ReadCh.back());
115 if (m_stateRecs && idTop < MIN_TOKEN_ID)
116 {
117 const auto gotos = m_stateRecs[m_CurState].m_goto;
118 T_State nextState;
119 bool found = false;
120 for (int i = m_gotoN[m_CurState]; i > 0;)
121 {
122 const auto pt = gotos[--i];
123 if (pt.m_inputLB <= idTop)
124 {
125 nextState = pt.m_nextState;
126 if (std::numeric_limits<T_State>::max() != nextState)
127 found = true;
128 break;
129 }
130 }
131 if (found)
132 // Transition found
133 {
134 m_CurState = nextState;
135 if (auto pAction = m_stateRecs[nextState].m_action)
136 // Is final
137 {
138 for (size_t i = 0; i < m_1stFitN; ++i)
139 if (m_1stFits[i] == nextState &&
140 (!m_isFinal[i] || (*m_isFinal[i])(m_ReadCh.data(), m_ReadCh.size())))
141 // First fit - action right now
142 {
143 const C_ActionRet ret = (*pAction)(m_ReadCh.data(), m_ReadCh.size());
144 return addToken(ret.m_id, m_ReadPos.front(), ret.m_pAttr);
145 }
146 m_LastSuccess = int(m_ReadCh.size());
147 m_pAction = pAction;
148 }
149 return;
150 }
151 } // if (id < MIN_TOKEN_ID)
152
153 // No transition - Claim the new token
154 const auto pos = m_ReadPos.front();
155 T_LexID token;
156 I_LexAttr *attr{};
157 if (m_LastSuccess < 0)
158 // No final state ever visited -- unread all but the first
159 {
160 shrinkReadSize(1);
161 token = C_Traits::id(m_ReadCh.front());
162 }
163 else if (!m_pAction)
164 // Bug ?
165 {
166 std::string buf;
167 for (auto i: m_ReadCh)
168 switch (auto id = C_Traits::id(i))
169 {
170 case TID_EOF:
171 buf += "EOF";
172 break;
173 default:
174 buf += to_utf8(id);
175 }
176 RUNTIME_ERROR("Run out of scanner at {}({},{},{}) |{}|", pos.m_Source, pos.m_Line, pos.m_Col, m_CurState, buf);
177 }
178 else
179 // Conclude on the latest visited final state
180 {
181 shrinkReadSize(size_t(m_LastSuccess));
182 const C_ActionRet ret = (*m_pAction)(m_ReadCh.data(), m_ReadCh.size());
183 token = ret.m_id;
184 attr = ret.m_pAttr;
185 }
186
187 // Clean up the read state (compared with the unread counterpart)
188 addToken(token, pos, attr);
189 }
190}
191
192template<class T_Input, class T_State, class T_Char, class C_Traits>
194 T_LexID token,
195 C_SourcePos pos,
196 I_LexAttr *attr )
197{
198 resetReadState();
199
200 // Add new token to parser
201 if (m_OldSrc != pos.m_Source)
202 {
203 (void)m_Parser.setSource(pos.m_Source);
204 m_OldSrc = pos.m_Source;
205 }
206 m_Parser.add(token, pos.m_Line, pos.m_Col, attr);
207}
208
209template<class T_Input, class T_State, class T_Char, class C_Traits>
210void C_ScannerImpl<T_Input,T_State,T_Char,C_Traits>::resetReadState()
211{
212 m_LastSuccess = -1; // No success ever being made
213 m_CurState = 0; // presumably the starting state
214 m_ReadCh.clear();
215 m_ReadPos.clear();
216}
217
218template<class T_Input, class T_State, class T_Char, class C_Traits>
220{
221 m_CurLine = line;
222}
223
224template<class T_Input, class T_State, class T_Char, class C_Traits>
226{
227 m_CurSrc = src;
228}
229
230template<class T_Input, class T_State, class T_Char, class C_Traits>
232{
233 while (newSize < m_ReadCh.size())
234 {
235 m_UnreadCh.emplace_back(m_ReadCh.back());
236 m_ReadCh.pop_back();
237 m_UnreadPos.emplace_back(m_ReadPos.back());
238 m_ReadPos.pop_back();
239 }
240}
241
242} //namespace bux
#define RUNTIME_ERROR(fmtStr,...)
Wrap FILE(DATE)#__LINE__ FUNCTION: msg into std::runtime_error.
Definition XException.h:32
C_ScannerImpl(I_Parser &parser)
Definition ImplScanner.h:84
C_ActionRet F_Action(const T_Char *c, size_t n)
Definition ImplScanner.h:17
void firstFits(const T_State *states, F_IsFinal *const *isFinal, size_t stateN)
Definition ImplScanner.h:43
void stateTables(const C_StateRec *stateRecs, const T_Input *gotoN)
Definition ImplScanner.h:45
bool F_IsFinal(const T_Char *c, size_t n)
Definition ImplScanner.h:18
void setSource(std::string_view src) override
void add(unsigned col, T_Char c) override
Definition ImplScanner.h:90
void setLine(unsigned line) override
THE common namespace of bux library.
Definition AtomiX.cpp:3
std::string_view to_utf8(T_Utf32 uc)
constinit const uint32_t MIN_TOKEN_ID
Definition LexBase.h:18
@ TID_EOF
Definition LexBase.h:25
uint32_t T_LexID
Definition LexBase.h:35
I_LexAttr * m_pAttr
newed
Definition ScannerBase.h:29
const C_GotoPair * m_goto
Definition ImplScanner.h:28
unsigned m_Col
Definition LexBase.h:42
std::string_view m_Source
Definition LexBase.h:40
unsigned m_Line
Definition LexBase.h:41