bux API Reference 1.12.3
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
ImplScanner.h
Go to the documentation of this file.
1#pragma once
2
3#include "ScannerBase.h" // bux::C_LexTraits<>, bux::I_Scanner<>, bux::C_ActionRet, ...
4#include "XException.h" // RUNTIME_ERROR()
5#include <vector> // std::vector<>
6
7namespace bux {
8
9//
10// Types
11//
12template<class T_Input, class T_State, class T_Char, class C_Traits = C_LexTraits<T_Char>>
13class C_ScannerImpl: public I_Scanner<T_Char>
14{
15public:
16
17 // Types
18 typedef C_ActionRet F_Action(const T_Char *c, size_t n);
19 typedef bool F_IsFinal(const T_Char *c, size_t n);
20
22 {
23 T_Input m_inputLB;
24 T_State m_nextState;
25 };
26
28 {
31 };
32
33 // Nonvirtuals
34 C_ScannerImpl(I_Parser &parser);
35
36 // Implement I_Scanner<T_Char>
37 void add(unsigned col, T_Char c) override;
38 void setLine(unsigned line) override;
39 void setSource(std::string_view src) override;
40
41protected:
42
43 // Nonvirtuals
44 void firstFits(const T_State *states, F_IsFinal *const *isFinal, size_t stateN)
45 { m_1stFits = states; m_isFinal = isFinal; m_1stFitN = stateN; }
46 void stateTables(const C_StateRec *stateRecs, const T_Input *gotoN)
47 { m_stateRecs = stateRecs; m_gotoN = gotoN; }
48
49private:
50
51 // Types
52 typedef std::vector<T_Char> C_ChStack;
53 typedef std::vector<C_SourcePos> C_PosStack;
54
55 // Data
56 I_Parser &m_Parser;
57 std::string_view m_OldSrc;
58 std::string_view m_CurSrc;
59 unsigned m_CurLine;
60 //---- Read State Begins
61 int m_LastSuccess;
62 F_Action *m_pAction; // valid if m_LastSuccess >= 0
63 T_State m_CurState;
64 C_ChStack m_ReadCh, m_UnreadCh;
65 C_PosStack m_ReadPos, m_UnreadPos;
66 //---- Read State Ends
67 //---- Transit Table Begins
68 const C_StateRec *m_stateRecs {nullptr};
69 const T_Input *m_gotoN {nullptr};
70 const T_State *m_1stFits {nullptr};
71 F_IsFinal *const *m_isFinal {nullptr};
72 size_t m_1stFitN {0};
73 //---- Transit Table Ends
74
75 // Nonvirtuals
76 void addToken(T_LexID token, C_SourcePos pos, I_LexAttr *unownedAttr);
77 void resetReadState();
78 void shrinkReadSize(size_t newSize);
79};
80
81//
82// Implement Class Templates
83//
84template<class T_Input, class T_State, class T_Char, class C_Traits>
86{
87 resetReadState();
88}
89
90template<class T_Input, class T_State, class T_Char, class C_Traits>
92{
93 for (bool consumed = false;;)
94 {
95 // Read the next char
96 if (m_UnreadCh.empty())
97 {
98 if (consumed)
99 // The only stop condition
100 break;
101
102 m_ReadCh.emplace_back(c);
103 m_ReadPos.emplace_back(m_CurSrc, m_CurLine, col);
104 consumed =true;
105 }
106 else
107 {
108 m_ReadPos.emplace_back(m_UnreadPos.back());
109 m_UnreadPos.pop_back();
110 m_ReadCh.emplace_back(m_UnreadCh.back());
111 m_UnreadCh.pop_back();
112 }
113
114 // Match the char against the transit table
115 const T_LexID idTop = C_Traits::id(m_ReadCh.back());
116 if (m_stateRecs && idTop < MIN_TOKEN_ID)
117 {
118 const auto gotos = m_stateRecs[m_CurState].m_goto;
119 T_State nextState;
120 bool found = false;
121 for (int i = m_gotoN[m_CurState]; i > 0;)
122 {
123 const auto pt = gotos[--i];
124 if (pt.m_inputLB <= idTop)
125 {
126 nextState = pt.m_nextState;
127 if (std::numeric_limits<T_State>::max() != nextState)
128 found = true;
129 break;
130 }
131 }
132 if (found)
133 // Transition found
134 {
135 m_CurState = nextState;
136 if (auto pAction = m_stateRecs[nextState].m_action)
137 // Is final
138 {
139 for (size_t i = 0; i < m_1stFitN; ++i)
140 if (m_1stFits[i] == nextState &&
141 (!m_isFinal[i] || (*m_isFinal[i])(m_ReadCh.data(), m_ReadCh.size())))
142 // First fit - action right now
143 {
144 const C_ActionRet ret = (*pAction)(m_ReadCh.data(), m_ReadCh.size());
145 return addToken(ret.m_id, m_ReadPos.front(), ret.m_pAttr);
146 }
147 m_LastSuccess = int(m_ReadCh.size());
148 m_pAction = pAction;
149 }
150 return;
151 }
152 } // if (id < MIN_TOKEN_ID)
153
154 // No transition - Claim the new token
155 const auto pos = m_ReadPos.front();
156 T_LexID token;
157 I_LexAttr *attr{};
158 if (m_LastSuccess < 0)
159 // No final state ever visited -- unread all but the first
160 {
161 shrinkReadSize(1);
162 token = C_Traits::id(m_ReadCh.front());
163 }
164 else if (!m_pAction)
165 // Bug ?
166 {
167 std::string buf;
168 for (auto i: m_ReadCh)
169 switch (auto id = C_Traits::id(i))
170 {
171 case TID_EOF:
172 buf += "EOF";
173 break;
174 default:
175 buf += to_utf8(id);
176 }
177 RUNTIME_ERROR("Run out of scanner at {}({},{},{}) |{}|", pos.m_Source, pos.m_Line, pos.m_Col, m_CurState, buf);
178 }
179 else
180 // Conclude on the latest visited final state
181 {
182 shrinkReadSize(size_t(m_LastSuccess));
183 const C_ActionRet ret = (*m_pAction)(m_ReadCh.data(), m_ReadCh.size());
184 token = ret.m_id;
185 attr = ret.m_pAttr;
186 }
187
188 // Clean up the read state (compared with the unread counterpart)
189 addToken(token, pos, attr);
190 }
191}
192
193template<class T_Input, class T_State, class T_Char, class C_Traits>
194void C_ScannerImpl<T_Input,T_State,T_Char,C_Traits>::addToken(
195 T_LexID token,
196 C_SourcePos pos,
197 I_LexAttr *attr )
198{
199 resetReadState();
200
201 // Add new token to parser
202 if (m_OldSrc != pos.m_Source)
203 {
204 (void)m_Parser.setSource(pos.m_Source);
205 m_OldSrc = pos.m_Source;
206 }
207 m_Parser.add(token, pos.m_Line, pos.m_Col, attr);
208}
209
210template<class T_Input, class T_State, class T_Char, class C_Traits>
211void C_ScannerImpl<T_Input,T_State,T_Char,C_Traits>::resetReadState()
212{
213 m_LastSuccess = -1; // No success ever being made
214 m_CurState = 0; // presumably the starting state
215 m_ReadCh.clear();
216 m_ReadPos.clear();
217}
218
219template<class T_Input, class T_State, class T_Char, class C_Traits>
221{
222 m_CurLine = line;
223}
224
225template<class T_Input, class T_State, class T_Char, class C_Traits>
227{
228 m_CurSrc = src;
229}
230
231template<class T_Input, class T_State, class T_Char, class C_Traits>
232void C_ScannerImpl<T_Input,T_State,T_Char,C_Traits>::shrinkReadSize(size_t newSize)
233{
234 while (newSize < m_ReadCh.size())
235 {
236 m_UnreadCh.emplace_back(m_ReadCh.back());
237 m_ReadCh.pop_back();
238 m_UnreadPos.emplace_back(m_ReadPos.back());
239 m_ReadPos.pop_back();
240 }
241}
242
243} //namespace bux
#define RUNTIME_ERROR(fmtStr,...)
Wrap FILE(DATE)#__LINE__ FUNCTION: msg into std::runtime_error.
Definition XException.h:32
C_ScannerImpl(I_Parser &parser)
Definition ImplScanner.h:85
bool F_IsFinal(const T_Char *c, size_t n)
Definition ImplScanner.h:19
void firstFits(const T_State *states, F_IsFinal *const *isFinal, size_t stateN)
Definition ImplScanner.h:44
void stateTables(const C_StateRec *stateRecs, const T_Input *gotoN)
Definition ImplScanner.h:46
C_ActionRet F_Action(const T_Char *c, size_t n)
Definition ImplScanner.h:18
void setSource(std::string_view src) override
void add(unsigned col, T_Char c) override
Definition ImplScanner.h:91
void setLine(unsigned line) override
THE common namespace of bux library.
Definition AtomiX.cpp:3
std::string_view to_utf8(T_Utf32 uc)
@ TID_EOF
Definition LexBase.h:26
constinit const uint32_t MIN_TOKEN_ID
Definition LexBase.h:19
uint32_t T_LexID
Definition LexBase.h:36
I_LexAttr * m_pAttr
newed
Definition ScannerBase.h:30
const C_GotoPair * m_goto
Definition ImplScanner.h:29
unsigned m_Col
Definition LexBase.h:43
std::string_view m_Source
Definition LexBase.h:41
unsigned m_Line
Definition LexBase.h:42