bux API Reference 1.9.0
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
LR1.cpp
Go to the documentation of this file.
1#include "LR1.h"
2#include "StrUtil.h" // HRTN()
3#include <format> // std::format()
4#include <limits> // std::numeric_limits<>
5
6namespace bux {
7namespace LR1 {
8
9//
10// Implement Classes
11//
13{
14 return false;
15}
16
17bool I_ParserPolicy::getTokenName(T_LexID, std::string &) const
18{
19 return false;
20}
21
22std::string I_ParserPolicy::printToken(T_LexID token) const
23{
24 std::string name;
25 if (getTokenName(token, name))
26 return name;
27
28 switch (token)
29 {
30 case TID_EOF:
31 return "bux::TID_EOF";
32 case ROOT_NID:
33 return "<@> aka bux::ROOT_NID";
34 default:
35 if (token >= TOKENGEN_LB)
36 return std::format("bux::TOKENGEN_LB+{}", token - TOKENGEN_LB);
37
38 std::string out = std::format("0x{:x}", token);
39 if (isascii(int(token)))
40 out += std::format(" or \'{}\'", asciiLiteral(token));
41
42 return out;
43 }
44}
45
47 m_Policy(policy),
48 m_ErrState(std::numeric_limits<T_StateID>::max()),
49 m_ShiftCountdown(0),
50 m_Accepted(false)
51{
52}
53
54void C_Parser::add(T_LexID token, unsigned line, unsigned col, I_LexAttr *unownedAttr)
55{
56 C_LexInfo info;
57 info.m_attr.assign(unownedAttr, true);
58 info.m_pos.m_Source = m_CurSrc;
59 info.m_pos.m_Line = line;
60 info.m_pos.m_Col = col;
61
62 C_StateStackLR1 unreadStack;
63 add(token, info, unreadStack);
64}
65
66void C_Parser::add(T_LexID token, C_LexInfo &info, C_StateStackLR1 &unreadStack)
67{
68 size_t actionid;
69Again:
70 if (m_Accepted)
71 onError(info, "Already accepted");
72
73 actionid = m_Policy.action(currentState(), token);
74 switch (actionid)
75 {
76 case ACTION_SHIFT:
77 shift(token, info);
78 if (m_ShiftCountdown && !--m_ShiftCountdown)
79 // Call and forget
80 {
81 std::function<void()> t;
82 t.swap(m_OnPostShift);
83 t();
84 }
85 break;
86 case ACTION_ACCEPT:
87 if (m_OnPostShift)
88 // Call and forget
89 {
90 std::function<void()> t;
91 t.swap(m_OnPostShift);
92 m_ShiftCountdown = 0;
93 t();
94 }
95 if (!reduceOn(m_Policy.getAcceptId(), info))
96 onError(info, "Reduction error on acception");
97
98 m_Accepted = true;
99 break;
100 case ACTION_ERROR: // Error
101 if (m_Policy.changeToken(token, info.m_attr))
102 goto Again;
103
104 if (m_ErrState == std::numeric_limits<T_StateID>::max())
105 {
106 m_ErrState = currentState();
107 m_ErrToken = token;
108 m_ErrPos = info.m_pos;
109 }
110
111 if (recover(token, info, unreadStack))
112 // Recoverable
113 {
114 info.m_attr.assign(0, false);
115 add(m_Policy.m_IdError, info, unreadStack);
116 }
117 else
118 // Unrecoverable
119 {
120 auto out = std::format("Syntax error on state={} token={}", m_ErrState, m_Policy.printToken(m_ErrToken));
121 if (auto *attr =info.m_attr.get())
122 out.append(" of attr type ").append(HRTN(*attr));
123 else
124 out += " with null attr";
125
126 if (m_CurStack.empty())
127 out += "\nEmpty stack";
128 else
129 {
130 out += std::format("\nStack[{}] Dump:", m_CurStack.size()-1);
131 bool first = true;
132 for (const auto &i: m_CurStack)
133 {
134 if (first)
135 first = false;
136 else
137 out += std::format("\n({},{})\t{}\ts={}\tt={}",
138 i.m_pos.m_Line, i.m_pos.m_Col, i.m_attr?HRTN(*i):"", i.m_StateID, m_Policy.printToken(i.m_TokenID));
139 }
140 }
141 onError(m_ErrPos, out);
142 if (token == TID_EOF)
143 m_Accepted = true;
144 else
145 {
146 panicRollback(unreadStack); // panic mode the cheapest
147 m_ErrState = std::numeric_limits<T_StateID>::max();
148 }
149 }
150 break;
151 default:
152 if (actionid >= ACTION_REDUCE_MIN)
153 {
154 const size_t prodId = actionid - ACTION_REDUCE_MIN;
155 if (reduceOn(prodId, info.m_pos))
156 {
157 m_ErrState = std::numeric_limits<T_StateID>::max();
158 goto Again;
159 }
160 onError(info, "Reduction error on production "+std::to_string(prodId));
161 }
162 onError(info, "Unknown action id "+std::to_string(actionid));
163 } // switch (actionid)
164
165 // Consume unread
166 if (!unreadStack.empty())
167 {
168 token = unreadStack.top().m_TokenID;
169 info = unreadStack.top();
170 unreadStack.pop();
171 goto Again;
172 }
173}
174
175T_StateID C_Parser::currentState() const
176{
177 if (m_CurStack.empty())
178 return 0; // 0 is reserved as the ID of [ S' -> .S ]
179
180 return m_CurStack.top().m_StateID;
181}
182
183void C_Parser::onError(const C_SourcePos &pos, std::string_view message)
184{
185 m_Policy.onError(*this, pos, message);
186}
187
188void C_Parser::panicRollback(C_StateStackLR1 &unreadStack)
189{
190 size_t finalSize;
191 for (finalSize = 0; finalSize < m_CurStack.size(); ++finalSize)
192 {
193 if (m_CurStack[finalSize].m_TokenID == m_Policy.m_IdError)
194 break;
195 }
196
197 while (!unreadStack.empty())
198 {
199 m_CurStack.push(unreadStack.top());
200 unreadStack.pop();
201 }
202 while (m_CurStack.size() > finalSize)
203 {
204 C_StateLR1 &top = m_CurStack.top();
205 if (top.m_TokenID != m_Policy.m_IdError)
206 unreadStack.push(top);
207
208 m_CurStack.pop();
209 }
210}
211
212bool C_Parser::recover(T_LexID token, C_LexInfo &info, C_StateStackLR1 &unreadStack)
213{
214 bool isLast = true;
215 for (auto i = m_CurStack.size(); i; --i, isLast = false)
216 {
217 if (!isLast && m_CurStack[i].m_TokenID == m_Policy.m_IdError)
218 // No crossing old m_Policy.m_IdError
219 break;
220
221 if (m_Policy.action(m_CurStack[i-1].m_StateID, m_Policy.m_IdError) != ACTION_ERROR)
222 {
223 if (!isLast && info.m_pos == m_CurStack[i].m_pos)
224 // Total length of nonempty ungotten tokens is zero.
225 continue;
226
227 for (size_t j = i; j; --j, isLast = false)
228 {
229 if (m_CurStack[j-1].m_pos < (isLast?info.m_pos:m_CurStack[j].m_pos))
230 break;
231 if (m_CurStack[j-1].m_TokenID == m_Policy.m_IdError)
232 return false;
233 }
234
235 auto &t = unreadStack.push();
236 t.m_TokenID = token;
237 static_cast<C_LexInfo &>(t) = info;
238 while (m_CurStack.size() > i)
239 {
240 unreadStack.push(m_CurStack.top());
241 m_CurStack.pop();
242 }
243 info.m_pos = unreadStack.top().m_pos;
244 return true;
245 }
246 }
247 return false;
248}
249
250bool C_Parser::reduceOn(size_t id, const C_SourcePos &pos)
251{
252 I_ParserPolicy::C_ReduceInfo ri;
253 m_Policy.getReduceInfo(id, ri);
254 if (m_CurStack.size() >= ri.m_PopLength)
255 {
256 const auto base = m_CurStack.end() - ri.m_PopLength;
257 C_LexInfo li;
258 if (ri.m_Reduce)
259 ri.m_Reduce(*this,
260 [=,this,size=ri.m_PopLength](size_t n)->C_LexInfo& {
261 if (n >= size)
262 onError(pos, "Production["+std::to_string(id)+"] out of range "+std::to_string(n)+'/'+std::to_string(size));
263
264 return base[n];
265 }, li.m_attr);
266 if (ri.m_PopLength)
267 {
268 li.m_pos = base->m_pos;
269 m_CurStack.pop(ri.m_PopLength);
270 }
271 else
272 li.m_pos = pos;
273
274 shift(ri.m_ResultID, li);
275 return true;
276 }
277 return false;
278}
279
280void C_Parser::reservePostShift(std::function<void()> calledOnce, unsigned shifts)
281{
282 m_ShiftCountdown = shifts;
283 m_OnPostShift = calledOnce;
284}
285
286std::string_view C_Parser::setSource(std::string_view src)
287{
288 const auto ret = m_CurSrc;
289 m_CurSrc = src;
290 return ret;
291}
292
293void C_Parser::shift(T_LexID token, C_LexInfo &info)
294{
295 const auto state = currentState();
296 auto &t = m_CurStack.push();
297 t.m_StateID = token != ROOT_NID? m_Policy.nextState(state, token): state;
298 t.m_TokenID = token;
299 static_cast<C_LexInfo&>(t) = info;
300}
301
302} // namespace LR1
303} // namespace bux
#define HRTN(t)
Definition StrUtil.h:22
bool empty() const noexcept
Definition Xtack.h:53
auto size() const noexcept
Definition Xtack.h:68
const I_ParserPolicy & m_Policy
Definition LR1.h:75
void onError(const C_SourcePos &pos, std::string_view message)
Definition LR1.cpp:183
void add(T_LexID token, unsigned line, unsigned col, I_LexAttr *unownedAttr) override
Definition LR1.cpp:54
C_Parser(const I_ParserPolicy &policy)
Definition LR1.cpp:46
std::string_view setSource(std::string_view src) override
Definition LR1.cpp:286
void reservePostShift(std::function< void()> calledOnce, unsigned shifts)
Definition LR1.cpp:280
C_LexInfoT< I_LexAttr, C_AutoNode > C_LexInfo
Definition LR1.h:68
@ ACTION_REDUCE_MIN
Definition LR1.h:18
@ ACTION_ACCEPT
Definition LR1.h:16
@ ACTION_SHIFT
Definition LR1.h:15
@ ACTION_ERROR
Definition LR1.h:17
THE common namespace of bux library.
Definition AtomiX.cpp:3
@ TOKENGEN_LB
Definition LexBase.h:29
@ TID_EOF
Definition LexBase.h:25
@ ROOT_NID
Definition LexBase.h:26
std::string asciiLiteral(uint32_t utf32)
Definition LexBase.cpp:98
uint32_t T_LexID
Definition LexBase.h:35
unsigned T_StateID
Definition ParserBase.h:17
C_SourcePos m_pos
Definition LexBase.h:56
C_Ptr< T > m_attr
Definition LexBase.h:55
unsigned m_Col
Definition LexBase.h:42
std::string_view m_Source
Definition LexBase.h:40
unsigned m_Line
Definition LexBase.h:41
virtual size_t action(T_StateID state, T_LexID token) const =0
Return action kind.
const T_LexID m_IdError
Definition LR1.h:42
std::string printToken(T_LexID token) const
Definition LR1.cpp:22
virtual T_StateID nextState(T_StateID state, T_LexID lex) const =0
Goto table.
virtual bool getTokenName(T_LexID token, std::string &name) const
Return true if token has a name.
Definition LR1.cpp:17
virtual size_t getAcceptId() const =0
Return id which will be passed as the 1st arg of getReduceInfo()
virtual void getReduceInfo(size_t id, C_ReduceInfo &info) const =0
Get reduction if available.
virtual bool changeToken(T_LexID &token, C_LexPtr &attr) const
A chance to save action error.
Definition LR1.cpp:12
virtual void onError(C_Parser &parser, const C_SourcePos &pos, std::string_view message) const =0
Report error (to log or to throw)