bux API Reference 1.9.0
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
ScannerBase.h
Go to the documentation of this file.
1#pragma once
2
3#include "LexBase.h" // bux::T_LexID, bux::I_LexAttr, bux::C_IntegerLex, bux::TID_EOF
4#include "UnicodeCvt.h" // bux::C_UnicodeIn
5#ifdef _WIN32
6 #include <ctype.h> // __isascii()
7#else
8 #include <wchar.h> // wcwidth()
9#endif
10
11namespace bux {
12
13//
14// Types
15//
16template<class T_Char>
18{
19 // Pure virtuals
20 virtual ~I_Scanner() = default;
21 virtual void add(unsigned col, T_Char c) = 0;
22 virtual void setLine(unsigned line) = 0;
23 virtual void setSource(std::string_view src) = 0;
24};
25
27{
30
31 constexpr C_ActionRet(T_LexID id, I_LexAttr *unownedAttr = nullptr): m_id(id), m_pAttr(unownedAttr)
32 {}
33 C_ActionRet(): m_pAttr(nullptr)
34 {}
35};
36
37template<class T_LexCh>
39{
40 static void appendUTF8(std::string &u8s, const T_LexCh &ch);
41 static unsigned columnsInDisplay(const T_LexCh &ch);
42 static T_LexID id(const T_LexCh &ch);
43 static bool read(C_UnicodeIn &uin, T_LexCh &ch);
44 static void setId(T_LexCh &ch, T_LexID id);
45};
46
47struct C_LexUTF32 { uint32_t m_U32; };
48
49template<>
51{
52 static void appendUTF8(std::string &u8, C_LexUTF32 src)
53 {
54 u8 += to_utf8(src.m_U32);
55 }
56 static unsigned columnsInDisplay(C_LexUTF32 ch) noexcept
57 {
58#ifdef _WIN32
59 return __isascii(int(ch.m_U32)) ?1U :2U;
60#else
61 return (unsigned)wcwidth(wchar_t(ch.m_U32));
62#endif
63 }
64 static constexpr auto id(C_LexUTF32 ch) noexcept
65 {
66 return ch.m_U32;
67 }
68 static bool read(C_UnicodeIn &uin, C_LexUTF32 &ch)
69 {
70 return uin.get(ch.m_U32) > 0;
71 }
72 static void setId(C_LexUTF32 &ch, T_LexID id) noexcept
73 {
74 ch.m_U32 = id;
75 }
76};
77
78//
79// Externals
80//
81[[nodiscard]]
82std::string escseq2str(std::string);
83[[nodiscard]]
84bool isIdentifier(std::string_view s) noexcept;
85[[nodiscard]]
86size_t parseEscapeChar(std::string_view s, uint32_t &c, size_t pos =0);
87[[nodiscard]]
88size_t skipIdentifier(std::string_view s, size_t pos) noexcept;
89
90//
91// Function Templates
92//
93template<class T_LexCh>
94[[nodiscard]] auto toString(const T_LexCh *c, size_t start, size_t end) noexcept(noexcept(
95 C_LexTraits<T_LexCh>::appendUTF8(std::declval<std::string&>(), T_LexCh())))
96{
97 std::string buf;
98 for (size_t i = start; i < end; C_LexTraits<T_LexCh>::appendUTF8(buf, c[i++]));
99 return buf;
100}
101
102template<T_LexID _ID, class T_LexCh>
103[[nodiscard]] auto createCharLiteral(const T_LexCh *c, size_t n)
104{
105 uint32_t key;
106 const auto len = parseEscapeChar(toString(c,1,n-1), key);
107 if (len + 2 != n)
108 RUNTIME_ERROR("parseEscapeChar() returns {} != {}", len, n -2);
109
110 return C_ActionRet{_ID, createLex(key)};
111}
112
113template<T_LexID _ID, class T_LexCh>
114[[nodiscard]] auto createDecNum(const T_LexCh *c, size_t n)
115{
116 return C_ActionRet{_ID, new C_IntegerLex(toString(c,0,n), 10)};
117}
118
119template<T_LexID _ID, class T_LexCh>
120[[nodiscard]] auto createHexNum(const T_LexCh *c, size_t n)
121{
122 return C_ActionRet{_ID, new C_IntegerLex(toString(c,0,n), 16)};
123}
124
125template<T_LexID _ID, class T_LexCh>
126[[nodiscard]] C_ActionRet createNothing(const T_LexCh *, size_t)
127{
128 return _ID;
129}
130
131template<T_LexID _ID, class T_LexCh>
132[[nodiscard]] auto createOctNum(const T_LexCh *c, size_t n)
133{
134 return C_ActionRet{_ID, new C_IntegerLex(toString(c,1,n), 8)};
135}
136
137template<T_LexID _ID, class T_LexCh, size_t TRIMLEFT = 0, size_t TRIMRIGHT = 0>
138[[nodiscard]] auto createPlainString(const T_LexCh *c, size_t n)
139{
140 return C_ActionRet{_ID, createLex(toString(c, TRIMLEFT, n-TRIMRIGHT))};
141}
142
143template<T_LexID _ID, class T_LexCh, size_t TRIMLEFT = 0, size_t TRIMRIGHT = 0>
144[[nodiscard]] auto createEscapeString(const T_LexCh *c, size_t n)
145{
146 return C_ActionRet{_ID, createLex(escseq2str(toString(c, TRIMLEFT, n-TRIMRIGHT)))};
147}
148
149template<class T_Char>
150void scanFile(std::string_view filename, std::istream &in, I_Scanner<T_Char> &scanner, T_LexID endToken = TID_EOF, T_Encoding encoding = 0)
151{
152 C_UnicodeIn src(in, encoding);
153 unsigned line = 1, col = 1;
154 T_Char c;
155
156 scanner.setSource(filename);
157 scanner.setLine(line);
158
159 typedef C_LexTraits<T_Char> C_Traits;
160
161 while (C_Traits::read(src, c))
162 {
163 scanner.add(col, c);
164 switch (C_Traits::id(c))
165 {
166 case '\n': // New line
167 scanner.setLine(++line);
168 col = 1;
169 break;
170 case '\t': // TAB
171 col += 4 - (col - 1) % 4;
172 break;
173 default:
174 col += C_Traits::columnsInDisplay(c);
175 }
176 }
177 C_Traits::setId(c, endToken);
178 scanner.add(col, c);
179}
180
181} //namespace bux
#define RUNTIME_ERROR(fmtStr,...)
Wrap FILE(DATE)#__LINE__ FUNCTION: msg into std::runtime_error.
Definition XException.h:32
int get(T_Utf32 &c)
THE common namespace of bux library.
Definition AtomiX.cpp:3
std::string_view to_utf8(T_Utf32 uc)
C_ActionRet createNothing(const T_LexCh *, size_t)
bool isIdentifier(std::string_view s) noexcept
std::string escseq2str(std::string s)
auto createDecNum(const T_LexCh *c, size_t n)
auto createEscapeString(const T_LexCh *c, size_t n)
auto createLex(const T &t)
Definition LexBase.h:205
auto createCharLiteral(const T_LexCh *c, size_t n)
auto toString(const T_LexCh *c, size_t start, size_t end) noexcept(noexcept(C_LexTraits< T_LexCh >::appendUTF8(std::declval< std::string & >(), T_LexCh())))
Definition ScannerBase.h:94
@ TID_EOF
Definition LexBase.h:25
void scanFile(std::string_view filename, std::istream &in, I_Scanner< T_Char > &scanner, T_LexID endToken=TID_EOF, T_Encoding encoding=0)
auto createHexNum(const T_LexCh *c, size_t n)
size_t parseEscapeChar(std::string_view s, uint32_t &c, size_t pos)
size_t skipIdentifier(std::string_view s, size_t pos) noexcept
uint32_t T_LexID
Definition LexBase.h:35
auto createOctNum(const T_LexCh *c, size_t n)
auto createPlainString(const T_LexCh *c, size_t n)
constexpr C_ActionRet(T_LexID id, I_LexAttr *unownedAttr=nullptr)
Definition ScannerBase.h:31
I_LexAttr * m_pAttr
newed
Definition ScannerBase.h:29
static unsigned columnsInDisplay(C_LexUTF32 ch) noexcept
Definition ScannerBase.h:56
static bool read(C_UnicodeIn &uin, C_LexUTF32 &ch)
Definition ScannerBase.h:68
static void setId(C_LexUTF32 &ch, T_LexID id) noexcept
Definition ScannerBase.h:72
static void appendUTF8(std::string &u8, C_LexUTF32 src)
Definition ScannerBase.h:52
static constexpr auto id(C_LexUTF32 ch) noexcept
Definition ScannerBase.h:64
static void setId(T_LexCh &ch, T_LexID id)
static void appendUTF8(std::string &u8s, const T_LexCh &ch)
static unsigned columnsInDisplay(const T_LexCh &ch)
static T_LexID id(const T_LexCh &ch)
static bool read(C_UnicodeIn &uin, T_LexCh &ch)
virtual void setLine(unsigned line)=0
virtual ~I_Scanner()=default
virtual void setSource(std::string_view src)=0
virtual void add(unsigned col, T_Char c)=0