bux API Reference 1.12.3
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
ScannerBase.h
Go to the documentation of this file.
1#pragma once
2
3#include "LexBase.h" // bux::T_LexID, bux::I_LexAttr, bux::C_IntegerLex, bux::TID_EOF
4#include "UnicodeCvt.h" // bux::C_UnicodeIn
5#include <stdexcept> // std::runtime_error
6#ifdef _WIN32
7 #include <ctype.h> // __isascii()
8#else
9 #include <wchar.h> // wcwidth()
10#endif
11
12namespace bux {
13
14//
15// Types
16//
17template<class T_Char>
19{
20 // Pure virtuals
21 virtual ~I_Scanner() = default;
22 virtual void add(unsigned col, T_Char c) = 0;
23 virtual void setLine(unsigned line) = 0;
24 virtual void setSource(std::string_view src) = 0;
25};
26
28{
31
32 constexpr C_ActionRet(T_LexID id, I_LexAttr *unownedAttr = nullptr): m_id(id), m_pAttr(unownedAttr)
33 {}
34 C_ActionRet(): m_pAttr(nullptr)
35 {}
36};
37
38template<class T_LexCh>
40{
41 static void appendUTF8(std::string &u8s, const T_LexCh &ch);
42 static unsigned columnsInDisplay(const T_LexCh &ch);
43 static T_LexID id(const T_LexCh &ch);
44 static bool read(C_UnicodeIn &uin, T_LexCh &ch);
45 static void setId(T_LexCh &ch, T_LexID id);
46};
47
48struct C_LexUTF32 { uint32_t m_U32; };
49
50template<>
52{
53 static void appendUTF8(std::string &u8, C_LexUTF32 src)
54 {
55 u8 += to_utf8(src.m_U32);
56 }
57 static unsigned columnsInDisplay(C_LexUTF32 ch) noexcept
58 {
59#ifdef _WIN32
60 return __isascii(int(ch.m_U32)) ?1U :2U;
61#else
62 return (unsigned)wcwidth(wchar_t(ch.m_U32));
63#endif
64 }
65 static constexpr auto id(C_LexUTF32 ch) noexcept
66 {
67 return ch.m_U32;
68 }
69 static bool read(C_UnicodeIn &uin, C_LexUTF32 &ch)
70 {
71 return uin.get(ch.m_U32) > 0;
72 }
73 static void setId(C_LexUTF32 &ch, T_LexID id) noexcept
74 {
75 ch.m_U32 = id;
76 }
77};
78
79//
80// Externals
81//
82[[nodiscard]]
83std::string escseq2str(std::string);
84[[nodiscard]]
85bool isIdentifier(std::string_view s) noexcept;
86[[nodiscard]]
87size_t parseEscapeChar(std::string_view s, uint32_t &c, size_t pos =0);
88[[nodiscard]]
89size_t skipIdentifier(std::string_view s, size_t pos) noexcept;
90
91//
92// Function Templates
93//
94template<class T_LexCh>
95[[nodiscard]] auto toString(const T_LexCh *c, size_t start, size_t end) noexcept(noexcept(
96 C_LexTraits<T_LexCh>::appendUTF8(std::declval<std::string&>(), T_LexCh())))
97{
98 std::string buf;
99 for (size_t i = start; i < end; C_LexTraits<T_LexCh>::appendUTF8(buf, c[i++]));
100 return buf;
101}
102
103template<T_LexID _ID, class T_LexCh>
104[[nodiscard]] auto createCharLiteral(const T_LexCh *c, size_t n)
105{
106 uint32_t key;
107 const auto len = parseEscapeChar(toString(c,1,n-1), key);
108 if (len + 2 != n)
109 throw std::runtime_error{"parseEscapeChar() returns " + std::to_string(len) + " != " + std::to_string(n-2)};
110
111 return C_ActionRet{_ID, createLex(key)};
112}
113
114template<T_LexID _ID, class T_LexCh>
115[[nodiscard]] auto createDecNum(const T_LexCh *c, size_t n)
116{
117 return C_ActionRet{_ID, new C_IntegerLex(toString(c,0,n), 10)};
118}
119
120template<T_LexID _ID, class T_LexCh>
121[[nodiscard]] auto createHexNum(const T_LexCh *c, size_t n)
122{
123 return C_ActionRet{_ID, new C_IntegerLex(toString(c,0,n), 16)};
124}
125
126template<T_LexID _ID, class T_LexCh>
127[[nodiscard]] C_ActionRet createNothing(const T_LexCh *, size_t)
128{
129 return _ID;
130}
131
132template<T_LexID _ID, class T_LexCh>
133[[nodiscard]] auto createOctNum(const T_LexCh *c, size_t n)
134{
135 return C_ActionRet{_ID, new C_IntegerLex(toString(c,1,n), 8)};
136}
137
138template<T_LexID _ID, class T_LexCh, size_t TRIMLEFT = 0, size_t TRIMRIGHT = 0>
139[[nodiscard]] auto createPlainString(const T_LexCh *c, size_t n)
140{
141 return C_ActionRet{_ID, createLex(toString(c, TRIMLEFT, n-TRIMRIGHT))};
142}
143
144template<T_LexID _ID, class T_LexCh, size_t TRIMLEFT = 0, size_t TRIMRIGHT = 0>
145[[nodiscard]] auto createEscapeString(const T_LexCh *c, size_t n)
146{
147 return C_ActionRet{_ID, createLex(escseq2str(toString(c, TRIMLEFT, n-TRIMRIGHT)))};
148}
149
150template<class T_Char>
151void scanFile(std::string_view filename, std::istream &in, I_Scanner<T_Char> &scanner, T_LexID endToken = TID_EOF, T_Encoding encoding = 0)
152{
153 C_UnicodeIn src(in, encoding);
154 unsigned line = 1, col = 1;
155 T_Char c;
156
157 scanner.setSource(filename);
158 scanner.setLine(line);
159
160 typedef C_LexTraits<T_Char> C_Traits;
161
162 while (C_Traits::read(src, c))
163 {
164 scanner.add(col, c);
165 switch (C_Traits::id(c))
166 {
167 case '\n': // New line
168 scanner.setLine(++line);
169 col = 1;
170 break;
171 case '\t': // TAB
172 col += 4 - (col - 1) % 4;
173 break;
174 default:
175 col += C_Traits::columnsInDisplay(c);
176 }
177 }
178 C_Traits::setId(c, endToken);
179 scanner.add(col, c);
180}
181
182} //namespace bux
int get(T_Utf32 &c)
THE common namespace of bux library.
Definition AtomiX.cpp:3
std::string_view to_utf8(T_Utf32 uc)
const char *const * T_Encoding
Definition UnicodeCvt.h:52
C_ActionRet createNothing(const T_LexCh *, size_t)
bool isIdentifier(std::string_view s) noexcept
@ TID_EOF
Definition LexBase.h:26
std::string escseq2str(std::string s)
auto createDecNum(const T_LexCh *c, size_t n)
auto createEscapeString(const T_LexCh *c, size_t n)
auto createLex(const T &t)
Definition LexBase.h:206
auto createCharLiteral(const T_LexCh *c, size_t n)
auto toString(const T_LexCh *c, size_t start, size_t end) noexcept(noexcept(C_LexTraits< T_LexCh >::appendUTF8(std::declval< std::string & >(), T_LexCh())))
Definition ScannerBase.h:95
void scanFile(std::string_view filename, std::istream &in, I_Scanner< T_Char > &scanner, T_LexID endToken=TID_EOF, T_Encoding encoding=0)
auto createHexNum(const T_LexCh *c, size_t n)
size_t parseEscapeChar(std::string_view s, uint32_t &c, size_t pos)
size_t skipIdentifier(std::string_view s, size_t pos) noexcept
uint32_t T_LexID
Definition LexBase.h:36
auto createOctNum(const T_LexCh *c, size_t n)
auto createPlainString(const T_LexCh *c, size_t n)
constexpr C_ActionRet(T_LexID id, I_LexAttr *unownedAttr=nullptr)
Definition ScannerBase.h:32
I_LexAttr * m_pAttr
newed
Definition ScannerBase.h:30
static unsigned columnsInDisplay(C_LexUTF32 ch) noexcept
Definition ScannerBase.h:57
static bool read(C_UnicodeIn &uin, C_LexUTF32 &ch)
Definition ScannerBase.h:69
static void setId(C_LexUTF32 &ch, T_LexID id) noexcept
Definition ScannerBase.h:73
static void appendUTF8(std::string &u8, C_LexUTF32 src)
Definition ScannerBase.h:53
static constexpr auto id(C_LexUTF32 ch) noexcept
Definition ScannerBase.h:65
static void setId(T_LexCh &ch, T_LexID id)
static void appendUTF8(std::string &u8s, const T_LexCh &ch)
static unsigned columnsInDisplay(const T_LexCh &ch)
static T_LexID id(const T_LexCh &ch)
static bool read(C_UnicodeIn &uin, T_LexCh &ch)
virtual void setLine(unsigned line)=0
virtual ~I_Scanner()=default
virtual void setSource(std::string_view src)=0
virtual void add(unsigned col, T_Char c)=0