Improve regexLexer, especially character classes
This commit is contained in:
parent
97a347a6ee
commit
e425e9ff81
|
@ -6,91 +6,87 @@
|
||||||
namespace { //The automaton data
|
namespace { //The automaton data
|
||||||
typedef std::size_t State;
|
typedef std::size_t State;
|
||||||
|
|
||||||
State REJECT = 39;
|
State REJECT = 37;
|
||||||
|
|
||||||
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)2, (unsigned char)3, (unsigned char)4, (unsigned char)5, (unsigned char)0, (unsigned char)6, (unsigned char)7, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)8, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)9, (unsigned char)10, (unsigned char)11, (unsigned char)12, (unsigned char)0, (unsigned char)0, (unsigned char)13, (unsigned char)14, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)15, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)16, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)17, (unsigned char)18, (unsigned char)19, (unsigned char)0, (unsigned char)20, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)21, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
|
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)2, (unsigned char)3, (unsigned char)4, (unsigned char)5, (unsigned char)0, (unsigned char)6, (unsigned char)7, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)8, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)9, (unsigned char)10, (unsigned char)11, (unsigned char)12, (unsigned char)0, (unsigned char)0, (unsigned char)13, (unsigned char)14, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)15, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)16, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)17, (unsigned char)18, (unsigned char)19, (unsigned char)0, (unsigned char)20, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)21, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
|
||||||
|
|
||||||
State TABLE[40 - 1][22] = {
|
State TABLE[38 - 1][22] = {
|
||||||
{ 37, 16, 35, 36, 31, 32, 37, 30, 33, 1, 11, 38, 37, 37, 37, 37, 37, 37, 37, 37, 37, 34, },
|
{ 35, 14, 33, 34, 29, 30, 35, 28, 31, 1, 9, 36, 35, 35, 35, 35, 35, 35, 35, 35, 35, 32, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 4, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 3, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 39, 6, 6, 6, 6, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 4, 4, 4, 4, 4, 4, 37, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, },
|
||||||
|
|
||||||
{ 8, 8, 8, 8, 8, 8, 39, 8, 8, 8, 8, 10, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 8, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 10, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 22, 23, 19, 20, 37, 27, 26, 24, 18, 25, 37, 15, 13, 16, 11, 12, 14, 10, 17, 21, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 24, 25, 21, 22, 39, 29, 28, 26, 20, 27, 39, 17, 15, 18, 13, 14, 16, 12, 19, 23, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
{ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, },
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
|
||||||
|
|
||||||
{ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
RegexLexer::TokenType TOKENS[40] = { RegexLexer::nonmatching, RegexLexer::ERROR, RegexLexer::nonmatching, RegexLexer::CHAR_CLASS, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::CHAR_CLASS, RegexLexer::ERROR, RegexLexer::TAB, RegexLexer::NEWLINE, RegexLexer::CARRIAGE_RETURN, RegexLexer::BACKSPACE, RegexLexer::SPACE, RegexLexer::BELL, RegexLexer::FORMFEED, RegexLexer::VTAB, RegexLexer::BACKSLASH, RegexLexer::ESCAPED_STAR, RegexLexer::ESCAPED_PLUS, RegexLexer::ESCAPED_PIPE, RegexLexer::ESCAPED_LPAREN, RegexLexer::ESCAPED_RPAREN, RegexLexer::ESCAPED_LBRACKET, RegexLexer::ESCAPED_RBRACKET, RegexLexer::ESCAPED_QUESTIONMARK, RegexLexer::ESCAPED_DOT, RegexLexer::DOT, RegexLexer::STAR, RegexLexer::PLUS, RegexLexer::QUESTIONMARK, RegexLexer::PIPE, RegexLexer::LPAREN, RegexLexer::RPAREN, RegexLexer::CHAR, RegexLexer::ERROR, RegexLexer::nonmatching, };
|
RegexLexer::TokenType TOKENS[38] = { RegexLexer::nonmatching, RegexLexer::ERROR, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::nonmatching, RegexLexer::CHAR_CLASS, RegexLexer::ERROR, RegexLexer::TAB, RegexLexer::NEWLINE, RegexLexer::CARRIAGE_RETURN, RegexLexer::BACKSPACE, RegexLexer::SPACE, RegexLexer::BELL, RegexLexer::FORMFEED, RegexLexer::VTAB, RegexLexer::BACKSLASH, RegexLexer::ESCAPED_STAR, RegexLexer::ESCAPED_PLUS, RegexLexer::ESCAPED_PIPE, RegexLexer::ESCAPED_LPAREN, RegexLexer::ESCAPED_RPAREN, RegexLexer::ESCAPED_LBRACKET, RegexLexer::ESCAPED_RBRACKET, RegexLexer::ESCAPED_QUESTIONMARK, RegexLexer::ESCAPED_DOT, RegexLexer::DOT, RegexLexer::STAR, RegexLexer::PLUS, RegexLexer::QUESTIONMARK, RegexLexer::PIPE, RegexLexer::LPAREN, RegexLexer::RPAREN, RegexLexer::CHAR, RegexLexer::ERROR, RegexLexer::nonmatching, };
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexLexer::RegexLexer(std::istream& in) : m_offset(0), m_input(in) {
|
RegexLexer::RegexLexer(std::istream& in) : m_offset(0), m_input(in) {
|
||||||
|
|
|
@ -11,8 +11,6 @@ class RegexLexer {
|
||||||
class NoMoreTokens : public std::exception {};
|
class NoMoreTokens : public std::exception {};
|
||||||
class NoMatch : public std::exception {};
|
class NoMatch : public std::exception {};
|
||||||
|
|
||||||
RegexLexer(const RegexLexer&) = delete;
|
|
||||||
|
|
||||||
enum TokenType {
|
enum TokenType {
|
||||||
nonmatching,
|
nonmatching,
|
||||||
BACKSLASH,
|
BACKSLASH,
|
||||||
|
|
|
@ -169,7 +169,7 @@ namespace lxs {
|
||||||
++start;
|
++start;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (input[end] == '-') {
|
if (input[end - 1] == '-') {
|
||||||
used_chars.insert('-');
|
used_chars.insert('-');
|
||||||
--end;
|
--end;
|
||||||
}
|
}
|
||||||
|
@ -339,7 +339,7 @@ namespace lxs {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RegexLexer::ERROR:
|
case RegexLexer::ERROR:
|
||||||
throw SyntaxError(("Error on character: " + tok.content).c_str());
|
throw SyntaxError((std::to_string(lex.getByteOffset()) + ": Error on character: " + tok.content).c_str());
|
||||||
|
|
||||||
case RegexLexer::ignore: case RegexLexer::nonmatching:
|
case RegexLexer::ignore: case RegexLexer::nonmatching:
|
||||||
//Just ignore these
|
//Just ignore these
|
||||||
|
|
|
@ -1,4 +1,15 @@
|
||||||
CHAR_CLASS = \[^?\]?-?([^]-]-[^]-]|[^]-])+-?\]|\[^?(-|\]|\]-)\]
|
# \[
|
||||||
|
# ( ^ ( \]-? | - | X' )
|
||||||
|
# | \]-? | - | Y' )
|
||||||
|
# X'*
|
||||||
|
# -? \]
|
||||||
|
#
|
||||||
|
# With X' = (X - X | X)
|
||||||
|
# Y' = (Y - X | Y)
|
||||||
|
# X = [^]-]
|
||||||
|
# Y = [^]-^]
|
||||||
|
#
|
||||||
|
CHAR_CLASS = \[(^(\]-?|-|([^]-]-[^]-]|[^]-]))|\]-?|-|([^]-^]-[^]-]|[^]-^]))([^]-]-[^]-]|[^]-])*-?\]
|
||||||
|
|
||||||
#All the escape sequences
|
#All the escape sequences
|
||||||
TAB = \\t
|
TAB = \\t
|
||||||
|
|
Loading…
Reference in New Issue