From 75d3a75ddf9f76d7a0c4f87cbaf4f0fcb6afa554 Mon Sep 17 00:00:00 2001 From: Thomas Ave Date: Sat, 28 May 2016 00:51:12 +0200 Subject: [PATCH] SyntaxHighlighter 1.0 --- examples/SyntaxHighlighter/CMakeLists.txt | 11 ++ examples/SyntaxHighlighter/attributelexer.lxs | 6 + .../include/AttributeLexer.h | 65 ++++++++ examples/SyntaxHighlighter/include/XMLLexer.h | 61 ++++++++ .../SyntaxHighlighter/include/highlighter.h | 54 +++++++ .../SyntaxHighlighter/src/AttributeLexer.cpp | 114 ++++++++++++++ examples/SyntaxHighlighter/src/CMakeLists.txt | 6 + examples/SyntaxHighlighter/src/XMLLexer.cpp | 96 ++++++++++++ .../SyntaxHighlighter/src/highlighter.cpp | 141 ++++++++++++++++++ examples/SyntaxHighlighter/src/main.cpp | 15 ++ examples/SyntaxHighlighter/xmllexer.xls | 2 + 11 files changed, 571 insertions(+) create mode 100644 examples/SyntaxHighlighter/CMakeLists.txt create mode 100644 examples/SyntaxHighlighter/attributelexer.lxs create mode 100644 examples/SyntaxHighlighter/include/AttributeLexer.h create mode 100644 examples/SyntaxHighlighter/include/XMLLexer.h create mode 100644 examples/SyntaxHighlighter/include/highlighter.h create mode 100644 examples/SyntaxHighlighter/src/AttributeLexer.cpp create mode 100644 examples/SyntaxHighlighter/src/CMakeLists.txt create mode 100644 examples/SyntaxHighlighter/src/XMLLexer.cpp create mode 100644 examples/SyntaxHighlighter/src/highlighter.cpp create mode 100644 examples/SyntaxHighlighter/src/main.cpp create mode 100644 examples/SyntaxHighlighter/xmllexer.xls diff --git a/examples/SyntaxHighlighter/CMakeLists.txt b/examples/SyntaxHighlighter/CMakeLists.txt new file mode 100644 index 0000000..d41a61e --- /dev/null +++ b/examples/SyntaxHighlighter/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 2.6) + +project(SyntaxHighlighter) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED 11) + +set(CMAKE_CXX_FLAGS "-Wall -g3 -ggdb") + +include_directories(include) +add_subdirectory(src) diff --git a/examples/SyntaxHighlighter/attributelexer.lxs b/examples/SyntaxHighlighter/attributelexer.lxs new file mode 100644 index 0000000..9f5f46a --- /dev/null +++ b/examples/SyntaxHighlighter/attributelexer.lxs @@ -0,0 +1,6 @@ +ELEMENT = ] diff --git a/examples/SyntaxHighlighter/include/AttributeLexer.h b/examples/SyntaxHighlighter/include/AttributeLexer.h new file mode 100644 index 0000000..01861f6 --- /dev/null +++ b/examples/SyntaxHighlighter/include/AttributeLexer.h @@ -0,0 +1,65 @@ +#pragma once +#ifndef LEXER_AttributeLexer_H +#define LEXER_AttributeLexer_H + +#include +#include +#include + +class AttributeLexer { + public: + class NoMoreTokens : public std::exception {}; + class NoMatch : public std::exception {}; + + enum TokenType { + nonmatching, + ATTRIBUTE, + ATTRIBUTE_CONTENT_DOUBLE_QUOTES, + ATTRIBUTE_CONTENT_SINGLE_QUOTES, + BRACKET, + ELEMENT, + WHITESPACE, + ignore, + }; + + struct Token { + TokenType type; + std::string content; + }; + + AttributeLexer(std::istream& in); + ~AttributeLexer(); + + /** + * Get the next token + * + * @throws NoMoreTokens if no more tokens are available + * @throws NoMatch if no match was found + */ + Token nextToken(); + + /** + * Skip the following `n` bytes. + * + * @param n The number of bytes to skip + */ + void skip(std::size_t n); + + /** + * Peek at the current head of the input stream, useful in error reporting when a character mismatches for example + * + * @throws NoMoreTokens if the input stream is at an end + */ + char peek(); + + /** + * Get the current byte offset + */ + std::size_t getByteOffset(); + + private: + std::size_t m_offset; + std::istream& m_input; +}; + +#endif //LEXER_AttributeLexer_H diff --git a/examples/SyntaxHighlighter/include/XMLLexer.h b/examples/SyntaxHighlighter/include/XMLLexer.h new file mode 100644 index 0000000..7c503e6 --- /dev/null +++ b/examples/SyntaxHighlighter/include/XMLLexer.h @@ -0,0 +1,61 @@ +#pragma once +#ifndef LEXER_XMLLexer_H +#define LEXER_XMLLexer_H + +#include +#include +#include + +class XMLLexer { + public: + class NoMoreTokens : public std::exception {}; + class NoMatch : public std::exception {}; + + enum TokenType { + nonmatching, + CONTENT, + TAG, + ignore, + }; + + struct Token { + TokenType type; + std::string content; + }; + + XMLLexer(std::istream& in); + ~XMLLexer(); + + /** + * Get the next token + * + * @throws NoMoreTokens if no more tokens are available + * @throws NoMatch if no match was found + */ + Token nextToken(); + + /** + * Skip the following `n` bytes. + * + * @param n The number of bytes to skip + */ + void skip(std::size_t n); + + /** + * Peek at the current head of the input stream, useful in error reporting when a character mismatches for example + * + * @throws NoMoreTokens if the input stream is at an end + */ + char peek(); + + /** + * Get the current byte offset + */ + std::size_t getByteOffset(); + + private: + std::size_t m_offset; + std::istream& m_input; +}; + +#endif //LEXER_XMLLexer_H diff --git a/examples/SyntaxHighlighter/include/highlighter.h b/examples/SyntaxHighlighter/include/highlighter.h new file mode 100644 index 0000000..2646c1f --- /dev/null +++ b/examples/SyntaxHighlighter/include/highlighter.h @@ -0,0 +1,54 @@ +#pragma once +#ifndef HIGHLIGHTER_H +#define HIGHLIGHTER_H + +#include "XMLLexer.h" + +#include +#include +#include +#include + +class Highlighter { + public: + + Highlighter(std::istream &file); + virtual ~Highlighter(); + + virtual void highlight(std::ostream &os)=0; + + protected: + + void process(); + + enum TokenType { + TAG, + CONTENT, + ELEMENT, + ATTRIBUTE, + ATTRIBURE_CONTENT, + WHITESPACE, + BRACKET, + nonmatching + }; + + enum Color { Red, Green, Blue, Orange, Yellow, Cyan, Grey, Black, White, Magenta, Pink, Brown, Indigo, Violet, Undefined}; // All the colors, not all of them are used, but it's easy to change now + + struct Token { + std::string content = ""; + Color color; + TokenType type; + }; + std::map colormap; + std::vector m_tokens; + XMLLexer *m_lexer; +}; + +class ConsoleHighlighter: public Highlighter { + public: + ConsoleHighlighter(std::istream &file); + void highlight(std::ostream &os); + +}; + +#endif // HIGHLIGHTER_H diff --git a/examples/SyntaxHighlighter/src/AttributeLexer.cpp b/examples/SyntaxHighlighter/src/AttributeLexer.cpp new file mode 100644 index 0000000..633d9af --- /dev/null +++ b/examples/SyntaxHighlighter/src/AttributeLexer.cpp @@ -0,0 +1,114 @@ +#include "AttributeLexer.h" + +#include +#include + +namespace { //The automaton data + typedef std::size_t State; + + State REJECT = 13; + + unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)2, (unsigned char)3, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)4, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)5, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)2, (unsigned char)2, (unsigned char)7, (unsigned char)8, (unsigned char)9, (unsigned char)2, (unsigned char)2, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, }; + + State TABLE[14 - 1][10] = { + { 13, 4, 13, 8, 10, 13, 5, 1, 7, 12, }, + + { 13, 2, 13, 13, 13, 2, 3, 13, 13, 13, }, + + { 13, 2, 13, 13, 13, 13, 3, 13, 13, 13, }, + + { 13, 13, 13, 13, 13, 13, 3, 13, 13, 13, }, + + { 13, 4, 13, 8, 10, 13, 13, 13, 7, 13, }, + + { 13, 6, 13, 13, 13, 13, 5, 13, 7, 13, }, + + { 13, 6, 13, 13, 13, 13, 13, 13, 7, 13, }, + + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }, + + { 13, 8, 8, 9, 8, 8, 8, 13, 8, 13, }, + + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }, + + { 13, 10, 10, 10, 11, 10, 10, 13, 10, 13, }, + + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }, + + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }, + }; + + AttributeLexer::TokenType TOKENS[14] = { AttributeLexer::WHITESPACE, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::WHITESPACE, AttributeLexer::nonmatching, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_DOUBLE_QUOTES, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_SINGLE_QUOTES, AttributeLexer::BRACKET, AttributeLexer::nonmatching, }; +} + +AttributeLexer::AttributeLexer(std::istream& in) : m_offset(0), m_input(in) { + +} + +AttributeLexer::~AttributeLexer() { +} + +AttributeLexer::Token AttributeLexer::nextToken() { + TokenType type = ignore; + std::string token; + + while (type == ignore) { + State state = 0; + std::size_t match_length = 0; + token = ""; + + while (!m_input.eof() && state != REJECT) { + char c = m_input.peek(); + if (m_input.eof()) + break; + + token += c; + + state = TABLE[state][TRANS_IDX[(unsigned char)c]]; + if (TOKENS[state]) + { + match_length = token.length(); + type = TOKENS[state]; + } + m_input.get(); + ++m_offset; + } + + std::size_t sdiff = token.length() - match_length; + for (std::size_t i = 0; i < sdiff; i++) + { + m_input.putback(token[token.length() - i - 1]); + } + m_offset -= sdiff; + + if (!type || !match_length) { + if (m_input.eof()) + throw NoMoreTokens(); + throw NoMatch(); + } + + token = token.substr(0, match_length); + } + + Token t; + t.type = type; + t.content = token; + return t; +} + +void AttributeLexer::skip(std::size_t n) { + for (size_t i = 0; i < n; i++) { + m_input.get(); + ++m_offset; + } +} + +char AttributeLexer::peek() { + if (m_input.eof()) + throw NoMoreTokens(); + return m_input.peek(); +} + +std::size_t AttributeLexer::getByteOffset() { + return m_offset; +} diff --git a/examples/SyntaxHighlighter/src/CMakeLists.txt b/examples/SyntaxHighlighter/src/CMakeLists.txt new file mode 100644 index 0000000..199e5a9 --- /dev/null +++ b/examples/SyntaxHighlighter/src/CMakeLists.txt @@ -0,0 +1,6 @@ +add_executable(highlighter + highlighter.cpp + main.cpp + AttributeLexer.cpp + XMLLexer.cpp +) diff --git a/examples/SyntaxHighlighter/src/XMLLexer.cpp b/examples/SyntaxHighlighter/src/XMLLexer.cpp new file mode 100644 index 0000000..5227fc6 --- /dev/null +++ b/examples/SyntaxHighlighter/src/XMLLexer.cpp @@ -0,0 +1,96 @@ +#include "XMLLexer.h" + +#include +#include + +namespace { //The automaton data + typedef std::size_t State; + + State REJECT = 4; + + unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)2, (unsigned char)1, (unsigned char)3, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, }; + + State TABLE[5 - 1][4] = { + { 4, 3, 1, 4, }, + + { 4, 1, 4, 2, }, + + { 4, 4, 4, 4, }, + + { 4, 3, 4, 4, }, + }; + + XMLLexer::TokenType TOKENS[5] = { XMLLexer::CONTENT, XMLLexer::nonmatching, XMLLexer::TAG, XMLLexer::CONTENT, XMLLexer::nonmatching, }; +} + +XMLLexer::XMLLexer(std::istream& in) : m_offset(0), m_input(in) { + +} + +XMLLexer::~XMLLexer() { +} + +XMLLexer::Token XMLLexer::nextToken() { + TokenType type = ignore; + std::string token; + + while (type == ignore) { + State state = 0; + std::size_t match_length = 0; + token = ""; + + while (!m_input.eof() && state != REJECT) { + char c = m_input.peek(); + if (m_input.eof()) + break; + + token += c; + + state = TABLE[state][TRANS_IDX[(unsigned char)c]]; + if (TOKENS[state]) + { + match_length = token.length(); + type = TOKENS[state]; + } + m_input.get(); + ++m_offset; + } + + std::size_t sdiff = token.length() - match_length; + for (std::size_t i = 0; i < sdiff; i++) + { + m_input.putback(token[token.length() - i - 1]); + } + m_offset -= sdiff; + + if (!type || !match_length) { + if (m_input.eof()) + throw NoMoreTokens(); + throw NoMatch(); + } + + token = token.substr(0, match_length); + } + + Token t; + t.type = type; + t.content = token; + return t; +} + +void XMLLexer::skip(std::size_t n) { + for (size_t i = 0; i < n; i++) { + m_input.get(); + ++m_offset; + } +} + +char XMLLexer::peek() { + if (m_input.eof()) + throw NoMoreTokens(); + return m_input.peek(); +} + +std::size_t XMLLexer::getByteOffset() { + return m_offset; +} diff --git a/examples/SyntaxHighlighter/src/highlighter.cpp b/examples/SyntaxHighlighter/src/highlighter.cpp new file mode 100644 index 0000000..2003e4d --- /dev/null +++ b/examples/SyntaxHighlighter/src/highlighter.cpp @@ -0,0 +1,141 @@ +#include "highlighter.h" +#include "AttributeLexer.h" +#include +#include + +Highlighter::Highlighter(std::istream &file) { + m_lexer = new XMLLexer(file); + colormap[CONTENT] = Undefined; + colormap[ELEMENT] = Undefined; + colormap[ATTRIBUTE] = Undefined; + colormap[ATTRIBURE_CONTENT] = Undefined; + colormap[BRACKET] = Undefined; + colormap[nonmatching] = Undefined; +} + +Highlighter::~Highlighter() { + delete m_lexer; +} + +void Highlighter::process() { + while (true) { + try { + XMLLexer::Token token = m_lexer->nextToken(); + Token newtoken; + newtoken.content = token.content; + switch(token.type) { + case XMLLexer::TokenType::CONTENT: + newtoken.type = CONTENT; + break; + case XMLLexer::TokenType::TAG: + newtoken.type = TAG; + break; + default: + newtoken.type = nonmatching; + break; + } + newtoken.color = colormap.find(newtoken.type)->second; + m_tokens.push_back(newtoken); + } catch (XMLLexer::NoMoreTokens &err) { + break; + } catch (XMLLexer::NoMatch& err) { + Token newtoken; + newtoken.content = m_lexer->peek(); + m_lexer->skip(1); + newtoken.type = nonmatching; + m_tokens.push_back(newtoken); + } + } + auto tokens = std::move(m_tokens); + m_tokens.clear(); + for(auto &tagtoken: tokens) { + if(tagtoken.type == TAG && !tagtoken.content.empty()) { + std::istringstream content(tagtoken.content); + AttributeLexer attributelexer(content); + while (true) { + try { + AttributeLexer::Token token = attributelexer.nextToken(); + Token newtoken; + newtoken.content = token.content; + switch(token.type) { + case AttributeLexer::TokenType::ELEMENT: + newtoken.type = ELEMENT; + break; + case AttributeLexer::TokenType::BRACKET: + newtoken.type = BRACKET; + break; + case AttributeLexer::TokenType::ATTRIBUTE: + newtoken.type = ATTRIBUTE; + break; + case AttributeLexer::TokenType::ATTRIBUTE_CONTENT_DOUBLE_QUOTES: + newtoken.type = ATTRIBURE_CONTENT; + break; + case AttributeLexer::TokenType::ATTRIBUTE_CONTENT_SINGLE_QUOTES: + newtoken.type = ATTRIBURE_CONTENT; + break; + default: + newtoken.type = nonmatching; + break; + } + newtoken.color = colormap.find(newtoken.type)->second; + m_tokens.push_back(newtoken); + } catch (AttributeLexer::NoMoreTokens &err) { + break; + } catch (AttributeLexer::NoMatch& err) { + Token newtoken; + newtoken.content = attributelexer.peek(); + attributelexer.skip(1); + newtoken.type = nonmatching; + m_tokens.push_back(newtoken); + } + } + } else { + m_tokens.push_back(tagtoken); + } + } +} + +ConsoleHighlighter::ConsoleHighlighter(std::istream &file): Highlighter(file) { + colormap[CONTENT] = White; + colormap[ELEMENT] = Blue; + colormap[TAG] = Magenta; + colormap[ATTRIBUTE] = Yellow; + colormap[ATTRIBURE_CONTENT] = Green; + colormap[BRACKET] = Blue; + colormap[nonmatching] = Black; + process(); +} + +void ConsoleHighlighter::highlight(std::ostream &os) { + for(auto &token: m_tokens) { + switch(token.color) { + case Yellow: + os << "\033[1;33m" << token.content << "\033[0m"; + break; + case Black: + os << "\033[1;30m" << token.content << "\033[0m"; + break; + case Red: + os << "\033[1;31m" << token.content << "\033[0m"; + break; + case Green: + os << "\033[1;32m" << token.content << "\033[0m"; + break; + case Blue: + os << "\033[1;34m" << token.content << "\033[0m"; + break; + case Magenta: + os << "\033[1;35m" << token.content << "\033[0m"; + break; + case Cyan: + os << "\033[1;36m" << token.content << "\033[0m"; + break; + case White: + os << "\033[1;37m" << token.content << "\033[0m"; + break; + default: + os << token.content; + } + } + os << std::endl; +} diff --git a/examples/SyntaxHighlighter/src/main.cpp b/examples/SyntaxHighlighter/src/main.cpp new file mode 100644 index 0000000..cce3db5 --- /dev/null +++ b/examples/SyntaxHighlighter/src/main.cpp @@ -0,0 +1,15 @@ +#include "highlighter.h" + +#include +#include + +int main(int argc, char** argv) { + if(argc == 2) { + std::ifstream in(argv[1]); + ConsoleHighlighter highlighter(in); + highlighter.highlight(std::cout); + } else { + std::cout << "Usage: " << argv[0] << " \n"; + } + return 0; +} diff --git a/examples/SyntaxHighlighter/xmllexer.xls b/examples/SyntaxHighlighter/xmllexer.xls new file mode 100644 index 0000000..8a61b15 --- /dev/null +++ b/examples/SyntaxHighlighter/xmllexer.xls @@ -0,0 +1,2 @@ +TAG = +CONTENT = [-a-zA-Z0-9 `'!"@#$%^&*()_={}|\;:/?.,`~+]*