SyntaxHighlighter 1.0
This commit is contained in:
parent
f4e095b462
commit
75d3a75ddf
|
@ -0,0 +1,11 @@
|
|||
cmake_minimum_required(VERSION 2.6)
|
||||
|
||||
project(SyntaxHighlighter)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED 11)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-Wall -g3 -ggdb")
|
||||
|
||||
include_directories(include)
|
||||
add_subdirectory(src)
|
|
@ -0,0 +1,6 @@
|
|||
ELEMENT = </?[ ]*[a-zA-Z0-9]*
|
||||
WHITESPACE = [ ]*
|
||||
ATTRIBUTE = [a-zA-Z0-9]*[ ]*=
|
||||
ATTRIBUTE_CONTENT_DOUBLE_QUOTES = [ ]*"[a-zA-Z0-9 '!@#$%^&*()_-+={}|\;:/?.,`~]*"
|
||||
ATTRIBUTE_CONTENT_SINGLE_QUOTES = [ ]*'[a-zA-Z0-9 !"@#$%^&*()_-+={}|\;:/?.,`~]*'
|
||||
BRACKET = [<>]
|
|
@ -0,0 +1,65 @@
|
|||
#pragma once
|
||||
#ifndef LEXER_AttributeLexer_H
|
||||
#define LEXER_AttributeLexer_H
|
||||
|
||||
#include <exception>
|
||||
#include <istream>
|
||||
#include <string>
|
||||
|
||||
class AttributeLexer {
|
||||
public:
|
||||
class NoMoreTokens : public std::exception {};
|
||||
class NoMatch : public std::exception {};
|
||||
|
||||
enum TokenType {
|
||||
nonmatching,
|
||||
ATTRIBUTE,
|
||||
ATTRIBUTE_CONTENT_DOUBLE_QUOTES,
|
||||
ATTRIBUTE_CONTENT_SINGLE_QUOTES,
|
||||
BRACKET,
|
||||
ELEMENT,
|
||||
WHITESPACE,
|
||||
ignore,
|
||||
};
|
||||
|
||||
struct Token {
|
||||
TokenType type;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
AttributeLexer(std::istream& in);
|
||||
~AttributeLexer();
|
||||
|
||||
/**
|
||||
* Get the next token
|
||||
*
|
||||
* @throws NoMoreTokens if no more tokens are available
|
||||
* @throws NoMatch if no match was found
|
||||
*/
|
||||
Token nextToken();
|
||||
|
||||
/**
|
||||
* Skip the following `n` bytes.
|
||||
*
|
||||
* @param n The number of bytes to skip
|
||||
*/
|
||||
void skip(std::size_t n);
|
||||
|
||||
/**
|
||||
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
|
||||
*
|
||||
* @throws NoMoreTokens if the input stream is at an end
|
||||
*/
|
||||
char peek();
|
||||
|
||||
/**
|
||||
* Get the current byte offset
|
||||
*/
|
||||
std::size_t getByteOffset();
|
||||
|
||||
private:
|
||||
std::size_t m_offset;
|
||||
std::istream& m_input;
|
||||
};
|
||||
|
||||
#endif //LEXER_AttributeLexer_H
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
#ifndef LEXER_XMLLexer_H
|
||||
#define LEXER_XMLLexer_H
|
||||
|
||||
#include <exception>
|
||||
#include <istream>
|
||||
#include <string>
|
||||
|
||||
class XMLLexer {
|
||||
public:
|
||||
class NoMoreTokens : public std::exception {};
|
||||
class NoMatch : public std::exception {};
|
||||
|
||||
enum TokenType {
|
||||
nonmatching,
|
||||
CONTENT,
|
||||
TAG,
|
||||
ignore,
|
||||
};
|
||||
|
||||
struct Token {
|
||||
TokenType type;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
XMLLexer(std::istream& in);
|
||||
~XMLLexer();
|
||||
|
||||
/**
|
||||
* Get the next token
|
||||
*
|
||||
* @throws NoMoreTokens if no more tokens are available
|
||||
* @throws NoMatch if no match was found
|
||||
*/
|
||||
Token nextToken();
|
||||
|
||||
/**
|
||||
* Skip the following `n` bytes.
|
||||
*
|
||||
* @param n The number of bytes to skip
|
||||
*/
|
||||
void skip(std::size_t n);
|
||||
|
||||
/**
|
||||
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
|
||||
*
|
||||
* @throws NoMoreTokens if the input stream is at an end
|
||||
*/
|
||||
char peek();
|
||||
|
||||
/**
|
||||
* Get the current byte offset
|
||||
*/
|
||||
std::size_t getByteOffset();
|
||||
|
||||
private:
|
||||
std::size_t m_offset;
|
||||
std::istream& m_input;
|
||||
};
|
||||
|
||||
#endif //LEXER_XMLLexer_H
|
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
#ifndef HIGHLIGHTER_H
|
||||
#define HIGHLIGHTER_H
|
||||
|
||||
#include "XMLLexer.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
class Highlighter {
|
||||
public:
|
||||
|
||||
Highlighter(std::istream &file);
|
||||
virtual ~Highlighter();
|
||||
|
||||
virtual void highlight(std::ostream &os)=0;
|
||||
|
||||
protected:
|
||||
|
||||
void process();
|
||||
|
||||
enum TokenType {
|
||||
TAG,
|
||||
CONTENT,
|
||||
ELEMENT,
|
||||
ATTRIBUTE,
|
||||
ATTRIBURE_CONTENT,
|
||||
WHITESPACE,
|
||||
BRACKET,
|
||||
nonmatching
|
||||
};
|
||||
|
||||
enum Color { Red, Green, Blue, Orange, Yellow, Cyan, Grey, Black, White, Magenta, Pink, Brown, Indigo, Violet, Undefined}; // All the colors, not all of them are used, but it's easy to change now
|
||||
|
||||
struct Token {
|
||||
std::string content = "";
|
||||
Color color;
|
||||
TokenType type;
|
||||
};
|
||||
std::map<TokenType, Color> colormap;
|
||||
std::vector<Token> m_tokens;
|
||||
XMLLexer *m_lexer;
|
||||
};
|
||||
|
||||
class ConsoleHighlighter: public Highlighter {
|
||||
public:
|
||||
ConsoleHighlighter(std::istream &file);
|
||||
void highlight(std::ostream &os);
|
||||
|
||||
};
|
||||
|
||||
#endif // HIGHLIGHTER_H
|
|
@ -0,0 +1,114 @@
|
|||
#include "AttributeLexer.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace { //The automaton data
|
||||
typedef std::size_t State;
|
||||
|
||||
State REJECT = 13;
|
||||
|
||||
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)2, (unsigned char)3, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)4, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)5, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)2, (unsigned char)2, (unsigned char)7, (unsigned char)8, (unsigned char)9, (unsigned char)2, (unsigned char)2, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)6, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)2, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
|
||||
|
||||
State TABLE[14 - 1][10] = {
|
||||
{ 13, 4, 13, 8, 10, 13, 5, 1, 7, 12, },
|
||||
|
||||
{ 13, 2, 13, 13, 13, 2, 3, 13, 13, 13, },
|
||||
|
||||
{ 13, 2, 13, 13, 13, 13, 3, 13, 13, 13, },
|
||||
|
||||
{ 13, 13, 13, 13, 13, 13, 3, 13, 13, 13, },
|
||||
|
||||
{ 13, 4, 13, 8, 10, 13, 13, 13, 7, 13, },
|
||||
|
||||
{ 13, 6, 13, 13, 13, 13, 5, 13, 7, 13, },
|
||||
|
||||
{ 13, 6, 13, 13, 13, 13, 13, 13, 7, 13, },
|
||||
|
||||
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, },
|
||||
|
||||
{ 13, 8, 8, 9, 8, 8, 8, 13, 8, 13, },
|
||||
|
||||
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, },
|
||||
|
||||
{ 13, 10, 10, 10, 11, 10, 10, 13, 10, 13, },
|
||||
|
||||
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, },
|
||||
|
||||
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, },
|
||||
};
|
||||
|
||||
AttributeLexer::TokenType TOKENS[14] = { AttributeLexer::WHITESPACE, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::WHITESPACE, AttributeLexer::nonmatching, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_DOUBLE_QUOTES, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_SINGLE_QUOTES, AttributeLexer::BRACKET, AttributeLexer::nonmatching, };
|
||||
}
|
||||
|
||||
AttributeLexer::AttributeLexer(std::istream& in) : m_offset(0), m_input(in) {
|
||||
|
||||
}
|
||||
|
||||
AttributeLexer::~AttributeLexer() {
|
||||
}
|
||||
|
||||
AttributeLexer::Token AttributeLexer::nextToken() {
|
||||
TokenType type = ignore;
|
||||
std::string token;
|
||||
|
||||
while (type == ignore) {
|
||||
State state = 0;
|
||||
std::size_t match_length = 0;
|
||||
token = "";
|
||||
|
||||
while (!m_input.eof() && state != REJECT) {
|
||||
char c = m_input.peek();
|
||||
if (m_input.eof())
|
||||
break;
|
||||
|
||||
token += c;
|
||||
|
||||
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
|
||||
if (TOKENS[state])
|
||||
{
|
||||
match_length = token.length();
|
||||
type = TOKENS[state];
|
||||
}
|
||||
m_input.get();
|
||||
++m_offset;
|
||||
}
|
||||
|
||||
std::size_t sdiff = token.length() - match_length;
|
||||
for (std::size_t i = 0; i < sdiff; i++)
|
||||
{
|
||||
m_input.putback(token[token.length() - i - 1]);
|
||||
}
|
||||
m_offset -= sdiff;
|
||||
|
||||
if (!type || !match_length) {
|
||||
if (m_input.eof())
|
||||
throw NoMoreTokens();
|
||||
throw NoMatch();
|
||||
}
|
||||
|
||||
token = token.substr(0, match_length);
|
||||
}
|
||||
|
||||
Token t;
|
||||
t.type = type;
|
||||
t.content = token;
|
||||
return t;
|
||||
}
|
||||
|
||||
void AttributeLexer::skip(std::size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
m_input.get();
|
||||
++m_offset;
|
||||
}
|
||||
}
|
||||
|
||||
char AttributeLexer::peek() {
|
||||
if (m_input.eof())
|
||||
throw NoMoreTokens();
|
||||
return m_input.peek();
|
||||
}
|
||||
|
||||
std::size_t AttributeLexer::getByteOffset() {
|
||||
return m_offset;
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
add_executable(highlighter
|
||||
highlighter.cpp
|
||||
main.cpp
|
||||
AttributeLexer.cpp
|
||||
XMLLexer.cpp
|
||||
)
|
|
@ -0,0 +1,96 @@
|
|||
#include "XMLLexer.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace { //The automaton data
|
||||
typedef std::size_t State;
|
||||
|
||||
State REJECT = 4;
|
||||
|
||||
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)2, (unsigned char)1, (unsigned char)3, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
|
||||
|
||||
State TABLE[5 - 1][4] = {
|
||||
{ 4, 3, 1, 4, },
|
||||
|
||||
{ 4, 1, 4, 2, },
|
||||
|
||||
{ 4, 4, 4, 4, },
|
||||
|
||||
{ 4, 3, 4, 4, },
|
||||
};
|
||||
|
||||
XMLLexer::TokenType TOKENS[5] = { XMLLexer::CONTENT, XMLLexer::nonmatching, XMLLexer::TAG, XMLLexer::CONTENT, XMLLexer::nonmatching, };
|
||||
}
|
||||
|
||||
XMLLexer::XMLLexer(std::istream& in) : m_offset(0), m_input(in) {
|
||||
|
||||
}
|
||||
|
||||
XMLLexer::~XMLLexer() {
|
||||
}
|
||||
|
||||
XMLLexer::Token XMLLexer::nextToken() {
|
||||
TokenType type = ignore;
|
||||
std::string token;
|
||||
|
||||
while (type == ignore) {
|
||||
State state = 0;
|
||||
std::size_t match_length = 0;
|
||||
token = "";
|
||||
|
||||
while (!m_input.eof() && state != REJECT) {
|
||||
char c = m_input.peek();
|
||||
if (m_input.eof())
|
||||
break;
|
||||
|
||||
token += c;
|
||||
|
||||
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
|
||||
if (TOKENS[state])
|
||||
{
|
||||
match_length = token.length();
|
||||
type = TOKENS[state];
|
||||
}
|
||||
m_input.get();
|
||||
++m_offset;
|
||||
}
|
||||
|
||||
std::size_t sdiff = token.length() - match_length;
|
||||
for (std::size_t i = 0; i < sdiff; i++)
|
||||
{
|
||||
m_input.putback(token[token.length() - i - 1]);
|
||||
}
|
||||
m_offset -= sdiff;
|
||||
|
||||
if (!type || !match_length) {
|
||||
if (m_input.eof())
|
||||
throw NoMoreTokens();
|
||||
throw NoMatch();
|
||||
}
|
||||
|
||||
token = token.substr(0, match_length);
|
||||
}
|
||||
|
||||
Token t;
|
||||
t.type = type;
|
||||
t.content = token;
|
||||
return t;
|
||||
}
|
||||
|
||||
void XMLLexer::skip(std::size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
m_input.get();
|
||||
++m_offset;
|
||||
}
|
||||
}
|
||||
|
||||
char XMLLexer::peek() {
|
||||
if (m_input.eof())
|
||||
throw NoMoreTokens();
|
||||
return m_input.peek();
|
||||
}
|
||||
|
||||
std::size_t XMLLexer::getByteOffset() {
|
||||
return m_offset;
|
||||
}
|
|
@ -0,0 +1,141 @@
|
|||
#include "highlighter.h"
|
||||
#include "AttributeLexer.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
Highlighter::Highlighter(std::istream &file) {
|
||||
m_lexer = new XMLLexer(file);
|
||||
colormap[CONTENT] = Undefined;
|
||||
colormap[ELEMENT] = Undefined;
|
||||
colormap[ATTRIBUTE] = Undefined;
|
||||
colormap[ATTRIBURE_CONTENT] = Undefined;
|
||||
colormap[BRACKET] = Undefined;
|
||||
colormap[nonmatching] = Undefined;
|
||||
}
|
||||
|
||||
Highlighter::~Highlighter() {
|
||||
delete m_lexer;
|
||||
}
|
||||
|
||||
void Highlighter::process() {
|
||||
while (true) {
|
||||
try {
|
||||
XMLLexer::Token token = m_lexer->nextToken();
|
||||
Token newtoken;
|
||||
newtoken.content = token.content;
|
||||
switch(token.type) {
|
||||
case XMLLexer::TokenType::CONTENT:
|
||||
newtoken.type = CONTENT;
|
||||
break;
|
||||
case XMLLexer::TokenType::TAG:
|
||||
newtoken.type = TAG;
|
||||
break;
|
||||
default:
|
||||
newtoken.type = nonmatching;
|
||||
break;
|
||||
}
|
||||
newtoken.color = colormap.find(newtoken.type)->second;
|
||||
m_tokens.push_back(newtoken);
|
||||
} catch (XMLLexer::NoMoreTokens &err) {
|
||||
break;
|
||||
} catch (XMLLexer::NoMatch& err) {
|
||||
Token newtoken;
|
||||
newtoken.content = m_lexer->peek();
|
||||
m_lexer->skip(1);
|
||||
newtoken.type = nonmatching;
|
||||
m_tokens.push_back(newtoken);
|
||||
}
|
||||
}
|
||||
auto tokens = std::move(m_tokens);
|
||||
m_tokens.clear();
|
||||
for(auto &tagtoken: tokens) {
|
||||
if(tagtoken.type == TAG && !tagtoken.content.empty()) {
|
||||
std::istringstream content(tagtoken.content);
|
||||
AttributeLexer attributelexer(content);
|
||||
while (true) {
|
||||
try {
|
||||
AttributeLexer::Token token = attributelexer.nextToken();
|
||||
Token newtoken;
|
||||
newtoken.content = token.content;
|
||||
switch(token.type) {
|
||||
case AttributeLexer::TokenType::ELEMENT:
|
||||
newtoken.type = ELEMENT;
|
||||
break;
|
||||
case AttributeLexer::TokenType::BRACKET:
|
||||
newtoken.type = BRACKET;
|
||||
break;
|
||||
case AttributeLexer::TokenType::ATTRIBUTE:
|
||||
newtoken.type = ATTRIBUTE;
|
||||
break;
|
||||
case AttributeLexer::TokenType::ATTRIBUTE_CONTENT_DOUBLE_QUOTES:
|
||||
newtoken.type = ATTRIBURE_CONTENT;
|
||||
break;
|
||||
case AttributeLexer::TokenType::ATTRIBUTE_CONTENT_SINGLE_QUOTES:
|
||||
newtoken.type = ATTRIBURE_CONTENT;
|
||||
break;
|
||||
default:
|
||||
newtoken.type = nonmatching;
|
||||
break;
|
||||
}
|
||||
newtoken.color = colormap.find(newtoken.type)->second;
|
||||
m_tokens.push_back(newtoken);
|
||||
} catch (AttributeLexer::NoMoreTokens &err) {
|
||||
break;
|
||||
} catch (AttributeLexer::NoMatch& err) {
|
||||
Token newtoken;
|
||||
newtoken.content = attributelexer.peek();
|
||||
attributelexer.skip(1);
|
||||
newtoken.type = nonmatching;
|
||||
m_tokens.push_back(newtoken);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
m_tokens.push_back(tagtoken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ConsoleHighlighter::ConsoleHighlighter(std::istream &file): Highlighter(file) {
|
||||
colormap[CONTENT] = White;
|
||||
colormap[ELEMENT] = Blue;
|
||||
colormap[TAG] = Magenta;
|
||||
colormap[ATTRIBUTE] = Yellow;
|
||||
colormap[ATTRIBURE_CONTENT] = Green;
|
||||
colormap[BRACKET] = Blue;
|
||||
colormap[nonmatching] = Black;
|
||||
process();
|
||||
}
|
||||
|
||||
void ConsoleHighlighter::highlight(std::ostream &os) {
|
||||
for(auto &token: m_tokens) {
|
||||
switch(token.color) {
|
||||
case Yellow:
|
||||
os << "\033[1;33m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Black:
|
||||
os << "\033[1;30m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Red:
|
||||
os << "\033[1;31m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Green:
|
||||
os << "\033[1;32m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Blue:
|
||||
os << "\033[1;34m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Magenta:
|
||||
os << "\033[1;35m" << token.content << "\033[0m";
|
||||
break;
|
||||
case Cyan:
|
||||
os << "\033[1;36m" << token.content << "\033[0m";
|
||||
break;
|
||||
case White:
|
||||
os << "\033[1;37m" << token.content << "\033[0m";
|
||||
break;
|
||||
default:
|
||||
os << token.content;
|
||||
}
|
||||
}
|
||||
os << std::endl;
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#include "highlighter.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if(argc == 2) {
|
||||
std::ifstream in(argv[1]);
|
||||
ConsoleHighlighter highlighter(in);
|
||||
highlighter.highlight(std::cout);
|
||||
} else {
|
||||
std::cout << "Usage: " << argv[0] << " <filename.xml>\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
TAG = </?[-a-zA-Z0-9 `'!"@#$%^&*()_={}|\;:/?.,`~+]*/?>
|
||||
CONTENT = [-a-zA-Z0-9 `'!"@#$%^&*()_={}|\;:/?.,`~+]*
|
Loading…
Reference in New Issue