CMake integration for SyntaxHighlighter

This commit is contained in:
Robin Jadoul 2016-05-28 17:41:20 +02:00
parent 62044a0a1d
commit 7e53c7af48
7 changed files with 16 additions and 338 deletions

View File

@ -1,65 +0,0 @@
#pragma once
#ifndef LEXER_AttributeLexer_H
#define LEXER_AttributeLexer_H
#include <exception>
#include <istream>
#include <string>
class AttributeLexer {
public:
class NoMoreTokens : public std::exception {};
class NoMatch : public std::exception {};
enum TokenType {
nonmatching,
ATTRIBUTE,
ATTRIBUTE_CONTENT_DOUBLE_QUOTES,
ATTRIBUTE_CONTENT_SINGLE_QUOTES,
BRACKET,
ELEMENT,
WHITESPACE,
ignore,
};
struct Token {
TokenType type;
std::string content;
};
AttributeLexer(std::istream& in);
~AttributeLexer();
/**
* Get the next token
*
* @throws NoMoreTokens if no more tokens are available
* @throws NoMatch if no match was found
*/
Token nextToken();
/**
* Skip the following `n` bytes.
*
* @param n The number of bytes to skip
*/
void skip(std::size_t n);
/**
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
*
* @throws NoMoreTokens if the input stream is at an end
*/
char peek();
/**
* Get the current byte offset
*/
std::size_t getByteOffset();
private:
std::size_t m_offset;
std::istream& m_input;
};
#endif //LEXER_AttributeLexer_H

View File

@ -1,61 +0,0 @@
#pragma once
#ifndef LEXER_XMLLexer_H
#define LEXER_XMLLexer_H
#include <exception>
#include <istream>
#include <string>
class XMLLexer {
public:
class NoMoreTokens : public std::exception {};
class NoMatch : public std::exception {};
enum TokenType {
nonmatching,
CONTENT,
TAG,
ignore,
};
struct Token {
TokenType type;
std::string content;
};
XMLLexer(std::istream& in);
~XMLLexer();
/**
* Get the next token
*
* @throws NoMoreTokens if no more tokens are available
* @throws NoMatch if no match was found
*/
Token nextToken();
/**
* Skip the following `n` bytes.
*
* @param n The number of bytes to skip
*/
void skip(std::size_t n);
/**
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
*
* @throws NoMoreTokens if the input stream is at an end
*/
char peek();
/**
* Get the current byte offset
*/
std::size_t getByteOffset();
private:
std::size_t m_offset;
std::istream& m_input;
};
#endif //LEXER_XMLLexer_H

View File

@ -1,114 +0,0 @@
#include "AttributeLexer.h"
#include <sstream>
#include <iostream>
namespace { //The automaton data
typedef std::size_t State;
State REJECT = 13;
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)3, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)4, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)0, (unsigned char)0, (unsigned char)6, (unsigned char)7, (unsigned char)8, (unsigned char)0, (unsigned char)0, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)5, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
State TABLE[14 - 1][9] = {
{ 13, 4, 8, 10, 12, 5, 1, 7, 12, },
{ 13, 2, 13, 13, 2, 3, 13, 13, 13, },
{ 13, 2, 13, 13, 13, 3, 13, 13, 13, },
{ 13, 13, 13, 13, 13, 3, 13, 13, 13, },
{ 13, 4, 8, 10, 13, 13, 13, 7, 13, },
{ 13, 6, 13, 13, 13, 5, 13, 7, 13, },
{ 13, 6, 13, 13, 13, 13, 13, 7, 13, },
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, },
{ 8, 8, 9, 8, 8, 8, 13, 8, 13, },
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, },
{ 10, 10, 10, 11, 10, 10, 13, 10, 13, },
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, },
{ 13, 13, 13, 13, 13, 13, 13, 13, 13, },
};
AttributeLexer::TokenType TOKENS[14] = { AttributeLexer::WHITESPACE, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::ELEMENT, AttributeLexer::WHITESPACE, AttributeLexer::nonmatching, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_DOUBLE_QUOTES, AttributeLexer::nonmatching, AttributeLexer::ATTRIBUTE_CONTENT_SINGLE_QUOTES, AttributeLexer::BRACKET, AttributeLexer::nonmatching, };
}
AttributeLexer::AttributeLexer(std::istream& in) : m_offset(0), m_input(in) {
}
AttributeLexer::~AttributeLexer() {
}
AttributeLexer::Token AttributeLexer::nextToken() {
TokenType type = ignore;
std::string token;
while (type == ignore) {
State state = 0;
std::size_t match_length = 0;
token = "";
while (!m_input.eof() && state != REJECT) {
char c = m_input.peek();
if (m_input.eof())
break;
token += c;
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
if (TOKENS[state])
{
match_length = token.length();
type = TOKENS[state];
}
m_input.get();
++m_offset;
}
std::size_t sdiff = token.length() - match_length;
for (std::size_t i = 0; i < sdiff; i++)
{
m_input.putback(token[token.length() - i - 1]);
}
m_offset -= sdiff;
if (!type || !match_length) {
if (m_input.eof())
throw NoMoreTokens();
throw NoMatch();
}
token = token.substr(0, match_length);
}
Token t;
t.type = type;
t.content = token;
return t;
}
void AttributeLexer::skip(std::size_t n) {
for (size_t i = 0; i < n; i++) {
m_input.get();
++m_offset;
}
}
char AttributeLexer::peek() {
if (m_input.eof())
throw NoMoreTokens();
return m_input.peek();
}
std::size_t AttributeLexer::getByteOffset() {
return m_offset;
}

View File

@ -1,6 +1,20 @@
find_program(LEXESIS_EXE Lexesis PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../build/bin")
add_custom_command(SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/xmllexer.lxs"
COMMAND ${LEXESIS_EXE} ARGS -d "${CMAKE_CURRENT_BINARY_DIR}" -n XMLLexer "${CMAKE_CURRENT_SOURCE_DIR}/xmllexer.lxs"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/XMLLexer.h" "${CMAKE_CURRENT_BINARY_DIR}/XMLLexer.cpp"
)
add_custom_command(SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/attributelexer.lxs"
COMMAND ${LEXESIS_EXE} ARGS -d "${CMAKE_CURRENT_BINARY_DIR}" -n AttributeLexer "${CMAKE_CURRENT_SOURCE_DIR}/attributelexer.lxs"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/AttributeLexer.h" "${CMAKE_CURRENT_BINARY_DIR}/AttributeLexer.cpp"
)
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
add_executable(highlighter
highlighter.cpp
main.cpp
AttributeLexer.cpp
XMLLexer.cpp
"${CMAKE_CURRENT_BINARY_DIR}/AttributeLexer.cpp"
"${CMAKE_CURRENT_BINARY_DIR}/XMLLexer.cpp"
)

View File

@ -1,96 +0,0 @@
#include "XMLLexer.h"
#include <sstream>
#include <iostream>
namespace { //The automaton data
typedef std::size_t State;
State REJECT = 4;
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)2, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
State TABLE[5 - 1][3] = {
{ 3, 1, 4, },
{ 1, 4, 2, },
{ 4, 4, 4, },
{ 3, 4, 4, },
};
XMLLexer::TokenType TOKENS[5] = { XMLLexer::CONTENT, XMLLexer::nonmatching, XMLLexer::TAG, XMLLexer::CONTENT, XMLLexer::nonmatching, };
}
XMLLexer::XMLLexer(std::istream& in) : m_offset(0), m_input(in) {
}
XMLLexer::~XMLLexer() {
}
XMLLexer::Token XMLLexer::nextToken() {
TokenType type = ignore;
std::string token;
while (type == ignore) {
State state = 0;
std::size_t match_length = 0;
token = "";
while (!m_input.eof() && state != REJECT) {
char c = m_input.peek();
if (m_input.eof())
break;
token += c;
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
if (TOKENS[state])
{
match_length = token.length();
type = TOKENS[state];
}
m_input.get();
++m_offset;
}
std::size_t sdiff = token.length() - match_length;
for (std::size_t i = 0; i < sdiff; i++)
{
m_input.putback(token[token.length() - i - 1]);
}
m_offset -= sdiff;
if (!type || !match_length) {
if (m_input.eof())
throw NoMoreTokens();
throw NoMatch();
}
token = token.substr(0, match_length);
}
Token t;
t.type = type;
t.content = token;
return t;
}
void XMLLexer::skip(std::size_t n) {
for (size_t i = 0; i < n; i++) {
m_input.get();
++m_offset;
}
}
char XMLLexer::peek() {
if (m_input.eof())
throw NoMoreTokens();
return m_input.peek();
}
std::size_t XMLLexer::getByteOffset() {
return m_offset;
}