Finish regex parsing
This commit is contained in:
		
							parent
							
								
									90ff409281
								
							
						
					
					
						commit
						e476591495
					
				
							
								
								
									
										121
									
								
								src/re.cpp
								
								
								
								
							
							
						
						
									
										121
									
								
								src/re.cpp
								
								
								
								
							| 
						 | 
				
			
			@ -119,7 +119,7 @@ namespace lxs {
 | 
			
		|||
            stk.push(tp);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::shared_ptr<RE> parseEscapeChar(char c) {
 | 
			
		||||
        char parseEscapeChar(char c) {
 | 
			
		||||
            switch (c)
 | 
			
		||||
            {
 | 
			
		||||
                case '\\':
 | 
			
		||||
| 
						 | 
				
			
			@ -154,7 +154,113 @@ namespace lxs {
 | 
			
		|||
                default:
 | 
			
		||||
                    throw SyntaxError(("Invalid escape sequence: \\" + std::string(1, c)).c_str());
 | 
			
		||||
            }
 | 
			
		||||
            return std::make_shared<SingleRE>(c);
 | 
			
		||||
            return c;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        void sumREs(std::vector<std::shared_ptr<RE> >& res)
 | 
			
		||||
        {
 | 
			
		||||
            for (std::size_t step = 2; step < res.size(); step <<= 1)
 | 
			
		||||
            {
 | 
			
		||||
                for (std::size_t i = 0; i < res.size(); i += step)
 | 
			
		||||
                {
 | 
			
		||||
                    if (i + step / 2 < res.size())
 | 
			
		||||
                        res[i] = std::make_shared<PlusRE>(res[i], res[i + step / 2]);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::shared_ptr<RE> parseCharacterClass(string& input, size_t& idx) {
 | 
			
		||||
            if (idx >= input.size())
 | 
			
		||||
                throw SyntaxError("Unclosed character class");
 | 
			
		||||
            std::vector<std::shared_ptr<RE> > chars;
 | 
			
		||||
            std::set<char> used_chars;
 | 
			
		||||
 | 
			
		||||
            bool invert = false;
 | 
			
		||||
 | 
			
		||||
            if (input[idx] == '^')
 | 
			
		||||
            {
 | 
			
		||||
                invert = true;
 | 
			
		||||
                idx++;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (idx >= input.size())
 | 
			
		||||
                throw SyntaxError("Unclosed character class");
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            if (input[idx] == ']')
 | 
			
		||||
            {
 | 
			
		||||
                used_chars.insert(']');
 | 
			
		||||
                idx++;
 | 
			
		||||
            }
 | 
			
		||||
            
 | 
			
		||||
            if (idx >= input.size())
 | 
			
		||||
                throw SyntaxError("Unclosed character class");
 | 
			
		||||
 | 
			
		||||
            if (input[idx] == '-')
 | 
			
		||||
            {
 | 
			
		||||
                used_chars.insert('-');
 | 
			
		||||
                idx++;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (idx >= input.size())
 | 
			
		||||
                throw SyntaxError("Unclosed character class");
 | 
			
		||||
 | 
			
		||||
            for (; idx < input.size() && input[idx] != ']'; idx++)
 | 
			
		||||
            {
 | 
			
		||||
                if (input[idx] == '-')
 | 
			
		||||
                {
 | 
			
		||||
                    idx++;
 | 
			
		||||
                    
 | 
			
		||||
                    if (idx >= input.size())
 | 
			
		||||
                        throw SyntaxError("Unclosed character class");
 | 
			
		||||
 | 
			
		||||
                    if (input[idx] == ']')
 | 
			
		||||
                    {
 | 
			
		||||
                        used_chars.insert('-');
 | 
			
		||||
                    }
 | 
			
		||||
                    else
 | 
			
		||||
                    {
 | 
			
		||||
                        for (int i = ((SingleRE*)(chars[chars.size() - 1].get()))->c + 1; i <= input[idx]; i++)
 | 
			
		||||
                        {
 | 
			
		||||
                            used_chars.insert((char) i);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                else if (input[idx] == '\\')
 | 
			
		||||
                {
 | 
			
		||||
                    idx++;
 | 
			
		||||
                    if (idx >= input.size())
 | 
			
		||||
                        throw SyntaxError("Unclosed character classe");
 | 
			
		||||
                    used_chars.insert(parseEscapeChar(input[idx]));
 | 
			
		||||
                }
 | 
			
		||||
                else
 | 
			
		||||
                {
 | 
			
		||||
                    used_chars.insert(input[idx]);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            
 | 
			
		||||
            if (idx >= input.size())
 | 
			
		||||
                throw SyntaxError("Unclosed character class");
 | 
			
		||||
            idx++; //Eat the ]
 | 
			
		||||
 | 
			
		||||
            for (int i = 0; i < 256; i++)
 | 
			
		||||
            {
 | 
			
		||||
                if (invert ^ (used_chars.count((char) i) > 0))
 | 
			
		||||
                    chars.push_back(std::make_shared<SingleRE>((char) i));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            sumREs(chars);
 | 
			
		||||
 | 
			
		||||
            return chars[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::shared_ptr<RE> dotChar() {
 | 
			
		||||
            std::vector<std::shared_ptr<RE> > any;
 | 
			
		||||
            for (int i = 0; i < 256; i++)
 | 
			
		||||
                if ((char) i != '\n') //Dot matches anything except newlines
 | 
			
		||||
                    any.push_back(std::make_shared<SingleRE>((char) i));
 | 
			
		||||
            sumREs(any);
 | 
			
		||||
            return any[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::shared_ptr<RE> parseRE(string& input, size_t& idx)
 | 
			
		||||
| 
						 | 
				
			
			@ -170,16 +276,19 @@ namespace lxs {
 | 
			
		|||
                        if (idx >= input.length())
 | 
			
		||||
                            throw SyntaxError("Escape sequence at the end of the string");
 | 
			
		||||
                        else
 | 
			
		||||
                            throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
 | 
			
		||||
                        stk.push(parseEscapeChar(input[idx++]));
 | 
			
		||||
                            stk.push(std::make_shared<SingleRE>(parseEscapeChar(input[idx])));
 | 
			
		||||
                        break;
 | 
			
		||||
 | 
			
		||||
                    case '[':
 | 
			
		||||
                        //TODO: parse character classes
 | 
			
		||||
                        stk.push(parseCharacterClass(input, ++idx));
 | 
			
		||||
                        break;
 | 
			
		||||
 | 
			
		||||
                    case '.':
 | 
			
		||||
                        //TODO: any character
 | 
			
		||||
                        for (int c = 0; c <= 256; c++)
 | 
			
		||||
                        {
 | 
			
		||||
                            stk.push(dotChar());
 | 
			
		||||
                        }
 | 
			
		||||
                        break;
 | 
			
		||||
 | 
			
		||||
                    case ']':
 | 
			
		||||
                        throw SyntaxError("Unopened ']'");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue