-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizer.cpp
More file actions
111 lines (95 loc) · 3.28 KB
/
tokenizer.cpp
File metadata and controls
111 lines (95 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include "tokenizer.h"
#include <cctype>
#include <stdexcept>
static const std::vector<std::string> FUNCTIONS = {
"sqrt", "sin", "cos", "tan",
"log", "log2", "ln",
"abs", "floor", "ceil"
};
static bool isFunction(const std::string& word) {
for (const auto& f : FUNCTIONS)
if (f == word) return true;
return false;
}
std::vector<Token> Tokenizer::tokenize(const std::string& expr) {
input = expr;
pos = 0;
std::vector<Token> tokens;
while (!isAtEnd()) {
skipWhitespace();
if (isAtEnd()) break;
char c = peek();
if (std::isdigit(c) || (c == '.' && std::isdigit(input[pos + 1]))) {
tokens.push_back(readNumber());
}
else if (c == '-' && (tokens.empty() ||
tokens.back().type == TokenType::OPERATOR ||
tokens.back().type == TokenType::LEFT_PAREN)) {
advance();
if (!isAtEnd() && (std::isdigit(peek()) || peek() == '.')) {
Token t = readNumber();
t.number = -t.number;
t.value = "-" + t.value;
tokens.push_back(t);
} else {
tokens.push_back(Token(TokenType::OPERATOR, "u-"));
}
}
else if (c == '+' || c == '-' || c == '*' ||
c == '/' || c == '^' || c == '%') {
tokens.push_back(Token(TokenType::OPERATOR, std::string(1, c)));
advance();
}
else if (c == '(') {
tokens.push_back(Token(TokenType::LEFT_PAREN, "("));
advance();
}
else if (c == ')') {
tokens.push_back(Token(TokenType::RIGHT_PAREN, ")"));
advance();
}
else if (c == ',') {
tokens.push_back(Token(TokenType::COMMA, ","));
advance();
}
else if (std::isalpha(c) || c == '_') {
tokens.push_back(readWord());
}
else if (c == '=') {
tokens.push_back(Token(TokenType::OPERATOR, "="));
advance();
}
else {
throw std::runtime_error(std::string("unexpected character: '") + c + "'");
}
}
tokens.push_back(Token(TokenType::END, ""));
return tokens;
}
void Tokenizer::skipWhitespace() {
while (!isAtEnd() && std::isspace(input[pos]))
++pos;
}
Token Tokenizer::readNumber() {
size_t start = pos;
bool hasDot = false;
while (!isAtEnd() && (std::isdigit(peek()) || (peek() == '.' && !hasDot))) {
if (peek() == '.') hasDot = true;
advance();
}
std::string raw = input.substr(start, pos - start);
return Token(TokenType::NUMBER, raw, std::stod(raw));
}
Token Tokenizer::readWord() {
size_t start = pos;
while (!isAtEnd() && (std::isalnum(peek()) || peek() == '_'))
advance();
std::string word = input.substr(start, pos - start);
if (word == "pi") return Token(TokenType::NUMBER, "pi", 3.14159265358979);
if (word == "e") return Token(TokenType::NUMBER, "e", 2.71828182845905);
if (isFunction(word)) return Token(TokenType::FUNCTION, word);
return Token(TokenType::VARIABLE, word);
}
char Tokenizer::peek() const { return input[pos]; }
char Tokenizer::advance() { return input[pos++]; }
bool Tokenizer::isAtEnd() const { return pos >= input.size(); }