22 #include "Tokenizer.h"
25 const std::string Tokenizer::SPEC_CHARS
26 (
"!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~");
36 const Uchar u = str.
charAt(i);
40 }
else if (u ==
'#') {
42 while (str.
charAt(i) !=
'\n') {
45 }
else if (u ==
'"') {
47 }
else if (u >=
'0' && u <=
'9') {
49 }
else if (u < 0x80 && SPEC_CHARS.find((
char) u)
50 != std::string::npos) {
51 parseOperator(str, i);
72 void Tokenizer::add(
Token *tok) {
81 void Tokenizer::parseBlockComment(
Ustring &s,
int &i) {
98 void Tokenizer::parseId(
Ustring &s,
int &i) {
105 if (u <=
' ' || Tokenizer::SPEC_CHARS.find((
char) u)
106 != std::string::npos) {
112 t =
new Token(Token::ID, ustr);
121 void Tokenizer::parseNumber(
Ustring &s,
int &i) {
146 }
else if (u0 < '0' || u0 >
'9') {
155 if (u0 ==
'E' || u0 ==
'e') {
160 }
else if (u0 ==
'+') {
165 if (u0 < '0' || u0 >
'9') {
170 exponent -= u0 -
'0';
172 exponent += u0 -
'0';
176 exponent -= decimals;
177 if (expneg ==
false) {
178 for (
int i = 0; i < exponent; ++i) {
182 for (
int i = 0; i > exponent; --i) {
187 t =
new Token(value);
196 void Tokenizer::parseOperator(
Ustring &s,
int &i) {
208 }
else if (u1 >=
'0' && u1 <=
'9') {
216 parseBlockComment(s, i);
240 t =
new Token(Token::OPERATOR, ustr);
249 void Tokenizer::parseString(
Ustring &s,
int& i) {
259 }
else if (u ==
'"') {
262 }
else if (u ==
'\\') {
286 for (
int j = 0;; j++) {
288 if (u >=
'0' && u <=
'9') {
290 }
else if (u >=
'A' && u <=
'F') {
292 }
else if (u >=
'a' && u <=
'f') {
294 }
else if (u ==
';') {
301 if (e > 0x0010FFFFu) {
311 for (
int j = 0; j < 2; j++) {
313 if (u >=
'0' && u <=
'9') {
315 }
else if (u >=
'A' && u <=
'F') {
317 }
else if (u >=
'a' && u <=
'f') {
336 t =
new Token(Token::STRING, ustr);
347 for (TokenList::iterator it = tokenlist.
begin();
348 it != tokenlist.
end(); ++it) {