001package jmri.jmrit.logixng.util.parser; 002 003import java.util.ArrayList; 004import java.util.List; 005import java.util.concurrent.atomic.AtomicInteger; 006 007/** 008 * Parses and calculates an expression, for example "sin(2*pi*x)/3" 009 * 010 * @author Daniel Bergqvist 2019 011 */ 012public class Tokenizer { 013 014 // This class should never be instanciated. 015 private Tokenizer() { 016 } 017 018 private static void addToken(Token currentToken, List<Token> tokens) { 019 if ((currentToken._tokenType == TokenType.FLOATING_NUMBER) && isIntegerNumber(currentToken._string)) { 020 currentToken._tokenType = TokenType.INTEGER_NUMBER; 021 } 022 023 tokens.add(currentToken); 024 } 025 026 public static List<Token> getTokens(String expression) throws InvalidSyntaxException { 027 028 List<Token> tokens = new ArrayList<>(); 029 Token currentToken = new Token(); 030 031// System.out.format("%n%n%n"); 032// System.out.format("getTokens(): %s%n", expression); 033 034 AtomicInteger eatNextChar = new AtomicInteger(0); 035 036 char ch = ' '; 037 char lastChar; 038 039 for (int i=0; i < expression.length(); i++) { 040 lastChar = ch; 041 ch = expression.charAt(i); 042 char nextChar = ' '; // An extra space at the end of the _string doesn't matter 043 if (i+1 < expression.length()) { 044 nextChar = expression.charAt(i+1); 045 } 046 047// System.out.format("index %d: %s, %s, %c, %c%n", i, currentToken._tokenType.name(), currentToken._string, ch, nextChar); 048 049 050 051 // Check for token type STRING 052 if (ch == '\"') { 053 if (Character.isLetterOrDigit(lastChar)) { 054 throw new InvalidSyntaxException(Bundle.getMessage("InvalidSyntaxAtIndex", i)); 055 } 056 057 if (currentToken._tokenType == TokenType.SPACE) { 058 currentToken = new Token(); 059 } else if (currentToken._tokenType != TokenType.NONE) { 060// System.out.format("Add: index %d: %s, %s, %c, %c%n", i, currentToken._tokenType.name(), currentToken._string, ch, nextChar); 061 addToken(currentToken, tokens); 062 currentToken = new Token(); 063 } 064 currentToken._tokenType = TokenType.STRING; 065 066 boolean done = false; 067 while (!done) { 068 i++; 069 if (i >= expression.length()) { 070 throw new InvalidSyntaxException(Bundle.getMessage("UnexpectedEndOfString")); 071 } 072 ch = expression.charAt(i); 073 nextChar = ' '; // An extra space at the end of the _string doesn't matter 074 if (i+1 < expression.length()) { 075 nextChar = expression.charAt(i+1); 076 } 077 // Handle escaped characters 078 if ((ch == '\\') && ((nextChar == '\\') || (nextChar == '"'))) { 079 080 currentToken._string += nextChar; 081 i++; 082 } else if (ch != '\"') { 083 currentToken._string += ch; 084 } 085 086 done = (ch == '\"'); 087 } 088 089 if (Character.isLetterOrDigit(nextChar)) { 090 throw new InvalidSyntaxException(Bundle.getMessage("InvalidSyntaxAtIndex", i)); 091 } 092 093// System.out.format("Add: index %d: %s, %s, %c, %c%n", i, currentToken._tokenType.name(), currentToken._string, ch, nextChar); 094 addToken(currentToken, tokens); 095 currentToken = new Token(); 096 097 // Continue for loop 098 continue; 099 } 100 101 102 char nextNextChar = ' '; // An extra space at the end of the _string doesn't matter 103 char nextNextNextChar = ' '; // An extra space at the end of the _string doesn't matter 104 if (i+2 < expression.length()) { 105 nextNextChar = expression.charAt(i+2); 106 } 107 if (i+3 < expression.length()) { 108 nextNextNextChar = expression.charAt(i+3); 109 } 110 111 TokenType nextToken = getTokenType(currentToken, ch, nextChar, nextNextChar, nextNextNextChar, eatNextChar); 112// System.out.format("index %d: %s, %c%n", i, nextToken.name(), ch); 113 114 if (nextToken == TokenType.SAME_AS_LAST) { 115 currentToken._string += ch; 116 continue; 117 } 118 119 switch (nextToken) { 120 case ERROR: 121 throw new InvalidSyntaxException(Bundle.getMessage("InvalidSyntaxAtIndex", i)); 122 123 case ASSIGN: 124 case ASSIGN_ADD: 125 case ASSIGN_SUBTRACKT: 126 case ASSIGN_MULTIPLY: 127 case ASSIGN_DIVIDE: 128 case ASSIGN_MODULO: 129 case ASSIGN_AND: 130 case ASSIGN_OR: 131 case ASSIGN_XOR: 132 case ASSIGN_SHIFT_LEFT: 133 case ASSIGN_SHIFT_RIGHT: 134 case ASSIGN_UNSIGNED_SHIFT_RIGHT: 135 case TERNARY_QUESTION_MARK: 136 case TERNARY_COLON: 137 case LEFT_PARENTHESIS: 138 case RIGHT_PARENTHESIS: 139 case LEFT_SQUARE_BRACKET: 140 case RIGHT_SQUARE_BRACKET: 141 case LEFT_CURLY_BRACKET: 142 case RIGHT_CURLY_BRACKET: 143 case DOT: 144 case DOT_DOT: 145 case COMMA: 146 case EQUAL: 147 case NOT_EQUAL: 148 case LESS_THAN: 149 case LESS_OR_EQUAL: 150 case GREATER_THAN: 151 case GREATER_OR_EQUAL: 152 case ADD: 153 case SUBTRACKT: 154 case MULTIPLY: 155 case DIVIDE: 156 case MODULO: 157 case SHIFT_LEFT: 158 case SHIFT_RIGHT: 159 case UNSIGNED_SHIFT_RIGHT: 160 case BOOLEAN_AND: 161 case BOOLEAN_OR: 162 case BOOLEAN_XOR: 163 case BOOLEAN_NOT: 164 case BINARY_AND: 165 case BINARY_OR: 166 case BINARY_XOR: 167 case BINARY_NOT: 168 case INCREMENT: 169 case DECREMENT: 170 case IDENTIFIER: 171 case SPACE: 172 case NONE: 173 if ((currentToken._tokenType != TokenType.NONE) && (currentToken._tokenType != TokenType.SPACE)) { 174 addToken(currentToken, tokens); 175 currentToken = new Token(); 176 } 177 currentToken._tokenType = nextToken; 178 break; 179 180 case FLOATING_NUMBER: 181 if ((currentToken._tokenType == TokenType.FLOATING_NUMBER) && !currentToken._string.isEmpty() && !isFloatingNumber(currentToken._string)) { 182// System.out.format("Not a number: '%s'%n", currentToken._string); 183 throw new InvalidSyntaxException(Bundle.getMessage("InvalidSyntaxAtIndex", i)); 184 } 185 if ((currentToken._tokenType != TokenType.NONE) && (currentToken._tokenType != TokenType.SPACE)) { 186 addToken(currentToken, tokens); 187 currentToken = new Token(); 188 } 189 currentToken._tokenType = nextToken; 190 break; 191 192 case STRING: 193 if (!currentToken._string.endsWith("\"")) { 194// System.err.format("String: %s%n", currentToken._string); 195 throw new InvalidSyntaxException(Bundle.getMessage("InvalidSyntaxAtIndex", i)); 196 } 197 if ((currentToken._tokenType != TokenType.NONE) && (currentToken._tokenType != TokenType.SPACE)) { 198 addToken(currentToken, tokens); 199 currentToken = new Token(); 200 } 201 currentToken._tokenType = nextToken; 202 break; 203 204 default: 205 throw new RuntimeException("unknown token type: "+nextToken.name()); 206 } 207 208 if (currentToken._tokenType != TokenType.SPACE) { 209 currentToken._string += ch; 210 } 211 212 i += eatNextChar.get(); 213// System.out.format("New string: '%s'%n", currentToken._string); 214 } 215 216 if (currentToken._tokenType != TokenType.NONE) { 217 addToken(currentToken, tokens); 218 } 219 220 return tokens; 221 } 222 223 private static TokenType getTokenType(Token currentToken, char ch, char nextChar, char nextNextChar, char nextNextNextChar, AtomicInteger eatNextChar) { 224 225 eatNextChar.set(0); 226 227 if (ch == '"') { 228 return TokenType.STRING; 229 } 230 231 if (Character.isWhitespace(ch)) { 232 return TokenType.SPACE; 233 } 234 235 if (currentToken._tokenType == TokenType.STRING) { 236 return TokenType.SAME_AS_LAST; 237 } 238 239 if (ch == '.') { 240 if (nextChar == '.') { 241 if ((currentToken._tokenType != TokenType.DOT_DOT)) { 242 eatNextChar.set(1); 243 return TokenType.DOT_DOT; 244 } else { 245 // Three dots in a row is an error 246 return TokenType.ERROR; 247 } 248 } else if ((currentToken._tokenType == TokenType.IDENTIFIER) 249 || (currentToken._tokenType == TokenType.NONE) 250 || (currentToken._tokenType == TokenType.RIGHT_PARENTHESIS) 251 || (currentToken._tokenType == TokenType.RIGHT_SQUARE_BRACKET) 252 || (currentToken._tokenType == TokenType.RIGHT_CURLY_BRACKET) 253 ) { 254 return TokenType.DOT; 255 } 256 } 257 258 if (ch == '?') { 259 return TokenType.TERNARY_QUESTION_MARK; 260 } 261 262 if (ch == ':') { 263 return TokenType.TERNARY_COLON; 264 } 265 266 if ((ch == '=') && (nextChar != '=')) { 267 return TokenType.ASSIGN; 268 } 269 270 if (nextChar == '=') { 271 switch (ch) { 272 case '+': 273 eatNextChar.set(1); 274 return TokenType.ASSIGN_ADD; 275 case '-': 276 eatNextChar.set(1); 277 return TokenType.ASSIGN_SUBTRACKT; 278 case '*': 279 eatNextChar.set(1); 280 return TokenType.ASSIGN_MULTIPLY; 281 case '/': 282 eatNextChar.set(1); 283 return TokenType.ASSIGN_DIVIDE; 284 case '%': 285 eatNextChar.set(1); 286 return TokenType.ASSIGN_MODULO; 287 default: 288 // Do nothing 289 } 290 } 291 292 if (ch == '<') { 293 switch (nextChar) { 294 case '=': 295 eatNextChar.set(1); 296 return TokenType.LESS_OR_EQUAL; 297 case '<': 298 if (nextNextChar == '=') { 299 eatNextChar.set(2); 300 return TokenType.ASSIGN_SHIFT_LEFT; 301 } else { 302 eatNextChar.set(1); 303 return TokenType.SHIFT_LEFT; 304 } 305 default: 306 return TokenType.LESS_THAN; 307 } 308 } 309 310 if (ch == '>') { 311 switch (nextChar) { 312 case '=': 313 eatNextChar.set(1); 314 return TokenType.GREATER_OR_EQUAL; 315 case '>': 316 if (nextNextChar == '=') { 317 eatNextChar.set(2); 318 return TokenType.ASSIGN_SHIFT_RIGHT; 319 } else if (nextNextChar == '>') { 320 if (nextNextNextChar == '=') { 321 eatNextChar.set(3); 322 return TokenType.ASSIGN_UNSIGNED_SHIFT_RIGHT; 323 } else { 324 eatNextChar.set(2); 325 return TokenType.UNSIGNED_SHIFT_RIGHT; 326 } 327 } else { 328 eatNextChar.set(1); 329 return TokenType.SHIFT_RIGHT; 330 } 331 default: 332 return TokenType.GREATER_THAN; 333 } 334 } 335 336 if (ch == '=') { 337 if (nextChar == '=') { 338 eatNextChar.set(1); 339 return TokenType.EQUAL; 340 } else { 341 return TokenType.ERROR; 342 } 343 } 344 345 if (ch == '!') { 346 if (nextChar == '=') { 347 eatNextChar.set(1); 348 return TokenType.NOT_EQUAL; 349 } else { 350 return TokenType.BOOLEAN_NOT; 351 } 352 } 353 354 if (ch == '|') { 355 if (nextChar == '|') { 356 eatNextChar.set(1); 357 return TokenType.BOOLEAN_OR; 358 } else if (nextChar == '=') { 359 eatNextChar.set(1); 360 return TokenType.ASSIGN_OR; 361 } else { 362 return TokenType.BINARY_OR; 363 } 364 } 365 366 if (ch == '&') { 367 if (nextChar == '&') { 368 eatNextChar.set(1); 369 return TokenType.BOOLEAN_AND; 370 } else if (nextChar == '=') { 371 eatNextChar.set(1); 372 return TokenType.ASSIGN_AND; 373 } else { 374 return TokenType.BINARY_AND; 375 } 376 } 377 378 if (ch == '~') { 379 return TokenType.BINARY_NOT; 380 } 381 382 if (ch == ',') { 383 return TokenType.COMMA; 384 } 385 386 if (ch == '+') { 387 if (nextChar == '+') { 388 eatNextChar.set(1); 389 return TokenType.INCREMENT; 390 } else { 391 return TokenType.ADD; 392 } 393 } 394 395 if (ch == '-') { 396 if (nextChar == '-') { 397 eatNextChar.set(1); 398 return TokenType.DECREMENT; 399 } else { 400 return TokenType.SUBTRACKT; 401 } 402 } 403 404 if (ch == '*') { 405 return TokenType.MULTIPLY; 406 } 407 408 if (ch == '/') { 409 return TokenType.DIVIDE; 410 } 411 412 if (ch == '%') { 413 return TokenType.MODULO; 414 } 415 416 if (ch == '^') { 417 if (nextChar == '^') { 418 eatNextChar.set(1); 419 return TokenType.BOOLEAN_XOR; 420 } else if (nextChar == '=') { 421 eatNextChar.set(1); 422 return TokenType.ASSIGN_XOR; 423 } else { 424 return TokenType.BINARY_XOR; 425 } 426 } 427 428 if (ch == '(') { 429 return TokenType.LEFT_PARENTHESIS; 430 } 431 432 if (ch == ')') { 433 return TokenType.RIGHT_PARENTHESIS; 434 } 435 436 if (ch == '[') { 437 return TokenType.LEFT_SQUARE_BRACKET; 438 } 439 440 if (ch == ']') { 441 return TokenType.RIGHT_SQUARE_BRACKET; 442 } 443 444 if (ch == '{') { 445 return TokenType.LEFT_CURLY_BRACKET; 446 } 447 448 if (ch == '}') { 449 return TokenType.RIGHT_CURLY_BRACKET; 450 } 451 452 if ((currentToken._tokenType == TokenType.FLOATING_NUMBER) && 453 (isFloatingNumber(currentToken._string+ch) || isFloatingNumber(currentToken._string+ch+nextChar))) { 454 return TokenType.SAME_AS_LAST; 455 } 456 457 if ((currentToken._tokenType == TokenType.IDENTIFIER) && (Character.isLetterOrDigit(ch) || (ch == '_'))) { 458 return TokenType.SAME_AS_LAST; 459 } 460 461 if (Character.isDigit(ch)) { 462 return TokenType.FLOATING_NUMBER; 463 } 464 465 if ((currentToken._tokenType == TokenType.FLOATING_NUMBER) && 466 (Character.isLetterOrDigit(ch))) { 467 return TokenType.ERROR; 468 } 469 470 if (Character.isDigit(ch)) { 471 return TokenType.FLOATING_NUMBER; 472 } 473 474 if (Character.isLetter(ch) || (ch == '_')) { 475 return TokenType.IDENTIFIER; 476 } 477 478 return TokenType.ERROR; 479 } 480 481 private static boolean isIntegerNumber(String str) { 482 return str.matches("\\d+"); 483 } 484 485 private static boolean isFloatingNumber(String str) { 486 return str.matches("\\d+") || str.matches("\\d+\\.\\d+"); 487 } 488 489}