/** * SPDX-License-Identifier: MIT * SPDX-FileCopyrightText: 2015-2016 Johan Van den Brande * * @file tokenizer.c */ #include "tokenizer.h" #include #include #include #include #include #include #include "arch.h" #include "array.h" #include "hexdump.h" static array *token_array = NULL; add_token(T_ERROR, NULL); add_token(T_EOF, NULL); add_token(T_NUMBER, NULL); add_token(T_STRING, NULL); add_token(T_VARIABLE_STRING, NULL); add_token(T_VARIABLE_NUMBER, NULL); add_token(T_PLUS, "+"); add_token(T_MINUS, "-"); add_token(T_MULTIPLY, "*"); add_token(T_DIVIDE, "/"); add_token(T_LEFT_BANANA, "("); add_token(T_RIGHT_BANANA, ")"); add_token(T_COLON, ":"); add_token(T_SEMICOLON, ";"); add_token(T_EQUALS, "="); add_token(T_LESS, "<"); add_token(T_GREATER, ">"); add_token(T_COMMA, ","); char *tokenizer_line = NULL; char *tokenizer_p = NULL; char *tokenizer_next_p = NULL; token tokenizer_actual_token; float tokenizer_actual_number; char tokenizer_actual_char; char tokenizer_actual_string[tokenizer_string_length]; char tokenizer_actual_variable[tokenizer_variable_length]; void tokenizer_setup(void) { token_array = array_new(sizeof(token_entry)); tokenizer_register_token(&_T_ERROR); tokenizer_register_token(&_T_EOF); tokenizer_register_token(&_T_NUMBER); tokenizer_register_token(&_T_STRING); tokenizer_register_token(&_T_VARIABLE_STRING); tokenizer_register_token(&_T_VARIABLE_NUMBER); tokenizer_register_token(&_T_PLUS); tokenizer_register_token(&_T_MINUS); tokenizer_register_token(&_T_MULTIPLY); tokenizer_register_token(&_T_DIVIDE); tokenizer_register_token(&_T_LEFT_BANANA); tokenizer_register_token(&_T_RIGHT_BANANA); tokenizer_register_token(&_T_COLON); tokenizer_register_token(&_T_SEMICOLON); tokenizer_register_token(&_T_EQUALS); tokenizer_register_token(&_T_LESS); tokenizer_register_token(&_T_GREATER); tokenizer_register_token(&_T_COMMA); } void tokenizer_init(char *input) { tokenizer_line = input; tokenizer_p = tokenizer_next_p = tokenizer_line; } char *tokenizer_char_pointer(char *set) { if (set != NULL) { tokenizer_p = set; return NULL; } // Skip white space while (*tokenizer_p && isspace(*tokenizer_p)) { tokenizer_p++; } return tokenizer_p; } static bool isvarchar(char c) { if (c >= 'A' && c <= 'Z') { return true; } if (c == '$') { return true; } if (c >= '0' && c <= '9') { return true; } return false; } token _find_registered(void) { for (size_t i = 0; i < array_size(token_array); i++) { token_entry *entry = (token_entry *)array_get(token_array, i); if (entry->name == NULL) continue; if (strncmp(tokenizer_p, entry->name, strlen(entry->name)) == 0) { tokenizer_next_p = tokenizer_p + strlen(entry->name); tokenizer_p = tokenizer_next_p; return entry->token; } } return T_THE_END; } token tokenizer_get_next_token(void) { if (!*tokenizer_p) { return T_EOF; } // Skip white space while (*tokenizer_p && isspace(*tokenizer_p)) { tokenizer_p++; } // Check for number if (isdigit(*tokenizer_p) || *tokenizer_p == '.') { tokenizer_next_p = tokenizer_p; size_t l = 0; while (*tokenizer_next_p && (isdigit(*tokenizer_next_p) || *tokenizer_next_p == '.')) { l++; tokenizer_next_p++; } char number[l + 1]; memset(number, 0, l + 1); strncpy(number, tokenizer_p, l); number[l] = '\0'; tokenizer_p = tokenizer_next_p; float f; sscanf(number, "%f", &f); tokenizer_actual_number = f; return T_NUMBER; } // Check for string if ('"' == *tokenizer_p) { tokenizer_p++; // skip " tokenizer_next_p = tokenizer_p; size_t l = 0; while (*tokenizer_next_p && '"' != *tokenizer_next_p) { l++; tokenizer_next_p++; } if (*tokenizer_next_p) { tokenizer_next_p++; // skip trailing " } if (l > 80) { return T_ERROR; } memcpy(tokenizer_actual_string, tokenizer_p, l); tokenizer_actual_string[l] = '\0'; tokenizer_p = tokenizer_next_p; return T_STRING; } token t = _find_registered(); if (t != T_THE_END) { return t; } // Check for variable tokenizer_next_p = tokenizer_p; size_t len = 0; while (*tokenizer_next_p && isvarchar(*tokenizer_next_p)) { len++; tokenizer_next_p++; } if (len > tokenizer_variable_length) { return T_ERROR; } if (len > 0) { memcpy(tokenizer_actual_variable, tokenizer_p, len); tokenizer_actual_variable[len] = '\0'; tokenizer_p = tokenizer_next_p; if (tokenizer_actual_variable[len - 1] == '$') { return T_VARIABLE_STRING; } return T_VARIABLE_NUMBER; } return T_ERROR; } float tokenizer_get_number(void) { return tokenizer_actual_number; } char *tokenizer_get_string(void) { return tokenizer_actual_string; } void tokenizer_get_variable_name(char *name) { strncpy(name, tokenizer_actual_variable, sizeof(tokenizer_actual_variable)); } void tokenizer_register_token(token_entry *entry) { array_push(token_array, entry); } void tokenizer_free_registered_tokens(void) { array_destroy(token_array); }