mirror of
https://github.com/avinal/xeus-basic.git
synced 2026-01-11 07:38:36 +05:30
223 lines
5.1 KiB
C
223 lines
5.1 KiB
C
/**
|
|
* SPDX-License-Identifier: MIT
|
|
* SPDX-FileCopyrightText: 2015-2016 Johan Van den Brande
|
|
*
|
|
* @file tokenizer.c
|
|
*/
|
|
|
|
#include "tokenizer.h"
|
|
|
|
#include <ctype.h>
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "arch.h"
|
|
#include "array.h"
|
|
#include "hexdump.h"
|
|
|
|
static array *token_array = NULL;
|
|
|
|
add_token(T_ERROR, NULL);
|
|
add_token(T_EOF, NULL);
|
|
add_token(T_NUMBER, NULL);
|
|
add_token(T_STRING, NULL);
|
|
add_token(T_VARIABLE_STRING, NULL);
|
|
add_token(T_VARIABLE_NUMBER, NULL);
|
|
add_token(T_PLUS, "+");
|
|
add_token(T_MINUS, "-");
|
|
add_token(T_MULTIPLY, "*");
|
|
add_token(T_DIVIDE, "/");
|
|
add_token(T_LEFT_BANANA, "(");
|
|
add_token(T_RIGHT_BANANA, ")");
|
|
add_token(T_COLON, ":");
|
|
add_token(T_SEMICOLON, ";");
|
|
add_token(T_EQUALS, "=");
|
|
add_token(T_LESS, "<");
|
|
add_token(T_GREATER, ">");
|
|
add_token(T_COMMA, ",");
|
|
|
|
char *tokenizer_line = NULL;
|
|
char *tokenizer_p = NULL;
|
|
char *tokenizer_next_p = NULL;
|
|
|
|
token tokenizer_actual_token;
|
|
float tokenizer_actual_number;
|
|
char tokenizer_actual_char;
|
|
char tokenizer_actual_string[tokenizer_string_length];
|
|
char tokenizer_actual_variable[tokenizer_variable_length];
|
|
|
|
void tokenizer_setup(void) {
|
|
token_array = array_new(sizeof(token_entry));
|
|
tokenizer_register_token(&_T_ERROR);
|
|
tokenizer_register_token(&_T_EOF);
|
|
tokenizer_register_token(&_T_NUMBER);
|
|
tokenizer_register_token(&_T_STRING);
|
|
tokenizer_register_token(&_T_VARIABLE_STRING);
|
|
tokenizer_register_token(&_T_VARIABLE_NUMBER);
|
|
tokenizer_register_token(&_T_PLUS);
|
|
tokenizer_register_token(&_T_MINUS);
|
|
tokenizer_register_token(&_T_MULTIPLY);
|
|
tokenizer_register_token(&_T_DIVIDE);
|
|
tokenizer_register_token(&_T_LEFT_BANANA);
|
|
tokenizer_register_token(&_T_RIGHT_BANANA);
|
|
tokenizer_register_token(&_T_COLON);
|
|
tokenizer_register_token(&_T_SEMICOLON);
|
|
tokenizer_register_token(&_T_EQUALS);
|
|
tokenizer_register_token(&_T_LESS);
|
|
tokenizer_register_token(&_T_GREATER);
|
|
tokenizer_register_token(&_T_COMMA);
|
|
}
|
|
|
|
void tokenizer_init(char *input) {
|
|
tokenizer_line = input;
|
|
tokenizer_p = tokenizer_next_p = tokenizer_line;
|
|
}
|
|
|
|
char *tokenizer_char_pointer(char *set) {
|
|
if (set != NULL) {
|
|
tokenizer_p = set;
|
|
return NULL;
|
|
}
|
|
|
|
// Skip white space
|
|
while (*tokenizer_p && isspace(*tokenizer_p)) {
|
|
tokenizer_p++;
|
|
}
|
|
return tokenizer_p;
|
|
}
|
|
|
|
static bool isvarchar(char c) {
|
|
if (c >= 'A' && c <= 'Z') {
|
|
return true;
|
|
}
|
|
|
|
if (c == '$') {
|
|
return true;
|
|
}
|
|
|
|
if (c >= '0' && c <= '9') {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
token _find_registered(void) {
|
|
for (size_t i = 0; i < array_size(token_array); i++) {
|
|
token_entry *entry = (token_entry *)array_get(token_array, i);
|
|
if (entry->name == NULL) continue;
|
|
|
|
if (strncmp(tokenizer_p, entry->name, strlen(entry->name)) == 0) {
|
|
tokenizer_next_p = tokenizer_p + strlen(entry->name);
|
|
tokenizer_p = tokenizer_next_p;
|
|
return entry->token;
|
|
}
|
|
}
|
|
return T_THE_END;
|
|
}
|
|
|
|
token tokenizer_get_next_token(void) {
|
|
if (!*tokenizer_p) {
|
|
return T_EOF;
|
|
}
|
|
|
|
// Skip white space
|
|
while (*tokenizer_p && isspace(*tokenizer_p)) {
|
|
tokenizer_p++;
|
|
}
|
|
|
|
// Check for number
|
|
if (isdigit(*tokenizer_p) || *tokenizer_p == '.') {
|
|
tokenizer_next_p = tokenizer_p;
|
|
size_t l = 0;
|
|
while (*tokenizer_next_p &&
|
|
(isdigit(*tokenizer_next_p) || *tokenizer_next_p == '.')) {
|
|
l++;
|
|
tokenizer_next_p++;
|
|
}
|
|
char number[l + 1];
|
|
memset(number, 0, l + 1);
|
|
strncpy(number, tokenizer_p, l);
|
|
number[l] = '\0';
|
|
tokenizer_p = tokenizer_next_p;
|
|
float f;
|
|
sscanf(number, "%f", &f);
|
|
tokenizer_actual_number = f;
|
|
return T_NUMBER;
|
|
}
|
|
|
|
// Check for string
|
|
if ('"' == *tokenizer_p) {
|
|
tokenizer_p++; // skip "
|
|
tokenizer_next_p = tokenizer_p;
|
|
size_t l = 0;
|
|
while (*tokenizer_next_p && '"' != *tokenizer_next_p) {
|
|
l++;
|
|
tokenizer_next_p++;
|
|
}
|
|
|
|
if (*tokenizer_next_p) {
|
|
tokenizer_next_p++; // skip trailing "
|
|
}
|
|
|
|
if (l > 80) {
|
|
return T_ERROR;
|
|
}
|
|
|
|
memcpy(tokenizer_actual_string, tokenizer_p, l);
|
|
tokenizer_actual_string[l] = '\0';
|
|
|
|
tokenizer_p = tokenizer_next_p;
|
|
|
|
return T_STRING;
|
|
}
|
|
|
|
token t = _find_registered();
|
|
if (t != T_THE_END) {
|
|
return t;
|
|
}
|
|
|
|
// Check for variable
|
|
tokenizer_next_p = tokenizer_p;
|
|
size_t len = 0;
|
|
while (*tokenizer_next_p && isvarchar(*tokenizer_next_p)) {
|
|
len++;
|
|
tokenizer_next_p++;
|
|
}
|
|
|
|
if (len > tokenizer_variable_length) {
|
|
return T_ERROR;
|
|
}
|
|
|
|
if (len > 0) {
|
|
memcpy(tokenizer_actual_variable, tokenizer_p, len);
|
|
tokenizer_actual_variable[len] = '\0';
|
|
tokenizer_p = tokenizer_next_p;
|
|
if (tokenizer_actual_variable[len - 1] == '$') {
|
|
return T_VARIABLE_STRING;
|
|
}
|
|
return T_VARIABLE_NUMBER;
|
|
}
|
|
|
|
return T_ERROR;
|
|
}
|
|
|
|
float tokenizer_get_number(void) { return tokenizer_actual_number; }
|
|
|
|
char *tokenizer_get_string(void) { return tokenizer_actual_string; }
|
|
|
|
void tokenizer_get_variable_name(char *name) {
|
|
strncpy(name, tokenizer_actual_variable, sizeof(tokenizer_actual_variable));
|
|
}
|
|
|
|
void tokenizer_register_token(token_entry *entry) {
|
|
array_push(token_array, entry);
|
|
}
|
|
|
|
void tokenizer_free_registered_tokens(void) {
|
|
array_destroy(token_array);
|
|
}
|