From 679cbe58c4e53f0163588a7731154f3afe2d25aa Mon Sep 17 00:00:00 2001 From: Adam Branes Date: Sun, 2 May 2021 17:46:29 +0300 Subject: lexer formed --- lexer.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) (limited to 'lexer.c') diff --git a/lexer.c b/lexer.c index 4ade447..546c727 100644 --- a/lexer.c +++ b/lexer.c @@ -2,18 +2,102 @@ #define LEXER_C #include +#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_') +#define IS_DIGIT(x) ( x <= '9' && x >= '0' ) +#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x)) +#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);} + /* * placeholder very slow lexer that I will probabbly not replace */ void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data) { + skip_white_space(src); + while(src->where_in_srcsrc_size) + { + Queue_Push(token_destination,lex_step(src,translation_data)); + if(has_new_errors(translation_data)) + return; + else + skip_white_space(src); + } + Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column)); } -struct token* get_token(char *data,size_t size) + + +struct token* lex_step(struct Source *src,struct Translation_Data *translation_data) +{ + if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column); + if(check_and_move_if_on_word("state",sizeof("state")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("state")+1,sizeof("state")-1,KW_STATE,src->current_row,src->current_column); + if(check_and_move_if_on_word("from",sizeof("from")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column); + if(check_and_move_if_on_word("to",sizeof("to")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column); + if(check_and_move_if_on_word("on",sizeof("on")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column); + if(check_and_move_if_on_word("[",sizeof("[")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column); + if(check_and_move_if_on_word("]",sizeof("]")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column); + if(check_and_move_if_on_word(";",sizeof(";")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column); + if(check_and_move_if_on_word("|",sizeof("|")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column); + if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column); + + + + + + + if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/ + { + size_t i; + + ++src->where_in_src; + for( i=src->where_in_src ; + i < src->src_size && IS_ID_THING(src->src[i]); + ++i); + + + i-=src->where_in_src; + src->where_in_src+=i; + return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column); + }else if(src->src[src->where_in_src]=='"') /*check for string literal*/ + { + size_t i; + ++src->where_in_src; + for( i=src->where_in_src ; + src->src[i]!='"' && i< src->src_size; + ++i); + + if(i==src->src_size) + { + LEX_ERROR("Unexpected end of file"); + }else + { + i-=src->where_in_src; + src->where_in_src+=i+1; + return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column); + } + + }else + { + LEX_ERROR("Unexpected symbol"); + } +} +struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column) { struct token *ret; ret=malloc(sizeof(struct token)); ret->data=data; ret->size=size; + ret->type=type; + ret->row=row; + ret->column=column; return ret; } @@ -22,4 +106,32 @@ void delete_token(struct token *token) free(token); } +/*word_size without the ending '\0' */ +static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after) +{ + size_t i; + if(src->where_in_src + word_size > src->src_size) + return 0; + + for(i=0;isrc[src->where_in_src+i];++i); + + if(isrc[src->where_in_src+i])) || !needs_space_after ) + { + src->where_in_src+=i; + return 1; + } + else + { + return 0; + } +} +void skip_white_space(struct Source *src) +{ + while(src->where_in_srcsrc_size && isspace(src->src[src->where_in_src])) + ++src->where_in_src; +} #endif -- cgit v1.2.3