lexer formed

author: Adam Branes <adam@adam> 2021-05-02 17:46:29 +0300
committer: Galin Simeonov <gts@volconst.com> 2021-07-15 18:00:07 +0300
commit: 679cbe58c4e53f0163588a7731154f3afe2d25aa (patch)
tree: a2dbc2317a3f107899d60f5e68c8d6cf8d27e146 /lexer.c
parent: a3e36c1918e63761dfc4d2221cca3636b98e93aa (diff)
download: MEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz
1 files changed, 113 insertions, 1 deletions
diff --git a/lexer.c b/lexer.c
index 4ade447..546c727 100644
--- a/lexer.c
+++ b/lexer.c
@@ -2,18 +2,102 @@
 #define LEXER_C
 #include <lexer.h>
 
+#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_')
+#define IS_DIGIT(x) ( x <= '9' && x >= '0' )
+#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x))
+#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);}
+
 /*
  *	placeholder very slow lexer that I will probabbly not replace 
  */
 void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data)
 {
+	skip_white_space(src);
+	while(src->where_in_src<src->src_size)
+	{
+		Queue_Push(token_destination,lex_step(src,translation_data));
+		if(has_new_errors(translation_data))
+			return;
+		else
+			skip_white_space(src);
+	}
+	Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column));
 }
-struct token* get_token(char *data,size_t size)
+
+
+struct token* lex_step(struct Source *src,struct Translation_Data *translation_data)
+{
+	if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("state",sizeof("state")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("state")+1,sizeof("state")-1,KW_STATE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("from",sizeof("from")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("to",sizeof("to")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("on",sizeof("on")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("[",sizeof("[")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("]",sizeof("]")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word(";",sizeof(";")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("|",sizeof("|")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column);
+
+
+
+
+
+
+	if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/
+	{
+		size_t i;
+
+		++src->where_in_src;
+		for( i=src->where_in_src ; 
+			i < src->src_size && IS_ID_THING(src->src[i]);
+			++i);
+
+
+		i-=src->where_in_src;
+		src->where_in_src+=i;
+		return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column);
+	}else if(src->src[src->where_in_src]=='"') /*check for string literal*/
+	{
+		size_t i;
+		++src->where_in_src;
+		for( i=src->where_in_src ; 
+			src->src[i]!='"' && i< src->src_size;	
+			++i);
+		
+		if(i==src->src_size)
+		{
+			LEX_ERROR("Unexpected end of file");
+		}else
+		{
+			i-=src->where_in_src;
+			src->where_in_src+=i+1;
+			return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column);
+		}
+
+	}else
+	{
+		LEX_ERROR("Unexpected symbol");
+	}
+}
+struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column)
 {
 	struct token *ret;
 	ret=malloc(sizeof(struct token));
 	ret->data=data;
 	ret->size=size;
+	ret->type=type;
+	ret->row=row;
+	ret->column=column;
 
 	return ret;
 }
@@ -22,4 +106,32 @@ void delete_token(struct token *token)
 	free(token);
 }
 
+/*word_size without the ending '\0' */
+static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after)
+{
+	size_t i;
+	if(src->where_in_src + word_size > src->src_size)
+		return 0;
+
+	for(i=0;i<word_size && word[i]==src->src[src->where_in_src+i];++i);
+
+	if(i<word_size)
+	{
+		return 0;
+	}
+	else if( (needs_space_after && isspace(src->src[src->where_in_src+i])) || !needs_space_after )
+	{
+		src->where_in_src+=i;
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+void skip_white_space(struct Source *src)
+{
+	while(src->where_in_src<src->src_size && isspace(src->src[src->where_in_src]))
+		++src->where_in_src;
+}
 #endif
author	Adam Branes <adam@adam>	2021-05-02 17:46:29 +0300
committer	Galin Simeonov <gts@volconst.com>	2021-07-15 18:00:07 +0300
commit	679cbe58c4e53f0163588a7731154f3afe2d25aa (patch)
tree	a2dbc2317a3f107899d60f5e68c8d6cf8d27e146 /lexer.c
parent	a3e36c1918e63761dfc4d2221cca3636b98e93aa (diff)
download	MEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz