lexer formed

author: Adam Branes <adam@adam> 2021-05-02 17:46:29 +0300
committer: Galin Simeonov <gts@volconst.com> 2021-07-15 18:00:07 +0300
commit: 679cbe58c4e53f0163588a7731154f3afe2d25aa (patch)
tree: a2dbc2317a3f107899d60f5e68c8d6cf8d27e146
parent: a3e36c1918e63761dfc4d2221cca3636b98e93aa (diff)
download: MEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz
13 files changed, 564 insertions, 17 deletions
diff --git a/.gdb_history b/.gdb_history
new file mode 100644
index 0000000..6b826e1
--- /dev/null
+++ b/.gdb_history
@@ -0,0 +1,133 @@
+r < test 
+where
+b main
+r < test 
+n
+n
+n
+n
+r
+record
+n
+rs
+record off
+n
+record stop 
+n
+n
+rs
+r
+n
+r
+s
+n
+n
+n
+r test 
+n
+n
+n
+n
+s
+n
+print src
+print *src
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+s
+n
+print it
+print *it
+print *(struct token *)it->data
+n
+n
+q
+r test
+n
+b main
+r test
+n
+s
+print tokens
+print *tokens
+n
+print i
+print it
+n
+print it
+n
+print it
+print *tokens
+n
+print it
+n
+print it
+b lex
+r test
+s
+n
+print src->src
+n
+n
+s
+n
+print src
+print *src
+n
+print *src
+rs
+r
+n
+s
+s
+n
+print word[0]
+print src->src[src->where_in_src
+print src->src[src->where_in_src]
+n
+print i
+print sizes
+print size
+print word_size
+n
+b lex
+d
+b main
+r test
+n
+n
+n
+r
+n
+s
+n
+s
+n
+r
+n
+s
+record
+n
+r
+record stop
+n
+s
+n
+s
+n
+n
+q
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4ddaac8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+tags
+*.exe
+*.o
diff --git a/lexer.c b/lexer.c
index 4ade447..546c727 100644
--- a/lexer.c
+++ b/lexer.c
@@ -2,18 +2,102 @@
 #define LEXER_C
 #include <lexer.h>
 
+#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_')
+#define IS_DIGIT(x) ( x <= '9' && x >= '0' )
+#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x))
+#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);}
+
 /*
  *	placeholder very slow lexer that I will probabbly not replace 
  */
 void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data)
 {
+	skip_white_space(src);
+	while(src->where_in_src<src->src_size)
+	{
+		Queue_Push(token_destination,lex_step(src,translation_data));
+		if(has_new_errors(translation_data))
+			return;
+		else
+			skip_white_space(src);
+	}
+	Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column));
 }
-struct token* get_token(char *data,size_t size)
+
+
+struct token* lex_step(struct Source *src,struct Translation_Data *translation_data)
+{
+	if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("state",sizeof("state")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("state")+1,sizeof("state")-1,KW_STATE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("from",sizeof("from")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("to",sizeof("to")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("on",sizeof("on")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("[",sizeof("[")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("]",sizeof("]")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word(";",sizeof(";")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("|",sizeof("|")-1,src,0))
+		return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column);
+	if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1))
+		return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column);
+
+
+
+
+
+
+	if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/
+	{
+		size_t i;
+
+		++src->where_in_src;
+		for( i=src->where_in_src ; 
+			i < src->src_size && IS_ID_THING(src->src[i]);
+			++i);
+
+
+		i-=src->where_in_src;
+		src->where_in_src+=i;
+		return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column);
+	}else if(src->src[src->where_in_src]=='"') /*check for string literal*/
+	{
+		size_t i;
+		++src->where_in_src;
+		for( i=src->where_in_src ; 
+			src->src[i]!='"' && i< src->src_size;	
+			++i);
+		
+		if(i==src->src_size)
+		{
+			LEX_ERROR("Unexpected end of file");
+		}else
+		{
+			i-=src->where_in_src;
+			src->where_in_src+=i+1;
+			return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column);
+		}
+
+	}else
+	{
+		LEX_ERROR("Unexpected symbol");
+	}
+}
+struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column)
 {
 	struct token *ret;
 	ret=malloc(sizeof(struct token));
 	ret->data=data;
 	ret->size=size;
+	ret->type=type;
+	ret->row=row;
+	ret->column=column;
 
 	return ret;
 }
@@ -22,4 +106,32 @@ void delete_token(struct token *token)
 	free(token);
 }
 
+/*word_size without the ending '\0' */
+static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after)
+{
+	size_t i;
+	if(src->where_in_src + word_size > src->src_size)
+		return 0;
+
+	for(i=0;i<word_size && word[i]==src->src[src->where_in_src+i];++i);
+
+	if(i<word_size)
+	{
+		return 0;
+	}
+	else if( (needs_space_after && isspace(src->src[src->where_in_src+i])) || !needs_space_after )
+	{
+		src->where_in_src+=i;
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+void skip_white_space(struct Source *src)
+{
+	while(src->where_in_src<src->src_size && isspace(src->src[src->where_in_src]))
+		++src->where_in_src;
+}
 #endif
diff --git a/lexer.h b/lexer.h
index 27b69d5..e69e23b 100644
--- a/lexer.h
+++ b/lexer.h
@@ -1,8 +1,12 @@
 #ifndef LEXER_H
 #define LEXER_H
+#include <ctype.h> //isspace
 #include <program.h>
 #include <queue.h>
 
+struct Translation_Data;
+struct Source;
+
 enum Keyword
 {
 	KW_MACHINE,
@@ -11,16 +15,33 @@ enum Keyword
 	KW_TO,
 	KW_ON,
 	KW_ID,
+	KW_STRING,
+	KW_NOP,
+	KW_EOF,
+	KW_OPEN_SQUARE,
+	KW_CLOSE_SQUARE,
+	KW_PIPE,
+	KW_SEMI_COLUMN,
+	KW_STARTING,
 };
 struct token
 {
 	size_t size;
 	enum Keyword type;
 	char *data;
+	size_t row;
+	size_t column;
 };
+
 void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data);
-struct token* get_token(char *data,size_t size);
-void delete_token(struct token *token);
+struct token* lex_step(struct Source *src,struct Translation_Data *translation_data);
+struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column);
+void skip_white_space(struct Source *src);
 
 
+
+void delete_token(struct token *token);
+/*:X*/
+static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after);
+
 #endif
diff --git a/main.c b/main.c
index 79018d6..bfc09c0 100644
--- a/main.c
+++ b/main.c
@@ -1,9 +1,44 @@
 #include<stdio.h>
+#include<program.h>
 #include<lexer.h>
+#include<string.h>
+#include<print.h>
 
 
 
-int main()
+int main(int argc,char **argv)
 {
+	struct Options *options;
+	struct Source *source;
+	struct Program *program;
+	struct Translation_Data *translation_data;
+	
+	options=parse_command_line(argv);
+	if(options->source==NULL)
+	{
+		printf("No source file specified\n");
+		return 0;
+	}
+	source=extract_source(strdup(options->source));
+	translation_data=get_translation_data();
+
+
+	if(options->print_tokens)
+	{
+		lex(translation_data->tokens,source,translation_data);
+		if(translation_data->errors->size>0)
+		{
+			printf("There was an error!\n");
+			print_tokens(translation_data->tokens);
+			return 1;
+		}else
+		{
+			print_tokens(translation_data->tokens);
+		}
+
+	}
+
+	delete_source(source);
+	delete_options(options);
 	return 0;
 }
diff --git a/makefile b/makefile
index 23fd10a..457b589 100644
--- a/makefile
+++ b/makefile
@@ -1,9 +1,15 @@
 INCLUDE_DIRS=-I .
-main.exe : main.c lexer.o queue.o
-	gcc main.c lexer.o queue.o -o main.exe ${INCLUDE_DIRS}
+CFLAGS="-g" 
+main.exe : main.c lexer.o queue.o print.o program.o
+	gcc ${CFLAGS} main.c lexer.o queue.o print.o program.o -o main.exe ${INCLUDE_DIRS}
 lexer.o : lexer.c lexer.h
-	gcc -c lexer.c -o lexer.o ${INCLUDE_DIRS}
+	gcc ${CFLAGS} -c lexer.c -o lexer.o ${INCLUDE_DIRS}
 queue.o : queue.c queue.h
-	gcc -c queue.c -o queue.o ${INCLUDE_DIRS}
+	gcc ${CFLAGS} -c queue.c -o queue.o ${INCLUDE_DIRS}
+print.o : print.c print.h
+	gcc ${CFLAGS} -c print.c -o print.o ${INCLUDE_DIRS}
+program.o : program.c program.h
+	gcc ${CFLAGS} -c program.c -o program.o ${INCLUDE_DIRS}
+	
 clear: 
-	rm -rf lexer.o main.exe queue.o
+	rm -rf lexer.o main.exe queue.o print.o program.o
diff --git a/print.c b/print.c
new file mode 100644
index 0000000..a9ec939
--- /dev/null
+++ b/print.c
@@ -0,0 +1,77 @@
+#ifndef PRINT_C
+#define PRINT_C PRINT_C
+#include<print.h>
+
+void print_keyword_enum(enum Keyword code)
+{
+	switch(code)
+	{
+		case KW_MACHINE:
+			printf("KW_MACHINE");
+			break;
+		case KW_STATE:
+			printf("KW_STATE");
+			break;
+		case KW_FROM:
+			printf("KW_FROM");
+			break;
+		case KW_TO:
+			printf("KW_TO");
+			break;
+		case KW_ON:
+			printf("KW_ON");
+			break;
+		case KW_ID:
+			printf("KW_ID");
+			break;
+		case KW_STRING:
+			printf("KW_STRING");
+			break;
+		case KW_NOP:
+			printf("KW_NOP");
+			break;
+		case KW_EOF:
+			printf("KW_EOF");
+			break;
+		case KW_OPEN_SQUARE:
+			printf("KW_OPEN_SQUARE");
+			break;
+		case KW_CLOSE_SQUARE:
+			printf("KW_CLOSE_SQUARE");
+			break;
+		case KW_PIPE:
+			printf("KW_PIPE");
+			break;
+		case KW_SEMI_COLUMN:
+			printf("KW_SEMI_COLUMN");
+			break;
+		case KW_STARTING:
+			printf("KW_STARTING");
+			break;
+		default:
+			printf("LEXERROR");
+	}
+}
+void print_token(struct token *token)
+{
+	size_t i;
+
+	printf("[ ");
+	print_keyword_enum(token->type);
+	printf(" ");
+	for(i=0;i<token->size;++i)
+		printf("%c",token->data[i]);
+	printf(" ] ");
+
+}
+void print_tokens(struct Queue *tokens)
+{
+	struct Queue_Node *it;
+	for(it=tokens->first;it!=NULL;it=it->prev)
+	{
+		print_token( (struct token*)(it->data));
+		printf(" ");
+	}
+}
+
+#endif
diff --git a/print.h b/print.h
new file mode 100644
index 0000000..c4e3bec
--- /dev/null
+++ b/print.h
@@ -0,0 +1,11 @@
+#ifndef PRINT_H
+#define PRINT_H PRINT_H
+#include<stdio.h>
+#include<lexer.h>
+#include<queue.h>
+
+
+void print_keyword_enum(enum Keyword code);
+void print_token(struct token *token);
+void print_tokens(struct Queue *tokens);
+#endif
diff --git a/program.c b/program.c
index 6fe3a2f..c2140c1 100644
--- a/program.c
+++ b/program.c
@@ -1,5 +1,114 @@
 #ifndef PROGRAM_C
 #define PROGRAM_C
+#include<program.h>
 
+struct Source* extract_source(char *src_name)
+{
+	FILE *file;
 
+	struct Source *ret;
+	
+	file=fopen(src_name,"r");
+	if(file==NULL)
+		return NULL;
+	if(fseek(file,0L,SEEK_END)!=0)
+		return NULL;
+
+	ret=malloc(sizeof(struct Source));
+	ret->src_size=ftell(file);
+	ret->where_in_src=0;
+	ret->src_name=src_name;
+	ret->src=malloc(ret->src_size);
+	ret->current_column=0;
+	ret->current_row=0;
+
+	fseek(file,0L,SEEK_SET);
+
+
+	fread(ret->src,sizeof(char),ret->src_size,file);
+
+	fclose(file);
+	return ret;
+}
+struct Options* parse_command_line(char **argv)
+{
+	struct Options *ret;
+	size_t i;
+
+	ret=malloc(sizeof(struct Options));
+	ret->print_tokens=1;
+	ret->source=argv[1];
+	return ret;
+}
+struct Translation_Data* get_translation_data()
+{
+	struct Translation_Data *ret;
+	ret=malloc(sizeof(struct Translation_Data));
+	ret->errors=malloc(sizeof(struct Queue));
+	ret->tokens=malloc(sizeof(struct Queue));
+	
+	Queue_Init(ret->errors);
+	Queue_Init(ret->tokens);
+
+	ret->hold_number_of_errors=0;
+
+	return ret;
+}
+struct Error* get_error(char *message,size_t row,size_t column)
+{
+	struct Error *ret;
+	ret=malloc(sizeof(struct Error));
+	ret->message=message;
+	ret->row=row;
+	ret->column=column;
+}
+void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data)
+{
+	Queue_Push(translation_data->errors,get_error(error_message,src->current_row,src->current_column));
+}
+void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data)
+{
+	Queue_Push(translation_data->errors,get_error(error_message,token->row,token->column));
+}
+char has_new_errors(struct Translation_Data *translation_data)
+{
+	if(translation_data->hold_number_of_errors!=translation_data->errors->size)
+	{
+		translation_data->hold_number_of_errors=translation_data->errors->size;
+		return 1;
+	}else
+	{
+		return 0;
+	}
+}
+
+void delete_translation_data(struct Translation_Data *data)
+{
+	struct Error *hold_error;
+	struct token *hold_token;
+
+	while(data->tokens->size>0)
+		delete_token(Queue_Pop(data->tokens));
+	free(data->tokens);
+	while(data->errors->size>0)
+		delete_error(Queue_Pop(data->errors));
+	free(data->errors);
+
+	free(data);
+}
+void delete_source(struct Source *src)
+{
+	free(src->src_name);
+	free(src->src);
+	free(src);
+}
+void delete_options(struct Options *options)
+{
+	free(options);
+}
+void delete_error(struct Error *error)
+{
+	free(error->message);
+	free(error);
+}
 #endif
diff --git a/program.h b/program.h
index dfb5098..8445fa3 100644
--- a/program.h
+++ b/program.h
@@ -1,35 +1,61 @@
 #ifndef PROGRAM_H
 #define PROGRAM_H
-#include "queue.h"
+#include <queue.h>
+#include <lexer.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct token;
 
 struct Source
 {
+	size_t src_size;
+	size_t where_in_src;
+	size_t current_column;
+	size_t current_row;
 	char *src_name;
 	char *src;
-	size_t src_size;
 
 };
 
 struct Options
 {
 	int print_tokens:1;
+	char *source;
 };
 	
+struct Error
+{
+	char *message;
+	size_t row;
+	size_t column;
+};
+
 struct Translation_Data
 {
 	struct Queue *errors;
 	struct Queue *tokens;
+	size_t hold_number_of_errors;
+};
+struct Program
+{
+	struct Source *source;
+	
 };
 
 struct Source* extract_source(char *src_name);
+struct Options* parse_command_line(char **argv);
 struct Translation_Data* get_translation_data();
-struct Source* get_source();
-struct Options* get_options();
+struct Error* get_error(char *message,size_t row,size_t column);
 
+void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data);
+void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data);
+char has_new_errors(struct Translation_Data *translation_data);
 
-void destroy_translation_data(struct Translation_Data *data);
-void destroy_source(struct Source *src);
-void destroy_options(struct Options *options);
+void delete_translation_data(struct Translation_Data *data);
+void delete_source(struct Source *src);
+void delete_options(struct Options *options);
+void delete_error(struct Error *error);
 
 
 #endif
diff --git a/queue.c b/queue.c
index 187519a..b395acf 100644
--- a/queue.c
+++ b/queue.c
@@ -24,7 +24,7 @@ void Queue_Push(struct Queue *q,void *data)
 		struct Queue_Node *temp=malloc(sizeof(struct Queue_Node));
 		q->last->prev=temp;
 		temp->data=data;
-
+		temp->prev=NULL;
 		q->last=temp;
 		++q->size;
 	}
diff --git a/test b/test
new file mode 100644
index 0000000..3549fc5
--- /dev/null
+++ b/test
@@ -0,0 +1,12 @@
+machine temp_switch
+[ 
+	state above_treshold;
+	starting state below_treshold;
+	
+	from below_treshold to above_treshold
+		on event above_temperature "10";
+	from above_treshold to below_treshold
+		on event below_temperature "9";
+	on state above_treshold
+		on event get_temp | html_encase | http_out ;
+];
diff --git a/test2 b/test2
new file mode 100644
index 0000000..a5342f0
--- /dev/null
+++ b/test2
@@ -0,0 +1,2 @@
+"1"
+"12"
author	Adam Branes <adam@adam>	2021-05-02 17:46:29 +0300
committer	Galin Simeonov <gts@volconst.com>	2021-07-15 18:00:07 +0300
commit	679cbe58c4e53f0163588a7731154f3afe2d25aa (patch)
tree	a2dbc2317a3f107899d60f5e68c8d6cf8d27e146
parent	a3e36c1918e63761dfc4d2221cca3636b98e93aa (diff)
download	MEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz