diff options
author | Adam Branes <adam@adam> | 2021-05-02 17:46:29 +0300 |
---|---|---|
committer | Galin Simeonov <gts@volconst.com> | 2021-07-15 18:00:07 +0300 |
commit | 679cbe58c4e53f0163588a7731154f3afe2d25aa (patch) | |
tree | a2dbc2317a3f107899d60f5e68c8d6cf8d27e146 | |
parent | a3e36c1918e63761dfc4d2221cca3636b98e93aa (diff) | |
download | MEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz |
lexer formed
-rw-r--r-- | .gdb_history | 133 | ||||
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | lexer.c | 114 | ||||
-rw-r--r-- | lexer.h | 25 | ||||
-rw-r--r-- | main.c | 37 | ||||
-rw-r--r-- | makefile | 16 | ||||
-rw-r--r-- | print.c | 77 | ||||
-rw-r--r-- | print.h | 11 | ||||
-rw-r--r-- | program.c | 109 | ||||
-rw-r--r-- | program.h | 40 | ||||
-rw-r--r-- | queue.c | 2 | ||||
-rw-r--r-- | test | 12 | ||||
-rw-r--r-- | test2 | 2 |
13 files changed, 564 insertions, 17 deletions
diff --git a/.gdb_history b/.gdb_history new file mode 100644 index 0000000..6b826e1 --- /dev/null +++ b/.gdb_history @@ -0,0 +1,133 @@ +r < test +where +b main +r < test +n +n +n +n +r +record +n +rs +record off +n +record stop +n +n +rs +r +n +r +s +n +n +n +r test +n +n +n +n +s +n +print src +print *src +n +n +n +n +n +n +n +n +n +n +n +n +n +n +n +n +s +n +print it +print *it +print *(struct token *)it->data +n +n +q +r test +n +b main +r test +n +s +print tokens +print *tokens +n +print i +print it +n +print it +n +print it +print *tokens +n +print it +n +print it +b lex +r test +s +n +print src->src +n +n +s +n +print src +print *src +n +print *src +rs +r +n +s +s +n +print word[0] +print src->src[src->where_in_src +print src->src[src->where_in_src] +n +print i +print sizes +print size +print word_size +n +b lex +d +b main +r test +n +n +n +r +n +s +n +s +n +r +n +s +record +n +r +record stop +n +s +n +s +n +n +q diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4ddaac8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +tags +*.exe +*.o @@ -2,18 +2,102 @@ #define LEXER_C #include <lexer.h> +#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_') +#define IS_DIGIT(x) ( x <= '9' && x >= '0' ) +#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x)) +#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);} + /* * placeholder very slow lexer that I will probabbly not replace */ void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data) { + skip_white_space(src); + while(src->where_in_src<src->src_size) + { + Queue_Push(token_destination,lex_step(src,translation_data)); + if(has_new_errors(translation_data)) + return; + else + skip_white_space(src); + } + Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column)); } -struct token* get_token(char *data,size_t size) + + +struct token* lex_step(struct Source *src,struct Translation_Data *translation_data) +{ + if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column); + if(check_and_move_if_on_word("state",sizeof("state")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("state")+1,sizeof("state")-1,KW_STATE,src->current_row,src->current_column); + if(check_and_move_if_on_word("from",sizeof("from")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column); + if(check_and_move_if_on_word("to",sizeof("to")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column); + if(check_and_move_if_on_word("on",sizeof("on")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column); + if(check_and_move_if_on_word("[",sizeof("[")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column); + if(check_and_move_if_on_word("]",sizeof("]")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column); + if(check_and_move_if_on_word(";",sizeof(";")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column); + if(check_and_move_if_on_word("|",sizeof("|")-1,src,0)) + return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column); + if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1)) + return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column); + + + + + + + if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/ + { + size_t i; + + ++src->where_in_src; + for( i=src->where_in_src ; + i < src->src_size && IS_ID_THING(src->src[i]); + ++i); + + + i-=src->where_in_src; + src->where_in_src+=i; + return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column); + }else if(src->src[src->where_in_src]=='"') /*check for string literal*/ + { + size_t i; + ++src->where_in_src; + for( i=src->where_in_src ; + src->src[i]!='"' && i< src->src_size; + ++i); + + if(i==src->src_size) + { + LEX_ERROR("Unexpected end of file"); + }else + { + i-=src->where_in_src; + src->where_in_src+=i+1; + return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column); + } + + }else + { + LEX_ERROR("Unexpected symbol"); + } +} +struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column) { struct token *ret; ret=malloc(sizeof(struct token)); ret->data=data; ret->size=size; + ret->type=type; + ret->row=row; + ret->column=column; return ret; } @@ -22,4 +106,32 @@ void delete_token(struct token *token) free(token); } +/*word_size without the ending '\0' */ +static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after) +{ + size_t i; + if(src->where_in_src + word_size > src->src_size) + return 0; + + for(i=0;i<word_size && word[i]==src->src[src->where_in_src+i];++i); + + if(i<word_size) + { + return 0; + } + else if( (needs_space_after && isspace(src->src[src->where_in_src+i])) || !needs_space_after ) + { + src->where_in_src+=i; + return 1; + } + else + { + return 0; + } +} +void skip_white_space(struct Source *src) +{ + while(src->where_in_src<src->src_size && isspace(src->src[src->where_in_src])) + ++src->where_in_src; +} #endif @@ -1,8 +1,12 @@ #ifndef LEXER_H #define LEXER_H +#include <ctype.h> //isspace #include <program.h> #include <queue.h> +struct Translation_Data; +struct Source; + enum Keyword { KW_MACHINE, @@ -11,16 +15,33 @@ enum Keyword KW_TO, KW_ON, KW_ID, + KW_STRING, + KW_NOP, + KW_EOF, + KW_OPEN_SQUARE, + KW_CLOSE_SQUARE, + KW_PIPE, + KW_SEMI_COLUMN, + KW_STARTING, }; struct token { size_t size; enum Keyword type; char *data; + size_t row; + size_t column; }; + void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data); -struct token* get_token(char *data,size_t size); -void delete_token(struct token *token); +struct token* lex_step(struct Source *src,struct Translation_Data *translation_data); +struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column); +void skip_white_space(struct Source *src); + +void delete_token(struct token *token); +/*:X*/ +static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after); + #endif @@ -1,9 +1,44 @@ #include<stdio.h> +#include<program.h> #include<lexer.h> +#include<string.h> +#include<print.h> -int main() +int main(int argc,char **argv) { + struct Options *options; + struct Source *source; + struct Program *program; + struct Translation_Data *translation_data; + + options=parse_command_line(argv); + if(options->source==NULL) + { + printf("No source file specified\n"); + return 0; + } + source=extract_source(strdup(options->source)); + translation_data=get_translation_data(); + + + if(options->print_tokens) + { + lex(translation_data->tokens,source,translation_data); + if(translation_data->errors->size>0) + { + printf("There was an error!\n"); + print_tokens(translation_data->tokens); + return 1; + }else + { + print_tokens(translation_data->tokens); + } + + } + + delete_source(source); + delete_options(options); return 0; } @@ -1,9 +1,15 @@ INCLUDE_DIRS=-I . -main.exe : main.c lexer.o queue.o - gcc main.c lexer.o queue.o -o main.exe ${INCLUDE_DIRS} +CFLAGS="-g" +main.exe : main.c lexer.o queue.o print.o program.o + gcc ${CFLAGS} main.c lexer.o queue.o print.o program.o -o main.exe ${INCLUDE_DIRS} lexer.o : lexer.c lexer.h - gcc -c lexer.c -o lexer.o ${INCLUDE_DIRS} + gcc ${CFLAGS} -c lexer.c -o lexer.o ${INCLUDE_DIRS} queue.o : queue.c queue.h - gcc -c queue.c -o queue.o ${INCLUDE_DIRS} + gcc ${CFLAGS} -c queue.c -o queue.o ${INCLUDE_DIRS} +print.o : print.c print.h + gcc ${CFLAGS} -c print.c -o print.o ${INCLUDE_DIRS} +program.o : program.c program.h + gcc ${CFLAGS} -c program.c -o program.o ${INCLUDE_DIRS} + clear: - rm -rf lexer.o main.exe queue.o + rm -rf lexer.o main.exe queue.o print.o program.o @@ -0,0 +1,77 @@ +#ifndef PRINT_C +#define PRINT_C PRINT_C +#include<print.h> + +void print_keyword_enum(enum Keyword code) +{ + switch(code) + { + case KW_MACHINE: + printf("KW_MACHINE"); + break; + case KW_STATE: + printf("KW_STATE"); + break; + case KW_FROM: + printf("KW_FROM"); + break; + case KW_TO: + printf("KW_TO"); + break; + case KW_ON: + printf("KW_ON"); + break; + case KW_ID: + printf("KW_ID"); + break; + case KW_STRING: + printf("KW_STRING"); + break; + case KW_NOP: + printf("KW_NOP"); + break; + case KW_EOF: + printf("KW_EOF"); + break; + case KW_OPEN_SQUARE: + printf("KW_OPEN_SQUARE"); + break; + case KW_CLOSE_SQUARE: + printf("KW_CLOSE_SQUARE"); + break; + case KW_PIPE: + printf("KW_PIPE"); + break; + case KW_SEMI_COLUMN: + printf("KW_SEMI_COLUMN"); + break; + case KW_STARTING: + printf("KW_STARTING"); + break; + default: + printf("LEXERROR"); + } +} +void print_token(struct token *token) +{ + size_t i; + + printf("[ "); + print_keyword_enum(token->type); + printf(" "); + for(i=0;i<token->size;++i) + printf("%c",token->data[i]); + printf(" ] "); + +} +void print_tokens(struct Queue *tokens) +{ + struct Queue_Node *it; + for(it=tokens->first;it!=NULL;it=it->prev) + { + print_token( (struct token*)(it->data)); + printf(" "); + } +} + +#endif @@ -0,0 +1,11 @@ +#ifndef PRINT_H +#define PRINT_H PRINT_H +#include<stdio.h> +#include<lexer.h> +#include<queue.h> + + +void print_keyword_enum(enum Keyword code); +void print_token(struct token *token); +void print_tokens(struct Queue *tokens); +#endif @@ -1,5 +1,114 @@ #ifndef PROGRAM_C #define PROGRAM_C +#include<program.h> +struct Source* extract_source(char *src_name) +{ + FILE *file; + struct Source *ret; + + file=fopen(src_name,"r"); + if(file==NULL) + return NULL; + if(fseek(file,0L,SEEK_END)!=0) + return NULL; + + ret=malloc(sizeof(struct Source)); + ret->src_size=ftell(file); + ret->where_in_src=0; + ret->src_name=src_name; + ret->src=malloc(ret->src_size); + ret->current_column=0; + ret->current_row=0; + + fseek(file,0L,SEEK_SET); + + + fread(ret->src,sizeof(char),ret->src_size,file); + + fclose(file); + return ret; +} +struct Options* parse_command_line(char **argv) +{ + struct Options *ret; + size_t i; + + ret=malloc(sizeof(struct Options)); + ret->print_tokens=1; + ret->source=argv[1]; + return ret; +} +struct Translation_Data* get_translation_data() +{ + struct Translation_Data *ret; + ret=malloc(sizeof(struct Translation_Data)); + ret->errors=malloc(sizeof(struct Queue)); + ret->tokens=malloc(sizeof(struct Queue)); + + Queue_Init(ret->errors); + Queue_Init(ret->tokens); + + ret->hold_number_of_errors=0; + + return ret; +} +struct Error* get_error(char *message,size_t row,size_t column) +{ + struct Error *ret; + ret=malloc(sizeof(struct Error)); + ret->message=message; + ret->row=row; + ret->column=column; +} +void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data) +{ + Queue_Push(translation_data->errors,get_error(error_message,src->current_row,src->current_column)); +} +void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data) +{ + Queue_Push(translation_data->errors,get_error(error_message,token->row,token->column)); +} +char has_new_errors(struct Translation_Data *translation_data) +{ + if(translation_data->hold_number_of_errors!=translation_data->errors->size) + { + translation_data->hold_number_of_errors=translation_data->errors->size; + return 1; + }else + { + return 0; + } +} + +void delete_translation_data(struct Translation_Data *data) +{ + struct Error *hold_error; + struct token *hold_token; + + while(data->tokens->size>0) + delete_token(Queue_Pop(data->tokens)); + free(data->tokens); + while(data->errors->size>0) + delete_error(Queue_Pop(data->errors)); + free(data->errors); + + free(data); +} +void delete_source(struct Source *src) +{ + free(src->src_name); + free(src->src); + free(src); +} +void delete_options(struct Options *options) +{ + free(options); +} +void delete_error(struct Error *error) +{ + free(error->message); + free(error); +} #endif @@ -1,35 +1,61 @@ #ifndef PROGRAM_H #define PROGRAM_H -#include "queue.h" +#include <queue.h> +#include <lexer.h> +#include <stdlib.h> +#include <stdio.h> + +struct token; struct Source { + size_t src_size; + size_t where_in_src; + size_t current_column; + size_t current_row; char *src_name; char *src; - size_t src_size; }; struct Options { int print_tokens:1; + char *source; }; +struct Error +{ + char *message; + size_t row; + size_t column; +}; + struct Translation_Data { struct Queue *errors; struct Queue *tokens; + size_t hold_number_of_errors; +}; +struct Program +{ + struct Source *source; + }; struct Source* extract_source(char *src_name); +struct Options* parse_command_line(char **argv); struct Translation_Data* get_translation_data(); -struct Source* get_source(); -struct Options* get_options(); +struct Error* get_error(char *message,size_t row,size_t column); +void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data); +void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data); +char has_new_errors(struct Translation_Data *translation_data); -void destroy_translation_data(struct Translation_Data *data); -void destroy_source(struct Source *src); -void destroy_options(struct Options *options); +void delete_translation_data(struct Translation_Data *data); +void delete_source(struct Source *src); +void delete_options(struct Options *options); +void delete_error(struct Error *error); #endif @@ -24,7 +24,7 @@ void Queue_Push(struct Queue *q,void *data) struct Queue_Node *temp=malloc(sizeof(struct Queue_Node)); q->last->prev=temp; temp->data=data; - + temp->prev=NULL; q->last=temp; ++q->size; } @@ -0,0 +1,12 @@ +machine temp_switch +[ + state above_treshold; + starting state below_treshold; + + from below_treshold to above_treshold + on event above_temperature "10"; + from above_treshold to below_treshold + on event below_temperature "9"; + on state above_treshold + on event get_temp | html_encase | http_out ; +]; @@ -0,0 +1,2 @@ +"1" +"12" |