aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Branes <adam@adam>2021-05-02 17:46:29 +0300
committerGalin Simeonov <gts@volconst.com>2021-07-15 18:00:07 +0300
commit679cbe58c4e53f0163588a7731154f3afe2d25aa (patch)
treea2dbc2317a3f107899d60f5e68c8d6cf8d27e146
parenta3e36c1918e63761dfc4d2221cca3636b98e93aa (diff)
downloadMEGATRON-679cbe58c4e53f0163588a7731154f3afe2d25aa.tar.gz
lexer formed
-rw-r--r--.gdb_history133
-rw-r--r--.gitignore3
-rw-r--r--lexer.c114
-rw-r--r--lexer.h25
-rw-r--r--main.c37
-rw-r--r--makefile16
-rw-r--r--print.c77
-rw-r--r--print.h11
-rw-r--r--program.c109
-rw-r--r--program.h40
-rw-r--r--queue.c2
-rw-r--r--test12
-rw-r--r--test22
13 files changed, 564 insertions, 17 deletions
diff --git a/.gdb_history b/.gdb_history
new file mode 100644
index 0000000..6b826e1
--- /dev/null
+++ b/.gdb_history
@@ -0,0 +1,133 @@
+r < test
+where
+b main
+r < test
+n
+n
+n
+n
+r
+record
+n
+rs
+record off
+n
+record stop
+n
+n
+rs
+r
+n
+r
+s
+n
+n
+n
+r test
+n
+n
+n
+n
+s
+n
+print src
+print *src
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+n
+s
+n
+print it
+print *it
+print *(struct token *)it->data
+n
+n
+q
+r test
+n
+b main
+r test
+n
+s
+print tokens
+print *tokens
+n
+print i
+print it
+n
+print it
+n
+print it
+print *tokens
+n
+print it
+n
+print it
+b lex
+r test
+s
+n
+print src->src
+n
+n
+s
+n
+print src
+print *src
+n
+print *src
+rs
+r
+n
+s
+s
+n
+print word[0]
+print src->src[src->where_in_src
+print src->src[src->where_in_src]
+n
+print i
+print sizes
+print size
+print word_size
+n
+b lex
+d
+b main
+r test
+n
+n
+n
+r
+n
+s
+n
+s
+n
+r
+n
+s
+record
+n
+r
+record stop
+n
+s
+n
+s
+n
+n
+q
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4ddaac8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+tags
+*.exe
+*.o
diff --git a/lexer.c b/lexer.c
index 4ade447..546c727 100644
--- a/lexer.c
+++ b/lexer.c
@@ -2,18 +2,102 @@
#define LEXER_C
#include <lexer.h>
+#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_')
+#define IS_DIGIT(x) ( x <= '9' && x >= '0' )
+#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x))
+#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);}
+
/*
* placeholder very slow lexer that I will probabbly not replace
*/
void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data)
{
+ skip_white_space(src);
+ while(src->where_in_src<src->src_size)
+ {
+ Queue_Push(token_destination,lex_step(src,translation_data));
+ if(has_new_errors(translation_data))
+ return;
+ else
+ skip_white_space(src);
+ }
+ Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column));
}
-struct token* get_token(char *data,size_t size)
+
+
+struct token* lex_step(struct Source *src,struct Translation_Data *translation_data)
+{
+ if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("state",sizeof("state")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("state")+1,sizeof("state")-1,KW_STATE,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("from",sizeof("from")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("to",sizeof("to")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("on",sizeof("on")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("[",sizeof("[")-1,src,0))
+ return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("]",sizeof("]")-1,src,0))
+ return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column);
+ if(check_and_move_if_on_word(";",sizeof(";")-1,src,0))
+ return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("|",sizeof("|")-1,src,0))
+ return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column);
+ if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1))
+ return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column);
+
+
+
+
+
+
+ if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/
+ {
+ size_t i;
+
+ ++src->where_in_src;
+ for( i=src->where_in_src ;
+ i < src->src_size && IS_ID_THING(src->src[i]);
+ ++i);
+
+
+ i-=src->where_in_src;
+ src->where_in_src+=i;
+ return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column);
+ }else if(src->src[src->where_in_src]=='"') /*check for string literal*/
+ {
+ size_t i;
+ ++src->where_in_src;
+ for( i=src->where_in_src ;
+ src->src[i]!='"' && i< src->src_size;
+ ++i);
+
+ if(i==src->src_size)
+ {
+ LEX_ERROR("Unexpected end of file");
+ }else
+ {
+ i-=src->where_in_src;
+ src->where_in_src+=i+1;
+ return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column);
+ }
+
+ }else
+ {
+ LEX_ERROR("Unexpected symbol");
+ }
+}
+struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column)
{
struct token *ret;
ret=malloc(sizeof(struct token));
ret->data=data;
ret->size=size;
+ ret->type=type;
+ ret->row=row;
+ ret->column=column;
return ret;
}
@@ -22,4 +106,32 @@ void delete_token(struct token *token)
free(token);
}
+/*word_size without the ending '\0' */
+static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after)
+{
+ size_t i;
+ if(src->where_in_src + word_size > src->src_size)
+ return 0;
+
+ for(i=0;i<word_size && word[i]==src->src[src->where_in_src+i];++i);
+
+ if(i<word_size)
+ {
+ return 0;
+ }
+ else if( (needs_space_after && isspace(src->src[src->where_in_src+i])) || !needs_space_after )
+ {
+ src->where_in_src+=i;
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+void skip_white_space(struct Source *src)
+{
+ while(src->where_in_src<src->src_size && isspace(src->src[src->where_in_src]))
+ ++src->where_in_src;
+}
#endif
diff --git a/lexer.h b/lexer.h
index 27b69d5..e69e23b 100644
--- a/lexer.h
+++ b/lexer.h
@@ -1,8 +1,12 @@
#ifndef LEXER_H
#define LEXER_H
+#include <ctype.h> //isspace
#include <program.h>
#include <queue.h>
+struct Translation_Data;
+struct Source;
+
enum Keyword
{
KW_MACHINE,
@@ -11,16 +15,33 @@ enum Keyword
KW_TO,
KW_ON,
KW_ID,
+ KW_STRING,
+ KW_NOP,
+ KW_EOF,
+ KW_OPEN_SQUARE,
+ KW_CLOSE_SQUARE,
+ KW_PIPE,
+ KW_SEMI_COLUMN,
+ KW_STARTING,
};
struct token
{
size_t size;
enum Keyword type;
char *data;
+ size_t row;
+ size_t column;
};
+
void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data);
-struct token* get_token(char *data,size_t size);
-void delete_token(struct token *token);
+struct token* lex_step(struct Source *src,struct Translation_Data *translation_data);
+struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column);
+void skip_white_space(struct Source *src);
+
+void delete_token(struct token *token);
+/*:X*/
+static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after);
+
#endif
diff --git a/main.c b/main.c
index 79018d6..bfc09c0 100644
--- a/main.c
+++ b/main.c
@@ -1,9 +1,44 @@
#include<stdio.h>
+#include<program.h>
#include<lexer.h>
+#include<string.h>
+#include<print.h>
-int main()
+int main(int argc,char **argv)
{
+ struct Options *options;
+ struct Source *source;
+ struct Program *program;
+ struct Translation_Data *translation_data;
+
+ options=parse_command_line(argv);
+ if(options->source==NULL)
+ {
+ printf("No source file specified\n");
+ return 0;
+ }
+ source=extract_source(strdup(options->source));
+ translation_data=get_translation_data();
+
+
+ if(options->print_tokens)
+ {
+ lex(translation_data->tokens,source,translation_data);
+ if(translation_data->errors->size>0)
+ {
+ printf("There was an error!\n");
+ print_tokens(translation_data->tokens);
+ return 1;
+ }else
+ {
+ print_tokens(translation_data->tokens);
+ }
+
+ }
+
+ delete_source(source);
+ delete_options(options);
return 0;
}
diff --git a/makefile b/makefile
index 23fd10a..457b589 100644
--- a/makefile
+++ b/makefile
@@ -1,9 +1,15 @@
INCLUDE_DIRS=-I .
-main.exe : main.c lexer.o queue.o
- gcc main.c lexer.o queue.o -o main.exe ${INCLUDE_DIRS}
+CFLAGS="-g"
+main.exe : main.c lexer.o queue.o print.o program.o
+ gcc ${CFLAGS} main.c lexer.o queue.o print.o program.o -o main.exe ${INCLUDE_DIRS}
lexer.o : lexer.c lexer.h
- gcc -c lexer.c -o lexer.o ${INCLUDE_DIRS}
+ gcc ${CFLAGS} -c lexer.c -o lexer.o ${INCLUDE_DIRS}
queue.o : queue.c queue.h
- gcc -c queue.c -o queue.o ${INCLUDE_DIRS}
+ gcc ${CFLAGS} -c queue.c -o queue.o ${INCLUDE_DIRS}
+print.o : print.c print.h
+ gcc ${CFLAGS} -c print.c -o print.o ${INCLUDE_DIRS}
+program.o : program.c program.h
+ gcc ${CFLAGS} -c program.c -o program.o ${INCLUDE_DIRS}
+
clear:
- rm -rf lexer.o main.exe queue.o
+ rm -rf lexer.o main.exe queue.o print.o program.o
diff --git a/print.c b/print.c
new file mode 100644
index 0000000..a9ec939
--- /dev/null
+++ b/print.c
@@ -0,0 +1,77 @@
+#ifndef PRINT_C
+#define PRINT_C PRINT_C
+#include<print.h>
+
+void print_keyword_enum(enum Keyword code)
+{
+ switch(code)
+ {
+ case KW_MACHINE:
+ printf("KW_MACHINE");
+ break;
+ case KW_STATE:
+ printf("KW_STATE");
+ break;
+ case KW_FROM:
+ printf("KW_FROM");
+ break;
+ case KW_TO:
+ printf("KW_TO");
+ break;
+ case KW_ON:
+ printf("KW_ON");
+ break;
+ case KW_ID:
+ printf("KW_ID");
+ break;
+ case KW_STRING:
+ printf("KW_STRING");
+ break;
+ case KW_NOP:
+ printf("KW_NOP");
+ break;
+ case KW_EOF:
+ printf("KW_EOF");
+ break;
+ case KW_OPEN_SQUARE:
+ printf("KW_OPEN_SQUARE");
+ break;
+ case KW_CLOSE_SQUARE:
+ printf("KW_CLOSE_SQUARE");
+ break;
+ case KW_PIPE:
+ printf("KW_PIPE");
+ break;
+ case KW_SEMI_COLUMN:
+ printf("KW_SEMI_COLUMN");
+ break;
+ case KW_STARTING:
+ printf("KW_STARTING");
+ break;
+ default:
+ printf("LEXERROR");
+ }
+}
+void print_token(struct token *token)
+{
+ size_t i;
+
+ printf("[ ");
+ print_keyword_enum(token->type);
+ printf(" ");
+ for(i=0;i<token->size;++i)
+ printf("%c",token->data[i]);
+ printf(" ] ");
+
+}
+void print_tokens(struct Queue *tokens)
+{
+ struct Queue_Node *it;
+ for(it=tokens->first;it!=NULL;it=it->prev)
+ {
+ print_token( (struct token*)(it->data));
+ printf(" ");
+ }
+}
+
+#endif
diff --git a/print.h b/print.h
new file mode 100644
index 0000000..c4e3bec
--- /dev/null
+++ b/print.h
@@ -0,0 +1,11 @@
+#ifndef PRINT_H
+#define PRINT_H PRINT_H
+#include<stdio.h>
+#include<lexer.h>
+#include<queue.h>
+
+
+void print_keyword_enum(enum Keyword code);
+void print_token(struct token *token);
+void print_tokens(struct Queue *tokens);
+#endif
diff --git a/program.c b/program.c
index 6fe3a2f..c2140c1 100644
--- a/program.c
+++ b/program.c
@@ -1,5 +1,114 @@
#ifndef PROGRAM_C
#define PROGRAM_C
+#include<program.h>
+struct Source* extract_source(char *src_name)
+{
+ FILE *file;
+ struct Source *ret;
+
+ file=fopen(src_name,"r");
+ if(file==NULL)
+ return NULL;
+ if(fseek(file,0L,SEEK_END)!=0)
+ return NULL;
+
+ ret=malloc(sizeof(struct Source));
+ ret->src_size=ftell(file);
+ ret->where_in_src=0;
+ ret->src_name=src_name;
+ ret->src=malloc(ret->src_size);
+ ret->current_column=0;
+ ret->current_row=0;
+
+ fseek(file,0L,SEEK_SET);
+
+
+ fread(ret->src,sizeof(char),ret->src_size,file);
+
+ fclose(file);
+ return ret;
+}
+struct Options* parse_command_line(char **argv)
+{
+ struct Options *ret;
+ size_t i;
+
+ ret=malloc(sizeof(struct Options));
+ ret->print_tokens=1;
+ ret->source=argv[1];
+ return ret;
+}
+struct Translation_Data* get_translation_data()
+{
+ struct Translation_Data *ret;
+ ret=malloc(sizeof(struct Translation_Data));
+ ret->errors=malloc(sizeof(struct Queue));
+ ret->tokens=malloc(sizeof(struct Queue));
+
+ Queue_Init(ret->errors);
+ Queue_Init(ret->tokens);
+
+ ret->hold_number_of_errors=0;
+
+ return ret;
+}
+struct Error* get_error(char *message,size_t row,size_t column)
+{
+ struct Error *ret;
+ ret=malloc(sizeof(struct Error));
+ ret->message=message;
+ ret->row=row;
+ ret->column=column;
+}
+void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data)
+{
+ Queue_Push(translation_data->errors,get_error(error_message,src->current_row,src->current_column));
+}
+void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data)
+{
+ Queue_Push(translation_data->errors,get_error(error_message,token->row,token->column));
+}
+char has_new_errors(struct Translation_Data *translation_data)
+{
+ if(translation_data->hold_number_of_errors!=translation_data->errors->size)
+ {
+ translation_data->hold_number_of_errors=translation_data->errors->size;
+ return 1;
+ }else
+ {
+ return 0;
+ }
+}
+
+void delete_translation_data(struct Translation_Data *data)
+{
+ struct Error *hold_error;
+ struct token *hold_token;
+
+ while(data->tokens->size>0)
+ delete_token(Queue_Pop(data->tokens));
+ free(data->tokens);
+ while(data->errors->size>0)
+ delete_error(Queue_Pop(data->errors));
+ free(data->errors);
+
+ free(data);
+}
+void delete_source(struct Source *src)
+{
+ free(src->src_name);
+ free(src->src);
+ free(src);
+}
+void delete_options(struct Options *options)
+{
+ free(options);
+}
+void delete_error(struct Error *error)
+{
+ free(error->message);
+ free(error);
+}
#endif
diff --git a/program.h b/program.h
index dfb5098..8445fa3 100644
--- a/program.h
+++ b/program.h
@@ -1,35 +1,61 @@
#ifndef PROGRAM_H
#define PROGRAM_H
-#include "queue.h"
+#include <queue.h>
+#include <lexer.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct token;
struct Source
{
+ size_t src_size;
+ size_t where_in_src;
+ size_t current_column;
+ size_t current_row;
char *src_name;
char *src;
- size_t src_size;
};
struct Options
{
int print_tokens:1;
+ char *source;
};
+struct Error
+{
+ char *message;
+ size_t row;
+ size_t column;
+};
+
struct Translation_Data
{
struct Queue *errors;
struct Queue *tokens;
+ size_t hold_number_of_errors;
+};
+struct Program
+{
+ struct Source *source;
+
};
struct Source* extract_source(char *src_name);
+struct Options* parse_command_line(char **argv);
struct Translation_Data* get_translation_data();
-struct Source* get_source();
-struct Options* get_options();
+struct Error* get_error(char *message,size_t row,size_t column);
+void push_lexing_error(char *error_message,struct Source *src,struct Translation_Data *translation_data);
+void push_parsing_error(char *error_message,struct token *token ,struct Translation_Data *translation_data);
+char has_new_errors(struct Translation_Data *translation_data);
-void destroy_translation_data(struct Translation_Data *data);
-void destroy_source(struct Source *src);
-void destroy_options(struct Options *options);
+void delete_translation_data(struct Translation_Data *data);
+void delete_source(struct Source *src);
+void delete_options(struct Options *options);
+void delete_error(struct Error *error);
#endif
diff --git a/queue.c b/queue.c
index 187519a..b395acf 100644
--- a/queue.c
+++ b/queue.c
@@ -24,7 +24,7 @@ void Queue_Push(struct Queue *q,void *data)
struct Queue_Node *temp=malloc(sizeof(struct Queue_Node));
q->last->prev=temp;
temp->data=data;
-
+ temp->prev=NULL;
q->last=temp;
++q->size;
}
diff --git a/test b/test
new file mode 100644
index 0000000..3549fc5
--- /dev/null
+++ b/test
@@ -0,0 +1,12 @@
+machine temp_switch
+[
+ state above_treshold;
+ starting state below_treshold;
+
+ from below_treshold to above_treshold
+ on event above_temperature "10";
+ from above_treshold to below_treshold
+ on event below_temperature "9";
+ on state above_treshold
+ on event get_temp | html_encase | http_out ;
+];
diff --git a/test2 b/test2
new file mode 100644
index 0000000..a5342f0
--- /dev/null
+++ b/test2
@@ -0,0 +1,2 @@
+"1"
+"12"