Vinicius Teshima 1 год назад
Родитель
Сommit
8f8a49b492
2 измененных файлов с 187 добавлено и 0 удалено
  1. 95 0
      src/lexer.h
  2. 92 0
      src/token.h

+ 95 - 0
src/lexer.h

@@ -0,0 +1,95 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include <stdlib.h>
+
+#include "./token.h"
+#include "./str.h"
+
+struct lexer {
+	struct str in;
+	size_t pos;
+	size_t rpos;
+	char c;
+};
+
+struct lexer lexer_create(struct str in);
+void lexer_read_char(struct lexer *l);
+
+struct token lexer_next_token(struct lexer *l);
+struct str lexer_read_ident(struct lexer *l);
+
+bool _lexer_is_letter(char c);
+
+#if defined(IMP) || defined(LEXER_IMP)
+
+struct lexer 
+lexer_create(struct str in)
+{
+	struct lexer l = {0};
+	l.in = in;
+	lexer_read_char(&l);
+	return l;
+}
+
+void 
+lexer_read_char(struct lexer *l)
+{
+	if ( l->rpos >= l->in.size ) {
+		l->c = '\0';
+	} else {
+		l->c = l->in.data[l->rpos];
+	}
+	l->pos = l->rpos;
+	++l->rpos;
+}
+
+struct token 
+lexer_next_token(struct lexer *l)
+{
+	struct token t = TOKEN_ILLEGAL;
+
+	switch ( l->c ) {
+	case '=':  t = token_create(TT_ASSIGN,    str_from_cstr("=", 1)); break;
+	case ';':  t = token_create(TT_SEMICOLON, str_from_cstr(";", 1)); break;
+	case '(':  t = token_create(TT_LPAREN,    str_from_cstr("(", 1)); break;
+	case ')':  t = token_create(TT_RPAREN,    str_from_cstr(")", 1)); break;
+	case ',':  t = token_create(TT_COMMA,     str_from_cstr(",", 1)); break;
+	case '+':  t = token_create(TT_PLUS,      str_from_cstr("+", 1)); break;
+	case '{':  t = token_create(TT_LBRACE,    str_from_cstr("{", 1)); break;
+	case '}':  t = token_create(TT_RBRACE,    str_from_cstr("}", 1)); break;
+	case '\0': t = token_create(TT_EOF,       STR_EMPTY); break;
+	default: 
+		if ( _lexer_is_letter(l->c) ) {
+			t = token_create(TT_IDENT, lexer_read_ident(l));
+			return t;
+		}
+		break;
+	}
+
+	lexer_read_char(l);
+
+	return t;
+}
+
+struct str 
+lexer_read_ident(struct lexer *l)
+{
+	size_t pos = l->pos;
+	while ( _lexer_is_letter(l->c) ) {
+		lexer_read_char(l);
+	}
+	return str_slice(l->in, pos, l->pos);
+}
+
+bool
+_lexer_is_letter(char c)
+{
+	return ( c >= 0x41 && c <= 0x5A ) \
+	       || ( c >= 0x61 && c <= 0x7A ) \
+	       || c == 0x5F;
+}
+
+#endif /* defined(IMP) || defined(LEXER_IMP) */
+
+#endif /* LEXER_H */

+ 92 - 0
src/token.h

@@ -0,0 +1,92 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include "./str.h"
+
+enum token_type_enum {
+	TT_EOF = -1,
+	TT_ILLEGAL = 0,
+
+	TT_IDENT,
+	TT_INT_LIT,
+
+	TT_ASSIGN,
+	TT_PLUS,
+
+	TT_COMMA,
+	TT_SEMICOLON,
+
+	TT_LPAREN,
+	TT_RPAREN,
+	TT_LBRACE,
+	TT_RBRACE,
+
+	TT_FUNCION,
+	TT_TYPE,
+
+	TT_TOTAL
+};
+
+struct token_type {
+	enum token_type_enum code;
+	const char *name;
+};
+
+struct token {
+	struct token_type typ;
+	struct str lit;
+};
+
+struct token TOKEN_ILLEGAL = {{TT_ILLEGAL, "TT_ILLEGAL"}, STR_EMPTY};
+
+struct token token_create(enum token_type_enum tte, struct str lit);
+struct token_type token_type_create(enum token_type_enum tte);
+
+#if defined(IMP) | defined(TOKEN_IMP)
+
+
+struct 
+token token_create(enum token_type_enum tte, struct str lit)
+{
+	struct token t = {0};
+	t.typ = token_type_create(tte);
+	t.lit = lit;
+	return t;
+}
+
+struct token_type 
+token_type_create(enum token_type_enum tte)
+{
+	struct token_type tt = {0};
+
+	tt.code = tte;
+	switch ( tte ) {
+	case TT_EOF:		tt.name = "TT_EOF"; break;
+	case TT_ILLEGAL:	tt.name = "TT_ILLEGAL"; break;
+
+	case TT_IDENT:		tt.name = "TT_IDENT"; break;
+	case TT_INT_LIT:	tt.name = "TT_INT_LIT"; break;
+
+	case TT_ASSIGN:		tt.name = "TT_ASSIGN"; break;
+	case TT_PLUS:		tt.name = "TT_PLUS"; break;
+
+	case TT_COMMA:		tt.name = "TT_COMMA"; break;
+	case TT_SEMICOLON:	tt.name = "TT_SEMICOLON"; break;
+
+	case TT_LPAREN:		tt.name = "TT_LPAREN"; break;
+	case TT_RPAREN:		tt.name = "TT_RPAREN"; break;
+	case TT_LBRACE:		tt.name = "TT_LBRACE"; break;
+	case TT_RBRACE:		tt.name = "TT_RBRACE"; break;
+
+	case TT_FUNCION:	tt.name = "TT_FUNCION"; break;
+	case TT_TYPE:		tt.name = "TT_TYPE"; break;
+
+	case TT_TOTAL:		tt.name = "TT_TOTAL"; break;
+	}
+
+	return tt;
+}
+
+#endif /* defined(IMP) | defined(TOKEN_IMP) */
+
+#endif /* TOKEN_H */