|
|
@@ -0,0 +1,95 @@
|
|
|
+#ifndef LEXER_H
|
|
|
+#define LEXER_H
|
|
|
+
|
|
|
+#include <stdlib.h>
|
|
|
+
|
|
|
+#include "./token.h"
|
|
|
+#include "./str.h"
|
|
|
+
|
|
|
+struct lexer {
|
|
|
+ struct str in;
|
|
|
+ size_t pos;
|
|
|
+ size_t rpos;
|
|
|
+ char c;
|
|
|
+};
|
|
|
+
|
|
|
+struct lexer lexer_create(struct str in);
|
|
|
+void lexer_read_char(struct lexer *l);
|
|
|
+
|
|
|
+struct token lexer_next_token(struct lexer *l);
|
|
|
+struct str lexer_read_ident(struct lexer *l);
|
|
|
+
|
|
|
+bool _lexer_is_letter(char c);
|
|
|
+
|
|
|
+#if defined(IMP) || defined(LEXER_IMP)
|
|
|
+
|
|
|
+struct lexer
|
|
|
+lexer_create(struct str in)
|
|
|
+{
|
|
|
+ struct lexer l = {0};
|
|
|
+ l.in = in;
|
|
|
+ lexer_read_char(&l);
|
|
|
+ return l;
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+lexer_read_char(struct lexer *l)
|
|
|
+{
|
|
|
+ if ( l->rpos >= l->in.size ) {
|
|
|
+ l->c = '\0';
|
|
|
+ } else {
|
|
|
+ l->c = l->in.data[l->rpos];
|
|
|
+ }
|
|
|
+ l->pos = l->rpos;
|
|
|
+ ++l->rpos;
|
|
|
+}
|
|
|
+
|
|
|
+struct token
|
|
|
+lexer_next_token(struct lexer *l)
|
|
|
+{
|
|
|
+ struct token t = TOKEN_ILLEGAL;
|
|
|
+
|
|
|
+ switch ( l->c ) {
|
|
|
+ case '=': t = token_create(TT_ASSIGN, str_from_cstr("=", 1)); break;
|
|
|
+ case ';': t = token_create(TT_SEMICOLON, str_from_cstr(";", 1)); break;
|
|
|
+ case '(': t = token_create(TT_LPAREN, str_from_cstr("(", 1)); break;
|
|
|
+ case ')': t = token_create(TT_RPAREN, str_from_cstr(")", 1)); break;
|
|
|
+ case ',': t = token_create(TT_COMMA, str_from_cstr(",", 1)); break;
|
|
|
+ case '+': t = token_create(TT_PLUS, str_from_cstr("+", 1)); break;
|
|
|
+ case '{': t = token_create(TT_LBRACE, str_from_cstr("{", 1)); break;
|
|
|
+ case '}': t = token_create(TT_RBRACE, str_from_cstr("}", 1)); break;
|
|
|
+ case '\0': t = token_create(TT_EOF, STR_EMPTY); break;
|
|
|
+ default:
|
|
|
+ if ( _lexer_is_letter(l->c) ) {
|
|
|
+ t = token_create(TT_IDENT, lexer_read_ident(l));
|
|
|
+ return t;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ lexer_read_char(l);
|
|
|
+
|
|
|
+ return t;
|
|
|
+}
|
|
|
+
|
|
|
+struct str
|
|
|
+lexer_read_ident(struct lexer *l)
|
|
|
+{
|
|
|
+ size_t pos = l->pos;
|
|
|
+ while ( _lexer_is_letter(l->c) ) {
|
|
|
+ lexer_read_char(l);
|
|
|
+ }
|
|
|
+ return str_slice(l->in, pos, l->pos);
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+_lexer_is_letter(char c)
|
|
|
+{
|
|
|
+ return ( c >= 0x41 && c <= 0x5A ) \
|
|
|
+ || ( c >= 0x61 && c <= 0x7A ) \
|
|
|
+ || c == 0x5F;
|
|
|
+}
|
|
|
+
|
|
|
+#endif /* defined(IMP) || defined(LEXER_IMP) */
|
|
|
+
|
|
|
+#endif /* LEXER_H */
|