Ver Fonte

[lexer/token.h] Adding and implementing token TT_COMMENT

Vinicius Teshima há 1 ano atrás
pai
commit
bc30dd7220
2 ficheiros alterados com 72 adições e 1 exclusões
  1. 68 1
      src/lexer.h
  2. 4 0
      src/token.h

+ 68 - 1
src/lexer.h

@@ -15,6 +15,8 @@ struct lexer {
 
 enum lexer_err_code {
 	LEXER_ERR_OK = 0,
+	LEXER_ERR_UNEXPECTED_EOF,
+	LEXER_ERR_INVALID_COMMENT,
 	LEXER_ERR_INVALID_PP_IDENT,
 	LEXER_ERR_INVALID_PP_INCLUDE
 };
@@ -33,6 +35,7 @@ struct token lexer_next_token(struct lexer *l, struct lexer_err *err);
 struct str lexer_read_ident(struct lexer *l);
 struct str lexer_read_str_lit(struct lexer *l);
 struct str lexer_read_int_lit(struct lexer *l);
+struct str lexer_read_comment(struct lexer *l, struct lexer_err *err);
 struct str lexer_read_until_or(struct lexer *l, char tgt, char limit,
 			       bool *reached_limit);
 
@@ -85,9 +88,14 @@ lexer_next_token(struct lexer *l, struct lexer_err *err)
 {
 #define _LEXER_CUR_CHAR str_slice(l->in, l->pos, l->pos+1)
 #define _LEXER_2_CHAR str_slice(l->in, l->pos, l->pos+2)
+	struct lexer_err intern_err = {0};
 
 	struct token t = TOKEN_ILLEGAL;
 
+	if ( err == NULL ) {
+		err = &intern_err;
+	}
+
 	lexer_skip_whitespace(l);
 
 	switch ( l->c ) {
@@ -110,7 +118,17 @@ lexer_next_token(struct lexer *l, struct lexer_err *err)
 		}
 		t = token_create(TT_BANG,      _LEXER_CUR_CHAR);
 		break;
-	case '/':  t = token_create(TT_SLASH,     _LEXER_CUR_CHAR); break;
+	case '/':
+		if ( lexer_peek_char(l) == '*' ) {
+			struct str comment = lexer_read_comment(l, err);
+			if ( err->code != LEXER_ERR_OK ) {
+				goto ret_invalid;
+			}
+			t = token_create(TT_COMMENT, comment);
+			goto ret_ok;
+		}
+		t = token_create(TT_SLASH, _LEXER_CUR_CHAR);
+		break;
 	case ',':  t = token_create(TT_COMMA,     _LEXER_CUR_CHAR); break;
 	case '*':  t = token_create(TT_ASTERISK,  _LEXER_CUR_CHAR); break;
 	case '-':  t = token_create(TT_DASH,      _LEXER_CUR_CHAR); break;
@@ -205,6 +223,53 @@ lexer_read_int_lit(struct lexer *l)
 	
 }
 
+struct str
+lexer_read_comment(struct lexer *l, struct lexer_err *err)
+{
+	size_t pos = l->pos;
+
+	if ( l->c == '/' ) {
+		lexer_read_char(l);
+		if ( l->c == '*' ) {
+			lexer_read_char(l);
+		} else {
+			_lexer_set_err(err, LEXER_ERR_INVALID_COMMENT);
+			goto ret_err;
+		}
+	}
+	pos = l->pos;
+
+loop:
+	switch ( l->c ) {
+	case '*':
+		lexer_read_char(l);
+		if ( l->c == '/' ) {
+			lexer_read_char(l);
+			goto out_loop;
+		}
+		goto loop;
+	case '/':
+		lexer_read_char(l);
+		if ( l->c == '*' ) {
+			_lexer_set_err(err, LEXER_ERR_INVALID_COMMENT);
+			goto ret_err;
+		}
+		goto loop;
+	case '\0':
+		_lexer_set_err(err, LEXER_ERR_UNEXPECTED_EOF);
+		goto ret_err;
+	default:
+		lexer_read_char(l);
+		goto loop;
+	}
+out_loop:
+	_lexer_set_err(err, LEXER_ERR_OK);
+	return str_slice(l->in, pos, l->pos-2);
+
+ret_err:
+	return STR_EMPTY;
+}
+
 struct str
 lexer_read_until_or(struct lexer *l, char tgt, char limit, bool *reached_limit)
 {
@@ -439,6 +504,8 @@ lexer_err_create(enum lexer_err_code code)
 
 	switch ( code ) {
 	_LEXER_ERR_CASE(LEXER_ERR_OK);
+	_LEXER_ERR_CASE(LEXER_ERR_UNEXPECTED_EOF);
+	_LEXER_ERR_CASE(LEXER_ERR_INVALID_COMMENT);
 	_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_IDENT);
 	_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_INCLUDE);
 	}

+ 4 - 0
src/token.h

@@ -48,6 +48,8 @@ enum token_type_enum {
 	TT_CASE,
 	TT_BREAK,
 
+	TT_COMMENT,
+
 	TT_PP_INCLUDE,
 	TT_PP_DEFINE,
 	TT_PP_IFNDEF,
@@ -147,6 +149,8 @@ token_type_enum_2_cstr(enum token_type_enum tte)
 	_TOKEN_CASE(TT_SWITCH);
 	_TOKEN_CASE(TT_CASE);
 	_TOKEN_CASE(TT_BREAK);
+
+	_TOKEN_CASE(TT_COMMENT);
 	
 	_TOKEN_CASE(TT_PP_INCLUDE);
 	_TOKEN_CASE(TT_PP_DEFINE);