Explorar o código

[lexer.h] Adding special handlig for pp

Vinicius Teshima hai 1 ano
pai
achega
19e16724b1
Modificáronse 1 ficheiros con 125 adicións e 28 borrados
  1. 125 28
      src/lexer.h

+ 125 - 28
src/lexer.h

@@ -15,7 +15,8 @@ struct lexer {
 
 enum lexer_err_code {
 	LEXER_ERR_OK = 0,
-	LEXER_ERR_INVALID_PP_IDENT
+	LEXER_ERR_INVALID_PP_IDENT,
+	LEXER_ERR_INVALID_PP_INCLUDE
 };
 
 struct lexer_err {
@@ -31,11 +32,15 @@ struct token lexer_next_token(struct lexer *l, struct lexer_err *err);
 struct str lexer_read_ident(struct lexer *l);
 struct str lexer_read_str_lit(struct lexer *l);
 struct str lexer_read_int_lit(struct lexer *l);
+struct str lexer_read_until_or(struct lexer *l, char tgt, char limit,
+			       bool *reached_limit);
 
 enum token_type_enum lexer_lookup_ident(struct str ident);
-enum token_type_enum lexer_lookup_pp(struct str ident);
 void lexer_skip_whitespace(struct lexer *l);
 
+struct token lexer_handle_pp(struct lexer *l, struct lexer_err *err);
+enum token_type_enum _lexer_lookup_pp(struct str ident);
+
 struct lexer_err lexer_err_create(enum lexer_err_code code);
 
 bool _lexer_is_letter(char c);
@@ -92,37 +97,40 @@ lexer_next_token(struct lexer *l, struct lexer_err *err)
 	case '#':
 		lexer_read_char(l);
 		if ( _lexer_is_letter(l->c) ) {
-			struct str ident = lexer_read_ident(l);
-			t = token_create(lexer_lookup_pp(ident), ident);
+			t = lexer_handle_pp(l, err);
 			if ( t.typ.code == TT_ILLEGAL ) {
-				goto invalid_pp;
+				goto ret_invalid;
 			}
-			return t;
+			goto ret_ok;
 		}
-invalid_pp:
 		_lexer_set_err(err, LEXER_ERR_INVALID_PP_IDENT);
-		return TOKEN_ILLEGAL;
+		goto ret_invalid;
 		break;
 	case '"':
 		t = token_create(TT_STR_LIT, lexer_read_str_lit(l));
-		return t;
+		goto ret_ok;
 		break;
 	case '\0': t = token_create(TT_EOF, STR_EMPTY); break;
 	default: 
 		if ( _lexer_is_letter(l->c) ) {
 			struct str ident = lexer_read_ident(l);
 			t = token_create(lexer_lookup_ident(ident), ident);
-			return t;
+			goto ret_ok;
 		}
 		if ( _lexer_is_number(l->c) ) {
-			return token_create(TT_INT_LIT, lexer_read_int_lit(l));
+			t = token_create(TT_INT_LIT, lexer_read_int_lit(l));
+			goto ret_ok;
 		}
 		break;
 	}
 
 	lexer_read_char(l);
 
+ret_ok:
+	_lexer_set_err(err, LEXER_ERR_OK);
 	return t;
+ret_invalid:
+	return TOKEN_ILLEGAL;
 
 #undef _LEXER_CUR_CHAR
 }
@@ -169,6 +177,33 @@ lexer_read_int_lit(struct lexer *l)
 	
 }
 
+struct str
+lexer_read_until_or(struct lexer *l, char tgt, char limit, bool *reached_limit)
+{
+	size_t pos = l->pos;
+loop:
+	if ( l->c == tgt ) {
+		goto ret_ok;
+	}
+	if ( l->c == limit || l->c == '\0' ) {
+		goto ret_err;
+	}
+	lexer_read_char(l);
+	goto loop;
+
+ret_ok:
+	if ( reached_limit != NULL ) {
+		*reached_limit = false;
+	}
+	lexer_read_char(l);
+	return str_slice(l->in, pos, l->pos-1);
+ret_err:
+	if ( reached_limit != NULL ) {
+		*reached_limit = true;
+	}
+	return STR_EMPTY;
+}
+
 enum token_type_enum
 lexer_lookup_ident(struct str ident)
 {
@@ -201,8 +236,85 @@ lexer_lookup_ident(struct str ident)
 	return TT_IDENT;
 }
 
+
+void 
+lexer_skip_whitespace(struct lexer *l)
+{
+loop:
+	switch ( l->c ) {
+	case ' ':
+	case '\t':
+	case '\r':
+	case '\n':
+		lexer_read_char(l);
+		goto loop;
+	}
+
+	return;
+}
+
+
+struct token 
+lexer_handle_pp(struct lexer *l, struct lexer_err *err)
+{
+	struct token t = TOKEN_ILLEGAL;
+	struct str ident = lexer_read_ident(l);
+	enum token_type_enum tt = _lexer_lookup_pp(ident);
+	bool reached_limit = false;
+
+	if ( tt == TT_ILLEGAL ) {
+		_lexer_set_err(err, LEXER_ERR_INVALID_PP_IDENT);
+		goto ret_illegal;
+	}
+
+	lexer_skip_whitespace(l);
+
+	switch ( tt ) {
+	case TT_PP_INCLUDE:
+		switch ( l->c ) {
+		case '<':
+			lexer_read_char(l);
+			ident = lexer_read_until_or(l, '>', '\n', &reached_limit);
+			if ( reached_limit == true ) {
+				_lexer_set_err(err, LEXER_ERR_INVALID_PP_INCLUDE);
+				goto ret_illegal;
+			}
+			t = token_create(tt, ident);
+			goto ret_ok;
+			break;
+		case '"':
+			lexer_read_char(l);
+			ident = lexer_read_until_or(l, '"', '\n', &reached_limit);
+			if ( reached_limit == true ) {
+				_lexer_set_err(err, LEXER_ERR_INVALID_PP_INCLUDE);
+				goto ret_illegal;
+			}
+			t = token_create(tt, ident);
+			goto ret_ok;
+			break;
+		default: goto ret_illegal; break;
+		}
+		goto ret_ok;
+		break;
+
+	case TT_PP_DEFINE: goto ret_illegal; break;
+	case TT_PP_IFNDEF: goto ret_illegal; break;
+	case TT_PP_IFDEF: goto ret_illegal; break;
+	case TT_PP_IF: goto ret_illegal; break;
+
+	default: goto ret_illegal; break;
+	}
+
+ret_ok:
+	_lexer_set_err(err, LEXER_ERR_OK);
+	return t;
+
+ret_illegal:
+	return TOKEN_ILLEGAL;
+}
+
 enum token_type_enum
-lexer_lookup_pp(struct str ident)
+_lexer_lookup_pp(struct str ident)
 {
 	if ( ident.size < 2 ) {
 		return TT_ILLEGAL;
@@ -237,22 +349,6 @@ lexer_lookup_pp(struct str ident)
 	return TT_ILLEGAL;
 }
 
-void 
-lexer_skip_whitespace(struct lexer *l)
-{
-loop:
-	switch ( l->c ) {
-	case ' ':
-	case '\t':
-	case '\r':
-	case '\n':
-		lexer_read_char(l);
-		goto loop;
-	}
-
-	return;
-}
-
 struct lexer_err
 lexer_err_create(enum lexer_err_code code)
 {
@@ -263,6 +359,7 @@ lexer_err_create(enum lexer_err_code code)
 	switch ( code ) {
 	_LEXER_ERR_CASE(LEXER_ERR_OK);
 	_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_IDENT);
+	_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_INCLUDE);
 	}
 
 	return le;