|
@@ -15,7 +15,8 @@ struct lexer {
|
|
|
|
|
|
|
|
enum lexer_err_code {
|
|
enum lexer_err_code {
|
|
|
LEXER_ERR_OK = 0,
|
|
LEXER_ERR_OK = 0,
|
|
|
- LEXER_ERR_INVALID_PP_IDENT
|
|
|
|
|
|
|
+ LEXER_ERR_INVALID_PP_IDENT,
|
|
|
|
|
+ LEXER_ERR_INVALID_PP_INCLUDE
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
struct lexer_err {
|
|
struct lexer_err {
|
|
@@ -31,11 +32,15 @@ struct token lexer_next_token(struct lexer *l, struct lexer_err *err);
|
|
|
struct str lexer_read_ident(struct lexer *l);
|
|
struct str lexer_read_ident(struct lexer *l);
|
|
|
struct str lexer_read_str_lit(struct lexer *l);
|
|
struct str lexer_read_str_lit(struct lexer *l);
|
|
|
struct str lexer_read_int_lit(struct lexer *l);
|
|
struct str lexer_read_int_lit(struct lexer *l);
|
|
|
|
|
+struct str lexer_read_until_or(struct lexer *l, char tgt, char limit,
|
|
|
|
|
+ bool *reached_limit);
|
|
|
|
|
|
|
|
enum token_type_enum lexer_lookup_ident(struct str ident);
|
|
enum token_type_enum lexer_lookup_ident(struct str ident);
|
|
|
-enum token_type_enum lexer_lookup_pp(struct str ident);
|
|
|
|
|
void lexer_skip_whitespace(struct lexer *l);
|
|
void lexer_skip_whitespace(struct lexer *l);
|
|
|
|
|
|
|
|
|
|
+struct token lexer_handle_pp(struct lexer *l, struct lexer_err *err);
|
|
|
|
|
+enum token_type_enum _lexer_lookup_pp(struct str ident);
|
|
|
|
|
+
|
|
|
struct lexer_err lexer_err_create(enum lexer_err_code code);
|
|
struct lexer_err lexer_err_create(enum lexer_err_code code);
|
|
|
|
|
|
|
|
bool _lexer_is_letter(char c);
|
|
bool _lexer_is_letter(char c);
|
|
@@ -92,37 +97,40 @@ lexer_next_token(struct lexer *l, struct lexer_err *err)
|
|
|
case '#':
|
|
case '#':
|
|
|
lexer_read_char(l);
|
|
lexer_read_char(l);
|
|
|
if ( _lexer_is_letter(l->c) ) {
|
|
if ( _lexer_is_letter(l->c) ) {
|
|
|
- struct str ident = lexer_read_ident(l);
|
|
|
|
|
- t = token_create(lexer_lookup_pp(ident), ident);
|
|
|
|
|
|
|
+ t = lexer_handle_pp(l, err);
|
|
|
if ( t.typ.code == TT_ILLEGAL ) {
|
|
if ( t.typ.code == TT_ILLEGAL ) {
|
|
|
- goto invalid_pp;
|
|
|
|
|
|
|
+ goto ret_invalid;
|
|
|
}
|
|
}
|
|
|
- return t;
|
|
|
|
|
|
|
+ goto ret_ok;
|
|
|
}
|
|
}
|
|
|
-invalid_pp:
|
|
|
|
|
_lexer_set_err(err, LEXER_ERR_INVALID_PP_IDENT);
|
|
_lexer_set_err(err, LEXER_ERR_INVALID_PP_IDENT);
|
|
|
- return TOKEN_ILLEGAL;
|
|
|
|
|
|
|
+ goto ret_invalid;
|
|
|
break;
|
|
break;
|
|
|
case '"':
|
|
case '"':
|
|
|
t = token_create(TT_STR_LIT, lexer_read_str_lit(l));
|
|
t = token_create(TT_STR_LIT, lexer_read_str_lit(l));
|
|
|
- return t;
|
|
|
|
|
|
|
+ goto ret_ok;
|
|
|
break;
|
|
break;
|
|
|
case '\0': t = token_create(TT_EOF, STR_EMPTY); break;
|
|
case '\0': t = token_create(TT_EOF, STR_EMPTY); break;
|
|
|
default:
|
|
default:
|
|
|
if ( _lexer_is_letter(l->c) ) {
|
|
if ( _lexer_is_letter(l->c) ) {
|
|
|
struct str ident = lexer_read_ident(l);
|
|
struct str ident = lexer_read_ident(l);
|
|
|
t = token_create(lexer_lookup_ident(ident), ident);
|
|
t = token_create(lexer_lookup_ident(ident), ident);
|
|
|
- return t;
|
|
|
|
|
|
|
+ goto ret_ok;
|
|
|
}
|
|
}
|
|
|
if ( _lexer_is_number(l->c) ) {
|
|
if ( _lexer_is_number(l->c) ) {
|
|
|
- return token_create(TT_INT_LIT, lexer_read_int_lit(l));
|
|
|
|
|
|
|
+ t = token_create(TT_INT_LIT, lexer_read_int_lit(l));
|
|
|
|
|
+ goto ret_ok;
|
|
|
}
|
|
}
|
|
|
break;
|
|
break;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
lexer_read_char(l);
|
|
lexer_read_char(l);
|
|
|
|
|
|
|
|
|
|
+ret_ok:
|
|
|
|
|
+ _lexer_set_err(err, LEXER_ERR_OK);
|
|
|
return t;
|
|
return t;
|
|
|
|
|
+ret_invalid:
|
|
|
|
|
+ return TOKEN_ILLEGAL;
|
|
|
|
|
|
|
|
#undef _LEXER_CUR_CHAR
|
|
#undef _LEXER_CUR_CHAR
|
|
|
}
|
|
}
|
|
@@ -169,6 +177,33 @@ lexer_read_int_lit(struct lexer *l)
|
|
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+struct str
|
|
|
|
|
+lexer_read_until_or(struct lexer *l, char tgt, char limit, bool *reached_limit)
|
|
|
|
|
+{
|
|
|
|
|
+ size_t pos = l->pos;
|
|
|
|
|
+loop:
|
|
|
|
|
+ if ( l->c == tgt ) {
|
|
|
|
|
+ goto ret_ok;
|
|
|
|
|
+ }
|
|
|
|
|
+ if ( l->c == limit || l->c == '\0' ) {
|
|
|
|
|
+ goto ret_err;
|
|
|
|
|
+ }
|
|
|
|
|
+ lexer_read_char(l);
|
|
|
|
|
+ goto loop;
|
|
|
|
|
+
|
|
|
|
|
+ret_ok:
|
|
|
|
|
+ if ( reached_limit != NULL ) {
|
|
|
|
|
+ *reached_limit = false;
|
|
|
|
|
+ }
|
|
|
|
|
+ lexer_read_char(l);
|
|
|
|
|
+ return str_slice(l->in, pos, l->pos-1);
|
|
|
|
|
+ret_err:
|
|
|
|
|
+ if ( reached_limit != NULL ) {
|
|
|
|
|
+ *reached_limit = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ return STR_EMPTY;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
enum token_type_enum
|
|
enum token_type_enum
|
|
|
lexer_lookup_ident(struct str ident)
|
|
lexer_lookup_ident(struct str ident)
|
|
|
{
|
|
{
|
|
@@ -201,8 +236,85 @@ lexer_lookup_ident(struct str ident)
|
|
|
return TT_IDENT;
|
|
return TT_IDENT;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+void
|
|
|
|
|
+lexer_skip_whitespace(struct lexer *l)
|
|
|
|
|
+{
|
|
|
|
|
+loop:
|
|
|
|
|
+ switch ( l->c ) {
|
|
|
|
|
+ case ' ':
|
|
|
|
|
+ case '\t':
|
|
|
|
|
+ case '\r':
|
|
|
|
|
+ case '\n':
|
|
|
|
|
+ lexer_read_char(l);
|
|
|
|
|
+ goto loop;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+struct token
|
|
|
|
|
+lexer_handle_pp(struct lexer *l, struct lexer_err *err)
|
|
|
|
|
+{
|
|
|
|
|
+ struct token t = TOKEN_ILLEGAL;
|
|
|
|
|
+ struct str ident = lexer_read_ident(l);
|
|
|
|
|
+ enum token_type_enum tt = _lexer_lookup_pp(ident);
|
|
|
|
|
+ bool reached_limit = false;
|
|
|
|
|
+
|
|
|
|
|
+ if ( tt == TT_ILLEGAL ) {
|
|
|
|
|
+ _lexer_set_err(err, LEXER_ERR_INVALID_PP_IDENT);
|
|
|
|
|
+ goto ret_illegal;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ lexer_skip_whitespace(l);
|
|
|
|
|
+
|
|
|
|
|
+ switch ( tt ) {
|
|
|
|
|
+ case TT_PP_INCLUDE:
|
|
|
|
|
+ switch ( l->c ) {
|
|
|
|
|
+ case '<':
|
|
|
|
|
+ lexer_read_char(l);
|
|
|
|
|
+ ident = lexer_read_until_or(l, '>', '\n', &reached_limit);
|
|
|
|
|
+ if ( reached_limit == true ) {
|
|
|
|
|
+ _lexer_set_err(err, LEXER_ERR_INVALID_PP_INCLUDE);
|
|
|
|
|
+ goto ret_illegal;
|
|
|
|
|
+ }
|
|
|
|
|
+ t = token_create(tt, ident);
|
|
|
|
|
+ goto ret_ok;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case '"':
|
|
|
|
|
+ lexer_read_char(l);
|
|
|
|
|
+ ident = lexer_read_until_or(l, '"', '\n', &reached_limit);
|
|
|
|
|
+ if ( reached_limit == true ) {
|
|
|
|
|
+ _lexer_set_err(err, LEXER_ERR_INVALID_PP_INCLUDE);
|
|
|
|
|
+ goto ret_illegal;
|
|
|
|
|
+ }
|
|
|
|
|
+ t = token_create(tt, ident);
|
|
|
|
|
+ goto ret_ok;
|
|
|
|
|
+ break;
|
|
|
|
|
+ default: goto ret_illegal; break;
|
|
|
|
|
+ }
|
|
|
|
|
+ goto ret_ok;
|
|
|
|
|
+ break;
|
|
|
|
|
+
|
|
|
|
|
+ case TT_PP_DEFINE: goto ret_illegal; break;
|
|
|
|
|
+ case TT_PP_IFNDEF: goto ret_illegal; break;
|
|
|
|
|
+ case TT_PP_IFDEF: goto ret_illegal; break;
|
|
|
|
|
+ case TT_PP_IF: goto ret_illegal; break;
|
|
|
|
|
+
|
|
|
|
|
+ default: goto ret_illegal; break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ret_ok:
|
|
|
|
|
+ _lexer_set_err(err, LEXER_ERR_OK);
|
|
|
|
|
+ return t;
|
|
|
|
|
+
|
|
|
|
|
+ret_illegal:
|
|
|
|
|
+ return TOKEN_ILLEGAL;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
enum token_type_enum
|
|
enum token_type_enum
|
|
|
-lexer_lookup_pp(struct str ident)
|
|
|
|
|
|
|
+_lexer_lookup_pp(struct str ident)
|
|
|
{
|
|
{
|
|
|
if ( ident.size < 2 ) {
|
|
if ( ident.size < 2 ) {
|
|
|
return TT_ILLEGAL;
|
|
return TT_ILLEGAL;
|
|
@@ -237,22 +349,6 @@ lexer_lookup_pp(struct str ident)
|
|
|
return TT_ILLEGAL;
|
|
return TT_ILLEGAL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-void
|
|
|
|
|
-lexer_skip_whitespace(struct lexer *l)
|
|
|
|
|
-{
|
|
|
|
|
-loop:
|
|
|
|
|
- switch ( l->c ) {
|
|
|
|
|
- case ' ':
|
|
|
|
|
- case '\t':
|
|
|
|
|
- case '\r':
|
|
|
|
|
- case '\n':
|
|
|
|
|
- lexer_read_char(l);
|
|
|
|
|
- goto loop;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
struct lexer_err
|
|
struct lexer_err
|
|
|
lexer_err_create(enum lexer_err_code code)
|
|
lexer_err_create(enum lexer_err_code code)
|
|
|
{
|
|
{
|
|
@@ -263,6 +359,7 @@ lexer_err_create(enum lexer_err_code code)
|
|
|
switch ( code ) {
|
|
switch ( code ) {
|
|
|
_LEXER_ERR_CASE(LEXER_ERR_OK);
|
|
_LEXER_ERR_CASE(LEXER_ERR_OK);
|
|
|
_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_IDENT);
|
|
_LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_IDENT);
|
|
|
|
|
+ _LEXER_ERR_CASE(LEXER_ERR_INVALID_PP_INCLUDE);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return le;
|
|
return le;
|