|
|
@@ -43,11 +43,14 @@
|
|
|
|
|
|
# define BOOL2CSTR(bool) ((bool) ? "True" : "False")
|
|
|
|
|
|
+# define UNUSED(arg) (void) arg
|
|
|
+
|
|
|
enum err {
|
|
|
ERR_OK = 0,
|
|
|
ERR_NOT_INT,
|
|
|
ERR_TOO_BIG,
|
|
|
ERR_NULL_ARG,
|
|
|
+ ERR_INVALID_ARG,
|
|
|
ERR_NOT_FOUND,
|
|
|
ERR_INDEX_TOO_LARGE,
|
|
|
ERR_FAILED_OPEN,
|
|
|
@@ -57,6 +60,7 @@ enum err {
|
|
|
ERR_WROTE_WRONG_AMOUNT,
|
|
|
ERR_FAILED_ALLOC,
|
|
|
ERR_MKDIR_FAILED,
|
|
|
+ ERR_STR_EMPTY,
|
|
|
ERR_PATH_EMPTY,
|
|
|
ERR_PATH_INVALID,
|
|
|
ERR_PATH_FILE_EMPTY,
|
|
|
@@ -69,6 +73,7 @@ const char *err_to_name[] = {
|
|
|
"ERR_NOT_INT",
|
|
|
"ERR_TOO_BIG",
|
|
|
"ERR_NULL_ARG",
|
|
|
+ "ERR_INVALID_ARG",
|
|
|
"ERR_NOT_FOUND",
|
|
|
"ERR_INDEX_TOO_LARGE",
|
|
|
"ERR_FAILED_OPEN",
|
|
|
@@ -112,6 +117,13 @@ char *getenv(const char *name);
|
|
|
|
|
|
/* ----------------------------- START LIB DEF ----------------------------- */
|
|
|
|
|
|
+# define LIB_COALESCE2(arg1, arg2) ((arg1 != Null) ? arg1 : arg2)
|
|
|
+
|
|
|
+# define LIB_SET_IF_NULL(var, val) \
|
|
|
+ if ( (var) == NULL ) { \
|
|
|
+ (var) = (val); \
|
|
|
+ }
|
|
|
+
|
|
|
# define LIB_SET_IF_NOT_NULL(var, err) \
|
|
|
if ( var != NULL ) { \
|
|
|
*var = err; \
|
|
|
@@ -148,6 +160,12 @@ char *getenv(const char *name);
|
|
|
return ERR_NULL_ARG; \
|
|
|
}
|
|
|
|
|
|
+# define LIB_STR_MUST_NOT_BE_EMPTY(arg, err_var, ret_val) \
|
|
|
+ if ( (arg).data == NULL || (arg).size == 0 ) { \
|
|
|
+ LIB_SET_IF_NOT_NULL(err_var, ERR_STR_EMPTY); \
|
|
|
+ return ret_val; \
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
/* ------------------------------ END LIB DEF ------------------------------ */
|
|
|
|
|
|
@@ -218,9 +236,11 @@ struct str str_from_cstr(const char *cstr, u64 cstr_size,
|
|
|
enum err *out_err);
|
|
|
struct str str_from_cstr_ns(const char *cstr, enum err *out_err);
|
|
|
|
|
|
-struct str str_from_i64(i64 num, ALLOC_FUNC, enum err *out_err);
|
|
|
+struct str str_from_i64(i64 num, enum err *out_err);
|
|
|
+struct str str_from_i64_temp(i64 num);
|
|
|
|
|
|
-struct str str_dup(struct str str, ALLOC_FUNC);
|
|
|
+struct str str_dup(struct str str);
|
|
|
+struct str str_replace_escape_chars(const struct str *str, enum err *out_err);
|
|
|
|
|
|
intmax_t str_to_int(struct str str, enum err *err);
|
|
|
|
|
|
@@ -237,6 +257,7 @@ struct str str_tokenizer_next(struct str_tokenizer *st);
|
|
|
struct str str_slice(struct str str, u64 from, u64 to);
|
|
|
|
|
|
bool str_eq_cstr(struct str str, const char *cstr, u64 cstr_size);
|
|
|
+bool str_eq_str(struct str str1, struct str str2);
|
|
|
bool str_startswith_cstr(struct str str, const char *cstr, u64 cstr_size);
|
|
|
|
|
|
bool str_is_int(struct str str);
|
|
|
@@ -249,6 +270,52 @@ struct str_builder {
|
|
|
|
|
|
struct str str_builder_to_str(const struct str_builder *str_bldr);
|
|
|
|
|
|
+# if defined(IMP) || defined(IMP_STR)
|
|
|
+
|
|
|
+struct str
|
|
|
+str_replace_escape_chars(const struct str *str, enum err *out_err)
|
|
|
+{
|
|
|
+ struct str empty = {0};
|
|
|
+ struct str_builder ret = {0};
|
|
|
+ u64 i = 0;
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, empty);
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(str, out_err, empty);
|
|
|
+ LIB_STR_MUST_NOT_BE_EMPTY(*str, out_err, empty);
|
|
|
+
|
|
|
+ ret.data = malloc(str->size);
|
|
|
+ ret.size = 0;
|
|
|
+
|
|
|
+ if ( ret.data == NULL ) {
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_FAILED_ALLOC);
|
|
|
+ return empty;
|
|
|
+ }
|
|
|
+ memset((void*)ret.data, 0, str->size);
|
|
|
+
|
|
|
+ for ( i = 0; i < str->size; ++i ) {
|
|
|
+ if ( str->data[i] == '\\' ) {
|
|
|
+ if ( (i+1) < str->size ) {
|
|
|
+ ++i;
|
|
|
+ switch ( str->data[i] ) {
|
|
|
+ case 'n': ret.data[ret.size++] = '\n'; break;
|
|
|
+ case 'r': ret.data[ret.size++] = '\r'; break;
|
|
|
+ case 't': ret.data[ret.size++] = '\t'; break;
|
|
|
+ case 'e': ret.data[ret.size++] = '\033'; break;
|
|
|
+ default: ret.data[ret.size++] = '\\';
|
|
|
+ ret.data[ret.size++] = str->data[i];
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ret.data[ret.size++] = str->data[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ return str_builder_to_str(&ret);
|
|
|
+}
|
|
|
+
|
|
|
+# endif /* defined(IMP) || defined(IMP_STR) */
|
|
|
+
|
|
|
# endif /* defined(WANT_STR) || defined(WANT_ALL) */
|
|
|
|
|
|
/* ----------------------------- END STR DEF ------------------------------- */
|
|
|
@@ -501,6 +568,675 @@ enum err path_file_save(const struct path *path, const struct file *file,
|
|
|
|
|
|
/* ----------------------------- END PATH DEF ------------------------------ */
|
|
|
|
|
|
+/* ------------------------- START TOKENIZER DEF --------------------------- */
|
|
|
+
|
|
|
+# if defined(WANT_TOKENIZER) || defined(WANT_ALL)
|
|
|
+
|
|
|
+struct tokenizer;
|
|
|
+
|
|
|
+struct tokenizer_options {
|
|
|
+ bool (*skip_token)(struct tokenizer *tkn, char c);
|
|
|
+
|
|
|
+ bool (*is_id)(struct tokenizer *tkn, char c);
|
|
|
+ bool (*is_id_start)(struct tokenizer *tkn, char c);
|
|
|
+
|
|
|
+ bool (*is_digit)(struct tokenizer *tkn, char c);
|
|
|
+ bool (*is_num_lit)(struct tokenizer *tkn, struct str str);
|
|
|
+
|
|
|
+ bool (*is_str_lit_start)(struct tokenizer *tkn, char c);
|
|
|
+ bool (*is_str_lit_end)(struct tokenizer *tkn, char c);
|
|
|
+ bool (*is_str_lit)(struct tokenizer *tkn, struct str str);
|
|
|
+};
|
|
|
+
|
|
|
+struct tokenizer {
|
|
|
+ u64 i;
|
|
|
+
|
|
|
+ struct tokenizer_options opts;
|
|
|
+
|
|
|
+ struct path src;
|
|
|
+ struct str code;
|
|
|
+
|
|
|
+ void *edata; /* This is not used by the lib */
|
|
|
+};
|
|
|
+
|
|
|
+enum tokenizer_loc_format {
|
|
|
+ TLF_VIM = 0
|
|
|
+};
|
|
|
+
|
|
|
+enum token_type {
|
|
|
+ TK_INVALID = -2,
|
|
|
+ TK_EOF = -1,
|
|
|
+ TK_NUL = '\0',
|
|
|
+ TK_NL = '\n',
|
|
|
+ TK_TAB = '\t',
|
|
|
+ TK_SPACE = ' ',
|
|
|
+ TK_ASTERISK = '*',
|
|
|
+ TK_AMPERSAND = '&',
|
|
|
+ TK_PLUS = '+',
|
|
|
+ TK_MINUS = '-',
|
|
|
+ TK_EQUAL = '=',
|
|
|
+ TK_SLASH = '/',
|
|
|
+ TK_BACKSLASH = '\\',
|
|
|
+ TK_POUND = '#',
|
|
|
+ TK_SEMICOLON = ';',
|
|
|
+ TK_COLON = ':',
|
|
|
+ TK_COMMA = ',',
|
|
|
+ TK_DOT = '.',
|
|
|
+ TK_UNDERSCORE = '_',
|
|
|
+ TK_L_BRACES = '(',
|
|
|
+ TK_R_BRACES = ')',
|
|
|
+ TK_L_BRACKET = '[',
|
|
|
+ TK_R_BRACKET = ']',
|
|
|
+ TK_L_CUR_BRACES = '{',
|
|
|
+ TK_R_CUR_BRACES = '}',
|
|
|
+ TK_L_ANG_BRACKET = '<',
|
|
|
+ TK_R_ANG_BRACKET = '>',
|
|
|
+ TK_SINGLE_QUOTE = '\'',
|
|
|
+ TK_DOUBLE_QUOTE = '"',
|
|
|
+ TK_BACKTICK = '`',
|
|
|
+ TK_TILDE = '~',
|
|
|
+
|
|
|
+ TK_a = 'a',
|
|
|
+ TK_b = 'b',
|
|
|
+ TK_c = 'c',
|
|
|
+ TK_d = 'd',
|
|
|
+ TK_e = 'e',
|
|
|
+ TK_f = 'f',
|
|
|
+ TK_g = 'g',
|
|
|
+ TK_h = 'h',
|
|
|
+ TK_i = 'i',
|
|
|
+ TK_j = 'j',
|
|
|
+ TK_k = 'k',
|
|
|
+ TK_l = 'l',
|
|
|
+ TK_m = 'm',
|
|
|
+ TK_n = 'n',
|
|
|
+ TK_o = 'o',
|
|
|
+ TK_p = 'p',
|
|
|
+ TK_q = 'q',
|
|
|
+ TK_r = 'r',
|
|
|
+ TK_s = 's',
|
|
|
+ TK_t = 't',
|
|
|
+ TK_u = 'u',
|
|
|
+ TK_v = 'v',
|
|
|
+ TK_w = 'w',
|
|
|
+ TK_x = 'x',
|
|
|
+ TK_y = 'y',
|
|
|
+ TK_z = 'z',
|
|
|
+
|
|
|
+ TK_A = 'A',
|
|
|
+ TK_B = 'B',
|
|
|
+ TK_C = 'C',
|
|
|
+ TK_D = 'D',
|
|
|
+ TK_E = 'E',
|
|
|
+ TK_F = 'F',
|
|
|
+ TK_G = 'G',
|
|
|
+ TK_H = 'H',
|
|
|
+ TK_I = 'I',
|
|
|
+ TK_J = 'J',
|
|
|
+ TK_K = 'K',
|
|
|
+ TK_L = 'L',
|
|
|
+ TK_M = 'M',
|
|
|
+ TK_N = 'N',
|
|
|
+ TK_O = 'O',
|
|
|
+ TK_P = 'P',
|
|
|
+ TK_Q = 'Q',
|
|
|
+ TK_R = 'R',
|
|
|
+ TK_S = 'S',
|
|
|
+ TK_T = 'T',
|
|
|
+ TK_U = 'U',
|
|
|
+ TK_V = 'V',
|
|
|
+ TK_W = 'W',
|
|
|
+ TK_X = 'X',
|
|
|
+ TK_Y = 'Y',
|
|
|
+ TK_Z = 'Z',
|
|
|
+
|
|
|
+ TK_0 = '0',
|
|
|
+ TK_1 = '1',
|
|
|
+ TK_2 = '2',
|
|
|
+ TK_3 = '3',
|
|
|
+ TK_4 = '4',
|
|
|
+ TK_5 = '5',
|
|
|
+ TK_6 = '6',
|
|
|
+ TK_7 = '7',
|
|
|
+ TK_8 = '8',
|
|
|
+ TK_9 = '9',
|
|
|
+
|
|
|
+ TK_ID = 257,
|
|
|
+ TK_NUM_LIT,
|
|
|
+ TK_STR_LIT,
|
|
|
+
|
|
|
+ TK_LAST = 512
|
|
|
+};
|
|
|
+
|
|
|
+struct token {
|
|
|
+ enum token_type type;
|
|
|
+ u64 loc_start;
|
|
|
+ u64 loc_end;
|
|
|
+
|
|
|
+ u8 _[64];
|
|
|
+};
|
|
|
+
|
|
|
+struct token_wstr {
|
|
|
+ enum token_type type;
|
|
|
+ u64 loc_start;
|
|
|
+ u64 loc_end;
|
|
|
+
|
|
|
+ struct str string;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+struct tokenizer tokenizer_create(struct str code, struct path src,
|
|
|
+ struct tokenizer_options *opts,
|
|
|
+ enum err *out_err);
|
|
|
+
|
|
|
+struct token tokenizer_next_token(struct tokenizer *tkn, enum err *out_err);
|
|
|
+bool tokenizer_is_next(struct tokenizer *tkn, enum token_type type,
|
|
|
+ struct token *out_tk, enum err *out_err);
|
|
|
+bool tokenizer_is_next_id(struct tokenizer *tkn, struct str string,
|
|
|
+ struct token *out_tk, enum err *out_err);
|
|
|
+
|
|
|
+enum err tokenizer_destroy(struct tokenizer *tkn, enum err *out_err);
|
|
|
+
|
|
|
+const char *tokenizer_token_loc_temp(const struct tokenizer *tkn,
|
|
|
+ const struct token *tk,
|
|
|
+ enum tokenizer_loc_format format,
|
|
|
+ enum err *out_err);
|
|
|
+
|
|
|
+const char *token_to_cstr(enum token_type type);
|
|
|
+
|
|
|
+struct tokenizer_options tokenizer_options_defaultlyzer(
|
|
|
+ struct tokenizer_options *tkn_opts);
|
|
|
+
|
|
|
+bool tokenizer_skip_token(struct tokenizer *tkn, char c);
|
|
|
+
|
|
|
+bool tokenizer_is_id(struct tokenizer *tkn, char c);
|
|
|
+bool tokenizer_is_id_start(struct tokenizer *tkn, char c);
|
|
|
+
|
|
|
+bool tokenizer_is_digit(struct tokenizer *tkn, char c);
|
|
|
+bool tokenizer_is_num_lit(struct tokenizer *tkn, struct str str);
|
|
|
+
|
|
|
+bool tokenizer_is_str_lit_start(struct tokenizer *tkn, char c);
|
|
|
+bool tokenizer_is_str_lit_end(struct tokenizer *tkn, char c);
|
|
|
+bool tokenizer_is_str_lit(struct tokenizer *tkn, struct str str);
|
|
|
+
|
|
|
+# if defined(IMP) || defined(IMP_TOKENIZER)
|
|
|
+
|
|
|
+struct tokenizer
|
|
|
+tokenizer_create(struct str code, struct path src,
|
|
|
+ struct tokenizer_options *opts, enum err *out_err)
|
|
|
+{
|
|
|
+ struct tokenizer empty = {0};
|
|
|
+ struct tokenizer tkn = {0};
|
|
|
+ struct tokenizer_options defs = {0};
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, empty);
|
|
|
+
|
|
|
+ if ( opts == NULL ) {
|
|
|
+ tkn.opts = defs;
|
|
|
+ } else {
|
|
|
+ tkn.opts = *opts;
|
|
|
+ }
|
|
|
+
|
|
|
+ tkn.opts = tokenizer_options_defaultlyzer(&tkn.opts);
|
|
|
+
|
|
|
+ tkn.i = 0;
|
|
|
+ tkn.code = code;
|
|
|
+ tkn.src = src;
|
|
|
+
|
|
|
+ return tkn;
|
|
|
+}
|
|
|
+
|
|
|
+struct token
|
|
|
+tokenizer_next_token(struct tokenizer *tkn, enum err *out_err)
|
|
|
+{
|
|
|
+ static struct {
|
|
|
+ char data[256];
|
|
|
+ u16 size;
|
|
|
+ } buf = {0};
|
|
|
+
|
|
|
+ struct token empty = {0};
|
|
|
+ struct token tk = {0};
|
|
|
+ char c = 0;
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, empty);
|
|
|
+
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, empty);
|
|
|
+
|
|
|
+ buf.size = 0;
|
|
|
+
|
|
|
+ if ( tkn->i >= tkn->code.size ) {
|
|
|
+ tk.type = TK_EOF;
|
|
|
+ tk.loc_start = tkn->i;
|
|
|
+ tk.loc_end = tkn->i;
|
|
|
+ return tk;
|
|
|
+ }
|
|
|
+
|
|
|
+ c = tkn->code.data[tkn->i++];
|
|
|
+
|
|
|
+ while ( tkn->opts.skip_token(tkn, c) == true ) {
|
|
|
+ c = tkn->code.data[tkn->i++];
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tkn->opts.is_id_start(tkn, c) == true ) {
|
|
|
+ struct token_wstr tk_ws = {0};
|
|
|
+
|
|
|
+ tk_ws.type = TK_ID;
|
|
|
+ tk_ws.loc_start = tkn->i;
|
|
|
+
|
|
|
+ do {
|
|
|
+ buf.data[buf.size++] = c;
|
|
|
+ c = tkn->code.data[tkn->i++];
|
|
|
+ } while ( tkn->opts.is_id(tkn, c) == true );
|
|
|
+ --tkn->i;
|
|
|
+
|
|
|
+ tk_ws.string.data = buf.data;
|
|
|
+ tk_ws.string.size = buf.size;
|
|
|
+
|
|
|
+ if ( tkn->opts.is_num_lit(tkn, tk_ws.string) == true ) {
|
|
|
+ tk_ws.type = TK_NUM_LIT;
|
|
|
+ }
|
|
|
+ tk_ws.loc_end = tkn->i;
|
|
|
+
|
|
|
+ return *(struct token *)&tk_ws;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tkn->opts.is_digit(tkn, c) == true ) {
|
|
|
+ struct token_wstr tk_ws = {0};
|
|
|
+
|
|
|
+ tk_ws.type = TK_NUM_LIT;
|
|
|
+ tk_ws.loc_start = tkn->i;
|
|
|
+
|
|
|
+ do {
|
|
|
+ buf.data[buf.size++] = c;
|
|
|
+ c = tkn->code.data[tkn->i++];
|
|
|
+ } while ( tkn->opts.is_digit(tkn, c) == true );
|
|
|
+ --tkn->i;
|
|
|
+
|
|
|
+ tk_ws.string.data = buf.data;
|
|
|
+ tk_ws.string.size = buf.size;
|
|
|
+ tk_ws.loc_end = tkn->i;
|
|
|
+
|
|
|
+ return *(struct token *)&tk_ws;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tkn->opts.is_str_lit_start(tkn, c) == true ) {
|
|
|
+ struct token_wstr tk_ws = {0};
|
|
|
+
|
|
|
+ tk_ws.type = TK_STR_LIT;
|
|
|
+ tk_ws.loc_start = tkn->i;
|
|
|
+
|
|
|
+ do {
|
|
|
+ buf.data[buf.size++] = c;
|
|
|
+ c = tkn->code.data[tkn->i++];
|
|
|
+ } while ( tkn->opts.is_str_lit_end(tkn, c) == false );
|
|
|
+ buf.data[buf.size++] = c;
|
|
|
+
|
|
|
+ tk_ws.string.data = buf.data;
|
|
|
+ tk_ws.string.size = buf.size - 1;
|
|
|
+ tk_ws.loc_end = tkn->i;
|
|
|
+
|
|
|
+ return *(struct token *)&tk_ws;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tkn->i >= tkn->code.size ) {
|
|
|
+ tk.type = TK_EOF;
|
|
|
+ tk.loc_start = tkn->i;
|
|
|
+ tk.loc_end = tkn->i;
|
|
|
+ return tk;
|
|
|
+ }
|
|
|
+
|
|
|
+ tk.type = c;
|
|
|
+ tk.loc_start = tkn->i;
|
|
|
+ tk.loc_end = tkn->i;
|
|
|
+
|
|
|
+ return tk;
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_next(struct tokenizer *tkn, enum token_type type,
|
|
|
+ struct token *out_tk, enum err *out_err)
|
|
|
+{
|
|
|
+ enum err err = ERR_OK;
|
|
|
+ struct token tk = {0};
|
|
|
+ u64 pi = 0;
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
|
|
|
+
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
|
|
|
+
|
|
|
+ pi = tkn->i;
|
|
|
+
|
|
|
+ tk = tokenizer_next_token(tkn, &err);
|
|
|
+ if ( err != ERR_OK ) {
|
|
|
+ tkn->i = pi;
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, err);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tk.type != type ) {
|
|
|
+ tkn->i = pi;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ LIB_SET_IF_NOT_NULL(out_tk, tk);
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_OK);
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_next_id(struct tokenizer *tkn, struct str string,
|
|
|
+ struct token *out_tk, enum err *out_err)
|
|
|
+{
|
|
|
+ enum err err = ERR_OK;
|
|
|
+ struct token tk = {0};
|
|
|
+ struct token_wstr *tk_ws = NULL;
|
|
|
+ u64 pi = 0;
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
|
|
|
+
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
|
|
|
+
|
|
|
+ LIB_STR_MUST_NOT_BE_EMPTY(string, out_err, false);
|
|
|
+
|
|
|
+ pi = tkn->i;
|
|
|
+
|
|
|
+ tk = tokenizer_next_token(tkn, &err);
|
|
|
+ if ( err != ERR_OK ) {
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, err);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( tk.type != TK_ID ) {
|
|
|
+ tkn->i = pi;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ tk_ws = (struct token_wstr *)&tk;
|
|
|
+
|
|
|
+ if ( str_eq_str(tk_ws->string, string) == false ) {
|
|
|
+ tkn->i = pi;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ LIB_SET_IF_NOT_NULL(out_tk, tk);
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_OK);
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+enum err tokenizer_destroy(struct tokenizer *tkn, enum err *out_err);
|
|
|
+
|
|
|
+const char *
|
|
|
+tokenizer_token_loc_temp(const struct tokenizer *tkn, const struct token *tk,
|
|
|
+ enum tokenizer_loc_format format, enum err *out_err)
|
|
|
+{
|
|
|
+ static struct {
|
|
|
+ char data[1024];
|
|
|
+ u64 size;
|
|
|
+ } buf = {0};
|
|
|
+
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, NULL);
|
|
|
+
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, NULL);
|
|
|
+ LIB_ARG_MUST_NOT_BE_NULL(tk, out_err, NULL);
|
|
|
+
|
|
|
+ buf.size = 0;
|
|
|
+
|
|
|
+ switch ( format ) {
|
|
|
+ case TLF_VIM: {
|
|
|
+ u64 col = 0;
|
|
|
+ /* Format src.c:line:col: msg */
|
|
|
+
|
|
|
+ memcpy(buf.data, tkn->src.data, tkn->src.size);
|
|
|
+ buf.size += tkn->src.size;
|
|
|
+
|
|
|
+ buf.data[buf.size++] = ':';
|
|
|
+
|
|
|
+ {
|
|
|
+ u64 i = 0;
|
|
|
+ u64 line = 0;
|
|
|
+ struct str line_str = {0};
|
|
|
+ for ( i = 0; i < tk->loc_start; ++i ) {
|
|
|
+ line += ( tkn->code.data[i] == '\n' );
|
|
|
+ }
|
|
|
+ line_str = str_from_i64_temp((i64)line);
|
|
|
+ memcpy(buf.data + buf.size, line_str.data, line_str.size);
|
|
|
+ buf.size += line_str.size;
|
|
|
+
|
|
|
+ /* Col values wrong */
|
|
|
+ while ( tkn->code.data[i] != '\n' ) {
|
|
|
+ --i;
|
|
|
+ }
|
|
|
+
|
|
|
+ col = tk->loc_start - i;
|
|
|
+ }
|
|
|
+
|
|
|
+ buf.data[buf.size++] = ':';
|
|
|
+
|
|
|
+ {
|
|
|
+ struct str col_str = {0};
|
|
|
+ col_str = str_from_i64_temp((i64)col);
|
|
|
+ memcpy(buf.data + buf.size, col_str.data, col_str.size);
|
|
|
+ buf.size += col_str.size;
|
|
|
+ }
|
|
|
+
|
|
|
+ buf.data[buf.size++] = ':';
|
|
|
+ buf.data[buf.size++] = 0;
|
|
|
+
|
|
|
+ } break;
|
|
|
+ default:
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_INVALID_ARG);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_OK);
|
|
|
+ return buf.data;
|
|
|
+}
|
|
|
+
|
|
|
+const char *
|
|
|
+token_to_cstr(enum token_type type)
|
|
|
+{
|
|
|
+ switch ( type ) {
|
|
|
+ case TK_INVALID: return "TK_INVALID";
|
|
|
+ case TK_EOF: return "TK_EOF";
|
|
|
+ case TK_NUL: return "TK_NUL";
|
|
|
+ case TK_NL: return "TK_NL";
|
|
|
+ case TK_TAB: return "TK_TAB";
|
|
|
+ case TK_SPACE: return "TK_SPACE";
|
|
|
+ case TK_ASTERISK: return "TK_ASTERISK";
|
|
|
+ case TK_AMPERSAND: return "TK_AMPERSAND";
|
|
|
+ case TK_PLUS: return "TK_PLUS";
|
|
|
+ case TK_MINUS: return "TK_MINUS";
|
|
|
+ case TK_EQUAL: return "TK_EQUAL";
|
|
|
+ case TK_SLASH: return "TK_SLASH";
|
|
|
+ case TK_BACKSLASH: return "TK_BACKSLASH";
|
|
|
+ case TK_POUND: return "TK_POUND";
|
|
|
+ case TK_SEMICOLON: return "TK_SEMICOLON";
|
|
|
+ case TK_COLON: return "TK_COLON";
|
|
|
+ case TK_COMMA: return "TK_COMMA";
|
|
|
+ case TK_DOT: return "TK_DOT";
|
|
|
+ case TK_UNDERSCORE: return "TK_UNDERSCORE";
|
|
|
+ case TK_L_BRACES: return "TK_L_BRACES";
|
|
|
+ case TK_R_BRACES: return "TK_R_BRACES";
|
|
|
+ case TK_L_BRACKET: return "TK_L_BRACKET";
|
|
|
+ case TK_R_BRACKET: return "TK_R_BRACKET";
|
|
|
+ case TK_L_CUR_BRACES: return "TK_L_CUR_BRACES";
|
|
|
+ case TK_R_CUR_BRACES: return "TK_R_CUR_BRACES";
|
|
|
+ case TK_L_ANG_BRACKET: return "TK_L_ANG_BRACKET";
|
|
|
+ case TK_R_ANG_BRACKET: return "TK_R_ANG_BRACKET";
|
|
|
+ case TK_SINGLE_QUOTE: return "TK_SINGLE_QUOTE";
|
|
|
+ case TK_DOUBLE_QUOTE: return "TK_DOUBLE_QUOTE";
|
|
|
+ case TK_BACKTICK: return "TK_BACKTICK";
|
|
|
+ case TK_TILDE: return "TK_TILDE";
|
|
|
+
|
|
|
+ case TK_a: return "TK_a";
|
|
|
+ case TK_b: return "TK_b";
|
|
|
+ case TK_c: return "TK_c";
|
|
|
+ case TK_d: return "TK_d";
|
|
|
+ case TK_e: return "TK_e";
|
|
|
+ case TK_f: return "TK_f";
|
|
|
+ case TK_g: return "TK_g";
|
|
|
+ case TK_h: return "TK_h";
|
|
|
+ case TK_i: return "TK_i";
|
|
|
+ case TK_j: return "TK_j";
|
|
|
+ case TK_k: return "TK_k";
|
|
|
+ case TK_l: return "TK_l";
|
|
|
+ case TK_m: return "TK_m";
|
|
|
+ case TK_n: return "TK_n";
|
|
|
+ case TK_o: return "TK_o";
|
|
|
+ case TK_p: return "TK_p";
|
|
|
+ case TK_q: return "TK_q";
|
|
|
+ case TK_r: return "TK_r";
|
|
|
+ case TK_s: return "TK_s";
|
|
|
+ case TK_t: return "TK_t";
|
|
|
+ case TK_u: return "TK_u";
|
|
|
+ case TK_v: return "TK_v";
|
|
|
+ case TK_w: return "TK_w";
|
|
|
+ case TK_x: return "TK_x";
|
|
|
+ case TK_y: return "TK_y";
|
|
|
+ case TK_z: return "TK_z";
|
|
|
+
|
|
|
+ case TK_A: return "TK_A";
|
|
|
+ case TK_B: return "TK_B";
|
|
|
+ case TK_C: return "TK_C";
|
|
|
+ case TK_D: return "TK_D";
|
|
|
+ case TK_E: return "TK_E";
|
|
|
+ case TK_F: return "TK_F";
|
|
|
+ case TK_G: return "TK_G";
|
|
|
+ case TK_H: return "TK_H";
|
|
|
+ case TK_I: return "TK_I";
|
|
|
+ case TK_J: return "TK_J";
|
|
|
+ case TK_K: return "TK_K";
|
|
|
+ case TK_L: return "TK_L";
|
|
|
+ case TK_M: return "TK_M";
|
|
|
+ case TK_N: return "TK_N";
|
|
|
+ case TK_O: return "TK_O";
|
|
|
+ case TK_P: return "TK_P";
|
|
|
+ case TK_Q: return "TK_Q";
|
|
|
+ case TK_R: return "TK_R";
|
|
|
+ case TK_S: return "TK_S";
|
|
|
+ case TK_T: return "TK_T";
|
|
|
+ case TK_U: return "TK_U";
|
|
|
+ case TK_V: return "TK_V";
|
|
|
+ case TK_W: return "TK_W";
|
|
|
+ case TK_X: return "TK_X";
|
|
|
+ case TK_Y: return "TK_Y";
|
|
|
+ case TK_Z: return "TK_Z";
|
|
|
+
|
|
|
+ case TK_0: return "TK_0";
|
|
|
+ case TK_1: return "TK_1";
|
|
|
+ case TK_2: return "TK_2";
|
|
|
+ case TK_3: return "TK_3";
|
|
|
+ case TK_4: return "TK_4";
|
|
|
+ case TK_5: return "TK_5";
|
|
|
+ case TK_6: return "TK_6";
|
|
|
+ case TK_7: return "TK_7";
|
|
|
+ case TK_8: return "TK_8";
|
|
|
+ case TK_9: return "TK_9";
|
|
|
+
|
|
|
+ case TK_ID: return "TK_ID";
|
|
|
+ case TK_NUM_LIT: return "TK_NUM_LIT";
|
|
|
+ case TK_STR_LIT: return "TK_STR_LIT";
|
|
|
+ default: return "TK_UNKNOWN";
|
|
|
+ }
|
|
|
+ return "TK_UNKNOWN";
|
|
|
+}
|
|
|
+
|
|
|
+struct tokenizer_options
|
|
|
+tokenizer_options_defaultlyzer(struct tokenizer_options *tkn_opts)
|
|
|
+{
|
|
|
+ struct tokenizer_options defs = {0};
|
|
|
+
|
|
|
+ if ( tkn_opts != NULL ) {
|
|
|
+ defs = *tkn_opts;
|
|
|
+ }
|
|
|
+
|
|
|
+ LIB_SET_IF_NULL(defs.skip_token, tokenizer_skip_token);
|
|
|
+
|
|
|
+ LIB_SET_IF_NULL(defs.is_id, tokenizer_is_id);
|
|
|
+ LIB_SET_IF_NULL(defs.is_id_start, tokenizer_is_id_start);
|
|
|
+
|
|
|
+ LIB_SET_IF_NULL(defs.is_digit, tokenizer_is_digit);
|
|
|
+ LIB_SET_IF_NULL(defs.is_num_lit, tokenizer_is_num_lit);
|
|
|
+
|
|
|
+ LIB_SET_IF_NULL(defs.is_str_lit_start, tokenizer_is_str_lit_start);
|
|
|
+ LIB_SET_IF_NULL(defs.is_str_lit_end, tokenizer_is_str_lit_end);
|
|
|
+ LIB_SET_IF_NULL(defs.is_str_lit, tokenizer_is_str_lit);
|
|
|
+
|
|
|
+ LIB_SET_IF_NULL(defs.skip_token, tokenizer_skip_token);
|
|
|
+
|
|
|
+ return defs;
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_skip_token(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ return (c == ' ') || (c == '\n') || (c == '\r') || (c == '\t');
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_id(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ return (c == '_') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_id_start(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ return tokenizer_is_id(tkn, c);
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_digit(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ return (c >= '0' && c <= '9');
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_num_lit(struct tokenizer *tkn, struct str str)
|
|
|
+{
|
|
|
+ u64 i = 0;
|
|
|
+
|
|
|
+ UNUSED(tkn);
|
|
|
+
|
|
|
+ for ( i = 0; i < str.size; ++i ) {
|
|
|
+ if ( ! tokenizer_is_digit(tkn, str.data[i]) ) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_str_lit_start(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ return (c == '\'' || c == '"');
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_str_lit_end(struct tokenizer *tkn, char c)
|
|
|
+{
|
|
|
+ return tokenizer_is_str_lit_start(tkn, c);
|
|
|
+}
|
|
|
+
|
|
|
+bool
|
|
|
+tokenizer_is_str_lit(struct tokenizer *tkn, struct str str)
|
|
|
+{
|
|
|
+ UNUSED(tkn);
|
|
|
+ UNUSED(str);
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+# endif /* defined(IMP) || defined(IMP_TOKENIZER) */
|
|
|
+
|
|
|
+# endif /* defined(WANT_TOKENIZER) || defined(WANT_ALL) */
|
|
|
+
|
|
|
+/* -------------------------- END TOKENIZER DEF ---------------------------- */
|
|
|
+
|
|
|
/* ---------------------------- START SGFX DEF ----------------------------- */
|
|
|
|
|
|
# if defined(WANT_SGFX) || defined(WANT_ALL)
|
|
|
@@ -891,23 +1627,47 @@ str_from_cstr_ns(const char *cstr, enum err *out_err)
|
|
|
}
|
|
|
|
|
|
struct str
|
|
|
-str_from_i64(i64 num, ALLOC_FUNC, enum err *out_err)
|
|
|
+str_from_i64(i64 num, enum err *out_err)
|
|
|
{
|
|
|
- #define BUF_CAP ((u64) 256)
|
|
|
- char *data = NULL;
|
|
|
- char buf[256] = {0};
|
|
|
- u64 buf_size = 0;
|
|
|
- u64 _num = 0;
|
|
|
+ struct str empty = {0};
|
|
|
+ struct str str_temp = {0};
|
|
|
struct str ret = {0};
|
|
|
|
|
|
- _ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, STR_EMPTY);
|
|
|
+ LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, empty);
|
|
|
|
|
|
- data = alloc(STR_SIZE_LIMIT);
|
|
|
+ str_temp = str_from_i64_temp(num);
|
|
|
+
|
|
|
+ ret.size = str_temp.size;
|
|
|
ret.should_be_freed = true;
|
|
|
|
|
|
+ ret.data = malloc(str_temp.size);
|
|
|
+ if ( ret.data == NULL ) {
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_FAILED_ALLOC);
|
|
|
+ return empty;
|
|
|
+ }
|
|
|
+
|
|
|
+ memcpy((void *)ret.data, str_temp.data, str_temp.size);
|
|
|
+
|
|
|
+ LIB_SET_IF_NOT_NULL(out_err, ERR_OK);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+struct str
|
|
|
+str_from_i64_temp(i64 num)
|
|
|
+{
|
|
|
+ static char buffer[32];
|
|
|
+
|
|
|
+ struct {
|
|
|
+ char data[24];
|
|
|
+ u8 size;
|
|
|
+ } buf = {0};
|
|
|
+ u64 _num = 0;
|
|
|
+ struct str ret = {0};
|
|
|
+
|
|
|
+ ret.data = buffer;
|
|
|
+
|
|
|
if ( num < 0 ) {
|
|
|
- /* TODO: Check for cap */
|
|
|
- buf[buf_size++] = '-';
|
|
|
+ buf.data[buf.size++] = '-';
|
|
|
num *= -1;
|
|
|
}
|
|
|
|
|
|
@@ -916,16 +1676,14 @@ str_from_i64(i64 num, ALLOC_FUNC, enum err *out_err)
|
|
|
while ( 1 ) {
|
|
|
u8 mod = (u8) (_num % 10);
|
|
|
|
|
|
- /* TODO: Check for cap */
|
|
|
- buf[buf_size++] = (char) (mod + 48);
|
|
|
+ buf.data[buf.size++] = (char) (mod + 48);
|
|
|
|
|
|
_num /= 10;
|
|
|
|
|
|
- if ( buf_size >= BUF_CAP ) {
|
|
|
+ if ( buf.size >= 24 ) {
|
|
|
_loop:
|
|
|
- /* TODO: Check for cap */
|
|
|
- data[ret.size++] = buf[--buf_size];
|
|
|
- if ( buf_size > 0 ) {
|
|
|
+ buffer[ret.size++] = buf.data[--buf.size];
|
|
|
+ if ( buf.size > 0 ) {
|
|
|
goto _loop;
|
|
|
}
|
|
|
}
|
|
|
@@ -934,31 +1692,28 @@ str_from_i64(i64 num, ALLOC_FUNC, enum err *out_err)
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
- if ( buf_size == 0 ) {
|
|
|
+ if ( buf.size == 0 ) {
|
|
|
goto exit;
|
|
|
}
|
|
|
|
|
|
_out_loop:
|
|
|
- data[ret.size++] = buf[--buf_size];
|
|
|
- if ( buf_size > 0 ) {
|
|
|
+ buffer[ret.size++] = buf.data[--buf.size];
|
|
|
+ if ( buf.size > 0 ) {
|
|
|
goto _out_loop;
|
|
|
}
|
|
|
|
|
|
exit:
|
|
|
- data[ret.size] = 0;
|
|
|
- ret.data = data;
|
|
|
- _SET_IF_NOT_NULL(out_err, ERR_OK)
|
|
|
+ buffer[ret.size] = 0;
|
|
|
return ret;
|
|
|
- #undef BUF_CAP
|
|
|
}
|
|
|
|
|
|
|
|
|
struct str
|
|
|
-str_dup(struct str str, ALLOC_FUNC)
|
|
|
+str_dup(struct str str)
|
|
|
{
|
|
|
struct str ret = str;
|
|
|
|
|
|
- ret.data = alloc(ret.size * sizeof(*ret.data));
|
|
|
+ ret.data = malloc(ret.size * sizeof(*ret.data));
|
|
|
|
|
|
memcpy((char *)ret.data, str.data, str.size);
|
|
|
|
|
|
@@ -1157,6 +1912,24 @@ str_eq_cstr(struct str str, const char *cstr, u64 cstr_size)
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+bool
|
|
|
+str_eq_str(struct str str1, struct str str2)
|
|
|
+{
|
|
|
+ u64 i = 0;
|
|
|
+
|
|
|
+ if ( str1.size != str2.size ) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ for ( i = 0; i < str1.size; ++i ) {
|
|
|
+ if ( str1.data[i] != str2.data[i] ) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
bool
|
|
|
str_startswith_cstr(struct str str, const char *cstr, u64 cstr_size)
|
|
|
{
|