#include #include /* #define STR_SIZE_LIMIT 65536 */ #define STR_SIZE_LIMIT 5458264 #define IMP #define WANT_BUILD #define WANT_CSTR #define WANT_STR #define WANT_MAP #define WANT_TOKENIZER #define WANT_DYN_ARR #define WANT_PATH #define WANT_ENV #define WANT_SLOP #include "./src/lib.h" #include #include bool tkn_expect(struct tokenizer *tkn, enum token_type type, struct token *out_tk, enum err *out_err); bool tkn_expect_id(struct tokenizer *tkn, const char *cstr, struct token *out_tk, enum err *out_err); bool tkn_parse_function(struct tokenizer *tkn); bool run_function(struct str str); bool tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk, enum err *out_err); bool tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk, enum err *out_err); bool is_space(char c); bool is_space(char c) { return (c == ' ') || (c == '\r') || (c == '\n') || (c == '\t') || (c == '\v'); } enum c_token_type { _TT = TK_LAST, TK_PP_DEF, TK_PP_INC }; struct token_pp_inc { enum c_token_type type; u64 loc_start; u64 loc_end; struct str string; }; struct token_pp_def { enum token_type type; u64 loc_start; u64 loc_end; struct str name; struct str val; }; struct token_var_decl { enum token_type type; u64 loc_start; u64 loc_end; struct str _type; struct str name; struct str init; }; struct token_func_decl { enum token_type type; u64 loc_start; u64 loc_end; struct str _type; struct str name; }; struct ctx { void *t; }; bool skip_token(struct tokenizer *tkn, char c); # define DIE(f, e) \ (f); \ if ( (e) != ERR_OK ) {\ fprintf(stderr, \ "Error while running `" #f "`: %s\n", \ err_to_name[(e)]); \ goto exit_err; \ } # define PPP(f) \ printf("PRE: `" #f "`\n"); \ f; \ printf("POST: `" #f "`\n"); int main(int argc, char *argv[]) { enum err err = ERR_OK; u8 *file = NULL; u64 file_size = 0; struct str s = {0}; struct str ss = {0}; struct str_tokenizer stkn = {0}; u64 *index = NULL; u64 i = 0; u64 counter = 0; void *tmp = NULL; struct key_bytes *kb = NULL; struct map m = {0}; m = DIE(map_create(sizeof(u64), 64, &err), err); file = DIE(slop_file_slurp("./t8.shakespeare.txt", &file_size, &err), err); s = DIE(str_from_cstr((char*)file, file_size, &err), err); stkn = str_tokenize_func(s, is_space); ss = str_tokenizer_next(&stkn); for ( ; ss.size != ((u64) -1); (ss = str_tokenizer_next(&stkn), ++i)) { /* if ( i > 10 ) { break; } */ /* printf("i -> %ld\n", i); */ tmp = map_get(&m, (void*)ss.data, ss.size, &err); if ( err == ERR_NOT_FOUND ) { err = ERR_OK; counter = 1; /* printf("-> `%.*s` | `%ld`\n", (int)ss.size, ss.data, counter); */ DIE(map_add(&m, (void*)ss.data, ss.size, &counter, &err), err); continue; } counter = *(u64*) tmp; ++counter; DIE(map_set(&m, (void*)ss.data, ss.size, &counter, &err), err); /* printf("-> `%.*s` | `%ld`\n", (int)ss.size, ss.data, counter); */ } i = 0; MAP_FOR_EACH(&m, index, kb) { struct map_item *mi = MAP_GET_INDEX(&m, *index); if ( i > 10 ) break; printf("%.*s -> %ld\n", (int) kb->size, kb->key , *(u64*)mi->item); ++i; } /* MAP_FOR_EACH_INDEXS(&m, index) { struct map_item *mi = MAP_GET_INDEX(&m, *index); printf("%ld -> Value: %ld\n", *index, *(u64*)mi->item); } kb = m.keys.data; while ( kb->size > 0 ) { printf("-->> %.*s\n", (int) kb->size, kb->key); kb = CAST(struct key_bytes *, CAST(u8*, kb->key) + kb->size); } */ DIE(map_destroy(&m, &err), err); free(file); UNUSED(argc); UNUSED(argv); return 0; exit_err: if ( file != NULL ) free(file); map_destroy(&m, NULL); return 1; } int main2(int argc, char *argv[]); int main2(int argc, char *argv[]) { enum err err = ERR_OK; struct path src_path = {0}; struct file f = {0}; struct str code = {0}; struct tokenizer tkn = {0}; struct tokenizer_options tkn_opts = {0}; struct token tk = {0}; struct dyn_arr tk_da = {0}; tk_da = dyn_arr_create(sizeof(struct token), &err); src_path = path_from_cstr_ns("./first.c", &err); f = path_file_read_all(&src_path, &err); code = str_from_cstr((char*)f.data, f.size, &err); tkn_opts.skip_token = skip_token; tkn = tokenizer_create(code, src_path, &tkn_opts, &err); tkn.edata = &tk_da; tk = tokenizer_next_token(&tkn, &err); do { if ( err != ERR_OK ) { fprintf(stderr, "ERROR: Failed tokenizing `%.*s`: %s\n", (int) src_path.size, src_path.data, err_to_name[err]); goto error_exit; } switch ( tk.type ) { case TK_POUND: { struct token pp_tk = {0}; if ( ! tkn_parse_pp_directive(&tkn, &pp_tk, &err) ) goto error_exit; } break; case TK_ID: if ( ! tkn_parse_decl(&tkn, NULL, &err) ) goto error_exit; case TK_NL: break; default: fprintf(stderr, "%s ERROR: Invalid Token `%s`\n", tokenizer_token_loc_temp(&tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); goto error_exit; } tk = tokenizer_next_token(&tkn, &err); } while ( tk.type != TK_EOF ); printf("%s\n", err_to_name[err]); error_exit: if ( f.data != NULL ) free(f.data); if ( tk_da.data != NULL ) dyn_arr_destroy(&tk_da, NULL); (void) argc; (void) argv; return 0; } bool skip_token(struct tokenizer *tkn, char c) { UNUSED(tkn); return (c == ' ') || (c == '\r') || (c == '\t'); } bool tkn_expect(struct tokenizer *tkn, enum token_type type, struct token *out_tk, enum err *out_err) { enum err err = ERR_OK; enum err *perr = &err; LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false); if ( tokenizer_is_next(tkn, type, out_tk, perr) == false ) { struct token tk = {0}; tk = tokenizer_next_token(tkn, perr); if ( err != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[err]); return false; } fprintf(stderr, "%s ERRRO: Got wrong token, expected: %s, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(type), token_to_cstr(tk.type)); return false; } return true; } bool tkn_expect_id(struct tokenizer *tkn, const char *cstr, struct token *out_tk, enum err *out_err) { enum err err = ERR_OK; struct str str = {0}; LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false); LIB_ARG_MUST_NOT_BE_NULL(cstr, out_err, false); str = str_from_cstr_ns(cstr, &err); if ( tokenizer_is_next_id(tkn, str, out_tk, &err) == false ) { struct token tk = {0}; struct token_wstr *tk_ws = NULL; tk = tokenizer_next_token(tkn, &err); if ( err != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[err]); return false; } if ( tk.type != TK_ID ) { fprintf(stderr, "%s ERROR: Got wrong token, expected: TK_ID, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); return false; } tk_ws = (struct token_wstr *)&tk; fprintf(stderr, "%s ERROR: Got wrong id, expected: %s, got: %.*s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), cstr, (int) tk_ws->string.size, tk_ws->string.data); return false; } return true; } bool tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk, enum err *out_err) { static char buf[1024] = {0}; static char buf2[1024] = {0}; struct token tk = {0}; struct token_wstr *tk_ws = NULL; enum err err = ERR_OK; enum err *perr = &err; LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false); LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false); if ( out_err != NULL ) { perr = out_err; } tk = tokenizer_next_token(tkn, perr); if ( *perr != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]); return false; } if ( tk.type != TK_ID ) { fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); return false; } tk_ws = (struct token_wstr *) &tk; if ( str_eq_cstr(tk_ws->string, "define", 6) == true ) { struct token_pp_def tk_def = {0}; if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false; tk_ws = (struct token_wstr *) &tk; tk_def.loc_start = tk_ws->loc_start; memcpy(buf, tk_ws->string.data, tk_ws->string.size); tk_def.name.data = buf; tk_def.name.size = tk_ws->string.size; tk = tokenizer_next_token(tkn, &err); if ( err != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[err]); return false; } switch ( tk.type ) { case TK_ID: case TK_STR_LIT: case TK_NUM_LIT: tk_ws = (struct token_wstr *) &tk; break; case TK_NL: goto define_wout_value; default: fprintf(stderr, "%s Got wrong token, expected:" " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); return false; } memcpy(buf2, tk_ws->string.data, tk_ws->string.size); tk_def.val.data = buf2; tk_def.val.size = tk_ws->string.size; tk_def.loc_end = tk_ws->loc_end; if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false; define_wout_value: LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_def); return true; } if ( str_eq_cstr(tk_ws->string, "include", 7) == true ) { struct token_pp_inc tk_inc = {0}; tk_inc.type = TK_PP_INC; tk = tokenizer_next_token(tkn, &err); if ( err != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[err]); return false; } if ( tk.type == TK_STR_LIT ) { tk_ws = (struct token_wstr *) &tk; tk_inc.loc_start = tk_ws->loc_start; tk_inc.loc_end = tk_ws->loc_end; memcpy(buf, tk_ws->string.data, tk_ws->string.size); tk_inc.string.data = buf; tk_inc.string.size = tk_ws->string.size; goto include_str_lit; } else if ( tk.type != TK_L_ANG_BRACKET) { fprintf(stderr, "%s Got wrong token, expected:" " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); return false; } if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false; tk_ws = (struct token_wstr *) &tk; tk_inc.loc_start = tk_ws->loc_start; tk_inc.loc_end = tk_ws->loc_end; memcpy(buf, tk_ws->string.data, tk_ws->string.size); tk_inc.string.data = buf; tk_inc.string.size = tk_ws->string.size; if ( ! tkn_expect(tkn, TK_DOT, NULL, perr) ) return false; if ( ! tkn_expect_id(tkn, "h", NULL, perr) ) return false; if ( ! tkn_expect(tkn, TK_R_ANG_BRACKET, NULL, perr) ) return false; if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false; include_str_lit: LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_inc); return true; } if ( str_eq_cstr(tk_ws->string, "if", 2) == true ) { return true; } if ( str_eq_cstr(tk_ws->string, "ifdef", 2) == true ) { return true; } if ( str_eq_cstr(tk_ws->string, "ifndef", 2) == true ) { return true; } *perr = -1; fprintf(stderr, "%s ERROR: Invalid Pre-Compiler directive `%.*s`\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), (int) tk_ws->string.size, tk_ws->string.data); return false; } bool tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk, enum err *out_err) { /* static char buf[1024] = {0}; */ /* static char buf2[1024] = {0}; */ struct token tk = {0}; struct token_wstr *tk_ws = NULL; struct token_wstr tk_type = {0}; enum err err = ERR_OK; enum err *perr = &err; LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false); LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false); if ( out_err != NULL ) { perr = out_err; } tk_type = *(struct token_wstr *) &tkn->last; if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false; switch ( tokenizer_next_token_type(tkn, perr) ) { case TK_L_BRACES: { TODO("Implement function declaration"); } break; case TK_INVALID: fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]); return false; default: fprintf(stderr, "%s Got wrong token, expected: TK_L_BRACES, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); } /* tk = tokenizer_next_token(tkn, perr); if ( *perr != ERR_OK ) { fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]); return false; } if ( tk.type != TK_ID ) { fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n", tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL), token_to_cstr(tk.type)); return false; } */ UNUSED(tk_type); UNUSED(out_tk); *perr = ERR_OK; tk_ws = (struct token_wstr *) &tkn->last; fprintf(stderr, "%s ERROR: Invalid declaration id `%.*s`\n", tokenizer_token_loc_temp(tkn, &tkn->last, TLF_VIM, NULL), (int) tk_ws->string.size, tk_ws->string.data); return false; } /* int main(void) { enum err err = ERR_OK; bool was_rebuild = false; struct dyn_arr dirs = {0}; size_t i = 0; was_rebuild = build_go_rebuild_yourself(__FILE__, &err); if ( was_rebuild == true ) { return 0; } dirs = dir_list_with_ext("./src", ".c", &err); free(dirs); return 0; } */