first.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. #include <stdio.h>
  2. #include <stdbool.h>
  3. /* #define STR_SIZE_LIMIT 65536 */
  4. #define STR_SIZE_LIMIT 5458264
  5. #define IMP
  6. #define WANT_BUILD
  7. #define WANT_CSTR
  8. #define WANT_STR
  9. #define WANT_MAP
  10. #define WANT_TOKENIZER
  11. #define WANT_DYN_ARR
  12. #define WANT_PATH
  13. #define WANT_ENV
  14. #define WANT_SLOP
  15. #include "./src/lib.h"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. bool tkn_expect(struct tokenizer *tkn, enum token_type type,
  19. struct token *out_tk, enum err *out_err);
  20. bool tkn_expect_id(struct tokenizer *tkn, const char *cstr,
  21. struct token *out_tk, enum err *out_err);
  22. bool tkn_parse_function(struct tokenizer *tkn);
  23. bool run_function(struct str str);
  24. bool tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk,
  25. enum err *out_err);
  26. bool tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk,
  27. enum err *out_err);
  28. bool is_space(char c);
  29. bool
  30. is_space(char c)
  31. {
  32. return (c == ' ') || (c == '\r') || (c == '\n')
  33. || (c == '\t') || (c == '\v');
  34. }
  35. enum c_token_type {
  36. _TT = TK_LAST,
  37. TK_PP_DEF,
  38. TK_PP_INC
  39. };
  40. struct token_pp_inc {
  41. enum c_token_type type;
  42. u64 loc_start;
  43. u64 loc_end;
  44. struct str string;
  45. };
  46. struct token_pp_def {
  47. enum token_type type;
  48. u64 loc_start;
  49. u64 loc_end;
  50. struct str name;
  51. struct str val;
  52. };
  53. struct token_var_decl {
  54. enum token_type type;
  55. u64 loc_start;
  56. u64 loc_end;
  57. struct str _type;
  58. struct str name;
  59. struct str init;
  60. };
  61. struct token_func_decl {
  62. enum token_type type;
  63. u64 loc_start;
  64. u64 loc_end;
  65. struct str _type;
  66. struct str name;
  67. };
  68. struct ctx {
  69. void *t;
  70. };
  71. bool skip_token(struct tokenizer *tkn, char c);
  72. # define DIE(f, e) \
  73. (f); \
  74. if ( (e) != ERR_OK ) {\
  75. fprintf(stderr, \
  76. "Error while running `" #f "`: %s\n", \
  77. err_to_name[(e)]); \
  78. goto exit_err; \
  79. }
  80. # define PPP(f) \
  81. printf("PRE: `" #f "`\n"); \
  82. f; \
  83. printf("POST: `" #f "`\n");
  84. int
  85. main(int argc, char *argv[])
  86. {
  87. enum err err = ERR_OK;
  88. u8 *file = NULL;
  89. u64 file_size = 0;
  90. struct str s = {0};
  91. struct str ss = {0};
  92. struct str_tokenizer stkn = {0};
  93. u64 *index = NULL;
  94. u64 i = 0;
  95. u64 counter = 0;
  96. void *tmp = NULL;
  97. struct key_bytes *kb = NULL;
  98. struct map m = {0};
  99. m = DIE(map_create(sizeof(u64), 64, &err), err);
  100. file = DIE(slop_file_slurp("./t8.shakespeare.txt", &file_size, &err), err);
  101. s = DIE(str_from_cstr((char*)file, file_size, &err), err);
  102. stkn = str_tokenize_func(s, is_space);
  103. ss = str_tokenizer_next(&stkn);
  104. for ( ; ss.size != ((u64) -1); (ss = str_tokenizer_next(&stkn), ++i)) {
  105. /*
  106. if ( i > 10 ) {
  107. break;
  108. }
  109. */
  110. /* printf("i -> %ld\n", i); */
  111. tmp = map_get(&m, (void*)ss.data, ss.size, &err);
  112. if ( err == ERR_NOT_FOUND ) {
  113. err = ERR_OK;
  114. counter = 1;
  115. /* printf("-> `%.*s` | `%ld`\n", (int)ss.size, ss.data, counter); */
  116. DIE(map_add(&m, (void*)ss.data, ss.size, &counter, &err), err);
  117. continue;
  118. }
  119. counter = *(u64*) tmp;
  120. ++counter;
  121. DIE(map_set(&m, (void*)ss.data, ss.size, &counter, &err), err);
  122. /* printf("-> `%.*s` | `%ld`\n", (int)ss.size, ss.data, counter); */
  123. }
  124. i = 0;
  125. MAP_FOR_EACH(&m, index, kb) {
  126. struct map_item *mi = MAP_GET_INDEX(&m, *index);
  127. if ( i > 10 ) break;
  128. printf("%.*s -> %ld\n", (int) kb->size, kb->key , *(u64*)mi->item);
  129. ++i;
  130. }
  131. /*
  132. MAP_FOR_EACH_INDEXS(&m, index) {
  133. struct map_item *mi = MAP_GET_INDEX(&m, *index);
  134. printf("%ld -> Value: %ld\n", *index, *(u64*)mi->item);
  135. }
  136. kb = m.keys.data;
  137. while ( kb->size > 0 ) {
  138. printf("-->> %.*s\n", (int) kb->size, kb->key);
  139. kb = CAST(struct key_bytes *, CAST(u8*, kb->key) + kb->size);
  140. }
  141. */
  142. DIE(map_destroy(&m, &err), err);
  143. free(file);
  144. UNUSED(argc);
  145. UNUSED(argv);
  146. return 0;
  147. exit_err:
  148. if ( file != NULL ) free(file);
  149. map_destroy(&m, NULL);
  150. return 1;
  151. }
  152. int main2(int argc, char *argv[]);
  153. int
  154. main2(int argc, char *argv[])
  155. {
  156. enum err err = ERR_OK;
  157. struct path src_path = {0};
  158. struct file f = {0};
  159. struct str code = {0};
  160. struct tokenizer tkn = {0};
  161. struct tokenizer_options tkn_opts = {0};
  162. struct token tk = {0};
  163. struct dyn_arr tk_da = {0};
  164. tk_da = dyn_arr_create(sizeof(struct token), &err);
  165. src_path = path_from_cstr_ns("./first.c", &err);
  166. f = path_file_read_all(&src_path, &err);
  167. code = str_from_cstr((char*)f.data, f.size, &err);
  168. tkn_opts.skip_token = skip_token;
  169. tkn = tokenizer_create(code, src_path, &tkn_opts, &err);
  170. tkn.edata = &tk_da;
  171. tk = tokenizer_next_token(&tkn, &err);
  172. do {
  173. if ( err != ERR_OK ) {
  174. fprintf(stderr, "ERROR: Failed tokenizing `%.*s`: %s\n",
  175. (int) src_path.size, src_path.data, err_to_name[err]);
  176. goto error_exit;
  177. }
  178. switch ( tk.type ) {
  179. case TK_POUND: {
  180. struct token pp_tk = {0};
  181. if ( ! tkn_parse_pp_directive(&tkn, &pp_tk, &err) ) goto error_exit;
  182. } break;
  183. case TK_ID: if ( ! tkn_parse_decl(&tkn, NULL, &err) ) goto error_exit;
  184. case TK_NL: break;
  185. default: fprintf(stderr, "%s ERROR: Invalid Token `%s`\n",
  186. tokenizer_token_loc_temp(&tkn, &tk, TLF_VIM, NULL),
  187. token_to_cstr(tk.type));
  188. goto error_exit;
  189. }
  190. tk = tokenizer_next_token(&tkn, &err);
  191. } while ( tk.type != TK_EOF );
  192. printf("%s\n", err_to_name[err]);
  193. error_exit:
  194. if ( f.data != NULL ) free(f.data);
  195. if ( tk_da.data != NULL ) dyn_arr_destroy(&tk_da, NULL);
  196. (void) argc; (void) argv;
  197. return 0;
  198. }
  199. bool
  200. skip_token(struct tokenizer *tkn, char c)
  201. {
  202. UNUSED(tkn);
  203. return (c == ' ') || (c == '\r') || (c == '\t');
  204. }
  205. bool
  206. tkn_expect(struct tokenizer *tkn, enum token_type type, struct token *out_tk,
  207. enum err *out_err)
  208. {
  209. enum err err = ERR_OK;
  210. enum err *perr = &err;
  211. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  212. if ( tokenizer_is_next(tkn, type, out_tk, perr) == false ) {
  213. struct token tk = {0};
  214. tk = tokenizer_next_token(tkn, perr);
  215. if ( err != ERR_OK ) {
  216. fprintf(stderr, "Failed to get next token: %s\n",
  217. err_to_name[err]);
  218. return false;
  219. }
  220. fprintf(stderr, "%s ERRRO: Got wrong token, expected: %s, got: %s\n",
  221. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  222. token_to_cstr(type), token_to_cstr(tk.type));
  223. return false;
  224. }
  225. return true;
  226. }
  227. bool
  228. tkn_expect_id(struct tokenizer *tkn, const char *cstr, struct token *out_tk,
  229. enum err *out_err)
  230. {
  231. enum err err = ERR_OK;
  232. struct str str = {0};
  233. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  234. LIB_ARG_MUST_NOT_BE_NULL(cstr, out_err, false);
  235. str = str_from_cstr_ns(cstr, &err);
  236. if ( tokenizer_is_next_id(tkn, str, out_tk, &err) == false ) {
  237. struct token tk = {0};
  238. struct token_wstr *tk_ws = NULL;
  239. tk = tokenizer_next_token(tkn, &err);
  240. if ( err != ERR_OK ) {
  241. fprintf(stderr, "Failed to get next token: %s\n",
  242. err_to_name[err]);
  243. return false;
  244. }
  245. if ( tk.type != TK_ID ) {
  246. fprintf(stderr,
  247. "%s ERROR: Got wrong token, expected: TK_ID, got: %s\n",
  248. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  249. token_to_cstr(tk.type));
  250. return false;
  251. }
  252. tk_ws = (struct token_wstr *)&tk;
  253. fprintf(stderr, "%s ERROR: Got wrong id, expected: %s, got: %.*s\n",
  254. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  255. cstr, (int) tk_ws->string.size, tk_ws->string.data);
  256. return false;
  257. }
  258. return true;
  259. }
  260. bool
  261. tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk,
  262. enum err *out_err)
  263. {
  264. static char buf[1024] = {0};
  265. static char buf2[1024] = {0};
  266. struct token tk = {0};
  267. struct token_wstr *tk_ws = NULL;
  268. enum err err = ERR_OK;
  269. enum err *perr = &err;
  270. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  271. LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
  272. if ( out_err != NULL ) {
  273. perr = out_err;
  274. }
  275. tk = tokenizer_next_token(tkn, perr);
  276. if ( *perr != ERR_OK ) {
  277. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  278. return false;
  279. }
  280. if ( tk.type != TK_ID ) {
  281. fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n",
  282. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  283. token_to_cstr(tk.type));
  284. return false;
  285. }
  286. tk_ws = (struct token_wstr *) &tk;
  287. if ( str_eq_cstr(tk_ws->string, "define", 6) == true ) {
  288. struct token_pp_def tk_def = {0};
  289. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  290. tk_ws = (struct token_wstr *) &tk;
  291. tk_def.loc_start = tk_ws->loc_start;
  292. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  293. tk_def.name.data = buf;
  294. tk_def.name.size = tk_ws->string.size;
  295. tk = tokenizer_next_token(tkn, &err);
  296. if ( err != ERR_OK ) {
  297. fprintf(stderr, "Failed to get next token: %s\n",
  298. err_to_name[err]);
  299. return false;
  300. }
  301. switch ( tk.type ) {
  302. case TK_ID:
  303. case TK_STR_LIT:
  304. case TK_NUM_LIT:
  305. tk_ws = (struct token_wstr *) &tk;
  306. break;
  307. case TK_NL: goto define_wout_value;
  308. default:
  309. fprintf(stderr,
  310. "%s Got wrong token, expected:"
  311. " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n",
  312. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  313. token_to_cstr(tk.type));
  314. return false;
  315. }
  316. memcpy(buf2, tk_ws->string.data, tk_ws->string.size);
  317. tk_def.val.data = buf2;
  318. tk_def.val.size = tk_ws->string.size;
  319. tk_def.loc_end = tk_ws->loc_end;
  320. if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false;
  321. define_wout_value:
  322. LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_def);
  323. return true;
  324. }
  325. if ( str_eq_cstr(tk_ws->string, "include", 7) == true ) {
  326. struct token_pp_inc tk_inc = {0};
  327. tk_inc.type = TK_PP_INC;
  328. tk = tokenizer_next_token(tkn, &err);
  329. if ( err != ERR_OK ) {
  330. fprintf(stderr, "Failed to get next token: %s\n",
  331. err_to_name[err]);
  332. return false;
  333. }
  334. if ( tk.type == TK_STR_LIT ) {
  335. tk_ws = (struct token_wstr *) &tk;
  336. tk_inc.loc_start = tk_ws->loc_start;
  337. tk_inc.loc_end = tk_ws->loc_end;
  338. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  339. tk_inc.string.data = buf;
  340. tk_inc.string.size = tk_ws->string.size;
  341. goto include_str_lit;
  342. } else if ( tk.type != TK_L_ANG_BRACKET) {
  343. fprintf(stderr,
  344. "%s Got wrong token, expected:"
  345. " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n",
  346. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  347. token_to_cstr(tk.type));
  348. return false;
  349. }
  350. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  351. tk_ws = (struct token_wstr *) &tk;
  352. tk_inc.loc_start = tk_ws->loc_start;
  353. tk_inc.loc_end = tk_ws->loc_end;
  354. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  355. tk_inc.string.data = buf;
  356. tk_inc.string.size = tk_ws->string.size;
  357. if ( ! tkn_expect(tkn, TK_DOT, NULL, perr) ) return false;
  358. if ( ! tkn_expect_id(tkn, "h", NULL, perr) ) return false;
  359. if ( ! tkn_expect(tkn, TK_R_ANG_BRACKET, NULL, perr) ) return false;
  360. if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false;
  361. include_str_lit:
  362. LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_inc);
  363. return true;
  364. }
  365. if ( str_eq_cstr(tk_ws->string, "if", 2) == true ) {
  366. return true;
  367. }
  368. if ( str_eq_cstr(tk_ws->string, "ifdef", 2) == true ) {
  369. return true;
  370. }
  371. if ( str_eq_cstr(tk_ws->string, "ifndef", 2) == true ) {
  372. return true;
  373. }
  374. *perr = -1;
  375. fprintf(stderr, "%s ERROR: Invalid Pre-Compiler directive `%.*s`\n",
  376. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  377. (int) tk_ws->string.size, tk_ws->string.data);
  378. return false;
  379. }
  380. bool
  381. tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk, enum err *out_err)
  382. {
  383. /* static char buf[1024] = {0}; */
  384. /* static char buf2[1024] = {0}; */
  385. struct token tk = {0};
  386. struct token_wstr *tk_ws = NULL;
  387. struct token_wstr tk_type = {0};
  388. enum err err = ERR_OK;
  389. enum err *perr = &err;
  390. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  391. LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
  392. if ( out_err != NULL ) {
  393. perr = out_err;
  394. }
  395. tk_type = *(struct token_wstr *) &tkn->last;
  396. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  397. switch ( tokenizer_next_token_type(tkn, perr) ) {
  398. case TK_L_BRACES: {
  399. TODO("Implement function declaration");
  400. } break;
  401. case TK_INVALID:
  402. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  403. return false;
  404. default:
  405. fprintf(stderr,
  406. "%s Got wrong token, expected: TK_L_BRACES, got: %s\n",
  407. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  408. token_to_cstr(tk.type));
  409. }
  410. /*
  411. tk = tokenizer_next_token(tkn, perr);
  412. if ( *perr != ERR_OK ) {
  413. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  414. return false;
  415. }
  416. if ( tk.type != TK_ID ) {
  417. fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n",
  418. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  419. token_to_cstr(tk.type));
  420. return false;
  421. }
  422. */
  423. UNUSED(tk_type);
  424. UNUSED(out_tk);
  425. *perr = ERR_OK;
  426. tk_ws = (struct token_wstr *) &tkn->last;
  427. fprintf(stderr, "%s ERROR: Invalid declaration id `%.*s`\n",
  428. tokenizer_token_loc_temp(tkn, &tkn->last, TLF_VIM, NULL),
  429. (int) tk_ws->string.size, tk_ws->string.data);
  430. return false;
  431. }
  432. /*
  433. int
  434. main(void)
  435. {
  436. enum err err = ERR_OK;
  437. bool was_rebuild = false;
  438. struct dyn_arr dirs = {0};
  439. size_t i = 0;
  440. was_rebuild = build_go_rebuild_yourself(__FILE__, &err);
  441. if ( was_rebuild == true ) {
  442. return 0;
  443. }
  444. dirs = dir_list_with_ext("./src", ".c", &err);
  445. free(dirs);
  446. return 0;
  447. }
  448. */