first.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. #include <stdio.h>
  2. #include <stdbool.h>
  3. /* #define STR_SIZE_LIMIT 65536 */
  4. #define STR_SIZE_LIMIT 5458264
  5. #define IMP
  6. #define WANT_BUILD
  7. #define WANT_CSTR
  8. #define WANT_STR
  9. #define WANT_MAP
  10. #define WANT_TOKENIZER
  11. #define WANT_DYN_ARR
  12. #define WANT_PATH
  13. #define WANT_ENV
  14. #define WANT_SLOP
  15. #include "./src/lib.h"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. bool tkn_expect(struct tokenizer *tkn, enum token_type type,
  19. struct token *out_tk, enum err *out_err);
  20. bool tkn_expect_id(struct tokenizer *tkn, const char *cstr,
  21. struct token *out_tk, enum err *out_err);
  22. bool tkn_parse_function(struct tokenizer *tkn);
  23. bool run_function(struct str str);
  24. bool tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk,
  25. enum err *out_err);
  26. bool tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk,
  27. enum err *out_err);
  28. bool is_space(char c);
  29. bool
  30. is_space(char c)
  31. {
  32. return (c == ' ') || (c == '\r') || (c == '\n')
  33. || (c == '\t') || (c == '\v');
  34. }
  35. enum c_token_type {
  36. _TT = TK_LAST,
  37. TK_PP_DEF,
  38. TK_PP_INC
  39. };
  40. struct token_pp_inc {
  41. enum c_token_type type;
  42. u64 loc_start;
  43. u64 loc_end;
  44. struct str string;
  45. };
  46. struct token_pp_def {
  47. enum token_type type;
  48. u64 loc_start;
  49. u64 loc_end;
  50. struct str name;
  51. struct str val;
  52. };
  53. struct token_var_decl {
  54. enum token_type type;
  55. u64 loc_start;
  56. u64 loc_end;
  57. struct str _type;
  58. struct str name;
  59. struct str init;
  60. };
  61. struct token_func_decl {
  62. enum token_type type;
  63. u64 loc_start;
  64. u64 loc_end;
  65. struct str _type;
  66. struct str name;
  67. };
  68. struct ctx {
  69. void *t;
  70. };
  71. bool skip_token(struct tokenizer *tkn, char c);
  72. # define DIE(f, e) \
  73. (f); \
  74. if ( (e) != ERR_OK ) {\
  75. fprintf(stderr, \
  76. "Error while running `" #f "`: %s\n", \
  77. err_to_name[(e)]); \
  78. goto exit_err; \
  79. }
  80. int
  81. main(int argc, char *argv[])
  82. {
  83. enum err err = ERR_OK;
  84. u8 *file = NULL;
  85. u64 file_size = 0;
  86. struct str s = {0};
  87. struct str ss = {0};
  88. struct str_tokenizer stkn = {0};
  89. u64 *index = NULL;
  90. u64 i = 0;
  91. u64 counter = 0;
  92. void *tmp = NULL;
  93. struct map m = {0};
  94. m = DIE(map_create(sizeof(u64), 32, &err), err);
  95. file = DIE(slop_file_slurp("./t8.shakespeare.txt", &file_size, &err), err);
  96. s = DIE(str_from_cstr((char*)file, file_size, &err), err);
  97. stkn = str_tokenize_func(s, is_space);
  98. ss = str_tokenizer_next(&stkn);
  99. while ( ss.size != ((u64) -1) ) {
  100. if ( i > 10 ) {
  101. break;
  102. }
  103. tmp = map_get(&m, (void*)ss.data, ss.size, &err);
  104. if ( err == ERR_NOT_FOUND ) {
  105. err = ERR_OK;
  106. DIE(map_add(&m, (void*)ss.data, ss.size, (void*)1, &err), err);
  107. continue;
  108. }
  109. counter = (u64) tmp;
  110. DIE(map_set(&m, (void*)ss.data, ss.size, (void*)(counter+1), &err), err);
  111. printf("-> `%.*s` | `%ld`\n", (int)ss.size, ss.data, counter);
  112. ss = str_tokenizer_next(&stkn);
  113. ++i;
  114. }
  115. MAP_FOR_EACH_USED(&m, index) {
  116. struct map_item mi = m.data[*index];
  117. printf("%ld -> Value: %ld\n", *index, (u64)mi.item);
  118. }
  119. DIE(map_destroy(&m, &err), err);
  120. free(file);
  121. UNUSED(argc);
  122. UNUSED(argv);
  123. return 0;
  124. exit_err:
  125. if ( file != NULL ) free(file);
  126. map_destroy(&m, NULL);
  127. return 1;
  128. }
  129. int main2(int argc, char *argv[]);
  130. int
  131. main2(int argc, char *argv[])
  132. {
  133. enum err err = ERR_OK;
  134. struct path src_path = {0};
  135. struct file f = {0};
  136. struct str code = {0};
  137. struct tokenizer tkn = {0};
  138. struct tokenizer_options tkn_opts = {0};
  139. struct token tk = {0};
  140. struct dyn_arr tk_da = {0};
  141. tk_da = dyn_arr_create(sizeof(struct token), &err);
  142. src_path = path_from_cstr_ns("./first.c", &err);
  143. f = path_file_read_all(&src_path, &err);
  144. code = str_from_cstr((char*)f.data, f.size, &err);
  145. tkn_opts.skip_token = skip_token;
  146. tkn = tokenizer_create(code, src_path, &tkn_opts, &err);
  147. tkn.edata = &tk_da;
  148. tk = tokenizer_next_token(&tkn, &err);
  149. do {
  150. if ( err != ERR_OK ) {
  151. fprintf(stderr, "ERROR: Failed tokenizing `%.*s`: %s\n",
  152. (int) src_path.size, src_path.data, err_to_name[err]);
  153. goto error_exit;
  154. }
  155. switch ( tk.type ) {
  156. case TK_POUND: {
  157. struct token pp_tk = {0};
  158. if ( ! tkn_parse_pp_directive(&tkn, &pp_tk, &err) ) goto error_exit;
  159. } break;
  160. case TK_ID: if ( ! tkn_parse_decl(&tkn, NULL, &err) ) goto error_exit;
  161. case TK_NL: break;
  162. default: fprintf(stderr, "%s ERROR: Invalid Token `%s`\n",
  163. tokenizer_token_loc_temp(&tkn, &tk, TLF_VIM, NULL),
  164. token_to_cstr(tk.type));
  165. goto error_exit;
  166. }
  167. tk = tokenizer_next_token(&tkn, &err);
  168. } while ( tk.type != TK_EOF );
  169. printf("%s\n", err_to_name[err]);
  170. error_exit:
  171. if ( f.data != NULL ) free(f.data);
  172. if ( tk_da.data != NULL ) dyn_arr_destroy(&tk_da, NULL);
  173. (void) argc; (void) argv;
  174. return 0;
  175. }
  176. bool
  177. skip_token(struct tokenizer *tkn, char c)
  178. {
  179. UNUSED(tkn);
  180. return (c == ' ') || (c == '\r') || (c == '\t');
  181. }
  182. bool
  183. tkn_expect(struct tokenizer *tkn, enum token_type type, struct token *out_tk,
  184. enum err *out_err)
  185. {
  186. enum err err = ERR_OK;
  187. enum err *perr = &err;
  188. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  189. if ( tokenizer_is_next(tkn, type, out_tk, perr) == false ) {
  190. struct token tk = {0};
  191. tk = tokenizer_next_token(tkn, perr);
  192. if ( err != ERR_OK ) {
  193. fprintf(stderr, "Failed to get next token: %s\n",
  194. err_to_name[err]);
  195. return false;
  196. }
  197. fprintf(stderr, "%s ERRRO: Got wrong token, expected: %s, got: %s\n",
  198. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  199. token_to_cstr(type), token_to_cstr(tk.type));
  200. return false;
  201. }
  202. return true;
  203. }
  204. bool
  205. tkn_expect_id(struct tokenizer *tkn, const char *cstr, struct token *out_tk,
  206. enum err *out_err)
  207. {
  208. enum err err = ERR_OK;
  209. struct str str = {0};
  210. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  211. LIB_ARG_MUST_NOT_BE_NULL(cstr, out_err, false);
  212. str = str_from_cstr_ns(cstr, &err);
  213. if ( tokenizer_is_next_id(tkn, str, out_tk, &err) == false ) {
  214. struct token tk = {0};
  215. struct token_wstr *tk_ws = NULL;
  216. tk = tokenizer_next_token(tkn, &err);
  217. if ( err != ERR_OK ) {
  218. fprintf(stderr, "Failed to get next token: %s\n",
  219. err_to_name[err]);
  220. return false;
  221. }
  222. if ( tk.type != TK_ID ) {
  223. fprintf(stderr,
  224. "%s ERROR: Got wrong token, expected: TK_ID, got: %s\n",
  225. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  226. token_to_cstr(tk.type));
  227. return false;
  228. }
  229. tk_ws = (struct token_wstr *)&tk;
  230. fprintf(stderr, "%s ERROR: Got wrong id, expected: %s, got: %.*s\n",
  231. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  232. cstr, (int) tk_ws->string.size, tk_ws->string.data);
  233. return false;
  234. }
  235. return true;
  236. }
  237. bool
  238. tkn_parse_pp_directive(struct tokenizer *tkn, struct token *out_tk,
  239. enum err *out_err)
  240. {
  241. static char buf[1024] = {0};
  242. static char buf2[1024] = {0};
  243. struct token tk = {0};
  244. struct token_wstr *tk_ws = NULL;
  245. enum err err = ERR_OK;
  246. enum err *perr = &err;
  247. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  248. LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
  249. if ( out_err != NULL ) {
  250. perr = out_err;
  251. }
  252. tk = tokenizer_next_token(tkn, perr);
  253. if ( *perr != ERR_OK ) {
  254. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  255. return false;
  256. }
  257. if ( tk.type != TK_ID ) {
  258. fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n",
  259. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  260. token_to_cstr(tk.type));
  261. return false;
  262. }
  263. tk_ws = (struct token_wstr *) &tk;
  264. if ( str_eq_cstr(tk_ws->string, "define", 6) == true ) {
  265. struct token_pp_def tk_def = {0};
  266. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  267. tk_ws = (struct token_wstr *) &tk;
  268. tk_def.loc_start = tk_ws->loc_start;
  269. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  270. tk_def.name.data = buf;
  271. tk_def.name.size = tk_ws->string.size;
  272. tk = tokenizer_next_token(tkn, &err);
  273. if ( err != ERR_OK ) {
  274. fprintf(stderr, "Failed to get next token: %s\n",
  275. err_to_name[err]);
  276. return false;
  277. }
  278. switch ( tk.type ) {
  279. case TK_ID:
  280. case TK_STR_LIT:
  281. case TK_NUM_LIT:
  282. tk_ws = (struct token_wstr *) &tk;
  283. break;
  284. case TK_NL: goto define_wout_value;
  285. default:
  286. fprintf(stderr,
  287. "%s Got wrong token, expected:"
  288. " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n",
  289. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  290. token_to_cstr(tk.type));
  291. return false;
  292. }
  293. memcpy(buf2, tk_ws->string.data, tk_ws->string.size);
  294. tk_def.val.data = buf2;
  295. tk_def.val.size = tk_ws->string.size;
  296. tk_def.loc_end = tk_ws->loc_end;
  297. if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false;
  298. define_wout_value:
  299. LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_def);
  300. return true;
  301. }
  302. if ( str_eq_cstr(tk_ws->string, "include", 7) == true ) {
  303. struct token_pp_inc tk_inc = {0};
  304. tk_inc.type = TK_PP_INC;
  305. tk = tokenizer_next_token(tkn, &err);
  306. if ( err != ERR_OK ) {
  307. fprintf(stderr, "Failed to get next token: %s\n",
  308. err_to_name[err]);
  309. return false;
  310. }
  311. if ( tk.type == TK_STR_LIT ) {
  312. tk_ws = (struct token_wstr *) &tk;
  313. tk_inc.loc_start = tk_ws->loc_start;
  314. tk_inc.loc_end = tk_ws->loc_end;
  315. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  316. tk_inc.string.data = buf;
  317. tk_inc.string.size = tk_ws->string.size;
  318. goto include_str_lit;
  319. } else if ( tk.type != TK_L_ANG_BRACKET) {
  320. fprintf(stderr,
  321. "%s Got wrong token, expected:"
  322. " TK_ID/TK_STR_LIT/TK_NUM_LIT, got: %s\n",
  323. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  324. token_to_cstr(tk.type));
  325. return false;
  326. }
  327. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  328. tk_ws = (struct token_wstr *) &tk;
  329. tk_inc.loc_start = tk_ws->loc_start;
  330. tk_inc.loc_end = tk_ws->loc_end;
  331. memcpy(buf, tk_ws->string.data, tk_ws->string.size);
  332. tk_inc.string.data = buf;
  333. tk_inc.string.size = tk_ws->string.size;
  334. if ( ! tkn_expect(tkn, TK_DOT, NULL, perr) ) return false;
  335. if ( ! tkn_expect_id(tkn, "h", NULL, perr) ) return false;
  336. if ( ! tkn_expect(tkn, TK_R_ANG_BRACKET, NULL, perr) ) return false;
  337. if ( ! tkn_expect(tkn, TK_NL, NULL, perr) ) return false;
  338. include_str_lit:
  339. LIB_SET_IF_NOT_NULL(out_tk, *(struct token *) &tk_inc);
  340. return true;
  341. }
  342. if ( str_eq_cstr(tk_ws->string, "if", 2) == true ) {
  343. return true;
  344. }
  345. if ( str_eq_cstr(tk_ws->string, "ifdef", 2) == true ) {
  346. return true;
  347. }
  348. if ( str_eq_cstr(tk_ws->string, "ifndef", 2) == true ) {
  349. return true;
  350. }
  351. *perr = -1;
  352. fprintf(stderr, "%s ERROR: Invalid Pre-Compiler directive `%.*s`\n",
  353. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  354. (int) tk_ws->string.size, tk_ws->string.data);
  355. return false;
  356. }
  357. bool
  358. tkn_parse_decl(struct tokenizer *tkn, struct token *out_tk, enum err *out_err)
  359. {
  360. /* static char buf[1024] = {0}; */
  361. /* static char buf2[1024] = {0}; */
  362. struct token tk = {0};
  363. struct token_wstr *tk_ws = NULL;
  364. struct token_wstr tk_type = {0};
  365. enum err err = ERR_OK;
  366. enum err *perr = &err;
  367. LIB_ARG_IF_NOT_NULL_MUST_BE(out_err, ERR_OK, false);
  368. LIB_ARG_MUST_NOT_BE_NULL(tkn, out_err, false);
  369. if ( out_err != NULL ) {
  370. perr = out_err;
  371. }
  372. tk_type = *(struct token_wstr *) &tkn->last;
  373. if ( ! tkn_expect(tkn, TK_ID, &tk, perr) ) return false;
  374. switch ( tokenizer_next_token_type(tkn, perr) ) {
  375. case TK_L_BRACES: {
  376. TODO("Implement function declaration");
  377. } break;
  378. case TK_INVALID:
  379. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  380. return false;
  381. default:
  382. fprintf(stderr,
  383. "%s Got wrong token, expected: TK_L_BRACES, got: %s\n",
  384. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  385. token_to_cstr(tk.type));
  386. }
  387. /*
  388. tk = tokenizer_next_token(tkn, perr);
  389. if ( *perr != ERR_OK ) {
  390. fprintf(stderr, "Failed to get next token: %s\n", err_to_name[*perr]);
  391. return false;
  392. }
  393. if ( tk.type != TK_ID ) {
  394. fprintf(stderr, "%s Got wrong token, expected: TK_ID, got: %s\n",
  395. tokenizer_token_loc_temp(tkn, &tk, TLF_VIM, NULL),
  396. token_to_cstr(tk.type));
  397. return false;
  398. }
  399. */
  400. UNUSED(tk_type);
  401. UNUSED(out_tk);
  402. *perr = ERR_OK;
  403. tk_ws = (struct token_wstr *) &tkn->last;
  404. fprintf(stderr, "%s ERROR: Invalid declaration id `%.*s`\n",
  405. tokenizer_token_loc_temp(tkn, &tkn->last, TLF_VIM, NULL),
  406. (int) tk_ws->string.size, tk_ws->string.data);
  407. return false;
  408. }
  409. /*
  410. int
  411. main(void)
  412. {
  413. enum err err = ERR_OK;
  414. bool was_rebuild = false;
  415. struct dyn_arr dirs = {0};
  416. size_t i = 0;
  417. was_rebuild = build_go_rebuild_yourself(__FILE__, &err);
  418. if ( was_rebuild == true ) {
  419. return 0;
  420. }
  421. dirs = dir_list_with_ext("./src", ".c", &err);
  422. free(dirs);
  423. return 0;
  424. }
  425. */