34 FILE *f = fopen(path,
"rb");
36 if (fseek(f, 0, SEEK_END) != 0) {
46 char *buf = (
char *)malloc((
size_t)sz + 1);
51 size_t n = fread(buf, 1, (
size_t)sz, f);
54 if (out_len) *out_len = n;
64static void skip_ws(
const char *src,
size_t len,
size_t *pos) {
67 if (c ==
' ' || c ==
'\t' || c ==
'\r' || c ==
'\n') {
78static void skip_line(
const char *src,
size_t len,
size_t *pos) {
79 while (*pos < len && src[*pos] !=
'\n')
81 if (*pos < len && src[*pos] ==
'\n') (*pos)++;
91 if (*pos + 1 < len && src[*pos] ==
'/' && src[*pos + 1] ==
'/') {
96 if (*pos + 1 < len && src[*pos] ==
'/' && src[*pos + 1] ==
'*') {
98 while (*pos + 1 < len && !(src[*pos] ==
'*' && src[*pos + 1] ==
'/'))
100 if (*pos + 1 < len) *pos += 2;
111static int starts_with(
const char *src,
size_t len,
size_t pos,
const char *kw) {
112 size_t klen = strlen(kw);
113 if (pos + klen > len)
return 0;
114 return strncmp(src + pos, kw, klen) == 0;
121 if (*pos == 0 &&
starts_with(src, len, *pos,
"#!")) {
132 if (p < len && (isalpha((
unsigned char)src[p]) || src[p] ==
'_')) {
134 while (p < len && (isalnum((
unsigned char)src[p]) || src[p] ==
'_'))
144static int consume_char(
const char *src,
size_t len,
size_t *pos,
char expected) {
146 if (*pos < len && src[*pos] == expected) {
162 if (*pos >= len)
return NULL;
163 char quote = src[*pos];
164 if (quote !=
'"' && quote !=
'\'')
return NULL;
166 size_t cap = 64, out_len = 0;
167 char *out = (
char *)malloc(cap);
168 if (!out)
return NULL;
177 if (*pos >= len)
break;
203 if (out_len + 1 >= cap) {
205 char *tmp = (
char *)realloc(out, cap);
215 if (out_len + 1 >= cap) {
216 char *tmp = (
char *)realloc(out, cap + 1);
232static void skip_spaces(
const char *src,
size_t len,
size_t *pos) {
235 if (c ==
' ' || c ==
'\t' || c ==
'\r') {
250 if (p < len && (isalpha((
unsigned char)src[p]) || src[p] ==
'_')) {
253 while (p < len && (isalnum((
unsigned char)src[p]) || src[p] ==
'_'))
255 size_t n = p - start;
256 char *name = (
char *)malloc(n + 1);
258 memcpy(name, src + start, n);
276 if (p < len && (src[p] ==
'+' || src[p] ==
'-')) {
277 if (src[p] ==
'-') sign = -1;
286 if ((p + 1) < len && src[p] ==
'0' && (src[p + 1] ==
'x' || src[p + 1] ==
'X')) {
288 if (p >= len || !isxdigit((
unsigned char)src[p])) {
293 while (p < len && isxdigit((
unsigned char)src[p])) {
295 int d = (c >=
'0' && c <=
'9') ? (c -
'0')
296 : (c >=
'a' && c <=
'f') ? (c -
'a' + 10)
297 : (c >=
'A' && c <=
'F') ? (c -
'A' + 10)
299 val = (val << 4) + (uint64_t)d;
304 return (uint64_t)((int64_t)sign * (int64_t)val);
308 if (!isdigit((
unsigned char)src[p])) {
313 while (p < len && isdigit((
unsigned char)src[p])) {
314 val = val * 10 + (uint64_t)(src[p] -
'0');
319 return (uint64_t)((int64_t)sign * (int64_t)val);
334 void *np = realloc(ptr, newcap);
351 sb->
buf = (
char *)malloc(256);
352 sb->
cap = sb->
buf ? 256 : 0;
354 if (sb->
buf) sb->
buf[0] =
'\0';
361 if (need <= sb->cap)
return;
362 size_t nc = sb->
cap ? sb->
cap : 256;
377 if (!sb->
buf)
return;
378 memcpy(sb->
buf + sb->
len, s, n);
395 if (!sb->
buf)
return;
423 if (!name || !name[0])
return;
425 int ncap = nl->
cap ? nl->
cap * 2 : 8;
426 char **nn = (
char **)realloc(nl->
names, (
size_t)ncap *
sizeof(
char *));
439 for (
int i = 0; i < nl->
count; ++i)
453 if (!text || !out)
return;
454 size_t len = strlen(text);
455 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
457 for (
size_t i = 0; i < len;) {
471 if (c ==
'*' && (i + 1) < len && text[i + 1] ==
'/') {
482 if (!esc && c ==
'\\') {
488 if (!esc && c ==
'\'') {
497 if (!esc && c ==
'\\') {
503 if (!esc && c ==
'"') {
512 if (c ==
'/' && (i + 1) < len && text[i + 1] ==
'/') {
518 if (c ==
'/' && (i + 1) < len && text[i + 1] ==
'*') {
541 while (j < len && text[j] ==
' ') {
545 if (j < len && text[j] ==
'\t') {
554 const char *kw1 =
"fun";
555 const char *kw2 =
"class";
556 if (j + 3 <= len && strncmp(text + j, kw1, 3) == 0 && (j + 3 == len || isspace((
unsigned char)text[j + 3]))) {
558 while (p < len && (text[p] ==
' ' || text[p] ==
'\t'))
562 if (p < len && (isalpha((
unsigned char)text[p]) || text[p] ==
'_')) {
564 while (p < len && (isalnum((
unsigned char)text[p]) || text[p] ==
'_'))
566 size_t n = p - start;
569 size_t copy = (n <
sizeof(tmp) - 1) ? n : (
sizeof(tmp) - 1);
570 memcpy(tmp, text + start, copy);
575 }
else if (j + 5 <= len && strncmp(text + j, kw2, 5) == 0 && (j + 5 == len || isspace((
unsigned char)text[j + 5]))) {
577 while (p < len && (text[p] ==
' ' || text[p] ==
'\t'))
581 if (p < len && (isalpha((
unsigned char)text[p]) || text[p] ==
'_')) {
583 while (p < len && (isalnum((
unsigned char)text[p]) || text[p] ==
'_'))
585 size_t n = p - start;
588 size_t copy = (n <
sizeof(tmp) - 1) ? n : (
sizeof(tmp) - 1);
589 memcpy(tmp, text + start, copy);
619 if (!src)
return NULL;
621 fprintf(stderr,
"Include error: include nesting too deep\n");
626#ifndef DEFAULT_LIB_DIR
627#define DEFAULT_LIB_DIR "/usr/share/fun/lib/"
630 const char *env_lib = getenv(
"FUN_LIB_DIR");
631 size_t len = strlen(src);
636 size_t shebang_end = 0;
637 int shebang_lines = 0;
638 if (src[0] ==
'#' && src[1] ==
'!') {
641 while (src[j] && src[j] !=
'\n' && src[j] !=
'\r') j++;
643 if (src[j] ==
'\r') {
645 if (src[j] ==
'\n') j++;
646 }
else if (src[j] ==
'\n') {
650 for (
size_t t = 0; t < j; ++t)
if (src[t] ==
'\n') shebang_lines++;
651 if (shebang_lines == 0) shebang_lines = 1;
658 if (current_path && current_path[0]) {
659 sb_append(&out,
"// __include_begin__: ");
663 int base_line = 1 + (shebang_end ? shebang_lines : 0);
664 snprintf(lb,
sizeof(lb),
" @line %d", base_line);
668 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
671 for (
size_t i = shebang_end; i < len;) {
675 if (bol && !in_block && !in_sq && !in_dq) {
678 while (j < len && (src[j] ==
' ' || src[j] ==
'\t'))
681 if (k < len && src[k] ==
'#') k++;
682 const char *kw =
"include";
684 if (k + kwlen <= len && strncmp(src + k, kw, kwlen) == 0) {
687 while (k < len && (src[k] ==
' ' || src[k] ==
'\t'))
689 if (k < len && (src[k] ==
'"' || src[k] ==
'<')) {
690 char opener = src[k];
691 char closer = (opener ==
'"') ?
'"' :
'>';
693 size_t path_start = k;
694 while (k < len && src[k] != closer)
696 if (k < len && src[k] == closer) {
697 size_t path_len = k - path_start;
698 char *path = (
char *)malloc(path_len + 1);
700 memcpy(path, src + path_start, path_len);
701 path[path_len] =
'\0';
709 while (ap < len && (src[ap] ==
' ' || src[ap] ==
'\t'))
712 const char *askw =
"as";
713 if (ap + 2 <= len && strncmp(src + ap, askw, 2) == 0 && (ap + 2 == len || isspace((
unsigned char)src[ap + 2]))) {
715 while (ap < len && (src[ap] ==
' ' || src[ap] ==
'\t'))
719 if (ap < len && (isalpha((
unsigned char)src[ap]) || src[ap] ==
'_')) {
721 while (ap < len && (isalnum((
unsigned char)src[ap]) || src[ap] ==
'_'))
723 size_t n = ap - start;
724 size_t copy = (n <
sizeof(ns) - 1) ? n : (
sizeof(ns) - 1);
725 memcpy(ns, src + start, copy);
732 while (k < len && src[k] !=
'\n')
734 if (k < len && src[k] ==
'\n') k++;
738 char resolved2[1024];
753 if (env_lib && env_lib[0]) {
754 size_t elen = strlen(env_lib);
755 char last = env_lib[elen ? (elen - 1) : 0];
756 int needs_sep = !(last ==
'/' || last ==
'\\');
757 char sep = (last ==
'\\') ?
'\\' :
'/';
759 snprintf(resolved,
sizeof(resolved),
"%s%c%s", env_lib, sep, path);
761 snprintf(resolved,
sizeof(resolved),
"%s%s", env_lib, path);
773 snprintf(resolved,
sizeof(resolved),
"lib/%s", path);
778 snprintf(resolved,
sizeof(resolved),
"%s", path);
784 fprintf(stderr,
"Include error: cannot read '%s'\n", resolved[0] ? resolved :
"(unresolved)");
785 sb_append(&out,
"// include error: cannot read ");
786 sb_append(&out, resolved[0] ? resolved :
"(unresolved)");
790 const char *startp = inc;
792 if ((
unsigned char)inc[0] == 0xEF && (
unsigned char)inc[1] == 0xBB && (
unsigned char)inc[2] == 0xBF) {
796 if (startp[0] ==
'#' && startp[1] ==
'!') {
798 const char *q = startp;
799 while (*q && *q !=
'\n' && *q !=
'\r')
804 }
else if (*q ==
'\n') {
809 char *inc_clean = strdup(startp);
815 sb_append(&out,
"// __include_begin__: ");
831 if (current_path && current_path[0]) {
832 sb_append(&out,
"// __include_begin__: ");
837 snprintf(lb2,
sizeof(lb2),
" @line %d", 0);
839 int parent_line = 1 + (shebang_end ? shebang_lines : 0);
840 for (
size_t tt = shebang_end; tt < k; ++tt)
if (src[tt] ==
'\n') parent_line++;
841 snprintf(lb2,
sizeof(lb2),
" @line %d", parent_line);
874 if (c ==
'*' && (i + 1) < len && src[i + 1] ==
'/') {
881 bol = (c ==
'\n') ? 1 : 0;
887 if (!esc && c ==
'\\') {
893 if (!esc && c ==
'\'') {
897 bol = (c ==
'\n') ? 1 : 0;
903 if (!esc && c ==
'\\') {
909 if (!esc && c ==
'"') {
913 bol = (c ==
'\n') ? 1 : 0;
919 if (c ==
'/' && (i + 1) < len && src[i + 1] ==
'/') {
927 if (c ==
'/' && (i + 1) < len && src[i + 1] ==
'*') {
951 bol = (c ==
'\n') ? 1 : 0;
955 if (!out.
buf)
return strdup(
"");
997 char *out_path,
size_t out_path_cap,
999 if (!path || line <= 0 || !out_path || out_path_cap == 0 || !out_line)
return 0;
1006 if (!orig)
return 0;
1010 if (!prep)
return 0;
1013 size_t len = strlen(prep);
1016 while (pos < len && cur < line) {
1017 if (prep[pos] ==
'\n') cur++;
1020 if (cur != line) { free(prep);
return 0; }
1023 const char *marker =
"// __include_begin__: ";
1024 size_t mlen = strlen(marker);
1029 while (ls > 0 && prep[ls - 1] !=
'\n') ls--;
1031 if (ls + mlen <= len && strncmp(prep + ls, marker, mlen) == 0) {
1033 size_t p = ls + mlen;
1035 while (eol < len && prep[eol] !=
'\n') eol++;
1037 size_t pos_as = eol, pos_line = eol;
1038 for (
size_t t = p; t + 3 < eol; ++t) {
1039 if (prep[t] ==
' ' && strncmp(prep + t,
" as ", 4) == 0) { pos_as = t;
break; }
1041 for (
size_t t = p; t + 6 < eol; ++t) {
1042 if (prep[t] ==
' ' && strncmp(prep + t,
" @line ", 7) == 0) { pos_line = t;
break; }
1044 size_t path_end = pos_as < pos_line ? pos_as : pos_line;
1045 if (path_end < p) path_end = eol;
1046 size_t copy = (path_end - p) < (out_path_cap - 1) ? (path_end - p) : (out_path_cap - 1);
1047 memcpy(out_path, prep + p, copy);
1048 out_path[copy] =
'\0';
1052 if (pos_line < eol) {
1053 size_t num_start = pos_line + 7;
1055 while (num_start < eol && prep[num_start] ==
' ') num_start++;
1056 while (num_start < eol && prep[num_start] >=
'0' && prep[num_start] <=
'9') {
1057 v = v * 10 + (prep[num_start] -
'0');
1060 if (v > 0) base_line = v;
1065 if (q < len && prep[q] ==
'\n') q++;
1066 size_t span_start = q;
1067 size_t span_end = len;
1072 while (ls2 > 0 && prep[ls2 - 1] !=
'\n') ls2--;
1073 if (ls2 + mlen <= len && strncmp(prep + ls2, marker, mlen) == 0) {
1077 while (fwd < len && prep[fwd] !=
'\n') fwd++;
1078 if (fwd < len && prep[fwd] ==
'\n') fwd++;
1083 if (!(pos >= span_start && pos < span_end)) {
1085 goto next_scan_back;
1090 size_t cnt = span_start;
1091 while (cnt < pos) {
if (prep[cnt] ==
'\n') inner++; cnt++; }
1092 *out_line = base_line + inner - 1;
1098 scan = (ls > 0) ? (ls - 1) : 0;
1120 if (p < len && (src[p] ==
'+' || src[p] ==
'-')) p++;
1123 while (p < len && isdigit((
unsigned char)src[p])) {
1129 if (p < len && src[p] ==
'.') {
1132 while (p < len && isdigit((
unsigned char)src[p])) {
1139 if (p < len && (src[p] ==
'e' || src[p] ==
'E')) {
1141 size_t epos = p + 1;
1142 if (epos < len && (src[epos] ==
'+' || src[epos] ==
'-')) epos++;
1143 size_t digits_start = epos;
1144 while (epos < len && isdigit((
unsigned char)src[epos])) {
1147 if (epos == digits_start) {
1155 if (!saw_digit || (!saw_dot && !saw_exp)) {
1161 size_t n = p - start;
1162 char *tmp = (
char *)malloc(n + 1);
1167 memcpy(tmp, src + start, n);
1171 double dv = strtod(tmp, &endp);
1172 if (!endp || *endp !=
'\0') {
Public API for parsing Fun source into bytecode.
static void collect_exports_top_level(const char *text, NameList *out)
Collect top-level exported symbols (fun/class) from source text. Ignores strings and comments....
static double parse_float_literal_value(const char *src, size_t len, size_t *pos, int *ok)
Parse a floating-point literal (supports . and scientific notation).
static int read_identifier_into(const char *src, size_t len, size_t *pos, char **out_name)
Read an identifier starting at pos and allocate its name.
static void skip_ws(const char *src, size_t len, size_t *pos)
Skip spaces, tabs, carriage returns and newlines.
static uint64_t parse_int_literal_value(const char *src, size_t len, size_t *pos, int *ok)
Parse an integer literal (decimal or 0x-hex) with optional sign.
static void * xrealloc(void *ptr, size_t newcap)
Thin wrapper over realloc used by local buffers.
static int consume_char(const char *src, size_t len, size_t *pos, char expected)
Consume expected character after skipping whitespace.
static void sb_append(StrBuf *sb, const char *s)
Append a NUL-terminated string to the buffer.
static void sb_append_n(StrBuf *sb, const char *s, size_t n)
Append n bytes from s to the buffer.
static void skip_identifier(const char *src, size_t len, size_t *pos)
If an identifier starts at pos, advance pos to its end. Recognizes [A-Za-z_][A-Za-z0-9_]*.
static void skip_comments(const char *src, size_t len, size_t *pos)
Skip whitespace, then line and block comments. Continues until the next non-comment,...
static void skip_spaces(const char *src, size_t len, size_t *pos)
Skip only spaces, tabs and carriage returns (not newlines).
static void skip_shebang_if_present(const char *src, size_t len, size_t *pos)
Skip a top-of-file shebang line that starts with "#!" if present.
static char * read_file_all(const char *path, size_t *out_len)
Read entire file into a newly allocated buffer.
static void sb_init(StrBuf *sb)
Initialize a StrBuf with a small starting capacity.
static void skip_line(const char *src, size_t len, size_t *pos)
Advance pos to the next line, consuming the trailing ' ' if present.
static void sb_reserve(StrBuf *sb, size_t need)
Ensure buffer capacity for at least need bytes (including terminator).
static int starts_with(const char *src, size_t len, size_t pos, const char *kw)
Check if src starting at pos begins with kw and fits in len.
static char * parse_string_literal_any_quote(const char *src, size_t len, size_t *pos)
Parse a single-quoted or double-quoted string literal.
static void nl_free(NameList *nl)
Free all strings and internal storage in the list.
static void nl_add(NameList *nl, const char *name)
Add a copy of name to the list (ignores NULL/empty).
int map_expanded_line_to_include_path(const char *path, int line, char *out_path, size_t out_path_cap, int *out_line)
Map a line number in expanded source back to original include path/line.
char * preprocess_includes_with_path(const char *src, const char *current_path)
Preprocess includes with a known file path to improve span markers.
static void nl_init(NameList *nl)
Initialize an empty NameList.
static char * preprocess_includes_internal(const char *src, const char *current_path, int depth)
Expand include directives in Fun source.
char * preprocess_includes(const char *src)
Public wrapper to preprocess includes without a current path.
static void sb_append_ch(StrBuf *sb, char c)
Append a single character to the buffer.
List of exported symbol names discovered at top level.
Simple growable string buffer.