Fun 0.41.5
The programming language that makes you have fun!
Loading...
Searching...
No Matches
regex_replace.c
Go to the documentation of this file.
1/*
2 * This file is part of the Fun programming language.
3 * https://fun-lang.xyz/
4 *
5 * Copyright 2025 Johannes Findeisen <you@hanez.org>
6 * Licensed under the terms of the Apache-2.0 license.
7 * https://opensource.org/license/apache-2-0
8 */
9
10/**
11 * @file regex_replace.c
12 * @brief VM opcode snippet for OP_REGEX_REPLACE (POSIX global replace).
13 *
14 * Performs a global search-and-replace on the input string using a POSIX
15 * regular expression pattern and a replacement string. Backreferences in the
16 * replacement are not expanded (simple literal replacement).
17 *
18 * Behavior (stack effects):
19 * - Pops: replacement (string), pattern (string), input (string)
20 * - Pushes: output (string) — the transformed string after replacing all
21 * matches; on invalid regex, the original input is returned unchanged.
22 *
23 * Platform notes:
24 * - On non-UNIX platforms (no POSIX regex), the original input string is
25 * returned unchanged.
26 *
27 * Errors:
28 * - If operands have invalid types, the VM prints a runtime type error and
29 * exits.
30 *
31 * Example:
32 * - pattern = "[0-9]+", repl = "#", input = "a1b22c" -> "a#b#c"
33 */
34
35/* Regex global replace opcode using POSIX regex */
36#ifdef __unix__
37#include <regex.h>
38#include <stdlib.h>
39#include <string.h>
40#endif
41
43 Value repl = pop_value(vm);
46 if (str.type != VAL_STRING || pattern.type != VAL_STRING || repl.type != VAL_STRING) {
47 fprintf(stderr, "Runtime type error: REGEX_REPLACE expects (string, string, string)\n");
48 exit(1);
49 }
50#ifndef __unix__
51 /* Not supported: return original string */
52 Value out = make_string(str.s ? str.s : "");
57 break;
58#else
59 regex_t rx;
60 int rc = regcomp(&rx, pattern.s ? pattern.s : "", REG_EXTENDED);
61 if (rc != 0) {
62 /* invalid regex -> return original */
63 Value out = make_string(str.s ? str.s : "");
64 free_value(repl);
67 push_value(vm, out);
68 break;
69 }
70
71 const char *s = str.s ? str.s : "";
72 const char *r = repl.s ? repl.s : "";
73
74 size_t out_cap = strlen(s) + 1;
75 char *outbuf = (char *)malloc(out_cap);
76 size_t out_len = 0;
77 size_t pos = 0;
78
79 enum { MAX_CAP = 16 };
80 regmatch_t caps[MAX_CAP];
81
82 while (1) {
83 if (regexec(&rx, s + pos, MAX_CAP, caps, 0) != 0) {
84 /* no more matches: append the rest */
85 size_t rest = strlen(s + pos);
86 if (out_len + rest + 1 > out_cap) {
87 out_cap = out_len + rest + 1;
88 outbuf = (char *)realloc(outbuf, out_cap);
89 }
90 memcpy(outbuf + out_len, s + pos, rest + 1);
91 out_len += rest;
92 break;
93 }
94 int mstart = (int)caps[0].rm_so;
95 int mend = (int)caps[0].rm_eo;
96 if (mstart < 0 || mend < mstart) {
97 /* Shouldn't happen, avoid infinite loop */
98 break;
99 }
100 /* append prefix */
101 size_t pre_len = (size_t)mstart;
102 if (out_len + pre_len + 1 > out_cap) {
103 out_cap = (out_len + pre_len + 1) * 2;
104 outbuf = (char *)realloc(outbuf, out_cap);
105 }
106 memcpy(outbuf + out_len, s + pos, pre_len);
107 out_len += pre_len;
108
109 /* append replacement (no backref expansion for simplicity) */
110 size_t rlen = strlen(r);
111 if (out_len + rlen + 1 > out_cap) {
112 out_cap = (out_len + rlen + 1) * 2;
113 outbuf = (char *)realloc(outbuf, out_cap);
114 }
115 memcpy(outbuf + out_len, r, rlen);
116 out_len += rlen;
117
118 /* advance */
119 pos += (size_t)mend;
120 if (mend == 0) { /* prevent zero-length match infinite loop */
121 if (pos < strlen(s)) {
122 if (out_len + 1 > out_cap) {
123 out_cap = out_len + 2;
124 outbuf = (char *)realloc(outbuf, out_cap);
125 }
126 outbuf[out_len++] = s[pos++];
127 } else {
128 break;
129 }
130 }
131 }
132
133 Value out = make_string(outbuf ? outbuf : "");
134 if (outbuf) free(outbuf);
135 regfree(&rx);
136 free_value(repl);
139 push_value(vm, out);
140 break;
141#endif
142}
Value out
Definition apop.c:38
uint32_t r
Definition band.c:33
@ OP_REGEX_REPLACE
Definition bytecode.h:109
CURLcode rc
Definition download.c:71
char * pattern
Definition findall.c:52
free(vals)
Value str
Definition regex_match.c:42
free_value(repl)
push_value(vm, out)
uint32_t s
Definition rol.c:31
Tagged union representing a Fun value.
Definition value.h:68
ValueType type
Definition value.h:69
char * s
Definition value.h:73
DWORD rlen
Definition transmit.c:77
Value make_string(const char *s)
Construct a string Value by duplicating the given C string.
Definition value.c:95
@ VAL_STRING
Definition value.h:53
static Value pop_value(VM *vm)
Pop a Value from the VM operand stack.
Definition vm.c:580
#define fprintf
Definition vm.c:200
#define exit(code)
Definition vm.c:230