Fun 0.41.5
The programming language that makes you have fun!
Loading...
Searching...
No Matches
regex_search.c
Go to the documentation of this file.
1/*
2 * This file is part of the Fun programming language.
3 * https://fun-lang.xyz/
4 *
5 * Copyright 2025 Johannes Findeisen <you@hanez.org>
6 * Licensed under the terms of the Apache-2.0 license.
7 * https://opensource.org/license/apache-2-0
8 */
9
10/**
11 * @file regex_search.c
12 * @brief VM opcode snippet for OP_REGEX_SEARCH (POSIX first-match search).
13 *
14 * Finds the first match of a POSIX regular expression within an input string
15 * and returns a map with details about the match and captured groups.
16 *
17 * Behavior (stack effects):
18 * - Pops: pattern (string), input (string)
19 * - Pushes: map with keys:
20 * - "match": string — the matched substring (empty if no match)
21 * - "start": int — start index of the match, or -1
22 * - "end": int — end index (exclusive) of the match, or -1
23 * - "groups": array<string> — captured groups (may be empty)
24 *
25 * Platform notes:
26 * - On non-UNIX platforms (no POSIX regex available), returns a default map
27 * with empty match, start=-1, end=-1, and empty groups.
28 *
29 * Errors:
30 * - If operands are not strings, the VM prints a runtime type error and exits.
31 * - If the pattern is an invalid regex, returns the default empty map.
32 *
33 * Example:
34 * - pattern = "h(ell)o", input = "oh hello!" ->
35 * { match: "hello", start: 3, end: 8, groups: ["ell"] }
36 */
37
38/* Regex search (first match) opcode using POSIX regex */
39#ifdef __unix__
40#include <regex.h>
41#include <stdlib.h>
42#include <string.h>
43#endif
44
48 if (str.type != VAL_STRING || pattern.type != VAL_STRING) {
49 fprintf(stderr, "Runtime type error: REGEX_SEARCH expects (string, string)\n");
50 exit(1);
51 }
52#ifndef __unix__
53 /* Return default empty result on unsupported platforms */
55 (void)map_set(&m, "match", make_string(""));
56 (void)map_set(&m, "start", make_int(-1));
57 (void)map_set(&m, "end", make_int(-1));
59 (void)map_set(&m, "groups", emptyArr);
63 break;
64#else
65 regex_t rx;
66 int rc = regcomp(&rx, pattern.s ? pattern.s : "", REG_EXTENDED);
67 if (rc != 0) {
68 /* invalid regex -> empty result */
70 (void)map_set(&m, "match", make_string(""));
71 (void)map_set(&m, "start", make_int(-1));
72 (void)map_set(&m, "end", make_int(-1));
74 (void)map_set(&m, "groups", emptyArr);
77 push_value(vm, m);
78 break;
79 }
80 /* capture up to, say, 10 groups (including whole match) */
81 enum { MAX_CAP = 16 };
82 regmatch_t caps[MAX_CAP];
83 int ok = regexec(&rx, str.s ? str.s : "", MAX_CAP, caps, 0) == 0;
84 Value outMap = make_map_empty();
85 if (!ok) {
86 (void)map_set(&outMap, "match", make_string(""));
87 (void)map_set(&outMap, "start", make_int(-1));
88 (void)map_set(&outMap, "end", make_int(-1));
90 (void)map_set(&outMap, "groups", emptyArr);
91 } else {
92 int s = (int)caps[0].rm_so;
93 int e = (int)caps[0].rm_eo;
94 char *matchStr = NULL;
95 if (str.s && s >= 0 && e >= s) {
96 int len = e - s;
97 matchStr = (char *)malloc((size_t)len + 1);
98 if (matchStr) {
99 memcpy(matchStr, str.s + s, (size_t)len);
100 matchStr[len] = '\0';
101 }
102 }
103 (void)map_set(&outMap, "match", make_string(matchStr ? matchStr : ""));
104 if (matchStr) free(matchStr);
105 (void)map_set(&outMap, "start", make_int(s));
106 (void)map_set(&outMap, "end", make_int(e));
107 /* groups 1..n */
108 Value groupsArr = make_array_from_values(NULL, 0);
109 /* Count groups available */
110 int groupCount = 0;
111 for (int i = 1; i < MAX_CAP; ++i) {
112 if (caps[i].rm_so == -1 || caps[i].rm_eo == -1) break;
113 groupCount++;
114 }
115 if (groupCount > 0) {
116 Value *vals = (Value *)calloc((size_t)groupCount, sizeof(Value));
117 int vi = 0;
118 for (int i = 1; i <= groupCount; ++i) {
119 int gs = (int)caps[i].rm_so;
120 int ge = (int)caps[i].rm_eo;
121 char *gstr = NULL;
122 if (str.s && gs >= 0 && ge >= gs) {
123 int gl = ge - gs;
124 gstr = (char *)malloc((size_t)gl + 1);
125 if (gstr) {
126 memcpy(gstr, str.s + gs, (size_t)gl);
127 gstr[gl] = '\0';
128 }
129 }
130 vals[vi++] = make_string(gstr ? gstr : "");
131 if (gstr) free(gstr);
132 }
133 groupsArr = make_array_from_values(vals, groupCount);
134 for (int i = 0; i < groupCount; ++i)
135 free_value(vals[i]);
136 free(vals);
137 }
138 (void)map_set(&outMap, "groups", groupsArr);
139 }
140 regfree(&rx);
143 push_value(vm, outMap);
144 break;
145#endif
146}
@ OP_REGEX_SEARCH
Definition bytecode.h:108
pcsc_ctx_entry * e
Definition connect.c:38
int ok
Definition contains.c:38
CURLcode rc
Definition download.c:71
char * pattern
Definition findall.c:52
Value m
Definition has_key.c:27
size_t len
Definition input_line.c:102
Value * vals
Definition make_array.c:39
free(vals)
int map_set(Value *vm, const char *key, Value v)
Insert or replace a key in the map.
Definition map.c:79
Value make_map_empty(void)
Construct a new empty map Value.
Definition map.c:35
Value str
Definition regex_match.c:42
free_value(pattern)
Value emptyArr
push_value(vm, m)
uint32_t s
Definition rol.c:31
int64_t vi
Definition sclamp.c:31
Tagged union representing a Fun value.
Definition value.h:68
Value make_string(const char *s)
Construct a string Value by duplicating the given C string.
Definition value.c:95
Value make_int(int64_t v)
Construct a Value representing a 64-bit integer.
Definition value.c:51
Value make_array_from_values(const Value *vals, int count)
Create an array Value by copying items from an input span.
Definition value.c:142
@ VAL_STRING
Definition value.h:53
static Value pop_value(VM *vm)
Pop a Value from the VM operand stack.
Definition vm.c:580
#define fprintf
Definition vm.c:200
#define exit(code)
Definition vm.c:230