-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathregex.c
94 lines (85 loc) · 3.21 KB
/
regex.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/* Compile with:
CFLAGS="-g -Wall -O3 --std=gnu11 -Dtest_regexes" make regex
*/
#define _GNU_SOURCE //cause stdio.h to include asprintf
#include "stopif.h"
#include <regex.h>
#include "regex_fns.h"
#include <string.h> //strlen
#include <stdlib.h> //malloc, memcpy
static int count_parens(const char *string){
int out = 0;
int last_was_backslash = 0;
for(const char *step=string; *step !='\0'; step++){
if (*step == '\\' && !last_was_backslash){
last_was_backslash = 1;
continue;
}
if (*step == ')' && !last_was_backslash)
out++;
last_was_backslash = 0;
}
return out;
}
int regex_match_base(regex_fn_s in){
Stopif(!in.string, return -1, "NULL string input");
Stopif(!in.regex, return -2, "NULL regex input");
regex_t re;
int matchcount = 0;
if (in.substrings) matchcount = count_parens(in.regex);
regmatch_t result[matchcount+1];
int compiled_ok = !regcomp(&re, in.regex, REG_EXTENDED
+ (in.use_case ? 0 : REG_ICASE)
+ (in.substrings ? 0 : REG_NOSUB) );
Stopif(!compiled_ok, return -3, "This regular expression didn't compile: \"%s\"", in.regex);
int found = !regexec(&re, in.string, matchcount+1, result, 0);
if (!found) return 0;
if (in.substrings){
*in.substrings = malloc(sizeof(char*) * matchcount);
char **substrings = *in.substrings;
//match zero is the whole string; ignore it.
for (int i=0; i< matchcount; i++){
if (result[i+1].rm_eo > 0){//GNU peculiarity: match-to-empty marked with -1.
int length_of_match = result[i+1].rm_eo - result[i+1].rm_so;
substrings[i] = malloc(strlen(in.string)+1);
memcpy(substrings[i], in.string + result[i+1].rm_so, length_of_match);
substrings[i][length_of_match] = '\0';
} else { //empty match
substrings[i] = malloc(1);
substrings[i][0] = '\0';
}
}
in.string += result[0].rm_eo; //end of whole match;
}
regfree(&re);
return matchcount;
}
char * search_and_replace(char const *base, char const*search, char const *replace){
char *regex, *out;
asprintf(®ex, "(.*)(%s)(.*)", search);
char **substrings;
int match_ct = regex_match(base, regex, &substrings);
if(match_ct < 3) return NULL;
asprintf(&out, "%s%s%s", substrings[0], replace, substrings[2]);
for (int i=0; i< match_ct; i++)
free(substrings[i]);
free(substrings);
return out;
}
#ifdef test_regexes
int main(){
char **substrings;
int match_ct = regex_match("Hedonism by the alps, savory foods at every meal.",
"([He]*)do.*a(.*)s, (.*)or.* ([em]*)al", &substrings);
printf("%i matches:\n", match_ct);
for (int i=0; i< match_ct; i++){
printf("[%s] ", substrings[i]);
free(substrings[i]);
}
free(substrings);
printf("\n\n");
match_ct = regex_match("", "([[:alpha:]]+) ([[:alpha:]]+)", &substrings);
Stopif(match_ct != 0, return 1, "Error: matched a blank");
printf("Without the L, Plants are: %s", search_and_replace("Plants\n", "l", ""));
}
#endif