-
Notifications
You must be signed in to change notification settings - Fork 2
/
tokens.c
105 lines (100 loc) · 2.27 KB
/
tokens.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#include "y.tab.h"
#include "tokens.h"
#include "lexer.h"
#include "sblist.h"
#include <stdio.h>
#include <assert.h>
static sblist *lex_to_list() {
int c;
size_t pos;
struct list_item li;
sblist *ret = sblist_new(sizeof li, 32);
while((c = yylex()) != EOF) {
enum lex_context ctx = lex_getcontext();
pos = lex_getpos()-1;
switch(ctx) {
case CTX_DUP:
do { c = yylex(); } while (lex_getcontext() == CTX_DUP);
assert(c == '}');
li.type = CTX_DUP;
li.so = pos;
li.eo = lex_getpos();
sblist_add(ret, &li);
break;
case CTX_BRACKET:
do { c = yylex(); } while (lex_getcontext() == CTX_BRACKET);
assert(c == ']');
li.type = CTX_BRACKET;
li.so = pos;
li.eo = lex_getpos();
sblist_add(ret, &li);
break;
default:
li.type = ctx;
if (c == QUOTED_CHAR) {
li.so = pos-1;
li.eo = pos+1;
} else {
li.so = pos;
li.eo = pos+1;
}
sblist_add(ret, &li);
break;
}
}
return ret;
}
static void list_transform_dupchars(sblist* tokens, const char* org_regex) {
size_t i;
for(i=0; i<sblist_getsize(tokens); i++) {
struct list_item *li= sblist_get(tokens, i);
if(li->type == CTX_NONE) switch(org_regex[li->so]) {
case '?': case '*': case '+':
li->type = CTX_DUP;
break;
}
}
}
static sblist* list_join_literals(sblist* tokens, const char* org_regex) {
sblist *new = sblist_new(sizeof(struct list_item), sblist_getsize(tokens));
size_t i,j;
for(i=0; i<sblist_getsize(tokens); i++) {
size_t pcnt = 0;
for(j=i; j<sblist_getsize(tokens); ++j) {
struct list_item *li= sblist_get(tokens, j);
if(li->type != CTX_NONE) break;
switch(org_regex[li->so]) {
case '"':
case '^':
case '.':
case '[':
case '$':
case '(':
case ')':
case '|':
case '{':
goto break_loop;
default:
pcnt += li->eo-li->so;
}
continue;
break_loop:; break;
}
struct list_item ins = *((struct list_item *)sblist_get(tokens, i));
if(j > i) {
ins.type = 0xff;
ins.eo = ins.so+pcnt;
i = j-1;
}
sblist_add(new, &ins);
}
sblist_free(tokens);
return new;
}
sblist *lex_and_transform(const char *re, const char *re_end) {
lex_init(re, re_end, LEXFLAG_SILENT);
sblist *tokens = lex_to_list();
list_transform_dupchars(tokens, re);
tokens = list_join_literals(tokens, re);
return tokens;
}