English Language flag
// Log In
// CVSweb
Project: FreeWRT
// Summary // Activity // Search // Tracker // Lists // News // SCM // Wiki

SCM Repository

ViewVC logotype

Contents of /branches/common-nfo/tools/nfotiser/parser.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3574 - (show annotations) (download)
Wed Sep 12 12:59:14 2007 UTC (6 years, 5 months ago) by tg
File MIME type: text/plain
File size: 11545 byte(s)
use MAP_PRIVATE, so that this works on Linux as well
10x austriancoder@

1 /* $FreeWRT: src/share/misc/licence.template,v 1.20 2006/12/11 21:04:56 tg Rel $ */
2
3 /*-
4 * Copyright (c) 2007
5 * Thorsten Glaser <tg@mirbsd.de>
6 *
7 * Provided that these terms and disclaimer and all copyright notices
8 * are retained or reproduced in an accompanying document, permission
9 * is granted to deal in this work without restriction, including un-
10 * limited rights to use, publicly perform, distribute, sell, modify,
11 * merge, give away, or sublicence.
12 *
13 * Advertising materials mentioning features or use of this work must
14 * display the following acknowledgement:
15 * This product includes material provided by Thorsten Glaser.
16 * This acknowledgement does not need to be reprinted if this work is
17 * linked into a bigger work whose licence does not allow such clause
18 * and the author of this work is given due credit in the bigger work
19 * or its accompanying documents, where such information is generally
20 * kept, provided that said credits are retained.
21 *
22 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
23 * the utmost extent permitted by applicable law, neither express nor
24 * implied; without malicious intent or gross negligence. In no event
25 * may a licensor, author or contributor be held liable for indirect,
26 * direct, other damage, loss, or other issues arising in any way out
27 * of dealing in the work, even if advised of the possibility of such
28 * damage or existence of a defect, except proven that it results out
29 * of said person's immediate fault when using the work as intended.
30 */
31
32 #include <sys/param.h>
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35
36 #include <err.h>
37 #include <errno.h>
38 #include <stdarg.h>
39 #include <stdint.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 #include "nfotiser.h"
45
46 static void syntaxerr_(size_t, const char *, ...)
47 __attribute__((format (printf, 2, 3)))
48 __attribute__((noreturn));
49 #define syntaxerr(fmt, ...) syntaxerr_(lineno, (fmt), ##__VA_ARGS__)
50
51 /*
52 * Parsing works as follows:
53 *
54 * - strip completely empty line
55 * - strip line beginning with hash mark
56 * - if line ends with backslash, get next line that is not
57 * + empty
58 * + beginning with a hash mark
59 * and look if it begins with a tab (if not: syntax error)
60 * if so, strip backslash + newline + tab and repeat
61 * - if line begins with a tab, strip it and append line to
62 * the last line, including the newline separator
63 * - match line with '([A-Za-z_][A-Za-z0-9_]*)\t(.*)$' and
64 * call \1 the key and \2 the value (else: syntax error)
65 * - uppercase the key
66 * - enter the key/value pair in the system
67 *
68 * The following is also part of parsing, but left to the caller:
69 * - replace ${foo} with the value of key "foo"
70 * - undouble all backslashes
71 *
72 * We enter the key into the system up to three-fold:
73 * - KWT_NORMAL => ([A-Za-z_][A-Za-z0-9_]*)
74 * + \1 = keyword (toupper'd)
75 * - KWT_MULTI => ([A-Za-z_][A-Za-z0-9_]*)_([A-Za-z0-9_]*)
76 * + \1 = keyword (toupper'd)
77 * + \2 = kw_multi (case preserving)
78 * - KWT_ITERATED => ([A-Za-z_][A-Za-z0-9_]*)_([0-9]*)
79 * + \1 = keyword (toupper'd)
80 * + \2 = kw_iter (unsigned integer value)
81 * - KWT_MULTITER => ([A-Za-z_][A-Za-z0-9_]*)_([0-9]*)_([A-Za-z0-9_]*)
82 * + \1 = keyword (toupper'd)
83 * + \2 = kw_iter (unsigned integer value)
84 * + \3 = kw_multi (case preserving)
85 * - KWT_MULTITOP => ([A-Za-z_][A-Za-z0-9_]*)_(([0-9]*)_)?([A-Za-z0-9_]*)
86 * + \1 = keyword (toupper'd)
87 * + \2 = kw_iter (unsigned integer value), 0 if not set
88 * + \3 = kw_multi (case preserving)
89 * All KWT_* can match as if they were KWT_NORMAL (if we have a perfect
90 * first match); kw_iter=0 and kw_multi=NULL in that case.
91 *
92 * Cf. https://www.freewrt.org/trac/wiki/Documentation/Specs/Freewrt_info_files
93 * for more examples and a more human-readable version of this specification.
94 */
95
96 struct parser_result *
97 nfo_parse(int fd, const struct parser_keywords *kws)
98 {
99 struct parser_result *res;
100 struct parser_res *entry;
101 const struct parser_keywords *kwp;
102 char *cp, *t, *tp, *buf, *buf_base;
103 size_t len, n, lineno = 0;
104 struct stat sb;
105 char *entry_multi;
106 unsigned entry_iter;
107 enum parser_kwtype entry_type;
108
109 res = xmalloc(sizeof (struct parser_result));
110 CIRCLEQ_INIT(res);
111
112 if (fstat(fd, &sb))
113 err(255, "cannot stat");
114
115 /* slurp whole file into mapped memory */
116 len = sb.st_size;
117 D(2, "trying to mmap %zu bytes...", len);
118 if ((cp = mmap(NULL, len, PROT_READ, MAP_FILE | MAP_PRIVATE,
119 fd, 0)) == MAP_FAILED)
120 err(255, "cannot mmap %zu bytes", len);
121 D(2, "ok\n");
122 /* make a nice NUL-terminated copy (malloc'd) */
123 D(2, "copying %zu bytes...", len);
124 buf = buf_base = str_nsave(cp, len);
125 D(2, " munmap...");
126 if (munmap(cp, len))
127 err(255, "cannot munmap");
128 D(2, "ok\n");
129 /* don't need the file any more */
130
131 /* now we can operate on the NUL-terminated R/W string “buf” */
132 if (buf[len - 1] != '\n')
133 syntaxerr("file does not end with a newline!");
134
135 D(2, "entire string: «%s» (%zu)\n", buf, strlen(buf));
136 lineno = 1;
137
138 loop_newline:
139 /* completely new line buffer */
140 cp = NULL;
141
142 loop_getline:
143 /* get a line and add it to line buffer */
144 if (*buf == '\0') {
145 D(2, "D: [%4zu] read EOF\n", lineno);
146 goto loop_eof;
147 }
148 if (*buf == '\n') {
149 D(2, "D: [%4zu] read newline\n", lineno);
150 ++buf;
151 ++lineno;
152 goto loop_getline;
153 }
154 if (*buf == '#') {
155 D(2, "D: [%4zu] read comment ", lineno);
156 t = buf;
157 while (*t != '\n')
158 ++t;
159 *t++ = '\0';
160 D(2, "'%s'\n", buf);
161 buf = t;
162 ++lineno;
163 goto loop_getline;
164 }
165 if (*buf == '\t') {
166 D(2, "D: [%4zu] read trail line\n", lineno);
167 if (cp == NULL)
168 syntaxerr("expected lead line, got trail line!");
169 t = ++buf;
170 goto loop_storeline;
171 } else {
172 D(2, "D: [%4zu] read head line (%02X)\n", lineno, *buf);
173 }
174 if (cp != NULL) {
175 --lineno;
176 goto process_line;
177 }
178 if ((*buf >= 'A' && *buf <= 'Z') ||
179 (*buf >= 'a' && *buf <= 'z') || *buf == '_')
180 t = buf;
181 else
182 syntaxerr("line must begin with a letter or an underscore!");
183 loop_storeline:
184 while (*t++ != '\n')
185 ;
186 t = str_nsave(buf, (tp = t) - buf);
187 buf = tp;
188 if (cp != NULL) {
189 if (*(tp = cp + strlen(cp) - 2) == '\\')
190 *tp = '\0';
191 }
192 tp = t + strlen(t) - 1;
193 *tp = '\0';
194 D(2, "D: [%4zu] storing string '%s'\n", lineno, t);
195 *tp = '\n';
196 tp = str_add(cp, t);
197 free(t);
198 cp = tp;
199 ++lineno;
200 goto loop_getline;
201 process_line:
202 /* cp points to <line>[<nl><line>…][\]<nl> */
203 /* buf points to first byte of next line */
204 if (*(tp = cp + strlen(cp) - 2) == '\\')
205 syntaxerr("expected trail line, got lead line!");
206 process_lastline:
207 /* cut off final newline */
208 *++tp = '\0';
209 D(2, "D: [%4zu] processing «%s»\n", lineno, cp);
210
211 /* parse the meat out of there */
212 if ((tp = strchr(cp, '\t')) == NULL)
213 syntaxerr("expected keyword + tab + value!");
214 *tp++ = '\0';
215 /* cp points to keyword, tp points to value */
216 entry_multi = NULL;
217 entry_iter = 0;
218 entry_type = KWT_INVALID;
219 for (kwp = kws; kwp->kwprefix != NULL; ++kwp) {
220 char *np;
221
222 /* exact match? */
223 if (!strcasecmp(cp, kwp->kwprefix)) {
224 /* yep */
225 entry_type = KWT_NORMAL;
226 break;
227 }
228 /* prefix match allowed? */
229 if (kwp->kwtype == KWT_NORMAL)
230 /* nope */ continue;
231 /* prefix match? */
232 if (strncasecmp(cp, kwp->kwprefix, n = strlen(kwp->kwprefix)))
233 /* nope */ continue;
234 if (cp[n] != '_')
235 /* same */ continue;
236 /* okay, we got a prefix match, get args */
237 np = cp + n + 1;
238 entry_type = kwp->kwtype;
239 if (kwp->kwtype == KWT_ITERATED ||
240 (kwp->kwtype == KWT_MULTITOP &&
241 (*np >= '0' && *np <= '9')) ||
242 kwp->kwtype == KWT_MULTITER) {
243 char *zp = np;
244
245 if (zp[0] == '0' && (zp[1] == 'x' || zp[1] == 'X'))
246 zp += 2;
247 while (*zp >= '0' && *zp <= '9')
248 ++zp;
249 if (zp == np)
250 syntaxerr("iterator expected");
251 if (*zp != (char)(kwp->kwtype == KWT_ITERATED ?
252 '\0' : '_'))
253 syntaxerr("%s expected, got 0x%02X",
254 kwp->kwtype == KWT_ITERATED ?
255 "tab" : "underscore", *zp);
256 *zp++ = '\0';
257 entry_iter = (unsigned)strtoul(np, NULL, 0);
258 np = zp;
259 if (kwp->kwtype == KWT_MULTITOP)
260 entry_type = KWT_MULTITER;
261 }
262 if (kwp->kwtype == KWT_MULTITOP)
263 entry_type = KWT_MULTI;
264 if (kwp->kwtype == KWT_MULTI ||
265 kwp->kwtype == KWT_MULTITOP ||
266 kwp->kwtype == KWT_MULTITER)
267 entry_multi = str_save(np);
268 /* values filled out */
269 break;
270 }
271 if (kwp->kwprefix == NULL)
272 errx(1, "unknown keyword '%s'", cp);
273 if (entry_type == KWT_INVALID)
274 syntaxerr("internal error: invalid entry type");
275 entry = parser_new(kwp->kwnum, entry_type, entry_multi, entry_iter,
276 str_save(tp));
277 CIRCLEQ_INSERT_TAIL(res, entry, e);
278 free(cp);
279 ++lineno;
280 goto loop_newline;
281 loop_eof:
282 if (cp != NULL) {
283 if (*(tp = cp + strlen(cp) - 2) == '\\')
284 syntaxerr("expected trail line, read end of file!");
285 goto process_lastline;
286 }
287 free(buf_base);
288 return (res);
289 }
290
291 const struct parser_keywords *
292 parser_getkwbynum(parser_kwords num, const struct parser_keywords *kws)
293 {
294 const struct parser_keywords *kwp;
295
296 for (kwp = kws; kwp->kwprefix != NULL; ++kwp)
297 if (kwp->kwnum == num)
298 break;
299
300 return (kwp->kwprefix == NULL ? NULL : kwp);
301 }
302
303 void
304 parser_dump(struct parser_res *entry, const struct parser_keywords *kws)
305 {
306 const struct parser_keywords *kwp;
307
308 kwp = parser_getkwbynum(entry->keyword, kws);
309 if (kwp == NULL)
310 fputs("keyword <unknown> (type invalid)", stdout);
311 else {
312 printf("keyword %s (type %s", /*)*/ kwp->kwprefix,
313 entry->itype == KWT_NORMAL ? "normal" :
314 entry->itype == KWT_MULTI ? "multi" :
315 entry->itype == KWT_ITERATED ? "iterated" :
316 entry->itype == KWT_MULTITER ? "multiter" : "unknown");
317 if (kwp->kwtype != entry->itype)
318 printf(" orig %s",
319 kwp->kwtype == KWT_NORMAL ? "normal" :
320 kwp->kwtype == KWT_MULTI ? "multi" :
321 kwp->kwtype == KWT_ITERATED ? "iterated" :
322 kwp->kwtype == KWT_MULTITER ? "multiter" :
323 kwp->kwtype == KWT_MULTITOP ? "multitop" :
324 "unknown");
325 fputc(/*(*/ ')', stdout);
326 if (entry->itype == KWT_ITERATED ||
327 entry->itype == KWT_MULTITER)
328 printf(", iterator %u", entry->kw_iter);
329 if (entry->itype == KWT_MULTI ||
330 entry->itype == KWT_MULTITER) {
331 if (entry->kw_multi)
332 printf(", multi '%s'", entry->kw_multi);
333 else
334 fputs(", multibase", stdout);
335 }
336 }
337 if (entry->value) {
338 const uint8_t *cp = entry->value;
339
340 fputs(", value\n\t『", stdout);
341 while (*cp) {
342 while (*cp && *cp != '\n')
343 fputc(*cp++, stdout);
344 fputs(*cp ? (cp++, "\n\t ") : "』\n", stdout);
345 }
346 } else
347 fputs(", no value\n", stdout);
348 }
349
350 void
351 parser_free(struct parser_result *head)
352 {
353 struct parser_res *entry;
354
355 if (head == NULL)
356 return;
357
358 while (!CIRCLEQ_EMPTY(head)) {
359 entry = CIRCLEQ_FIRST(head);
360 CIRCLEQ_REMOVE(head, entry, e);
361 if (entry->kw_multi != NULL)
362 free(entry->kw_multi);
363 if (entry->value != NULL)
364 free(entry->value);
365 free(entry);
366 }
367
368 free(head);
369 }
370
371 static void
372 syntaxerr_(size_t lno, const char *fmt, ...)
373 {
374 va_list args;
375
376 va_start(args, fmt);
377 fflush(NULL);
378 fprintf(stderr, "syntax error [%s%s%4zu]: ",
379 parser_errpfx ? parser_errpfx : "",
380 parser_errpfx ? ":" : "", lno);
381 fflush(NULL);
382 verrx(1, fmt, args);
383 va_end(args);
384 }
385
386 struct parser_res *
387 parser_new(parser_kwords entry_kw, enum parser_kwtype entry_type,
388 char *entry_multi, unsigned entry_iter, char *value)
389 {
390 struct parser_res *entry;
391
392 entry = xmalloc(sizeof (struct parser_res));
393 bzero(entry, sizeof (struct parser_res));
394 entry->keyword = entry_kw;
395 entry->itype = entry_type;
396 entry->kw_multi = entry_multi;
397 entry->kw_iter = entry_iter;
398 entry->value = value;
399 return (entry);
400 }

root@freewrt.org:443
ViewVC Help
Powered by ViewVC 1.1.20