GCC Code Coverage Report

Directory:	./
File:	src/string.c
Date:	2021-09-04 00:13:15

	Exec	Total	Coverage
Lines:	166	168	98.8%
Branches:	81	87	93.1%

Line	Branch	Exec	Source
1			/*************************************************************************/ /
2
3			@file string.c
4
5			@author Stephen Brennan
6
7			@date Created Tuesday, 8 December 2015
8
9			@brief Parsing strings.
10
11			@copyright Copyright (c) 2015, Stephen Brennan. Released under the
12			Revised BSD License. See LICENSE.txt for details.
13
14			This file contains the string parser. It is designed to be independent of
15			what you're parsing the string for. That is, it can be used in all these
16			situations:
17
18			- Recognizing string tokens when doing the initial tokenizing.
19			- Comparing string tokens against other strings.
20			- Loading string tokens into actual strings.
21
22			*******************************************************************************/
23
24			#include <assert.h>
25			#include <stdbool.h>
26
27			#include "json_private.h"
28			#include "nosj.h"
29
30			/*******************************************************************************
31
32			Private Declarations for the Parser
33
34			*******************************************************************************/
35
36			// forward declaration of struct parser_arg
37			struct parser_arg;
38
39			/**
40			@brief A function that is called for every parsed character.
41			@param a The parser arguments. Mostly for reference.
42			@param out The next parsed character in the string.
43			@param data Any data the setter might need.
44			*/
45			typedef void (output_setter)(struct parser_arg a, char out, void *data);
46
47			/**
48			@brief States of the parser.
49			*/
50			enum parser_st { START, INSTRING, ESCAPE, END, UESC0, UESC1, UESC2, UESC3 };
51
52			/**
53			@brief All the variables the parser needs to do its job.
54			*/
55			struct parser_arg {
56			/**
57			@brief The state of the parser.
58			*/
59			enum parser_st state;
60			/**
61			@brief Input text.
62			*/
63			const char *text;
64			/**
65			@brief Current index of the text we're parsing.
66			*/
67			size_t textidx;
68			/**
69			@brief Function to call for every character we parse.
70			*/
71			output_setter setter;
72			/**
73			@brief Argument to go to the output setting function.
74			*/
75			void *setter_arg;
76			/**
77			@brief Index in which to put the next output character.
78			*/
79			size_t outidx;
80			/**
81			@brief Previously parsed unicode escape character.
82
83			This is used due to the fact that JSON only does 2-byte Unicode
84			escapes. In order to escape characters beyond the BMP (besides just
85			putting them in literally), you have to do the UTF-16 surrogate pair.
86			What a pain.
87			*/
88			wchar_t prev;
89			/**
90			@brief Unicode escape character we are currently parsing.
91			*/
92			wchar_t curr;
93			/**
94			@brief Any error we want to report.
95			*/
96			enum json_error error;
97			};
98
99			/*******************************************************************************
100
101			Helper Functions
102
103			*******************************************************************************/
104
105			/**
106			@brief Return true if c is a valid character to come after a backslash.
107			*/
108		69	static char json_escape(char c)
109			{
110	7/7 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 3 times. ✓ Branch 2 taken 3 times. ✓ Branch 3 taken 3 times. ✓ Branch 4 taken 3 times. ✓ Branch 5 taken 3 times. ✓ Branch 6 taken 40 times.	69	switch (c) {
111		14	case '\"':
112			case '\\':
113			case '/':
114		14	return c;
115		3	case 'b':
116		3	return '\b';
117		3	case 'f':
118		3	return '\f';
119		3	case 'n':
120		3	return '\n';
121		3	case 'r':
122		3	return '\r';
123		3	case 't':
124		3	return '\t';
125		40	default:
126		40	return '\0';
127			}
128			}
129
130			/**
131			@brief Return true if c is a valid hexadecimal digit for JSON.
132
133			Although there is an iswxdigit function in the C standard library, it allows
134			for other hexadecimal other than just 0-9, a-f, A-F (depending on locale).
135			The JSON spec explicitly states that these are the only hex characters it
136			accepts, so I've written my own to explicitly cover only those.
137			*/
138		214	static unsigned char json_xdigit(char c)
139			{
140	4/4 ✓ Branch 0 taken 213 times. ✓ Branch 1 taken 1 times. ✓ Branch 2 taken 130 times. ✓ Branch 3 taken 83 times.	214	if ('0' <= c && c <= '9') {
141		130	return (unsigned char)(c - '0');
142	3/4 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 64 times. ✓ Branch 2 taken 20 times. ✗ Branch 3 not taken.	84	} else if ('a' <= c && c <= 'f') {
143		20	return (unsigned char)(10 + c - 'a');
144	4/4 ✓ Branch 0 taken 63 times. ✓ Branch 1 taken 1 times. ✓ Branch 2 taken 62 times. ✓ Branch 3 taken 1 times.	64	} else if ('A' <= c && c <= 'F') {
145		62	return (unsigned char)(10 + c - 'A');
146			} else {
147		2	return 0xFF;
148			}
149			}
150
151			/**
152			@brief Register the output character.
153			@param a Parser data.
154			@param out The output character.
155			@param from_uesc Whether this came from a unicode escape
156
157			The nosj approach to JSON is: all data is UTF-8. Unfortunately, JSON can
158			contain Unicode escape sequences, which we have to manually translate into
159			valid UTF-8 here. However, if we translated all bytes into UTF-8 naively,
160			then we'd end up botching valid UTF-8 multi-byte sequences which already
161			exist. So, when from_uesc is true, we treat the output as a potential
162			multibyte sequence to translate to UTF-8. When from_uesc is false, we treat
163			it as a byte.
164			*/
165		5842	static void set_output(struct parser_arg *a, wchar_t out, bool from_uesc)
166			{
167			// don't forget to flush the "buffered" potential surrogate pair
168			char bytes[4];
169		5842	int nbytes = 0;
170			int i;
171	3/4 ✓ Branch 0 taken 5841 times. ✓ Branch 1 taken 1 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 5841 times.	5842	if (a->prev != 0 \|\| out > 0x1FFFFF) {
172		1	a->state = END;
173		1	a->error = JSONERR_INVALID_SURROGATE;
174		1	return;
175			}
176	2/2 ✓ Branch 0 taken 5824 times. ✓ Branch 1 taken 17 times.	5841	if (!from_uesc) {
177		5824	bytes[0] = out & 0xFF;
178		5824	nbytes = 1;
179	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 13 times.	17	} else if (out > 0xFFFF) {
180		4	bytes[0] = ((out >> 18) & 0x7) \| 0xF0;
181		4	nbytes = 4;
182	2/2 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 6 times.	13	} else if (out > 0x7FF) {
183		7	bytes[0] = ((out >> 12) & 0xF) \| 0xE0;
184		7	nbytes = 3;
185	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 4 times.	6	} else if (out > 0x7F) {
186		2	bytes[0] = ((out >> 6) & 0x1F) \| 0xC0;
187		2	nbytes = 2;
188			} else {
189		4	bytes[0] = out & 0x7F;
190		4	nbytes = 1;
191			}
192	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 5841 times.	5869	for (i = nbytes - 1; i > 0; i--) {
193		28	bytes[i] = (out & 0x3F) \| 0x80;
194		28	out >>= 6;
195			}
196	2/2 ✓ Branch 0 taken 5869 times. ✓ Branch 1 taken 5841 times.	11710	for (i = 0; i < nbytes; i++) {
197	2/2 ✓ Branch 0 taken 2095 times. ✓ Branch 1 taken 3774 times.	5869	if (a->setter)
198		2095	a->setter(a, bytes[i], a->setter_arg);
199		5869	a->outidx++;
200			}
201			}
202
203		1080	static void set_state(struct parser_arg *a, enum parser_st state)
204			{
205	2/2 ✓ Branch 0 taken 1079 times. ✓ Branch 1 taken 1 times.	1080	if (a->state != END) {
206		1079	a->state = state;
207			}
208		1080	}
209
210			/*******************************************************************************
211
212			Parser Functions
213
214			*******************************************************************************/
215
216			/**
217			@brief Called by the parser when it is in the START state.
218			@param a Parser data.
219			@param wc Character.
220			*/
221		469	static void json_string_start(struct parser_arg *a, char wc)
222			{
223	2/2 ✓ Branch 0 taken 462 times. ✓ Branch 1 taken 7 times.	469	if (wc == '"') {
224		462	set_state(a, INSTRING);
225			} else {
226		7	set_state(a, END);
227		7	a->error = JSONERR_UNEXPECTED_TOKEN;
228		7	a->textidx--;
229			}
230		469	}
231
232			/**
233			@brief Called by the parser when it is in the INSTRING state.
234			@param a Parser data.
235			@param wc Character.
236			*/
237		6310	static void json_string_instring(struct parser_arg *a, char wc)
238			{
239	2/2 ✓ Branch 0 taken 69 times. ✓ Branch 1 taken 6241 times.	6310	if (wc == '\\') {
240		69	set_state(a, ESCAPE);
241	2/2 ✓ Branch 0 taken 444 times. ✓ Branch 1 taken 5797 times.	6241	} else if (wc == '"') {
242		444	set_state(a, END);
243	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 5796 times.	5797	} else if (wc == '\0') {
244		1	set_state(a, END);
245		1	a->error = JSONERR_PREMATURE_EOF;
246		1	a->textidx--;
247			} else {
248		5796	set_output(a, wc, false);
249			}
250		6310	}
251
252			/**
253			@brief Called by the parser when it is in the ESCAPE state.
254			@param a Parser data.
255			@param wc Character.
256			*/
257		69	static void json_string_escape(struct parser_arg *a, char wc)
258			{
259		69	char esc = json_escape(wc);
260	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 68 times.	69	if (wc == '\0') {
261		1	set_state(a, END);
262		1	a->error = JSONERR_PREMATURE_EOF;
263		1	a->textidx--;
264	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 40 times.	68	} else if (wc == 'u') {
265		28	set_state(a, UESC0);
266	2/2 ✓ Branch 0 taken 29 times. ✓ Branch 1 taken 11 times.	40	} else if (esc != '\0') {
267		29	set_state(a, INSTRING);
268		29	set_output(a, esc, false);
269			} else {
270		11	set_state(a, END);
271		11	a->error = JSONERR_UNEXPECTED_TOKEN;
272		11	a->textidx--;
273			}
274		69	}
275
276			/**
277			@brief Called by the parser when it is in one of the UESC states.
278			@param a Parser data.
279			@param wc Character.
280			*/
281		109	static void json_string_uesc(struct parser_arg *a, char wc)
282			{
283	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 108 times.	109	if (wc == '\0') {
284		1	set_state(a, END);
285		1	a->error = JSONERR_PREMATURE_EOF;
286		1	a->textidx--;
287	2/2 ✓ Branch 1 taken 2 times. ✓ Branch 2 taken 106 times.	108	} else if (json_xdigit(wc) == 0xFF) {
288		2	set_state(a, END);
289		2	a->error = JSONERR_UNEXPECTED_TOKEN;
290		2	a->textidx--;
291			} else {
292		106	a->curr = a->curr << 4;
293		106	a->curr \|= json_xdigit(wc);
294	2/2 ✓ Branch 0 taken 81 times. ✓ Branch 1 taken 25 times.	106	if (a->state < UESC3) {
295			// continue reading all the input
296		81	a->state += 1;
297			} else {
298			// time to "publish" our unicode escape
299	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 5 times.	25	if (a->prev == 0) {
300			// if there was no "prev", that means this might
301			// be the start of a surrogate pair. Check for
302			// that!
303	3/4 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 13 times. ✓ Branch 2 taken 7 times. ✗ Branch 3 not taken.	20	if (0xD800 <= a->curr && a->curr <= 0xDFFF) {
304			// yup, it's a surrogate pair!
305		7	a->prev = a->curr;
306			} else {
307			// nope, keep going
308		13	set_output(a, a->curr, true);
309			}
310			} else {
311			// there was a previous starting surrogate
312	3/4 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 1 times. ✓ Branch 2 taken 4 times. ✗ Branch 3 not taken.	5	if (0xD800 <= a->curr && a->curr <= 0xDFFF) {
313			// and this is also a surrogate
314		4	a->curr &= 0x03FF; // clear upper bits;
315			// keep lower 10
316		4	a->curr \|= (a->prev & 0x03FF) << 10;
317		4	a->curr +=
318			0x10000; // apparently this
319			// needs to happen (?)
320		4	a->prev = 0;
321		4	set_output(a, a->curr, true);
322			} else {
323			// not a legal surrogate to match
324			// previous surrogate.
325		1	a->state = END;
326		1	a->error = JSONERR_INVALID_SURROGATE;
327			}
328			}
329		25	set_state(a, INSTRING);
330		25	a->curr = 0;
331			}
332			}
333		109	}
334
335			/**
336			@brief Parses JSON strings, in a very generic manner.
337			@param text Input text.
338			@param idx Starting index of the string.
339			@param setter Function to call with each character.
340			@param setarg Argument to give to the setter function.
341			*/
342		469	static struct parser_arg json_string(const char *text, size_t idx,
343			output_setter setter, void *setarg)
344			{
345			char wc;
346		469	struct parser_arg a = { .state = START,
347			.text = text,
348			.textidx = idx,
349			.outidx = 0,
350			.setter = setter,
351			.setter_arg = setarg,
352			.prev = 0,
353			.curr = 0,
354			.error = JSONERR_NO_ERROR };
355
356	2/2 ✓ Branch 0 taken 6957 times. ✓ Branch 1 taken 469 times.	7426	while (a.state != END) {
357		6957	wc = a.text[a.textidx];
358	4/6 ✓ Branch 0 taken 469 times. ✓ Branch 1 taken 6310 times. ✓ Branch 2 taken 69 times. ✓ Branch 3 taken 109 times. ✗ Branch 4 not taken. ✗ Branch 5 not taken.	6957	switch (a.state) {
359		469	case START:
360		469	json_string_start(&a, wc);
361		469	break;
362		6310	case INSTRING:
363		6310	json_string_instring(&a, wc);
364		6310	break;
365		69	case ESCAPE:
366		69	json_string_escape(&a, wc);
367		69	break;
368		109	case UESC0:
369			case UESC1:
370			case UESC2:
371			case UESC3:
372		109	json_string_uesc(&a, wc);
373		109	break;
374		✗	case END:
375			// never happens
376		✗	assert(false);
377			break;
378			}
379		6957	a.textidx++;
380			}
381	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 466 times.	469	if (a.prev != 0) {
382		3	a.error = JSONERR_INVALID_SURROGATE;
383			}
384		469	return a;
385			}
386
387			/*******************************************************************************
388
389			Application-Specific Parsers
390
391			*******************************************************************************/
392
393			/**
394			@brief Parse a string literal.
395			@param text The text we're parsing.
396			@param arr The token buffer.
397			@param maxtoken The length of the token buffer.
398			@param p The parser state.
399			@returns Parser state after parsing the string.
400			*/
401		276	struct json_parser json_parse_string(char text, struct json_token arr,
402			size_t maxtoken, struct json_parser p)
403			{
404			struct json_token tok;
405			struct parser_arg a;
406
407		276	tok.type = JSON_STRING;
408		276	tok.start = p.textidx;
409
410		276	a = json_string(text, p.textidx, NULL, NULL);
411
412		276	tok.end = a.textidx - 1;
413		276	tok.child = 0;
414		276	tok.next = 0;
415		276	tok.length = a.outidx;
416		276	json_settoken(arr, tok, p, maxtoken);
417
418		276	p.error = a.error;
419		276	p.tokenidx++;
420		276	p.textidx = a.textidx;
421		276	return p;
422			}
423
424			/**
425			@brief Argument passed to setter when we are doing json_string_match().
426			*/
427			struct string_compare_arg {
428			/**
429			@brief String we're comparing to.
430			*/
431			const char *other;
432			/**
433			@brief Whether or not the string has evaluated to equal so far.
434			*/
435			bool equal;
436			};
437
438			/**
439			@brief This is the "setter" function for json_string_match().
440			@param a Parser arguments.
441			@param wc Character to set.
442			@param arg The struct string_compare_arg.
443
444			This function just compares each output character to the corresponding
445			character in the other string. It stores the result in the arg, which will
446			be examined after the fact.
447			*/
448		2049	static void json_string_comparator(struct parser_arg a, char wc, void arg)
449			{
450		2049	struct string_compare_arg *ca = arg;
451			// we are depending on short-circuit evaluation here :)
452	4/4 ✓ Branch 0 taken 359 times. ✓ Branch 1 taken 1690 times. ✓ Branch 2 taken 204 times. ✓ Branch 3 taken 155 times.	2049	ca->equal = ca->equal && (wc == ca->other[a->outidx]);
453		2049	}
454
455		187	bool json_string_match(const char json, const struct json_token tokens,
456			size_t index, const char *other)
457			{
458		187	struct string_compare_arg ca = {
459			.other = other,
460			.equal = true,
461			};
462		187	struct parser_arg pa = json_string(json, tokens[index].start,
463			&json_string_comparator, &ca);
464
465			// They are equal if every previous character matches, and the next
466			// character in the other string is the null character, signifying the
467			// end.
468	4/4 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 155 times. ✓ Branch 2 taken 31 times. ✓ Branch 3 taken 1 times.	187	return ca.equal && (other[pa.outidx] == '\0');
469			}
470
471			/**
472			@brief This is the "setter" function for json_string_match().
473			@param a Parser arguments.
474			@param wc Character to set.
475			@param arg The struct string_compare_arg.
476
477			This function just compares each output character to the corresponding
478			character in the other string. It stores the result in the arg, which will
479			be examined after the fact.
480			*/
481		46	static void json_string_loader(struct parser_arg a, char wc, void arg)
482			{
483		46	char *str = arg;
484			// we are depending on short-circuit evaluation here :)
485		46	str[a->outidx] = wc;
486		46	}
487
488		6	void json_string_load(const char json, const struct json_token tokens,
489			size_t index, char *buffer)
490			{
491		6	struct parser_arg pa = json_string(json, tokens[index].start,
492			&json_string_loader, buffer);
493
494		6	buffer[pa.outidx] = '\0';
495		6	}
496

1

/***************************************************************************/ /**

@file string.c

@author Stephen Brennan

6

7

@date Created Tuesday, 8 December 2015

8

9

@brief Parsing strings.

10

11

12

Revised BSD License. See LICENSE.txt for details.

13

14

This file contains the string parser. It is designed to be independent of

15

what you're parsing the string for. That is, it can be used in all these

16

situations:

17

18

- Recognizing string tokens when doing the initial tokenizing.

19

- Comparing string tokens against other strings.

20

- Loading string tokens into actual strings.

21

22

*******************************************************************************/

#include <assert.h>

#include <stdbool.h>

#include "json_private.h"

28

#include "nosj.h"

29

30

/*******************************************************************************

31

32

Private Declarations for the Parser

33

34

*******************************************************************************/

35

36

// forward declaration of struct parser_arg

struct parser_arg;

/**

@brief A function that is called for every parsed character.

41

@param a The parser arguments. Mostly for reference.

42

@param out The next parsed character in the string.

43

@param data Any data the setter might need.

44

*/

45

typedef void (*output_setter)(struct parser_arg *a, char out, void *data);

46

47

/**

48

@brief States of the parser.

49

*/

50

enum parser_st { START, INSTRING, ESCAPE, END, UESC0, UESC1, UESC2, UESC3 };

51

52

/**

53

@brief All the variables the parser needs to do its job.

*/

struct parser_arg {

/**

@brief The state of the parser.

58

*/

59

enum parser_st state;

/**

@brief Input text.

*/

const char *text;

/**

@brief Current index of the text we're parsing.

*/

size_t textidx;

/**

@brief Function to call for every character we parse.

70

*/

71

output_setter setter;

72

/**

73

@brief Argument to go to the output setting function.

*/

void *setter_arg;

/**

@brief Index in which to put the next output character.

*/

size_t outidx;

/**

@brief Previously parsed unicode escape character.

82

83

This is used due to the fact that JSON only does 2-byte Unicode

84

escapes. In order to escape characters beyond the BMP (besides just

85

putting them in literally), you have to do the UTF-16 surrogate pair.

What a pain.

*/

wchar_t prev;

/**

@brief Unicode escape character we are currently parsing.

*/

wchar_t curr;

/**

@brief Any error we want to report.

95

*/

96

enum json_error error;

97

};

98

99

/*******************************************************************************

Helper Functions

*******************************************************************************/

104

105

/**

106

@brief Return true if c is a valid character to come after a backslash.

107

*/

108

69

static char json_escape(char c)

109

{

110

7/7

✓ Branch 0 taken 14 times.

✓ Branch 1 taken 3 times.

✓ Branch 2 taken 3 times.

✓ Branch 3 taken 3 times.

✓ Branch 4 taken 3 times.

✓ Branch 5 taken 3 times.

✓ Branch 6 taken 40 times.

69

switch (c) {

111

14

case '\"':

112

case '\\':

113

case '/':

114

14

return c;

115

3

case 'b':

116

3

return '\b';

117

3

case 'f':

118

3

return '\f';

119

3

case 'n':

120

3

return '\n';

121

3

case 'r':

122

3

return '\r';

123

3

case 't':

124

3

return '\t';

125

40

default:

126

40

return '\0';

}

}

/**

@brief Return true if c is a valid hexadecimal digit for JSON.

132

133

Although there is an iswxdigit function in the C standard library, it allows

134

for other hexadecimal other than just 0-9, a-f, A-F (depending on locale).

135

The JSON spec explicitly states that these are the only hex characters it

136

accepts, so I've written my own to explicitly cover only those.

137

*/

138

214

static unsigned char json_xdigit(char c)

139

{

140

4/4

✓ Branch 0 taken 213 times.

✓ Branch 1 taken 1 times.

✓ Branch 2 taken 130 times.

✓ Branch 3 taken 83 times.

214

if ('0' <= c && c <= '9') {

141

130

return (unsigned char)(c - '0');

142

3/4

✓ Branch 0 taken 20 times.

✓ Branch 1 taken 64 times.

✓ Branch 2 taken 20 times.

✗ Branch 3 not taken.

84

} else if ('a' <= c && c <= 'f') {

143

20

return (unsigned char)(10 + c - 'a');

144

4/4

✓ Branch 0 taken 63 times.

✓ Branch 1 taken 1 times.

✓ Branch 2 taken 62 times.

✓ Branch 3 taken 1 times.

64

} else if ('A' <= c && c <= 'F') {

145

62

return (unsigned char)(10 + c - 'A');

146

} else {

147

2

return 0xFF;

}

}

/**

@brief Register the output character.

153

@param a Parser data.

154

@param out The output character.

155

@param from_uesc Whether this came from a unicode escape

156

157

The nosj approach to JSON is: all data is UTF-8. Unfortunately, JSON can

158

contain Unicode escape sequences, which we have to manually translate into

159

valid UTF-8 here. However, if we translated *all* bytes into UTF-8 naively,

160

then we'd end up botching valid UTF-8 multi-byte sequences which already

161

exist. So, when from_uesc is true, we treat the output as a potential

162

multibyte sequence to translate to UTF-8. When from_uesc is false, we treat

163

it as a byte.

164

*/

165

5842

static void set_output(struct parser_arg *a, wchar_t out, bool from_uesc)

166

{

167

// don't forget to flush the "buffered" potential surrogate pair

168

char bytes[4];

169

5842

int nbytes = 0;

170

int i;

171

3/4

✓ Branch 0 taken 5841 times.

✓ Branch 1 taken 1 times.

✗ Branch 2 not taken.

✓ Branch 3 taken 5841 times.

5842

if (a->prev != 0 || out > 0x1FFFFF) {

172

1

a->state = END;

173

1

a->error = JSONERR_INVALID_SURROGATE;

174

1

return;

175

}

176

2/2

✓ Branch 0 taken 5824 times.

✓ Branch 1 taken 17 times.

5841

if (!from_uesc) {

177

5824

bytes[0] = out & 0xFF;

178

5824

nbytes = 1;

179

2/2

✓ Branch 0 taken 4 times.

✓ Branch 1 taken 13 times.

17

} else if (out > 0xFFFF) {

180

4

bytes[0] = ((out >> 18) & 0x7) | 0xF0;

181

4

nbytes = 4;

182

2/2

✓ Branch 0 taken 7 times.

✓ Branch 1 taken 6 times.

13

} else if (out > 0x7FF) {

183

7

bytes[0] = ((out >> 12) & 0xF) | 0xE0;

184

7

nbytes = 3;

185

2/2

✓ Branch 0 taken 2 times.

✓ Branch 1 taken 4 times.

6

} else if (out > 0x7F) {

186

2

bytes[0] = ((out >> 6) & 0x1F) | 0xC0;

187

2

nbytes = 2;

188

} else {

189

4

bytes[0] = out & 0x7F;

190

4

nbytes = 1;

191

}

192

2/2

✓ Branch 0 taken 28 times.

✓ Branch 1 taken 5841 times.

5869

for (i = nbytes - 1; i > 0; i--) {

193

28

bytes[i] = (out & 0x3F) | 0x80;

194

28

out >>= 6;

195

}

196

2/2

✓ Branch 0 taken 5869 times.

✓ Branch 1 taken 5841 times.

11710

for (i = 0; i < nbytes; i++) {

197

2/2

✓ Branch 0 taken 2095 times.

✓ Branch 1 taken 3774 times.

5869

if (a->setter)

198

2095

a->setter(a, bytes[i], a->setter_arg);

199

5869

a->outidx++;

}

}

1080

static void set_state(struct parser_arg *a, enum parser_st state)

204

{

205

2/2

✓ Branch 0 taken 1079 times.

✓ Branch 1 taken 1 times.

1080

if (a->state != END) {

206

1079

a->state = state;

207

}

208

1080

}

209

210

/*******************************************************************************

Parser Functions

*******************************************************************************/

215

216

/**

217

@brief Called by the parser when it is in the START state.

218

@param a Parser data.

219

@param wc Character.

220

*/

221

469

static void json_string_start(struct parser_arg *a, char wc)

222

{

223

2/2

✓ Branch 0 taken 462 times.

✓ Branch 1 taken 7 times.

469

if (wc == '"') {

224

462

set_state(a, INSTRING);

225

} else {

226

7

set_state(a, END);

227

7

a->error = JSONERR_UNEXPECTED_TOKEN;

228

7

a->textidx--;

229

}

230

469

}

231

232

/**

233

@brief Called by the parser when it is in the INSTRING state.

234

@param a Parser data.

235

@param wc Character.

236

*/

237

6310

static void json_string_instring(struct parser_arg *a, char wc)

238

{

239

2/2

✓ Branch 0 taken 69 times.

✓ Branch 1 taken 6241 times.

6310

if (wc == '\\') {

240

69

set_state(a, ESCAPE);

241

2/2

✓ Branch 0 taken 444 times.

✓ Branch 1 taken 5797 times.

6241

} else if (wc == '"') {

242

444

set_state(a, END);

243

2/2

✓ Branch 0 taken 1 times.

✓ Branch 1 taken 5796 times.

5797

} else if (wc == '\0') {

244

1

set_state(a, END);

245

1

a->error = JSONERR_PREMATURE_EOF;

246

1

a->textidx--;

247

} else {

248

5796

set_output(a, wc, false);

249

}

250

6310

}

251

252

/**

253

@brief Called by the parser when it is in the ESCAPE state.

254

@param a Parser data.

255

@param wc Character.

256

*/

257

69

static void json_string_escape(struct parser_arg *a, char wc)

258

{

259

69

char esc = json_escape(wc);

260

2/2

✓ Branch 0 taken 1 times.

✓ Branch 1 taken 68 times.

69

if (wc == '\0') {

261

1

set_state(a, END);

262

1

a->error = JSONERR_PREMATURE_EOF;

263

1

a->textidx--;

264

2/2

✓ Branch 0 taken 28 times.

✓ Branch 1 taken 40 times.

68

} else if (wc == 'u') {

265

28

set_state(a, UESC0);

266

2/2

✓ Branch 0 taken 29 times.

✓ Branch 1 taken 11 times.

40

} else if (esc != '\0') {

267

29

set_state(a, INSTRING);

268

29

set_output(a, esc, false);

269

} else {

270

11

set_state(a, END);

271

11

a->error = JSONERR_UNEXPECTED_TOKEN;

272

11

a->textidx--;

273

}

274

69

}

275

276

/**

277

@brief Called by the parser when it is in one of the UESC states.

278

@param a Parser data.

279

@param wc Character.

280

*/

281

109

static void json_string_uesc(struct parser_arg *a, char wc)

282

{

283

2/2

✓ Branch 0 taken 1 times.

✓ Branch 1 taken 108 times.

109

if (wc == '\0') {

284

1

set_state(a, END);

285

1

a->error = JSONERR_PREMATURE_EOF;

286

1

a->textidx--;

287

2/2

✓ Branch 1 taken 2 times.

✓ Branch 2 taken 106 times.

108

} else if (json_xdigit(wc) == 0xFF) {

288

2

set_state(a, END);

289

2

a->error = JSONERR_UNEXPECTED_TOKEN;

290

2

a->textidx--;

291

} else {

292

106

a->curr = a->curr << 4;

293

106

a->curr |= json_xdigit(wc);

294

2/2

✓ Branch 0 taken 81 times.

✓ Branch 1 taken 25 times.

106

if (a->state < UESC3) {

295

// continue reading all the input

296

81

a->state += 1;

297

} else {

298

// time to "publish" our unicode escape

299

2/2

✓ Branch 0 taken 20 times.

✓ Branch 1 taken 5 times.

25

if (a->prev == 0) {

300

// if there was no "prev", that means this might

301

// be the start of a surrogate pair. Check for

302

// that!

303

3/4

✓ Branch 0 taken 7 times.

✓ Branch 1 taken 13 times.

✓ Branch 2 taken 7 times.

✗ Branch 3 not taken.

20

if (0xD800 <= a->curr && a->curr <= 0xDFFF) {

304

// yup, it's a surrogate pair!

305

7

a->prev = a->curr;

306

} else {

307

// nope, keep going

308

13

set_output(a, a->curr, true);

309

}

310

} else {

311

// there was a previous starting surrogate

312

3/4

✓ Branch 0 taken 4 times.

✓ Branch 1 taken 1 times.

✓ Branch 2 taken 4 times.

✗ Branch 3 not taken.

5

if (0xD800 <= a->curr && a->curr <= 0xDFFF) {

313

// and this is also a surrogate

314

4

a->curr &= 0x03FF; // clear upper bits;

315

// keep lower 10

316

4

a->curr |= (a->prev & 0x03FF) << 10;

317

4

a->curr +=

318

0x10000; // apparently this

319

// needs to happen (?)

320

4

a->prev = 0;

321

4

set_output(a, a->curr, true);

322

} else {

323

// not a legal surrogate to match

324

// previous surrogate.

325

1

a->state = END;

326

1

a->error = JSONERR_INVALID_SURROGATE;

327

}

328

}

329

25

set_state(a, INSTRING);

330

25

a->curr = 0;

331

}

332

}

333

109

}

334

335

/**

336

@brief Parses JSON strings, in a very generic manner.

337

@param text Input text.

338

@param idx Starting index of the string.

339

@param setter Function to call with each character.

340

@param setarg Argument to give to the setter function.

341

*/

342

469

static struct parser_arg json_string(const char *text, size_t idx,

343

output_setter setter, void *setarg)

344

{

345

char wc;

346

469

struct parser_arg a = { .state = START,

.text = text,

.textidx = idx,

.outidx = 0,

.setter = setter,

.setter_arg = setarg,

352

.prev = 0,

353

.curr = 0,

354

.error = JSONERR_NO_ERROR };

355

356

2/2

✓ Branch 0 taken 6957 times.

✓ Branch 1 taken 469 times.

7426

while (a.state != END) {

357

6957

wc = a.text[a.textidx];

358

4/6

✓ Branch 0 taken 469 times.

✓ Branch 1 taken 6310 times.

✓ Branch 2 taken 69 times.

✓ Branch 3 taken 109 times.

✗ Branch 4 not taken.

✗ Branch 5 not taken.

6957

switch (a.state) {

359

469

case START:

360

469

json_string_start(&a, wc);

361

469

break;

362

6310

case INSTRING:

363

6310

json_string_instring(&a, wc);

364

6310

break;

365

69

case ESCAPE:

366

69

json_string_escape(&a, wc);

367

69

break;

368

109

case UESC0:

case UESC1:

case UESC2:

case UESC3:

109

json_string_uesc(&a, wc);

373

109

break;

374

✗

case END:

375

// never happens

376

✗

assert(false);

377

break;

378

}

379

6957

a.textidx++;

380

}

381

2/2

✓ Branch 0 taken 3 times.

✓ Branch 1 taken 466 times.

469

if (a.prev != 0) {

382

3

a.error = JSONERR_INVALID_SURROGATE;

383

}

384

469

return a;

385

}

386

387

/*******************************************************************************

388

389

Application-Specific Parsers

390

391

*******************************************************************************/

392

393

/**

394

@brief Parse a string literal.

395

@param text The text we're parsing.

396

@param arr The token buffer.

397

@param maxtoken The length of the token buffer.

398

@param p The parser state.

399

@returns Parser state after parsing the string.

400

*/

401

276

struct json_parser json_parse_string(char *text, struct json_token *arr,

402

size_t maxtoken, struct json_parser p)

403

{

404

struct json_token tok;

405

struct parser_arg a;

406

407

276

tok.type = JSON_STRING;

408

276

tok.start = p.textidx;

409

410

276

a = json_string(text, p.textidx, NULL, NULL);

411

412

276

tok.end = a.textidx - 1;

413

276

tok.child = 0;

414

276

tok.next = 0;

415

276

tok.length = a.outidx;

416

276

json_settoken(arr, tok, p, maxtoken);

417

418

276

p.error = a.error;

419

276

p.tokenidx++;

420

276

p.textidx = a.textidx;

421

276

return p;

}

/**

@brief Argument passed to setter when we are doing json_string_match().

426

*/

427

struct string_compare_arg {

428

/**

429

@brief String we're comparing to.

*/

const char *other;

/**

@brief Whether or not the string has evaluated to equal so far.

*/

bool equal;

};

/**

@brief This is the "setter" function for json_string_match().

440

@param a Parser arguments.

441

@param wc Character to set.

442

@param arg The struct string_compare_arg.

443

444

This function just compares each output character to the corresponding

445

character in the other string. It stores the result in the arg, which will

446

be examined after the fact.

447

*/

448

2049

static void json_string_comparator(struct parser_arg *a, char wc, void *arg)

449

{

450

2049

struct string_compare_arg *ca = arg;

451

// we are depending on short-circuit evaluation here :)

452

4/4

✓ Branch 0 taken 359 times.

✓ Branch 1 taken 1690 times.

✓ Branch 2 taken 204 times.

✓ Branch 3 taken 155 times.

2049

ca->equal = ca->equal && (wc == ca->other[a->outidx]);

453

2049

}

454

455

187

bool json_string_match(const char *json, const struct json_token *tokens,

456

size_t index, const char *other)

457

{

458

187

struct string_compare_arg ca = {

.other = other,

.equal = true,

};

187

struct parser_arg pa = json_string(json, tokens[index].start,

463

&json_string_comparator, &ca);

464

465

// They are equal if every previous character matches, and the next

466

// character in the other string is the null character, signifying the

467

// end.

468

4/4

✓ Branch 0 taken 32 times.

✓ Branch 1 taken 155 times.

✓ Branch 2 taken 31 times.

✓ Branch 3 taken 1 times.

187

return ca.equal && (other[pa.outidx] == '\0');

}

/**

@brief This is the "setter" function for json_string_match().

473

@param a Parser arguments.

474

@param wc Character to set.

475

@param arg The struct string_compare_arg.

476

477

This function just compares each output character to the corresponding

478

character in the other string. It stores the result in the arg, which will

479

be examined after the fact.

480

*/

481

46

static void json_string_loader(struct parser_arg *a, char wc, void *arg)

482

{

483

46

char *str = arg;

484

// we are depending on short-circuit evaluation here :)

485

46

str[a->outidx] = wc;

486

46

}

487

488

6

void json_string_load(const char *json, const struct json_token *tokens,

489

size_t index, char *buffer)

490

{

491

6

struct parser_arg pa = json_string(json, tokens[index].start,

492

&json_string_loader, buffer);

493

494

6

buffer[pa.outidx] = '\0';

495

6

}

496