Idea
- Build a JSON parser in c
- Instead of using by itself functions: attach functions to a struct and use these as methods
- make it C issue family free (segfaults, leaks, stack overflows, etc…)
- provide an ergonomic API
Usage
1#include "json.h"
2#include <stdlib.h>
3
4int main(void) {
5 struct json json = json_new(JSON({
6 "object" : {},
7 "array" : [[]],
8 "atoms" : [ "string", 0.1, true, false, null ]
9 }));
10 struct json_value json_value = json.parse(&json);
11 json_print_value(&json_value);
12 puts("");
13 json_free_value(&json_value);
14 return EXIT_SUCCESS;
15}Tip - Compiling C projects the easy way
Don’t take this as a guide for using make, in my projects I just use it as a command runner.
Compiler flags
These flags can be specific to
gcc, I usegcc (GCC) 14.2.1 20250207, so take this with a grain of salt.
I use these flags in almost every c project I ever started.
1gcc -std=c23 \
2 -O2 \
3 -Wall \
4 -Wextra \
5 -Werror \
6 -fdiagnostics-color=always \
7 -fsanitize=address,undefined \
8 -fno-common \
9 -Winit-self \
10 -Wfloat-equal \
11 -Wundef \
12 -Wshadow \
13 -Wpointer-arith \
14 -Wcast-align \
15 -Wstrict-prototypes \
16 -Wstrict-overflow=5 \
17 -Wwrite-strings \
18 -Waggregate-return \
19 -Wswitch-default \
20 -Wno-discarded-qualifiers \
21 -Wno-aggregate-return \
22 main.c| Flag | Description |
|---|---|
-std=c23 | set lang standard, i use ISO C23 |
-O2 | optimize more than -O1 |
-Wall | enable a list of warnings |
-Wextra | enable more warnings than -Wall |
-Werror | convert all warnings to errors |
-fdiagnostics-color=always | use color in diagnostics |
-fsanitize=address,undefined | enable AddressSanitizer and UndefinedBehaviorSanitizer |
-fno-common | place uninitialized global variables in the BSS section |
-Winit-self | warn about uninitialized variables |
-Wfloat-equal | warn if floating-point values are used in equality comparisons. |
-Wundef | warn if an undefined identifier is evaluated |
-Wshadow | warn whenever a local variable or type declaration shadows another variable, parameter, type |
-Wpointer-arith | warn about anything that depends on the “size of” a function type or of void |
-Wcast-align | warn whenever a pointer is cast such that the required alignment of the target is increased. |
-Wstrict-prototypes | warn if a function is declared or defined without specifying the argument types |
-Wstrict-overflow=5 | warns about cases where the compiler optimizes based on the assumption that signed overflow does not occu |
-Wwrite-strings | give string constants the type const char[length], warns on copy into non const char* |
-Wswitch-default | warn whenever a switch statement does not have a default case |
-Wno-discarded-qualifiers | do not warn if type qualifiers on pointers are being discarded. |
-Wno-aggregate-return | do not warn if any functions that return structures or unions are defined or called. |
Sourcing source files
I generally keep my header and source files in the same directory as the
makefile, so i use find to find them:
1shell find . -name "*.c"Make and Makefiles
I don’t define the
buildtarget as.PHONYbecause i generally never have abuilddirectory.
Putting it all together as a makefile:
1CFLAGS := -std=c23 \
2 -O2 \
3 -Wall \
4 -Wextra \
5 -Werror \
6 -fdiagnostics-color=always \
7 -fsanitize=address,undefined \
8 -fno-common \
9 -Winit-self \
10 -Wfloat-equal \
11 -Wundef \
12 -Wshadow \
13 -Wpointer-arith \
14 -Wcast-align \
15 -Wstrict-prototypes \
16 -Wstrict-overflow=5 \
17 -Wwrite-strings \
18 -Waggregate-return \
19 -Wcast-qual \
20 -Wswitch-default \
21 -Wno-discarded-qualifiers \
22 -Wno-aggregate-return
23
24FILES := $(shell find . -name "*.c")
25
26build:
27 $(CC) $(CFLAGS) $(FILES) -o jsoninc
Variadic macros to write inline raw JSON
This doesn’t really deserve its own section, but I use #<expression> to
stringify C expressions in conjunction with __VA_ARGS__:
1#define JSON(...) #__VA_ARGS__To enable:
1char *raw_json = JSON({ "array" : [ [], {}] });Inlines to:
1char *raw_json = "{ \"array\" : [[]], }";Representing JSON values in memory
I need a structure to hold a parsed JSON value, their types and their values.
Types of JSON values
JSON can be either one of:
- null
- true
- false
- number
- string
- array
- object
In C i use an enum to represent this:
1// json.h
2enum json_type {
3 json_number,
4 json_string,
5 json_boolean,
6 json_null,
7 json_object,
8 json_array,
9};
10
11extern char *json_type_map[];And i use json_type_map to map all json_type values to their char* representation:
1char *json_type_map[] = {
2 [json_number] = "json_number", [json_string] = "json_string",
3 [json_boolean] = "json_boolean", [json_null] = "json_null",
4 [json_object] = "json_object", [json_array] = "json_array",
5};json_value & unions for atoms, array elements or object values and object keys
The json_value struct holds the type, as defined above, a union sharing
memory space for either a boolean, a string or a number, a list of json_value
structures as array children or object values, a list of strings that are
object keys and the length for the three aforementioned fields.
1struct json_value {
2 enum json_type type;
3 union {
4 bool boolean;
5 char *string;
6 double number;
7 } value;
8 struct json_value *values;
9 char **object_keys;
10 size_t length;
11};Tearing values down
Since some of the fields in json_value are heap allocated, we have to destroy
/ free the structure upon either no longer using it or exiting the process.
json_free_value does exactly this:
1void json_free_value(struct json_value *json_value) {
2 switch (json_value->type) {
3 case json_string:
4 free(json_value->value.string);
5 break;
6 case json_object:
7 for (size_t i = 0; i < json_value->length; i++) {
8 free(json_value->object_keys[i]);
9 json_free_value(&json_value->values[i]);
10 }
11 if (json_value->object_keys != NULL) {
12 free(json_value->object_keys);
13 json_value->object_keys = NULL;
14 }
15 if (json_value->values != NULL) {
16 free(json_value->values);
17 json_value->values = NULL;
18 }
19 break;
20 case json_array:
21 for (size_t i = 0; i < json_value->length; i++) {
22 json_free_value(&json_value->values[i]);
23 }
24 if (json_value->values != NULL) {
25 free(json_value->values);
26 json_value->values = NULL;
27 }
28 break;
29 case json_number:
30 case json_boolean:
31 case json_null:
32 default:
33 break;
34 }
35 json_value->type = json_null;
36}As simple as that, we ignore stack allocated JSON value variants, such as
json_number, json_boolean and json_null, while freeing allocated memory
space for json_string, each json_array child and json_object keys and
values.
Printing json_values
Only a memory representation and no way to inspect it has no value to us, thus
I dumped print_json_value into main.c:
1void print_json_value(struct json_value *json_value) {
2 switch (json_value->type) {
3 case json_null:
4 printf("null");
5 break;
6 case json_number:
7 printf("%f", json_value->value.number);
8 break;
9 case json_string:
10 printf("\"%s\"", json_value->value.string);
11 break;
12 case json_boolean:
13 printf(json_value->value.boolean ? "true" : "false");
14 break;
15 case json_object:
16 printf("{");
17 for (size_t i = 0; i < json_value->length; i++) {
18 printf("\"%s\": ", json_value->object_keys[i]);
19 print_json_value(&json_value->values[i]);
20 if (i < json_value->length - 1) {
21 printf(", ");
22 }
23 }
24 printf("}");
25 break;
26 case json_array:
27 printf("[");
28 for (size_t i = 0; i < json_value->length; i++) {
29 print_json_value(&json_value->values[i]);
30 if (i < json_value->length - 1) {
31 printf(", ");
32 }
33 }
34 printf("]");
35 break;
36 default:
37 ASSERT(0, "Unimplemented json_value case");
38 break;
39 }
40}Calling this function:
1int main(void) {
2 struct json_value json_value = {
3 .type = json_array,
4 .length = 4,
5 .values =
6 (struct json_value[]){
7 (struct json_value){.type = json_string, .value.string = "hi"},
8 (struct json_value){.type = json_number, .value.number = 161},
9 (struct json_value){
10 .type = json_object,
11 .length = 1,
12 .object_keys =
13 (char *[]){
14 "key",
15 },
16 .values =
17 (struct json_value[]){
18 (struct json_value){.type = json_string,
19 .value.string = "value"},
20 },
21 },
22 (struct json_value){.type = json_null},
23 },
24 };
25 json_print_value(&json_value);
26 puts("");
27 return EXIT_SUCCESS;
28}Results in:
1["hi", 161.000000, {"key": "value"}, null]json Parser struct, Function pointers and how to use them (they suck)
As contrary as it sounds, one can attach functions to structures in c very easily, just define a field of a struct as a function pointer, assign a function to it and you got a method, as you would in Go or Rust.
1struct json {
2 char *input;
3 size_t pos;
4 size_t length;
5 char (*cur)(struct json *json);
6 bool (*is_eof)(struct json *json);
7 void (*advance)(struct json *json);
8 struct json_value (*atom)(struct json *json);
9 struct json_value (*array)(struct json *json);
10 struct json_value (*object)(struct json *json);
11 struct json_value (*parse)(struct json *json);
12};Of course you have to define a function the c way (<return type> <name>(<list of params>);) and assign it to your method field - but I is not that
complicated:
1struct json json_new(char *input) {
2 ASSERT(input != NULL, "corrupted input");
3 struct json j = (struct json){
4 .input = input,
5 .length = strlen(input) - 1,
6 };
7
8 j.cur = cur;
9 j.is_eof = is_eof;
10 j.advance = advance;
11 j.parse = parse;
12 j.object = object;
13 j.array = array;
14 j.atom = atom;
15
16 return j;
17}cur, is_eof and advance are small helper functions:
1static char cur(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 return json->is_eof(json) ? -1 : json->input[json->pos];
4}
5
6static bool is_eof(struct json *json) {
7 ASSERT(json != NULL, "corrupted internal state");
8 return json->pos > json->length;
9}
10
11static void advance(struct json *json) {
12 ASSERT(json != NULL, "corrupted internal state");
13 json->pos++;
14 skip_whitespace(json);
15}ASSERT is a simple assertion macro:
1#define ASSERT(EXP, context) \
2 if (!(EXP)) { \
3 fprintf(stderr, \
4 "jsoninc: ASSERT(" #EXP "): `" context \
5 "` failed at %s, line %d\n", \
6 __FILE__, __LINE__); \
7 exit(EXIT_FAILURE); \
8 }Failing for instance if the argument to the json_new function is a null pointer:
1int main(void) {
2 struct json json = json_new(NULL);
3 return EXIT_SUCCESS;
4}Even with a descriptive comment:
1jsoninc: ASSERT(input != NULL): `corrupted input` failed at ./json.c, line 16Parsing JSON with methods
Since we now have the whole setup out of the way, we can start with the crux of the project: parsing JSON. Normally I would have done a lexer and parser, but for the sake of simplicity - I combined these passes into a single parser architecture.
Warning
Also please don’t even think about standard compliance - I really cant be bothered, see Parsing JSON is a Minefield 💣.Ignoring Whitespace
As far as we are concerned, JSON does not say anything about whitespace - so we
just use the skip_whitespace function to ignore all and any whitespace:
1static void skip_whitespace(struct json *json) {
2 while (!json->is_eof(json) &&
3 (json->cur(json) == ' ' || json->cur(json) == '\t' ||
4 json->cur(json) == '\n')) {
5 json->pos++;
6 }
7}Parsing Atoms
Since JSON has five kinds of an atom, we need to parse them into our
json_value struct using the json->atom method:
1static struct json_value atom(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3
4 skip_whitespace(json);
5
6 char cc = json->cur(json);
7 if ((cc >= '0' && cc <= '9') || cc == '.' || cc == '-') {
8 return number(json);
9 }
10
11 switch (cc) {
12 // ... all of the atoms ...
13 default:
14 printf("unknown character '%c' at pos %zu\n", json->cur(json), json->pos);
15 ASSERT(false, "unknown character");
16 return (struct json_value){.type = json_null};
17 }
18}numbers
Info
Technically numbers in JSON should include scientific notation and other fun stuff, but lets just remember the projects simplicity and my sanity, see json.org. 1static struct json_value number(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 size_t start = json->pos;
4 // i don't give a fuck about scientific notation <3
5 for (char cc = json->cur(json);
6 ((cc >= '0' && cc <= '9') || cc == '_' || cc == '.' || cc == '-');
7 json->advance(json), cc = json->cur(json))
8 ;
9
10 char *slice = malloc(sizeof(char) * json->pos - start + 1);
11 ASSERT(slice != NULL, "failed to allocate slice for number parsing")
12 memcpy(slice, json->input + start, json->pos - start);
13 slice[json->pos - start] = 0;
14 double number = strtod(slice, NULL);
15 free(slice);
16
17 return (struct json_value){.type = json_number, .value = {.number = number}};
18}We keep track of the start of the number, advance as far as the number is still
considered a number (any of 0-9 | _ | . | -). Once we hit the end we allocate
a temporary string, copy the chars containing the number from the input string
and terminate the string with \0. strtod is used to convert this string to
a double. Once that is done we free the slice and return the result as a
json_value.
null, true and false
null, true and false are unique atoms and easy to reason about, regarding
constant size and characters, as such we can just assert their characters:
1static struct json_value atom(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3
4 skip_whitespace(json);
5
6 char cc = json->cur(json);
7 if ((cc >= '0' && cc <= '9') || cc == '.' || cc == '-') {
8 return number(json);
9 }
10
11 switch (cc) {
12 case 'n': // null
13 json->pos++;
14 ASSERT(json->cur(json) == 'u', "unknown atom 'n', wanted 'null'")
15 json->pos++;
16 ASSERT(json->cur(json) == 'l', "unknown atom 'nu', wanted 'null'")
17 json->pos++;
18 ASSERT(json->cur(json) == 'l', "unknown atom 'nul', wanted 'null'")
19 json->advance(json);
20 return (struct json_value){.type = json_null};
21 case 't': // true
22 json->pos++;
23 ASSERT(json->cur(json) == 'r', "unknown atom 't', wanted 'true'")
24 json->pos++;
25 ASSERT(json->cur(json) == 'u', "unknown atom 'tr', wanted 'true'")
26 json->pos++;
27 ASSERT(json->cur(json) == 'e', "unknown atom 'tru', wanted 'true'")
28 json->advance(json);
29 return (struct json_value){.type = json_boolean,
30 .value = {.boolean = true}};
31 case 'f': // false
32 json->pos++;
33 ASSERT(json->cur(json) == 'a', "invalid atom 'f', wanted 'false'")
34 json->pos++;
35 ASSERT(json->cur(json) == 'l', "invalid atom 'fa', wanted 'false'")
36 json->pos++;
37 ASSERT(json->cur(json) == 's', "invalid atom 'fal', wanted 'false'")
38 json->pos++;
39 ASSERT(json->cur(json) == 'e', "invalid atom 'fals', wanted 'false'")
40 json->advance(json);
41 return (struct json_value){.type = json_boolean,
42 .value = {.boolean = false}};
43 // ... strings ...
44 default:
45 printf("unknown character '%c' at pos %zu\n", json->cur(json), json->pos);
46 ASSERT(false, "unknown character");
47 return (struct json_value){.type = json_null};
48 }
49}strings
Info
Again, similarly to JSON numbers, JSON strings should include escapes for quotation marks and other fun stuff, but lets again just remember the projects simplicity and my sanity, see json.org. 1static char *string(struct json *json) {
2 json->advance(json);
3 size_t start = json->pos;
4 for (char cc = json->cur(json); cc != '\n' && cc != '"';
5 json->advance(json), cc = json->cur(json))
6 ;
7
8 char *slice = malloc(sizeof(char) * json->pos - start + 1);
9 ASSERT(slice != NULL, "failed to allocate slice for a string")
10
11 memcpy(slice, json->input + start, json->pos - start);
12 slice[json->pos - start] = 0;
13
14 ASSERT(json->cur(json) == '"', "unterminated string");
15 json->advance(json);
16 return slice;
17}Pretty easy stuff, as long as we are inside of the string (before \",\n and
EOF) we advance, after that we copy it into a new slice and return that slice
(this function is especially useful for object keys - that’s why it is a
function).
Parsing Arrays
Since arrays a any amount of JSON values between [] and separated via , -
this one is not that hard to implement too:
1struct json_value array(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 ASSERT(json->cur(json) == '[', "invalid array start");
4 json->advance(json);
5
6 struct json_value json_value = {.type = json_array};
7 json_value.values = malloc(sizeof(struct json_value));
8
9 while (!json->is_eof(json) && json->cur(json) != ']') {
10 if (json_value.length > 0) {
11 if (json->cur(json) != ',') {
12 json_free_value(&json_value);
13 }
14 ASSERT(json->cur(json) == ',',
15 "expected , as the separator between array members");
16 json->advance(json);
17 }
18 struct json_value member = json->parse(json);
19 json_value.values = realloc(json_value.values,
20 sizeof(json_value) * (json_value.length + 1));
21 json_value.values[json_value.length++] = member;
22 }
23
24 ASSERT(json->cur(json) == ']', "missing array end");
25 json->advance(json);
26 return json_value;
27}We start with a array length of one and reallocate for every new child we find.
We also check for the , between each child.
A growing array would probably be better to minimize allocations, but here we are, writing unoptimized C code - still, it works :)
Parsing Objects
1struct json_value object(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 ASSERT(json->cur(json) == '{', "invalid object start");
4 json->advance(json);
5
6 struct json_value json_value = {.type = json_object};
7 json_value.object_keys = malloc(sizeof(char *));
8 json_value.values = malloc(sizeof(struct json_value));
9
10 while (!json->is_eof(json) && json->cur(json) != '}') {
11 if (json_value.length > 0) {
12 if (json->cur(json) != ',') {
13 json_free_value(&json_value);
14 }
15 ASSERT(json->cur(json) == ',',
16 "expected , as separator between object key value pairs");
17 json->advance(json);
18 }
19 ASSERT(json->cur(json) == '"',
20 "expected a string as the object key, did not get that")
21 char *key = string(json);
22 ASSERT(json->cur(json) == ':', "expected object key and value separator");
23 json->advance(json);
24
25 struct json_value member = json->parse(json);
26 json_value.values = realloc(json_value.values, sizeof(struct json_value) *
27 (json_value.length + 1));
28 json_value.values[json_value.length] = member;
29 json_value.object_keys = realloc(json_value.object_keys,
30 sizeof(char **) * (json_value.length + 1));
31 json_value.object_keys[json_value.length] = key;
32 json_value.length++;
33 }
34
35 ASSERT(json->cur(json) == '}', "missing object end");
36 json->advance(json);
37 return json_value;
38}Same as arrays, only instead of a single atom we have a string as the key, :
as a separator and a json_value as the value. Each pair is separated with
,.