Idea
- Build a JSON parser in c
- Instead of using by itself functions: attach functions to a struct and use these as methods
- make it C issue family free (segfaults, leaks, stack overflows, etc…)
- provide an ergonomic API
Usage
1#include "json.h"
2#include <stdlib.h>
3
4int main(void) {
5 struct json json = json_new(JSON({
6 "object" : {},
7 "array" : [[]],
8 "atoms" : [ "string", 0.1, true, false, null ]
9 }));
10 struct json_value json_value = json.parse(&json);
11 json_print_value(&json_value);
12 puts("");
13 json_free_value(&json_value);
14 return EXIT_SUCCESS;
15}
Tip - Compiling C projects the easy way
Don’t take this as a guide for using make, in my projects I just use it as a command runner.
Compiler flags
These flags can be specific to
gcc
, I usegcc (GCC) 14.2.1 20250207
, so take this with a grain of salt.
I use these flags in almost every c project I ever started.
1gcc -std=c23 \
2 -O2 \
3 -Wall \
4 -Wextra \
5 -Werror \
6 -fdiagnostics-color=always \
7 -fsanitize=address,undefined \
8 -fno-common \
9 -Winit-self \
10 -Wfloat-equal \
11 -Wundef \
12 -Wshadow \
13 -Wpointer-arith \
14 -Wcast-align \
15 -Wstrict-prototypes \
16 -Wstrict-overflow=5 \
17 -Wwrite-strings \
18 -Waggregate-return \
19 -Wswitch-default \
20 -Wno-discarded-qualifiers \
21 -Wno-aggregate-return \
22 main.c
Flag | Description |
---|---|
-std=c23 | set lang standard, i use ISO C23 |
-O2 | optimize more than -O1 |
-Wall | enable a list of warnings |
-Wextra | enable more warnings than -Wall |
-Werror | convert all warnings to errors |
-fdiagnostics-color=always | use color in diagnostics |
-fsanitize=address,undefined | enable AddressSanitizer and UndefinedBehaviorSanitizer |
-fno-common | place uninitialized global variables in the BSS section |
-Winit-self | warn about uninitialized variables |
-Wfloat-equal | warn if floating-point values are used in equality comparisons. |
-Wundef | warn if an undefined identifier is evaluated |
-Wshadow | warn whenever a local variable or type declaration shadows another variable, parameter, type |
-Wpointer-arith | warn about anything that depends on the “size of” a function type or of void |
-Wcast-align | warn whenever a pointer is cast such that the required alignment of the target is increased. |
-Wstrict-prototypes | warn if a function is declared or defined without specifying the argument types |
-Wstrict-overflow=5 | warns about cases where the compiler optimizes based on the assumption that signed overflow does not occu |
-Wwrite-strings | give string constants the type const char[length] , warns on copy into non const char* |
-Wswitch-default | warn whenever a switch statement does not have a default case |
-Wno-discarded-qualifiers | do not warn if type qualifiers on pointers are being discarded. |
-Wno-aggregate-return | do not warn if any functions that return structures or unions are defined or called. |
Sourcing source files
I generally keep my header and source files in the same directory as the
makefile, so i use find
to find them:
1shell find . -name "*.c"
Make and Makefiles
I don’t define the
build
target as.PHONY
because i generally never have abuild
directory.
Putting it all together as a makefile:
1CFLAGS := -std=c23 \
2 -O2 \
3 -Wall \
4 -Wextra \
5 -Werror \
6 -fdiagnostics-color=always \
7 -fsanitize=address,undefined \
8 -fno-common \
9 -Winit-self \
10 -Wfloat-equal \
11 -Wundef \
12 -Wshadow \
13 -Wpointer-arith \
14 -Wcast-align \
15 -Wstrict-prototypes \
16 -Wstrict-overflow=5 \
17 -Wwrite-strings \
18 -Waggregate-return \
19 -Wcast-qual \
20 -Wswitch-default \
21 -Wno-discarded-qualifiers \
22 -Wno-aggregate-return
23
24FILES := $(shell find . -name "*.c")
25
26build:
27 $(CC) $(CFLAGS) $(FILES) -o jsoninc
Variadic macros to write inline raw JSON
This doesn’t really deserve its own section, but I use #<expression>
to
stringify C expressions in conjunction with __VA_ARGS__
:
1#define JSON(...) #__VA_ARGS__
To enable:
1char *raw_json = JSON({ "array" : [ [], {}] });
Inlines to:
1char *raw_json = "{ \"array\" : [[]], }";
Representing JSON values in memory
I need a structure to hold a parsed JSON value, their types and their values.
Types of JSON values
JSON can be either one of:
- null
- true
- false
- number
- string
- array
- object
In C i use an enum to represent this:
1// json.h
2enum json_type {
3 json_number,
4 json_string,
5 json_boolean,
6 json_null,
7 json_object,
8 json_array,
9};
10
11extern char *json_type_map[];
And i use json_type_map
to map all json_type
values to their char*
representation:
1char *json_type_map[] = {
2 [json_number] = "json_number", [json_string] = "json_string",
3 [json_boolean] = "json_boolean", [json_null] = "json_null",
4 [json_object] = "json_object", [json_array] = "json_array",
5};
json_value & unions for atoms, array elements or object values and object keys
The json_value
struct holds the type, as defined above, a union sharing
memory space for either a boolean, a string or a number, a list of json_value
structures as array children or object values, a list of strings that are
object keys and the length for the three aforementioned fields.
1struct json_value {
2 enum json_type type;
3 union {
4 bool boolean;
5 char *string;
6 double number;
7 } value;
8 struct json_value *values;
9 char **object_keys;
10 size_t length;
11};
Tearing values down
Since some of the fields in json_value
are heap allocated, we have to destroy
/ free the structure upon either no longer using it or exiting the process.
json_free_value
does exactly this:
1void json_free_value(struct json_value *json_value) {
2 switch (json_value->type) {
3 case json_string:
4 free(json_value->value.string);
5 break;
6 case json_object:
7 for (size_t i = 0; i < json_value->length; i++) {
8 free(json_value->object_keys[i]);
9 json_free_value(&json_value->values[i]);
10 }
11 if (json_value->object_keys != NULL) {
12 free(json_value->object_keys);
13 json_value->object_keys = NULL;
14 }
15 if (json_value->values != NULL) {
16 free(json_value->values);
17 json_value->values = NULL;
18 }
19 break;
20 case json_array:
21 for (size_t i = 0; i < json_value->length; i++) {
22 json_free_value(&json_value->values[i]);
23 }
24 if (json_value->values != NULL) {
25 free(json_value->values);
26 json_value->values = NULL;
27 }
28 break;
29 case json_number:
30 case json_boolean:
31 case json_null:
32 default:
33 break;
34 }
35 json_value->type = json_null;
36}
As simple as that, we ignore stack allocated JSON value variants, such as
json_number
, json_boolean
and json_null
, while freeing allocated memory
space for json_string
, each json_array
child and json_object
keys and
values.
Printing json_values
Only a memory representation and no way to inspect it has no value to us, thus
I dumped print_json_value
into main.c
:
1void print_json_value(struct json_value *json_value) {
2 switch (json_value->type) {
3 case json_null:
4 printf("null");
5 break;
6 case json_number:
7 printf("%f", json_value->value.number);
8 break;
9 case json_string:
10 printf("\"%s\"", json_value->value.string);
11 break;
12 case json_boolean:
13 printf(json_value->value.boolean ? "true" : "false");
14 break;
15 case json_object:
16 printf("{");
17 for (size_t i = 0; i < json_value->length; i++) {
18 printf("\"%s\": ", json_value->object_keys[i]);
19 print_json_value(&json_value->values[i]);
20 if (i < json_value->length - 1) {
21 printf(", ");
22 }
23 }
24 printf("}");
25 break;
26 case json_array:
27 printf("[");
28 for (size_t i = 0; i < json_value->length; i++) {
29 print_json_value(&json_value->values[i]);
30 if (i < json_value->length - 1) {
31 printf(", ");
32 }
33 }
34 printf("]");
35 break;
36 default:
37 ASSERT(0, "Unimplemented json_value case");
38 break;
39 }
40}
Calling this function:
1int main(void) {
2 struct json_value json_value = {
3 .type = json_array,
4 .length = 4,
5 .values =
6 (struct json_value[]){
7 (struct json_value){.type = json_string, .value.string = "hi"},
8 (struct json_value){.type = json_number, .value.number = 161},
9 (struct json_value){
10 .type = json_object,
11 .length = 1,
12 .object_keys =
13 (char *[]){
14 "key",
15 },
16 .values =
17 (struct json_value[]){
18 (struct json_value){.type = json_string,
19 .value.string = "value"},
20 },
21 },
22 (struct json_value){.type = json_null},
23 },
24 };
25 json_print_value(&json_value);
26 puts("");
27 return EXIT_SUCCESS;
28}
Results in:
1["hi", 161.000000, {"key": "value"}, null]
json
Parser struct, Function pointers and how to use them (they suck)
As contrary as it sounds, one can attach functions to structures in c very easily, just define a field of a struct as a function pointer, assign a function to it and you got a method, as you would in Go or Rust.
1struct json {
2 char *input;
3 size_t pos;
4 size_t length;
5 char (*cur)(struct json *json);
6 bool (*is_eof)(struct json *json);
7 void (*advance)(struct json *json);
8 struct json_value (*atom)(struct json *json);
9 struct json_value (*array)(struct json *json);
10 struct json_value (*object)(struct json *json);
11 struct json_value (*parse)(struct json *json);
12};
Of course you have to define a function the c way (<return type> <name>(<list of params>);
) and assign it to your method field - but I is not that
complicated:
1struct json json_new(char *input) {
2 ASSERT(input != NULL, "corrupted input");
3 struct json j = (struct json){
4 .input = input,
5 .length = strlen(input) - 1,
6 };
7
8 j.cur = cur;
9 j.is_eof = is_eof;
10 j.advance = advance;
11 j.parse = parse;
12 j.object = object;
13 j.array = array;
14 j.atom = atom;
15
16 return j;
17}
cur
, is_eof
and advance
are small helper functions:
1static char cur(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 return json->is_eof(json) ? -1 : json->input[json->pos];
4}
5
6static bool is_eof(struct json *json) {
7 ASSERT(json != NULL, "corrupted internal state");
8 return json->pos > json->length;
9}
10
11static void advance(struct json *json) {
12 ASSERT(json != NULL, "corrupted internal state");
13 json->pos++;
14 skip_whitespace(json);
15}
ASSERT
is a simple assertion macro:
1#define ASSERT(EXP, context) \
2 if (!(EXP)) { \
3 fprintf(stderr, \
4 "jsoninc: ASSERT(" #EXP "): `" context \
5 "` failed at %s, line %d\n", \
6 __FILE__, __LINE__); \
7 exit(EXIT_FAILURE); \
8 }
Failing for instance if the argument to the json_new
function is a null pointer:
1int main(void) {
2 struct json json = json_new(NULL);
3 return EXIT_SUCCESS;
4}
Even with a descriptive comment:
1jsoninc: ASSERT(input != NULL): `corrupted input` failed at ./json.c, line 16
Parsing JSON with methods
Since we now have the whole setup out of the way, we can start with the crux of the project: parsing JSON. Normally I would have done a lexer and parser, but for the sake of simplicity - I combined these passes into a single parser architecture.
Warning
Also please don’t even think about standard compliance - I really cant be bothered, see Parsing JSON is a Minefield 💣.Ignoring Whitespace
As far as we are concerned, JSON does not say anything about whitespace - so we
just use the skip_whitespace
function to ignore all and any whitespace:
1static void skip_whitespace(struct json *json) {
2 while (!json->is_eof(json) &&
3 (json->cur(json) == ' ' || json->cur(json) == '\t' ||
4 json->cur(json) == '\n')) {
5 json->pos++;
6 }
7}
Parsing Atoms
Since JSON has five kinds of an atom, we need to parse them into our
json_value
struct using the json->atom
method:
1static struct json_value atom(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3
4 skip_whitespace(json);
5
6 char cc = json->cur(json);
7 if ((cc >= '0' && cc <= '9') || cc == '.' || cc == '-') {
8 return number(json);
9 }
10
11 switch (cc) {
12 // ... all of the atoms ...
13 default:
14 printf("unknown character '%c' at pos %zu\n", json->cur(json), json->pos);
15 ASSERT(false, "unknown character");
16 return (struct json_value){.type = json_null};
17 }
18}
numbers
Info
Technically numbers in JSON should include scientific notation and other fun stuff, but lets just remember the projects simplicity and my sanity, see json.org. 1static struct json_value number(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 size_t start = json->pos;
4 // i don't give a fuck about scientific notation <3
5 for (char cc = json->cur(json);
6 ((cc >= '0' && cc <= '9') || cc == '_' || cc == '.' || cc == '-');
7 json->advance(json), cc = json->cur(json))
8 ;
9
10 char *slice = malloc(sizeof(char) * json->pos - start + 1);
11 ASSERT(slice != NULL, "failed to allocate slice for number parsing")
12 memcpy(slice, json->input + start, json->pos - start);
13 slice[json->pos - start] = 0;
14 double number = strtod(slice, NULL);
15 free(slice);
16
17 return (struct json_value){.type = json_number, .value = {.number = number}};
18}
We keep track of the start of the number, advance as far as the number is still
considered a number (any of 0-9 | _ | . | -
). Once we hit the end we allocate
a temporary string, copy the chars containing the number from the input string
and terminate the string with \0
. strtod
is used to convert this string to
a double. Once that is done we free the slice and return the result as a
json_value
.
null, true and false
null
, true
and false
are unique atoms and easy to reason about, regarding
constant size and characters, as such we can just assert their characters:
1static struct json_value atom(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3
4 skip_whitespace(json);
5
6 char cc = json->cur(json);
7 if ((cc >= '0' && cc <= '9') || cc == '.' || cc == '-') {
8 return number(json);
9 }
10
11 switch (cc) {
12 case 'n': // null
13 json->pos++;
14 ASSERT(json->cur(json) == 'u', "unknown atom 'n', wanted 'null'")
15 json->pos++;
16 ASSERT(json->cur(json) == 'l', "unknown atom 'nu', wanted 'null'")
17 json->pos++;
18 ASSERT(json->cur(json) == 'l', "unknown atom 'nul', wanted 'null'")
19 json->advance(json);
20 return (struct json_value){.type = json_null};
21 case 't': // true
22 json->pos++;
23 ASSERT(json->cur(json) == 'r', "unknown atom 't', wanted 'true'")
24 json->pos++;
25 ASSERT(json->cur(json) == 'u', "unknown atom 'tr', wanted 'true'")
26 json->pos++;
27 ASSERT(json->cur(json) == 'e', "unknown atom 'tru', wanted 'true'")
28 json->advance(json);
29 return (struct json_value){.type = json_boolean,
30 .value = {.boolean = true}};
31 case 'f': // false
32 json->pos++;
33 ASSERT(json->cur(json) == 'a', "invalid atom 'f', wanted 'false'")
34 json->pos++;
35 ASSERT(json->cur(json) == 'l', "invalid atom 'fa', wanted 'false'")
36 json->pos++;
37 ASSERT(json->cur(json) == 's', "invalid atom 'fal', wanted 'false'")
38 json->pos++;
39 ASSERT(json->cur(json) == 'e', "invalid atom 'fals', wanted 'false'")
40 json->advance(json);
41 return (struct json_value){.type = json_boolean,
42 .value = {.boolean = false}};
43 // ... strings ...
44 default:
45 printf("unknown character '%c' at pos %zu\n", json->cur(json), json->pos);
46 ASSERT(false, "unknown character");
47 return (struct json_value){.type = json_null};
48 }
49}
strings
Info
Again, similarly to JSON numbers, JSON strings should include escapes for quotation marks and other fun stuff, but lets again just remember the projects simplicity and my sanity, see json.org. 1static char *string(struct json *json) {
2 json->advance(json);
3 size_t start = json->pos;
4 for (char cc = json->cur(json); cc != '\n' && cc != '"';
5 json->advance(json), cc = json->cur(json))
6 ;
7
8 char *slice = malloc(sizeof(char) * json->pos - start + 1);
9 ASSERT(slice != NULL, "failed to allocate slice for a string")
10
11 memcpy(slice, json->input + start, json->pos - start);
12 slice[json->pos - start] = 0;
13
14 ASSERT(json->cur(json) == '"', "unterminated string");
15 json->advance(json);
16 return slice;
17}
Pretty easy stuff, as long as we are inside of the string (before \"
,\n
and
EOF
) we advance, after that we copy it into a new slice and return that slice
(this function is especially useful for object keys - that’s why it is a
function).
Parsing Arrays
Since arrays a any amount of JSON values between []
and separated via ,
-
this one is not that hard to implement too:
1struct json_value array(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 ASSERT(json->cur(json) == '[', "invalid array start");
4 json->advance(json);
5
6 struct json_value json_value = {.type = json_array};
7 json_value.values = malloc(sizeof(struct json_value));
8
9 while (!json->is_eof(json) && json->cur(json) != ']') {
10 if (json_value.length > 0) {
11 if (json->cur(json) != ',') {
12 json_free_value(&json_value);
13 }
14 ASSERT(json->cur(json) == ',',
15 "expected , as the separator between array members");
16 json->advance(json);
17 }
18 struct json_value member = json->parse(json);
19 json_value.values = realloc(json_value.values,
20 sizeof(json_value) * (json_value.length + 1));
21 json_value.values[json_value.length++] = member;
22 }
23
24 ASSERT(json->cur(json) == ']', "missing array end");
25 json->advance(json);
26 return json_value;
27}
We start with a array length of one and reallocate for every new child we find.
We also check for the ,
between each child.
A growing array would probably be better to minimize allocations, but here we are, writing unoptimized C code - still, it works :)
Parsing Objects
1struct json_value object(struct json *json) {
2 ASSERT(json != NULL, "corrupted internal state");
3 ASSERT(json->cur(json) == '{', "invalid object start");
4 json->advance(json);
5
6 struct json_value json_value = {.type = json_object};
7 json_value.object_keys = malloc(sizeof(char *));
8 json_value.values = malloc(sizeof(struct json_value));
9
10 while (!json->is_eof(json) && json->cur(json) != '}') {
11 if (json_value.length > 0) {
12 if (json->cur(json) != ',') {
13 json_free_value(&json_value);
14 }
15 ASSERT(json->cur(json) == ',',
16 "expected , as separator between object key value pairs");
17 json->advance(json);
18 }
19 ASSERT(json->cur(json) == '"',
20 "expected a string as the object key, did not get that")
21 char *key = string(json);
22 ASSERT(json->cur(json) == ':', "expected object key and value separator");
23 json->advance(json);
24
25 struct json_value member = json->parse(json);
26 json_value.values = realloc(json_value.values, sizeof(struct json_value) *
27 (json_value.length + 1));
28 json_value.values[json_value.length] = member;
29 json_value.object_keys = realloc(json_value.object_keys,
30 sizeof(char **) * (json_value.length + 1));
31 json_value.object_keys[json_value.length] = key;
32 json_value.length++;
33 }
34
35 ASSERT(json->cur(json) == '}', "missing object end");
36 json->advance(json);
37 return json_value;
38}
Same as arrays, only instead of a single atom we have a string as the key, :
as a separator and a json_value
as the value. Each pair is separated with
,
.