diff --git a/changelogs/appendices/newsfragments/2368.clarification b/changelogs/appendices/newsfragments/2368.clarification new file mode 100644 index 00000000..b5412248 --- /dev/null +++ b/changelogs/appendices/newsfragments/2368.clarification @@ -0,0 +1,2 @@ +Update the canonical JSON grammar with case sensitive strings and ABNF builtins +to be easier to understand. diff --git a/content/appendices.md b/content/appendices.md index 8a51cf02..703dd051 100644 --- a/content/appendices.md +++ b/content/appendices.md @@ -131,32 +131,37 @@ def canonical_json(value): #### Grammar -Adapted from the grammar in -removing insignificant whitespace, fractions, exponents and redundant -character escapes. +Adapted grammar from removing +insignificant whitespace, fractions, exponents and redundant character escapes +written in [ABNF](https://datatracker.ietf.org/doc/html/rfc5234) with +[case sensitive strings](https://datatracker.ietf.org/doc/html/rfc7405). - value = false / null / true / object / array / number / string - false = %x66.61.6C.73.65 - null = %x6E.75.6C.6C - true = %x74.72.75.65 - object = %x7B [ member *( %x2C member ) ] %x7D - member = string %x3A value - array = %x5B [ value *( %x2C value ) ] %x5D - number = [ %x2D ] int - int = %x30 / ( %x31-39 *digit ) - digit = %x30-39 - string = %x22 *char %x22 - char = unescaped / %x5C escaped - unescaped = %x20-21 / %x23-5B / %x5D-10FFFF - escaped = %x22 ; " quotation mark U+0022 - / %x5C ; \ reverse solidus U+005C - / %x62 ; b backspace U+0008 - / %x66 ; f form feed U+000C - / %x6E ; n line feed U+000A - / %x72 ; r carriage return U+000D - / %x74 ; t tab U+0009 - / %x75.30.30.30 (%x30-37 / %x62 / %x65-66) ; u000X - / %x75.30.30.31 (%x30-39 / %x61-66) ; u001X +``` +value = false / null / true / object / array / number / string +false = %s"false" +null = %s"null" +true = %s"true" +object = "{" [ member *( "," member ) ] "}" +member = string ":" value +array = "[" [ value *( "," value ) ] "]" +number = [ "-" ] int +int = %x30 / ( %x31-39 *DIGIT ) ; Integer without leading zeros +string = DQUOTE *char DQUOTE ; Quoted characters +char = unescaped / "\" escaped +unescaped = %x20-21 / %x23-5B / %x5D-10FFFF ; All UTF-8 codepoints except ASCII control + ; characters, " and \ +escaped = %x62 ; b backspace U+0008 + / %x74 ; t tab U+0009 + / %x6E ; n line feed U+000A + / %x66 ; f form feed U+000C + / %x72 ; r carriage return U+000D + / %x22 ; " quotation mark U+0022 + / %x5C ; \ reverse solidus U+005C + / %s"u000" (%x30-37 / %x62 / %x65-66) ; All ASCII control characters which do not have + ; dedicated escape sequences (for example \n). + ; u000X, where X is [0-7, b, e, f] + / %s"u001" (%x30-39 / %x61-66) ; u001X, where X is [0-9, a-f] +``` #### Examples