您的位置:首页 > 产品设计 > UI/UE

PostgreSQL json jsonb 支持的value数据类型,如何构造一个jsonb

2015-10-14 22:07 555 查看


Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:

Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
Postgres-XL的项目发起人Mason Sharp
pgpool的作者石井达夫(Tatsuo Ishii)
PG-Strom的作者海外浩平(Kaigai Kohei)
Greenplum研发总监姚延栋
周正中(德哥), PostgreSQL中国用户会创始人之一
汪洋,平安科技数据库技术部经理
……

2015年度PG大象会报名地址:http://postgres2015.eventdove.com/
PostgreSQL中国社区: http://postgres.cn/ PostgreSQL专业1群: 3336901(已满)
PostgreSQL专业2群: 100910388
PostgreSQL专业3群: 150657323


PostgreSQL 支持JSON已久,那么JSON的value中支持哪些类型呢?
目前支持string, numeric, boolean (true, false), null。 区分大小写哦。

/* *      Recursive Descent parse routines. There is one for each structural *      element in a json document: *        - scalar (string, number, true, false, null) *        - array  ( [ ] ) *        - array element *        - object ( { } ) *        - object field */static inline voidparse_scalar(JsonLexContext *lex, JsonSemAction *sem){        char       *val = NULL;        json_scalar_action sfunc = sem->scalar;        char      **valaddr;        JsonTokenType tok = lex_peek(lex);
valaddr = sfunc == NULL ? NULL : &val;
/* a scalar must be a string, a number, true, false, or null */        switch (tok)        {                case JSON_TOKEN_TRUE:                        lex_accept(lex, JSON_TOKEN_TRUE, valaddr);                        break;                case JSON_TOKEN_FALSE:                        lex_accept(lex, JSON_TOKEN_FALSE, valaddr);                        break;                case JSON_TOKEN_NULL:                        lex_accept(lex, JSON_TOKEN_NULL, valaddr);                        break;                case JSON_TOKEN_NUMBER:                        lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);                        break;                case JSON_TOKEN_STRING:                        lex_accept(lex, JSON_TOKEN_STRING, valaddr);                        break;                default:                        report_parse_error(JSON_PARSE_VALUE, lex);        }
if (sfunc != NULL)                (*sfunc) (sem->semstate, val, tok);}


例子:

postgres=# select jsonb '{"a": TRUE}';ERROR:  invalid input syntax for type jsonLINE 1: select jsonb '{"a": TRUE}';                     ^DETAIL:  Token "TRUE" is invalid.CONTEXT:  JSON data, line 1: {"a": TRUE...postgres=# select jsonb '{"a": true}';    jsonb    ------------- {"a": true}(1 row)
postgres=# select jsonb '{"a": false}';    jsonb     -------------- {"a": false}(1 row)
postgres=# select jsonb '{"a": NULL}';ERROR:  invalid input syntax for type jsonLINE 1: select jsonb '{"a": NULL}';                     ^DETAIL:  Token "NULL" is invalid.CONTEXT:  JSON data, line 1: {"a": NULL...postgres=# select jsonb '{"a": null}';    jsonb    ------------- {"a": null}(1 row)
postgres=# select jsonb '{"a": 10.001}';     jsonb     --------------- {"a": 10.001}(1 row)
postgres=# select jsonb '{"a": "10.001"}';      jsonb      ----------------- {"a": "10.001"}(1 row)


src/backend/utils/adt/json.c
解析json,jsonb时,定义的token类型,这个和JSONB里面能存什么类型没有关系:

/* * The context of the parser is maintained by the recursive descent * mechanism, but is passed explicitly to the error reporting routine * for better diagnostics. */typedef enum                                    /* contexts of JSON parser */{        JSON_PARSE_VALUE,                       /* expecting a value */        JSON_PARSE_STRING,                      /* expecting a string (for a field name) */        JSON_PARSE_ARRAY_START,         /* saw '[', expecting value or ']' */        JSON_PARSE_ARRAY_NEXT,          /* saw array element, expecting ',' or ']' */        JSON_PARSE_OBJECT_START,        /* saw '{', expecting label or '}' */        JSON_PARSE_OBJECT_LABEL,        /* saw object label, expecting ':' */        JSON_PARSE_OBJECT_NEXT         /* saw object value, expecting ',' or '}' */        JSON_PARSE_OBJECT_COMMA,        /* saw object ',', expecting next label */        JSON_PARSE_END                          /* saw the end of a document, expect nothing */} JsonParseContext;


实际上jsonb里没有类型的概念,你前面看到的TOKEN实际上也就是字符串匹配而已,你可以理解为jsonb内部就是一个有一定规则的字符串。

那么我们怎么来构造一个JSONB类型,并且在使用时可以很好的处理里面的数据类型呢?

这里需要用到PostgreSQL的输入输出函数,一般每个类型都会有对应的in和out函数,例如:

pg_catalog | anyrange_in                     | anyrange         | cstring, oid, integer                                          | normal pg_catalog | anyrange_out                    | cstring          | anyrange                                                       | normal pg_catalog | timestamptz_out               | cstring                     | timestamp with time zone                                                                                  | normal pg_catalog | timestamptz_in                | timestamp with time zone    | cstring, oid, integer                                                                                     | normal


/* timestamptz_in() * Convert a string to internal form. */Datumtimestamptz_in(PG_FUNCTION_ARGS){        char       *str = PG_GETARG_CSTRING(0);
#ifdef NOT_USED        Oid                     typelem = PG_GETARG_OID(1);#endif        int32           typmod = PG_GETARG_INT32(2);


postgres=# select timestamptz_out(now());        timestamptz_out        ------------------------------- 2015-09-24 19:44:16.076233+08(1 row)
postgres=# select oid,* from pg_type where typname='timestamptz';-[ RECORD 1 ]--+---------------------oid            | 1184typname        | timestamptztypnamespace   | 11typowner       | 10typlen         | 8typbyval       | ttyptype        | btypcategory    | Dtypispreferred | ttypisdefined   | ttypdelim       | ,typrelid       | 0typelem        | 0typarray       | 1185typinput       | timestamptz_intypoutput      | timestamptz_outtypreceive     | timestamptz_recvtypsend        | timestamptz_sendtypmodin       | timestamptztypmodintypmodout      | timestamptztypmodouttypanalyze     | -typalign       | dtypstorage     | ptypnotnull     | ftypbasetype    | 0typtypmod      | -1typndims       | 0typcollation   | 0typdefaultbin  | typdefault     | typacl         |


typmode表示精度

postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,1);-[ RECORD 1 ]--+-------------------------timestamptz_in | 2015-09-24 19:44:16.1+08postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,6);-[ RECORD 1 ]--+------------------------------timestamptz_in | 2015-09-24 19:44:16.076233+08


使用format格式来构造jsonb类型是比较靠谱的。

代码见varlena.c

/* * Returns a formatted string */Datumtext_format(PG_FUNCTION_ARGS){......


例子

构造一个jsonb的cstring

postgres=# select format('{"K": "%s"}', int8range(1,10));     format      ----------------- {"K": "[1,10)"}(1 row)


使用jsonb_in把cstring转换为JSONB

postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring );    jsonb_in     ----------------- {"K": "[1,10)"}(1 row)


取出K的element

postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K'; ?column? ---------- [1,10)(1 row)


将element转换回去

postgres=# select (jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K') :: int8range; int8range ----------- [1,10)(1 row)


再举个例子

postgres=# select format('{"K": "%s"}', '你好'::bytea);         format          ------------------------- {"K": "\xe4bda0e5a5bd"}(1 row)postgres=# select convert_from('\xe4bda0e5a5bd', 'utf8'); convert_from -------------- 你好(1 row)postgres=# \set VERBOSITY verbosepostgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring );ERROR:  22P02: invalid input syntax for type jsonDETAIL:  Escape sequence "\x" is invalid.CONTEXT:  JSON data, line 1: {"K": "\x...LOCATION:  json_lex_string, json.c:890


非常遗憾,JSONB会处理escape,而\x是不合法的,报错了。这块代码可以改一改。

代码如下:

/* * The next token in the input stream is known to be a string; lex it. */static inline voidjson_lex_string(JsonLexContext *lex){......                                switch (*s)                                {                                        case '"':                                        case '\\':                                        case '/':                                                appendStringInfoChar(lex->strval, *s);                                                break;                                        case 'b':                                                appendStringInfoChar(lex->strval, '\b');                                                break;                                        case 'f':                                                appendStringInfoChar(lex->strval, '\f');                                                break;                                        case 'n':                                                appendStringInfoChar(lex->strval, '\n');                                                break;                                        case 'r':                                                appendStringInfoChar(lex->strval, '\r');                                                break;                                        case 't':                                                appendStringInfoChar(lex->strval, '\t');                                                break;                                        default:                                                /* Not a valid string escape, so error out. */                                                lex->token_terminator = s + pg_mblen(s);                                                ereport(ERROR,                                                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),                                                                 errmsg("invalid input syntax for type json"),                                                        errdetail("Escape sequence \"\\%s\" is invalid.",                                                                          extract_mb_char(s)),                                                                 report_json_context(lex)));                                }


修改后,可以实现存储BYTEA,但是输出时还需要转换一下。

postgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K';   ?column?   -------------- e4bda0e5a5bd(1 row)
postgres=# select convert_from( byteain (('\x'||(jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K'))::cstring), 'utf8'::name); convert_from -------------- 你好(1 row)


用format来构造一个字符串,在里面使用%s填充需要的cstring,从而达到构造jsonb的目的。

这种方法同样适用构造JSONB中包含postgis的几何类型。

[参考]

1. http://www.postgresql.org/docs/9.4/static/functions-string.html

2. src/backend/utils/adt/json.c

/* * Lex one token from the input stream. */static inline voidjson_lex(JsonLexContext *lex){        char       *s;        int                     len;
/* Skip leading whitespace. */        s = lex->token_terminator;        len = s - lex->input;        while (len < lex->input_length &&                   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))        {                if (*s == '\n')                        ++lex->line_number;                ++s;                ++len;        }        lex->token_start = s;
/* Determine token type. */        if (len >= lex->input_length)        {                lex->token_start = NULL;                lex->prev_token_terminator = lex->token_terminator;                lex->token_terminator = s;                lex->token_type = JSON_TOKEN_END;        }        else                switch (*s)                {                                /* Single-character token, some kind of punctuation mark. */                        case '{':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_OBJECT_START;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_OBJECT_START;                                break;                        case '}':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_OBJECT_END;                                break;                        case '[':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_ARRAY_START;                                break;                        case ']':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_ARRAY_END;                                break;                        case ',':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_COMMA;                                break;                        case ':':                                lex->prev_token_terminator = lex->token_terminator;                                lex->token_terminator = s + 1;                                lex->token_type = JSON_TOKEN_COLON;                                break;                        case '"':                                /* string */                                json_lex_string(lex);                                lex->token_type = JSON_TOKEN_STRING;                                break;                        case '-':                                /* Negative number. */                                json_lex_number(lex, s + 1, NULL);                                lex->token_type = JSON_TOKEN_NUMBER;                                break;                        case '0':                        case '1':                        case '2':                        case '3':                        case '4':                        case '5':                        case '6':                        case '7':                        case '8':                        case '9':                                /* Positive number. */                                json_lex_number(lex, s, NULL);                                lex->token_type = JSON_TOKEN_NUMBER;                                break;                        default:                                {                                        char       *p;                                {                                        char       *p;
/*                                         * We're not dealing with a string, number, legal                                         * punctuation mark, or end of string.  The only legal                                         * tokens we might find here are true, false, and null,                                         * but for error reporting purposes we scan until we see a                                         * non-alphanumeric character.  That way, we can report                                         * the whole word as an unexpected token, rather than just                                         * some unintuitive prefix thereof.                                         */                                        for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)                                                 /* skip */ ;
/*                                         * We got some sort of unexpected punctuation or an                                         * otherwise unexpected character, so just complain about                                         * that one character.                                         */                                        if (p == s)                                        {                                                lex->prev_token_terminator = lex->token_terminator;                                                lex->token_terminator = s + 1;                                                report_invalid_token(lex);                                        }
/*                                         * We've got a real alphanumeric token here.  If it                                         * happens to be true, false, or null, all is well.  If                                         * not, error out.                                         */                                        lex->prev_token_terminator = lex->token_terminator;                                        lex->token_terminator = p;                                        if (p - s == 4)                                        {                                                if (memcmp(s, "true", 4) == 0)                                                        lex->token_type = JSON_TOKEN_TRUE;                                                else if (memcmp(s, "null", 4) == 0)                                                        lex->token_type = JSON_TOKEN_NULL;                                                else                                                        report_invalid_token(lex);                                        }                                        else if (p - s == 5 && memcmp(s, "false", 5) == 0)                                                lex->token_type = JSON_TOKEN_FALSE;                                        else                                                report_invalid_token(lex);
}                }                                               /* end of switch */}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息