PostgreSQL json jsonb 支持的value数据类型,如何构造一个jsonb
2015-10-14 22:07
555 查看
Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:
Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
Postgres-XL的项目发起人Mason Sharp
pgpool的作者石井达夫(Tatsuo Ishii)
PG-Strom的作者海外浩平(Kaigai Kohei)
Greenplum研发总监姚延栋
周正中(德哥), PostgreSQL中国用户会创始人之一
汪洋,平安科技数据库技术部经理
……
2015年度PG大象会报名地址:http://postgres2015.eventdove.com/ PostgreSQL中国社区: http://postgres.cn/ PostgreSQL专业1群: 3336901(已满) PostgreSQL专业2群: 100910388 PostgreSQL专业3群: 150657323 |
目前支持string, numeric, boolean (true, false), null。 区分大小写哦。
/* * Recursive Descent parse routines. There is one for each structural * element in a json document: * - scalar (string, number, true, false, null) * - array ( [ ] ) * - array element * - object ( { } ) * - object field */static inline voidparse_scalar(JsonLexContext *lex, JsonSemAction *sem){ char *val = NULL; json_scalar_action sfunc = sem->scalar; char **valaddr; JsonTokenType tok = lex_peek(lex); valaddr = sfunc == NULL ? NULL : &val; /* a scalar must be a string, a number, true, false, or null */ switch (tok) { case JSON_TOKEN_TRUE: lex_accept(lex, JSON_TOKEN_TRUE, valaddr); break; case JSON_TOKEN_FALSE: lex_accept(lex, JSON_TOKEN_FALSE, valaddr); break; case JSON_TOKEN_NULL: lex_accept(lex, JSON_TOKEN_NULL, valaddr); break; case JSON_TOKEN_NUMBER: lex_accept(lex, JSON_TOKEN_NUMBER, valaddr); break; case JSON_TOKEN_STRING: lex_accept(lex, JSON_TOKEN_STRING, valaddr); break; default: report_parse_error(JSON_PARSE_VALUE, lex); } if (sfunc != NULL) (*sfunc) (sem->semstate, val, tok);}
例子:
postgres=# select jsonb '{"a": TRUE}';ERROR: invalid input syntax for type jsonLINE 1: select jsonb '{"a": TRUE}'; ^DETAIL: Token "TRUE" is invalid.CONTEXT: JSON data, line 1: {"a": TRUE...postgres=# select jsonb '{"a": true}'; jsonb ------------- {"a": true}(1 row) postgres=# select jsonb '{"a": false}'; jsonb -------------- {"a": false}(1 row) postgres=# select jsonb '{"a": NULL}';ERROR: invalid input syntax for type jsonLINE 1: select jsonb '{"a": NULL}'; ^DETAIL: Token "NULL" is invalid.CONTEXT: JSON data, line 1: {"a": NULL...postgres=# select jsonb '{"a": null}'; jsonb ------------- {"a": null}(1 row) postgres=# select jsonb '{"a": 10.001}'; jsonb --------------- {"a": 10.001}(1 row) postgres=# select jsonb '{"a": "10.001"}'; jsonb ----------------- {"a": "10.001"}(1 row)
src/backend/utils/adt/json.c
解析json,jsonb时,定义的token类型,这个和JSONB里面能存什么类型没有关系:
/* * The context of the parser is maintained by the recursive descent * mechanism, but is passed explicitly to the error reporting routine * for better diagnostics. */typedef enum /* contexts of JSON parser */{ JSON_PARSE_VALUE, /* expecting a value */ JSON_PARSE_STRING, /* expecting a string (for a field name) */ JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */ JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */ JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */ JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */ JSON_PARSE_OBJECT_NEXT /* saw object value, expecting ',' or '}' */ JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */ JSON_PARSE_END /* saw the end of a document, expect nothing */} JsonParseContext;
实际上jsonb里没有类型的概念,你前面看到的TOKEN实际上也就是字符串匹配而已,你可以理解为jsonb内部就是一个有一定规则的字符串。
那么我们怎么来构造一个JSONB类型,并且在使用时可以很好的处理里面的数据类型呢?
这里需要用到PostgreSQL的输入输出函数,一般每个类型都会有对应的in和out函数,例如:
pg_catalog | anyrange_in | anyrange | cstring, oid, integer | normal pg_catalog | anyrange_out | cstring | anyrange | normal pg_catalog | timestamptz_out | cstring | timestamp with time zone | normal pg_catalog | timestamptz_in | timestamp with time zone | cstring, oid, integer | normal
/* timestamptz_in() * Convert a string to internal form. */Datumtimestamptz_in(PG_FUNCTION_ARGS){ char *str = PG_GETARG_CSTRING(0); #ifdef NOT_USED Oid typelem = PG_GETARG_OID(1);#endif int32 typmod = PG_GETARG_INT32(2);
postgres=# select timestamptz_out(now()); timestamptz_out ------------------------------- 2015-09-24 19:44:16.076233+08(1 row) postgres=# select oid,* from pg_type where typname='timestamptz';-[ RECORD 1 ]--+---------------------oid | 1184typname | timestamptztypnamespace | 11typowner | 10typlen | 8typbyval | ttyptype | btypcategory | Dtypispreferred | ttypisdefined | ttypdelim | ,typrelid | 0typelem | 0typarray | 1185typinput | timestamptz_intypoutput | timestamptz_outtypreceive | timestamptz_recvtypsend | timestamptz_sendtypmodin | timestamptztypmodintypmodout | timestamptztypmodouttypanalyze | -typalign | dtypstorage | ptypnotnull | ftypbasetype | 0typtypmod | -1typndims | 0typcollation | 0typdefaultbin | typdefault | typacl |
typmode表示精度
postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,1);-[ RECORD 1 ]--+-------------------------timestamptz_in | 2015-09-24 19:44:16.1+08postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,6);-[ RECORD 1 ]--+------------------------------timestamptz_in | 2015-09-24 19:44:16.076233+08
使用format格式来构造jsonb类型是比较靠谱的。
代码见varlena.c
/* * Returns a formatted string */Datumtext_format(PG_FUNCTION_ARGS){......
例子
构造一个jsonb的cstring
postgres=# select format('{"K": "%s"}', int8range(1,10)); format ----------------- {"K": "[1,10)"}(1 row)
使用jsonb_in把cstring转换为JSONB
postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ); jsonb_in ----------------- {"K": "[1,10)"}(1 row)
取出K的element
postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K'; ?column? ---------- [1,10)(1 row)
将element转换回去
postgres=# select (jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K') :: int8range; int8range ----------- [1,10)(1 row)
再举个例子
postgres=# select format('{"K": "%s"}', '你好'::bytea); format ------------------------- {"K": "\xe4bda0e5a5bd"}(1 row)postgres=# select convert_from('\xe4bda0e5a5bd', 'utf8'); convert_from -------------- 你好(1 row)postgres=# \set VERBOSITY verbosepostgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring );ERROR: 22P02: invalid input syntax for type jsonDETAIL: Escape sequence "\x" is invalid.CONTEXT: JSON data, line 1: {"K": "\x...LOCATION: json_lex_string, json.c:890
非常遗憾,JSONB会处理escape,而\x是不合法的,报错了。这块代码可以改一改。
代码如下:
/* * The next token in the input stream is known to be a string; lex it. */static inline voidjson_lex_string(JsonLexContext *lex){...... switch (*s) { case '"': case '\\': case '/': appendStringInfoChar(lex->strval, *s); break; case 'b': appendStringInfoChar(lex->strval, '\b'); break; case 'f': appendStringInfoChar(lex->strval, '\f'); break; case 'n': appendStringInfoChar(lex->strval, '\n'); break; case 'r': appendStringInfoChar(lex->strval, '\r'); break; case 't': appendStringInfoChar(lex->strval, '\t'); break; default: /* Not a valid string escape, so error out. */ lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), errdetail("Escape sequence \"\\%s\" is invalid.", extract_mb_char(s)), report_json_context(lex))); }
修改后,可以实现存储BYTEA,但是输出时还需要转换一下。
postgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K'; ?column? -------------- e4bda0e5a5bd(1 row) postgres=# select convert_from( byteain (('\x'||(jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K'))::cstring), 'utf8'::name); convert_from -------------- 你好(1 row)
用format来构造一个字符串,在里面使用%s填充需要的cstring,从而达到构造jsonb的目的。
这种方法同样适用构造JSONB中包含postgis的几何类型。
[参考]
1. http://www.postgresql.org/docs/9.4/static/functions-string.html
2. src/backend/utils/adt/json.c
/* * Lex one token from the input stream. */static inline voidjson_lex(JsonLexContext *lex){ char *s; int len; /* Skip leading whitespace. */ s = lex->token_terminator; len = s - lex->input; while (len < lex->input_length && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')) { if (*s == '\n') ++lex->line_number; ++s; ++len; } lex->token_start = s; /* Determine token type. */ if (len >= lex->input_length) { lex->token_start = NULL; lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s; lex->token_type = JSON_TOKEN_END; } else switch (*s) { /* Single-character token, some kind of punctuation mark. */ case '{': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_OBJECT_START; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_OBJECT_START; break; case '}': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_OBJECT_END; break; case '[': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_ARRAY_START; break; case ']': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_ARRAY_END; break; case ',': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_COMMA; break; case ':': lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; lex->token_type = JSON_TOKEN_COLON; break; case '"': /* string */ json_lex_string(lex); lex->token_type = JSON_TOKEN_STRING; break; case '-': /* Negative number. */ json_lex_number(lex, s + 1, NULL); lex->token_type = JSON_TOKEN_NUMBER; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* Positive number. */ json_lex_number(lex, s, NULL); lex->token_type = JSON_TOKEN_NUMBER; break; default: { char *p; { char *p; /* * We're not dealing with a string, number, legal * punctuation mark, or end of string. The only legal * tokens we might find here are true, false, and null, * but for error reporting purposes we scan until we see a * non-alphanumeric character. That way, we can report * the whole word as an unexpected token, rather than just * some unintuitive prefix thereof. */ for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++) /* skip */ ; /* * We got some sort of unexpected punctuation or an * otherwise unexpected character, so just complain about * that one character. */ if (p == s) { lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; report_invalid_token(lex); } /* * We've got a real alphanumeric token here. If it * happens to be true, false, or null, all is well. If * not, error out. */ lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = p; if (p - s == 4) { if (memcmp(s, "true", 4) == 0) lex->token_type = JSON_TOKEN_TRUE; else if (memcmp(s, "null", 4) == 0) lex->token_type = JSON_TOKEN_NULL; else report_invalid_token(lex); } else if (p - s == 5 && memcmp(s, "false", 5) == 0) lex->token_type = JSON_TOKEN_FALSE; else report_invalid_token(lex); } } /* end of switch */}
相关文章推荐
- MySQL中的integer 数据类型
- Android之获取手机上的图片和视频缩略图thumbnails
- 数据库链接字符串查询网站
- MySQL 数据类型
- DB2实例管理
- DB2实例管理
- XML 与 JSON 优劣对比
- 保障MySQL数据安全的14个最佳方法
- mysql问答汇集
- VBA将excel数据表生成JSON文件
- 创建一个空的IBM DB2 ECO数据库的方法
- Access 2000 数据库 80 万记录通用快速分页类
- 开通一个数据库失败的原因的和解决办法
- 一个简单的asp数据库操作类
- CentOS下DB2数据库安装过程详解
- PostgreSQL新手入门教程
- EasyASP v1.5发布(包含数据库操作类,原clsDbCtrl.asp)第1/2页
- PostgreSQL教程(十):性能提升技巧