revision-id: 656dff97582cca416ecf5030ed114a610f8698df (mariadb-10.6.1-155-g656dff97582) parent(s): da8bb4b470389d1bee4d9f52fa3d29cee269e7b0 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-09-29 20:11:48 +0300 message: MDEV-26711: Values in JSON histograms are not properly quoted Escape values when serializing to JSON. Un-escape when reading back. --- mysql-test/main/statistics.result | 56 +++-- mysql-test/main/statistics.test | 22 +- mysql-test/main/statistics_json.result | 445 +++++++++++++++++++++++++++++++-- mysql-test/main/statistics_json.test | 11 + sql/my_json_writer.h | 2 + sql/opt_histogram_json.cc | 152 +++++++++-- 6 files changed, 608 insertions(+), 80 deletions(-) diff --git a/mysql-test/main/statistics.result b/mysql-test/main/statistics.result index 7cedb22cfac..7556900f4a2 100644 --- a/mysql-test/main/statistics.result +++ b/mysql-test/main/statistics.result @@ -223,15 +223,16 @@ test.t1 analyze status Table is already up to date SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 0 49 0.0000 1.0000 4 SINGLE_PREC_HB 2E62A1D0 -test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 SINGLE_PREC_HB 003FBFFF -test t1 c aaaa dddddddd 0.1250 7.0000 4 SINGLE_PREC_HB 0055AAFF -test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 SINGLE_PREC_HB 001919FF -test t1 e 0.01 0.112 0.2250 6.2000 4 SINGLE_PREC_HB 000564E1 -test t1 f 1 5 0.2000 6.4000 4 SINGLE_PREC_HB 3F7FBFBF +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 0 49 0.0000 1.0000 4 SINGLE_PREC_HB 0.180,0.204,0.247,0.184,0.184 +test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 SINGLE_PREC_HB 0.000,0.247,0.502,0.251,0.000 +test t1 c aaaa dddddddd 0.1250 7.0000 4 SINGLE_PREC_HB 0.000,0.333,0.333,0.333,0.000 +test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 SINGLE_PREC_HB 0.000,0.098,0.000,0.902,0.000 +test t1 e 0.01 0.112 0.2250 6.2000 4 SINGLE_PREC_HB 0.000,0.020,0.373,0.490,0.118 +test t1 f 1 5 0.2000 6.4000 4 SINGLE_PREC_HB 0.247,0.251,0.251,0.000,0.251 DELETE FROM mysql.column_stats; set histogram_size=8; set histogram_type=@DOUBLE_PREC_TYPE; @@ -242,15 +243,16 @@ test.t1 analyze status Table is already up to date SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 0 49 0.0000 1.0000 8 DOUBLE_PREC_HB 052F4363F4A1F9D0 -test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 DOUBLE_PREC_HB 0000FF3FFFBFFFFF -test t1 c aaaa dddddddd 0.1250 7.0000 8 DOUBLE_PREC_HB 00005555AAAAFFFF -test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 DOUBLE_PREC_HB 0000031A031AFFFF -test t1 e 0.01 0.112 0.2250 6.2000 8 DOUBLE_PREC_HB 000005056464E1E1 -test t1 f 1 5 0.2000 6.4000 8 DOUBLE_PREC_HB FF3FFF7FFFBFFFBF +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 0 49 0.0000 1.0000 8 DOUBLE_PREC_HB 0.18367,0.20407,0.24489,0.18367,0.18369 +test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 DOUBLE_PREC_HB 0.00000,0.24999,0.50001,0.25000,0.00000 +test t1 c aaaa dddddddd 0.1250 7.0000 8 DOUBLE_PREC_HB 0.00000,0.33333,0.33333,0.33333,0.00000 +test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 DOUBLE_PREC_HB 0.00000,0.10161,0.00000,0.89839,0.00000 +test t1 e 0.01 0.112 0.2250 6.2000 8 DOUBLE_PREC_HB 0.00000,0.01961,0.37255,0.49020,0.11765 +test t1 f 1 5 0.2000 6.4000 8 DOUBLE_PREC_HB 0.24999,0.25000,0.25000,0.00000,0.25000 DELETE FROM mysql.column_stats; set histogram_size= 0; set histogram_type=@SINGLE_PREC_TYPE; @@ -1578,10 +1580,11 @@ test.t1 analyze status OK select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 1 3 0.0000 1.0000 10 SINGLE_PREC_HB 0000007F7F7F7FFFFFFF +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 1 3 0.0000 1.0000 10 SINGLE_PREC_HB 0.000,0.000,0.000,0.498,0.000,0.000,0.000,0.502,0.000,0.000,0.000 set histogram_size=default; drop table t1; # @@ -1603,10 +1606,11 @@ test.t1 analyze status OK select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0.00000,0.24999,0.25000,0.25000,0.25000,0.00000 set histogram_size=0; set histogram_type=@SINGLE_PREC_TYPE; drop table t1; diff --git a/mysql-test/main/statistics.test b/mysql-test/main/statistics.test index a002006b9f5..cbb923bee66 100644 --- a/mysql-test/main/statistics.test +++ b/mysql-test/main/statistics.test @@ -187,12 +187,12 @@ DELETE FROM mysql.column_stats; set histogram_size=4; ANALYZE TABLE t1; ---sorted_result SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, - hist_size, hist_type, HEX(histogram) - FROM mysql.column_stats; + hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; DELETE FROM mysql.column_stats; @@ -200,12 +200,12 @@ set histogram_size=8; set histogram_type=@DOUBLE_PREC_TYPE; ANALYZE TABLE t1; ---sorted_result SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, - hist_size, hist_type, HEX(histogram) - FROM mysql.column_stats; + hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; DELETE FROM mysql.column_stats; @@ -718,8 +718,9 @@ analyze table t1 persistent for all; select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, - hist_size, hist_type, HEX(histogram) - FROM mysql.column_stats; + hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; set histogram_size=default; @@ -743,8 +744,9 @@ analyze table t1 persistent for all; select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, - hist_size, hist_type, HEX(histogram) - FROM mysql.column_stats; + hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; set histogram_size=0; set histogram_type=@SINGLE_PREC_TYPE; diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index dba562aadc9..478fd1aefc2 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -229,15 +229,160 @@ test.t1 analyze status Table is already up to date SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 0 49 0.0000 1.0000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202230222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223131222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223234222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223338222C0A20202020202022656E64223A20223439222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D0A20205D0A7D -test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202276767676767676767676767676222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202277777777777777777777777777777777777777777777777777777777222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A20227878787878787878787878787878787878787878787878787878222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022797979222C0A20202020202022656E64223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D0A20205D0A7D -test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202261616161222C0A2020202020202273697A65223A20302E3235373134323835372C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022636363636363636363222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20226464646464646464222C0A20202020202022656E64223A20226464646464646464222C0A2020202020202273697A65223A20302E3238353731343238362C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022313938392D30332D3132222C0A2020202020202273697A65223A20302E3332333532393431322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939302D30352D3135222C0A2020202020202273697A65223A20302E3431313736343730362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939382D30382D3238222C0A2020202020202273697A65223A20302E3233353239343131382C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022313939392D30372D3233222C0A20202020202022656E64223A2022313939392D30372D3233222C0A2020202020202273697A65223A20302E3032393431313736352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022302E3031222C0A2020202020202273697A65223A20302E3338373039363737342C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E303132222C0A2020202020202273697A65223A20302E3232353830363435322C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022302E31222C0A2020202020202273697A65223A20302E3235383036343531362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E313132222C0A20202020202022656E64223A2022302E313132222C0A2020202020202273697A65223A20302E3132393033323235382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 f 1 5 0.2000 6.4000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202201222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202202222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202203222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202204222C0A20202020202022656E64223A202205222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D0A20205D0A7D +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 0 49 0.0000 1.0000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "0", + "size": 0.25, + "ndv": 10 + }, + { + "start": "11", + "size": 0.25, + "ndv": 10 + }, + { + "start": "24", + "size": 0.25, + "ndv": 10 + }, + { + "start": "38", + "end": "49", + "size": 0.25, + "ndv": 10 + } + ] +} +test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "vvvvvvvvvvvvv", + "size": 0.25, + "ndv": 1 + }, + { + "start": "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "size": 0.25, + "ndv": 2 + }, + { + "start": "xxxxxxxxxxxxxxxxxxxxxxxxxx", + "size": 0.25, + "ndv": 2 + }, + { + "start": "yyy", + "end": "zzzzzzzzzzzzzzzzzz", + "size": 0.25, + "ndv": 2 + } + ] +} +test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "aaaa", + "size": 0.257142857, + "ndv": 1 + }, + { + "start": "bbb", + "size": 0.228571429, + "ndv": 2 + }, + { + "start": "ccccccccc", + "size": 0.228571429, + "ndv": 1 + }, + { + "start": "dddddddd", + "end": "dddddddd", + "size": 0.285714286, + "ndv": 1 + } + ] +} +test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "1989-03-12", + "size": 0.323529412, + "ndv": 1 + }, + { + "start": "1990-05-15", + "size": 0.411764706, + "ndv": 1 + }, + { + "start": "1998-08-28", + "size": 0.235294118, + "ndv": 2 + }, + { + "start": "1999-07-23", + "end": "1999-07-23", + "size": 0.029411765, + "ndv": 1 + } + ] +} +test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "0.01", + "size": 0.387096774, + "ndv": 1 + }, + { + "start": "0.012", + "size": 0.225806452, + "ndv": 2 + }, + { + "start": "0.1", + "size": 0.258064516, + "ndv": 1 + }, + { + "start": "0.112", + "end": "0.112", + "size": 0.129032258, + "ndv": 1 + } + ] +} +test t1 f 1 5 0.2000 6.4000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "\u0001", + "size": 0.25, + "ndv": 2 + }, + { + "start": "\u0002", + "size": 0.25, + "ndv": 2 + }, + { + "start": "\u0003", + "size": 0.25, + "ndv": 2 + }, + { + "start": "\u0004", + "end": "\u0005", + "size": 0.25, + "ndv": 2 + } + ] +} DELETE FROM mysql.column_stats; set histogram_size=8; set histogram_type=@DOUBLE_PREC_TYPE; @@ -248,15 +393,200 @@ test.t1 analyze status Table is already up to date SELECT db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 0 49 0.0000 1.0000 8 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202230222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A202235222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223131222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223138222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223234222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223332222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223338222C0A202020202020227369 7A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223433222C0A20202020202022656E64223A20223439222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D0A20205D0A7D -test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202276767676767676767676767676222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202277777777777777777777777777777777777777777777777777777777222C0A2020202020202273697A65223A20302E32313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20227878787878787878787878787878787878787878787878787878222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022797979222C0A2020202020202273697A65223A20302E313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A20202020202022656E64223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A2020202020202273697A65223A20302E3231 3837352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 c aaaa dddddddd 0.1250 7.0000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202261616161222C0A2020202020202273697A65223A20302E3235373134323835372C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262222C0A2020202020202273697A65223A20302E3032383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262626262222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022636363636363636363222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20226464646464646464222C0A20202020202022656E64223A20226464646464646464222C0A2020202020202273697A65223A20302E3238353731343238362C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022313938392D30332D3132222C0A2020202020202273697A65223A20302E3332333532393431322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939302D30352D3135222C0A2020202020202273697A65223A20302E3431313736343730362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939382D30382D3238222C0A2020202020202273697A65223A20302E3032393431313736352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939392D30372D3233222C0A20202020202022656E64223A2022313939392D30372D3233222C0A2020202020202273697A65223A20302E3233353239343131382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 e 0.01 0.112 0.2250 6.2000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022302E3031222C0A2020202020202273697A65223A20302E3338373039363737342C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E303132222C0A2020202020202273697A65223A20302E3036343531363132392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E3035222C0A2020202020202273697A65223A20302E3136313239303332332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E31222C0A2020202020202273697A65223A20302E3235383036343531362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E313132222C0A20202020202022656E64223A2022302E313132222C0A2020202020202273697A65223A20302E3132393033323235382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D -test t1 f 1 5 0.2000 6.4000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202201222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202202222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202203222C0A2020202020202273697A65223A20302E313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202204222C0A2020202020202273697A65223A20302E333132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202205222C0A20202020202022656E64223A202205222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 0 49 0.0000 1.0000 8 JSON_HB { + "histogram_hb_v2": [ + { + "start": "0", + "size": 0.125, + "ndv": 5 + }, + { + "start": "5", + "size": 0.125, + "ndv": 5 + }, + { + "start": "11", + "size": 0.125, + "ndv": 5 + }, + { + "start": "18", + "size": 0.125, + "ndv": 5 + }, + { + "start": "24", + "size": 0.125, + "ndv": 5 + }, + { + "start": "32", + "size": 0.125, + "ndv": 5 + }, + { + "start": "38", + "size": 0.125, + "ndv": 5 + }, + { + "start": "43", + "end": "49", + "size": 0.125, + "ndv": 5 + } + ] +} +test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 5 JSON_HB { + "histogram_hb_v2": [ + { + "start": "vvvvvvvvvvvvv", + "size": 0.25, + "ndv": 1 + }, + { + "start": "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "size": 0.21875, + "ndv": 1 + }, + { + "start": "xxxxxxxxxxxxxxxxxxxxxxxxxx", + "size": 0.125, + "ndv": 1 + }, + { + "start": "yyy", + "size": 0.1875, + "ndv": 1 + }, + { + "start": "zzzzzzzzzzzzzzzzzz", + "end": "zzzzzzzzzzzzzzzzzz", + "size": 0.21875, + "ndv": 1 + } + ] +} +test t1 c aaaa dddddddd 0.1250 7.0000 5 JSON_HB { + "histogram_hb_v2": [ + { + "start": "aaaa", + "size": 0.257142857, + "ndv": 1 + }, + { + "start": "bbb", + "size": 0.028571429, + "ndv": 1 + }, + { + "start": "bbbbbb", + "size": 0.2, + "ndv": 1 + }, + { + "start": "ccccccccc", + "size": 0.228571429, + "ndv": 1 + }, + { + "start": "dddddddd", + "end": "dddddddd", + "size": 0.285714286, + "ndv": 1 + } + ] +} +test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB { + "histogram_hb_v2": [ + { + "start": "1989-03-12", + "size": 0.323529412, + "ndv": 1 + }, + { + "start": "1990-05-15", + "size": 0.411764706, + "ndv": 1 + }, + { + "start": "1998-08-28", + "size": 0.029411765, + "ndv": 1 + }, + { + "start": "1999-07-23", + "end": "1999-07-23", + "size": 0.235294118, + "ndv": 1 + } + ] +} +test t1 e 0.01 0.112 0.2250 6.2000 5 JSON_HB { + "histogram_hb_v2": [ + { + "start": "0.01", + "size": 0.387096774, + "ndv": 1 + }, + { + "start": "0.012", + "size": 0.064516129, + "ndv": 1 + }, + { + "start": "0.05", + "size": 0.161290323, + "ndv": 1 + }, + { + "start": "0.1", + "size": 0.258064516, + "ndv": 1 + }, + { + "start": "0.112", + "end": "0.112", + "size": 0.129032258, + "ndv": 1 + } + ] +} +test t1 f 1 5 0.2000 6.4000 5 JSON_HB { + "histogram_hb_v2": [ + { + "start": "\u0001", + "size": 0.125, + "ndv": 1 + }, + { + "start": "\u0002", + "size": 0.25, + "ndv": 1 + }, + { + "start": "\u0003", + "size": 0.1875, + "ndv": 1 + }, + { + "start": "\u0004", + "size": 0.3125, + "ndv": 1 + }, + { + "start": "\u0005", + "end": "\u0005", + "size": 0.125, + "ndv": 1 + } + ] +} DELETE FROM mysql.column_stats; set histogram_size= 0; set histogram_type=@SINGLE_PREC_TYPE; @@ -3332,10 +3662,30 @@ test.t1 analyze status OK select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 1 3 0.0000 1.0000 3 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202231222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202232222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202233222C0A20202020202022656E64223A202233222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D0A20205D0A7D +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 1 3 0.0000 1.0000 3 JSON_HB { + "histogram_hb_v2": [ + { + "start": "1", + "size": 0.333333333, + "ndv": 1 + }, + { + "start": "2", + "size": 0.333333333, + "ndv": 1 + }, + { + "start": "3", + "end": "3", + "size": 0.333333333, + "ndv": 1 + } + ] +} set histogram_size=default; drop table t1; # @@ -3357,10 +3707,40 @@ test.t1 analyze status OK select db_name, table_name, column_name, min_value, max_value, nulls_ratio, avg_frequency, -hist_size, hist_type, HEX(histogram) -FROM mysql.column_stats; -db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) -test t1 a 1 5 0.0000 1.0000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202231222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202232222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202233222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202234222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202235222C0A20202020202022656E64223A202235222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D0A20205D0A7D +hist_size, hist_type, decode_histogram(hist_type,histogram) +FROM mysql.column_stats +ORDER BY db_name, table_name, column_name; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram) +test t1 a 1 5 0.0000 1.0000 5 JSON_HB { + "histogram_hb_v2": [ + { + "start": "1", + "size": 0.2, + "ndv": 1 + }, + { + "start": "2", + "size": 0.2, + "ndv": 1 + }, + { + "start": "3", + "size": 0.2, + "ndv": 1 + }, + { + "start": "4", + "size": 0.2, + "ndv": 1 + }, + { + "start": "5", + "end": "5", + "size": 0.2, + "ndv": 1 + } + ] +} set histogram_size=0; set histogram_type=@SINGLE_PREC_TYPE; drop table t1; @@ -7460,3 +7840,16 @@ b a 1 foo 2 bar drop table t1; +# +# MDEV-26711: Values in JSON histograms are not properly quoted +# +create table t1 (a varchar(32)); +insert into t1 values ('this is "quoted" text'); +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from t1 where a = 'foo'; +a +drop table t1; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index c5bac951f21..c8fa05c834a 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -182,3 +182,14 @@ SET histogram_type= JSON_HB; ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM t1; drop table t1; + +--echo # +--echo # MDEV-26711: Values in JSON histograms are not properly quoted +--echo # +create table t1 (a varchar(32)); +insert into t1 values ('this is "quoted" text'); +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +select * from t1 where a = 'foo'; +drop table t1; + diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h index 27aec74d08d..24d404535ec 100644 --- a/sql/my_json_writer.h +++ b/sql/my_json_writer.h @@ -187,6 +187,8 @@ class Json_writer Json_writer& add_member(const char *name, size_t len); /* Add atomic values */ + + /* Note: the add_str methods do not do escapes. Should this change? */ void add_str(const char* val); void add_str(const char* val, size_t num_bytes); void add_str(const String &str); diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 66377d55aec..dc56a32d000 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -20,6 +20,73 @@ #include "sql_statistics.h" #include "opt_histogram_json.h" + +/* + Un-escape a JSON string and save it into *out. +*/ + +static bool json_unescape_to_string(const char *val, int val_len, String* out) +{ + // Make sure 'out' has some memory allocated. + if (!out->alloced_length() && out->alloc(128)) + return true; + + while (1) + { + uchar *buf= (uchar*)out->ptr(); + out->length(out->alloced_length()); + + int res= json_unescape(&my_charset_utf8mb4_bin, + (const uchar*)val, + (const uchar*)val + val_len, + &my_charset_utf8mb4_bin, + buf, buf + out->length()); + if (res > 0) + { + out->length(res); + return false; // Ok + } + + // We get here if the unescaped string didn't fit into memory. + if (out->alloc(out->alloced_length()*2)) + return true; + } +} + + +/* + Escape a JSON string and save it into *out. +*/ + +static bool json_escape_to_string(const char *val, int val_len, String* out) +{ + // Make sure 'out' has some memory allocated. + if (!out->alloced_length() && out->alloc(128)) + return true; + + while (1) + { + uchar *buf= (uchar*)out->ptr(); + out->length(out->alloced_length()); + + int res= json_escape(&my_charset_utf8mb4_bin, + (const uchar*)val, + (const uchar*)val + val_len, + &my_charset_utf8mb4_bin, + buf, buf + out->length()); + if (res > 0) + { + out->length(res); + return false; // Ok + } + + // We get here if the escaped string didn't fit into memory. + if (out->alloc(out->alloced_length()*2)) + return true; + } +} + + class Histogram_json_builder : public Histogram_builder { Histogram_json_hb *histogram; @@ -72,6 +139,7 @@ class Histogram_json_builder : public Histogram_builder ~Histogram_json_builder() override = default; +private: bool bucket_is_empty() { return bucket.ndv == 0; } /* @@ -92,13 +160,13 @@ class Histogram_json_builder : public Histogram_builder /* Same as finalize_bucket() but also provide the bucket's end value. */ - void finalize_bucket_with_end_value(void *elem) + bool finalize_bucket_with_end_value(void *elem) { - column->store_field_value((uchar*) elem, col_length); - StringBuffer<MAX_FIELD_WIDTH> val; - String *str= column->val_str(&val); - writer.add_member("end").add_str(str->c_ptr_safe()); + writer.add_member("end"); + if (append_column_value(elem)) + return true; finalize_bucket(); + return false; } /* @@ -106,18 +174,38 @@ class Histogram_json_builder : public Histogram_builder @param elem The value we are writing @param cnt The number of such values. */ - void start_bucket(void *elem, longlong cnt) + bool start_bucket(void *elem, longlong cnt) { DBUG_ASSERT(bucket.size == 0); - column->store_field_value((uchar*) elem, col_length); - StringBuffer<MAX_FIELD_WIDTH> val; - String *str= column->val_str(&val); - writer.start_object(); - writer.add_member("start").add_str(str->c_ptr_safe()); + writer.add_member("start"); + if (append_column_value(elem)) + return true; bucket.ndv= 1; bucket.size= cnt; + return false; + } + + /* + Append the passed value into the JSON writer as string value + */ + bool append_column_value(void *elem) + { + StringBuffer<MAX_FIELD_WIDTH> val; + + // Get the text representation of the value + column->store_field_value((uchar*) elem, col_length); + String *str= column->val_str(&val); + + // Escape the value for JSON + StringBuffer<MAX_FIELD_WIDTH> escaped_val; + if (json_escape_to_string(str->ptr(), str->length(), &escaped_val)) + return true; + + // Note: The Json_writer does NOT do escapes (perhaps this should change?) + writer.add_str(escaped_val.c_ptr_safe()); + return false; } /* @@ -129,6 +217,7 @@ class Histogram_json_builder : public Histogram_builder bucket.size += cnt; } +public: /* @brief Add data to the histogram. @@ -169,9 +258,14 @@ class Histogram_json_builder : public Histogram_builder finalize_bucket(); // Start/end the separate bucket for this value group. - start_bucket(elem, elem_cnt); + if (start_bucket(elem, elem_cnt)) + return 1; // OOM + if (records == count) - finalize_bucket_with_end_value(elem); + { + if (finalize_bucket_with_end_value(elem)) + return 1; + } else finalize_bucket(); } @@ -191,21 +285,28 @@ class Histogram_json_builder : public Histogram_builder */ append_to_bucket(bucket_capacity - bucket.size); if (records == count && !overflow) - finalize_bucket_with_end_value(elem); + { + if (finalize_bucket_with_end_value(elem)) + return 1; + } else finalize_bucket(); if (overflow > 0) { // Then, start the new bucket with the remaining values. - start_bucket(elem, overflow); + if (start_bucket(elem, overflow)) + return 1; } } else { // Case #3: there's not enough values to fill the current bucket. if (bucket_is_empty()) - start_bucket(elem, elem_cnt); + { + if (start_bucket(elem, elem_cnt)) + return 1; + } else append_to_bucket(elem_cnt); } @@ -273,6 +374,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, double cumulative_size= 0.0; size_t end_member_index= (size_t)-1; StringBuffer<128> value_buf; + StringBuffer<128> unescape_buf; if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len, &obj1, &obj1_len)) @@ -372,7 +474,14 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, } uint len_to_copy= field->key_length(); - field->store_text(val, val_len, &my_charset_bin); + if (json_unescape_to_string(val, val_len, &unescape_buf)) + { + err_pos= ndv; + err= "Out of memory"; + goto error; + } + field->store_text(unescape_buf.ptr(), unescape_buf.length(), + &my_charset_bin); value_buf.alloc(field->pack_length()); uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy, Field::itRAW); @@ -392,7 +501,14 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, } if (ret != JSV_NOTHING) { - field->store_text(end_val, end_val_len, &my_charset_bin); + if (json_unescape_to_string(end_val, end_val_len, &unescape_buf)) + { + err_pos= bucket_info; + err= "Out of memory"; + goto error; + } + field->store_text(unescape_buf.ptr(), unescape_buf.length(), + &my_charset_bin); value_buf.alloc(field->pack_length()); uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy, Field::itRAW);