Skip to content

Commit

Permalink
fix: chunk and merge json objects on postgres (#4555)
Browse files Browse the repository at this point in the history
Functions in PostgreSQL can only accept up to 100 arguments, which means that we can't build an object with more than 50 fields using `JSON_BUILD_OBJECT`. To work around that, we chunk the fields into subsets of 50 fields or less, build one or more JSONB objects using one or more `JSONB_BUILD_OBJECT` invocations, and merge them together using the `||` operator (which is not possible with plain JSON).

Another alternative that was considered and prototyped first was using `ROW_TO_JSON` but it turned out to not be a suitable replacement for several reasons, the final deal breaker [being the limit of the length of field names](hasura/graphql-engine#4004 (comment)) (63 characters). Other problems included the lack of support for `ROW_TO_JSON` on MySQL, which would have required us to have conditional logic in the query builder on the `sql-query-connector` level, which would introduce logic dependent on connector capabilities at an inappropriate abstraction layer, and difficulties in building the query compatible with `ROW_TO_JSON` without overfetching data because we would need to select additional fields (e.g. for filtering and order by) to be able to forward them to a query above without an easy way to exclude them from being added to the JSON object.

The workaround with JSONB doesn't suffer from these issues, and is completely isolated on the quaint level without leaking to the query engine.


Fixes: prisma/prisma#22298
Closes: #4550
  • Loading branch information
aqrln authored Dec 13, 2023
1 parent ea511f9 commit d8af2bb
Show file tree
Hide file tree
Showing 10 changed files with 353 additions and 18 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions quaint/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ metrics = "0.18"
futures = "0.3"
url = "2.1"
hex = "0.4"
itertools = "0.10"

either = { version = "1.6" }
base64 = { version = "0.12.3" }
Expand Down
2 changes: 2 additions & 0 deletions quaint/src/ast/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ pub(crate) enum FunctionType<'a> {
JsonExtractFirstArrayElem(JsonExtractFirstArrayElem<'a>),
#[cfg(any(feature = "postgresql", feature = "mysql"))]
JsonUnquote(JsonUnquote<'a>),
#[cfg(feature = "postgresql")]
JsonArrayAgg(JsonArrayAgg<'a>),
#[cfg(feature = "postgresql")]
JsonBuildObject(JsonBuildObject<'a>),
#[cfg(any(feature = "postgresql", feature = "mysql"))]
TextSearch(TextSearch<'a>),
Expand Down
27 changes: 10 additions & 17 deletions quaint/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ pub trait Visitor<'a> {
#[cfg(any(feature = "postgresql", feature = "mysql"))]
fn visit_json_unquote(&mut self, json_unquote: JsonUnquote<'a>) -> Result;

#[cfg(feature = "postgresql")]
fn visit_json_array_agg(&mut self, array_agg: JsonArrayAgg<'a>) -> Result;

#[cfg(feature = "postgresql")]
fn visit_json_build_object(&mut self, build_obj: JsonBuildObject<'a>) -> Result;

#[cfg(any(feature = "postgresql", feature = "mysql"))]
fn visit_text_search(&mut self, text_search: TextSearch<'a>) -> Result;

Expand Down Expand Up @@ -1132,26 +1138,13 @@ pub trait Visitor<'a> {
FunctionType::Concat(concat) => {
self.visit_concat(concat)?;
}
#[cfg(feature = "postgresql")]
FunctionType::JsonArrayAgg(array_agg) => {
self.write("JSON_AGG")?;
self.surround_with("(", ")", |s| s.visit_expression(*array_agg.expr))?;
self.visit_json_array_agg(array_agg)?;
}
#[cfg(feature = "postgresql")]
FunctionType::JsonBuildObject(build_obj) => {
let len = build_obj.exprs.len();

self.write("JSON_BUILD_OBJECT")?;
self.surround_with("(", ")", |s| {
for (i, (name, expr)) in build_obj.exprs.into_iter().enumerate() {
s.visit_raw_value(Value::text(name))?;
s.write(", ")?;
s.visit_expression(expr)?;
if i < (len - 1) {
s.write(", ")?;
}
}

Ok(())
})?;
self.visit_json_build_object(build_obj)?;
}
};

Expand Down
12 changes: 11 additions & 1 deletion quaint/src/visitor/mssql.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::Visitor;
#[cfg(any(feature = "postgresql", feature = "mysql"))]
use crate::prelude::{JsonExtract, JsonType, JsonUnquote};
use crate::prelude::{JsonArrayAgg, JsonBuildObject, JsonExtract, JsonType, JsonUnquote};
use crate::{
ast::{
Column, Comparable, Expression, ExpressionKind, Insert, IntoRaw, Join, JoinData, Joinable, Merge, OnConflict,
Expand Down Expand Up @@ -656,6 +656,16 @@ impl<'a> Visitor<'a> for Mssql<'a> {
unimplemented!("JSON filtering is not yet supported on MSSQL")
}

#[cfg(feature = "postgresql")]
fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result {
unimplemented!("JSON_AGG is not yet supported on MSSQL")
}

#[cfg(feature = "postgresql")]
fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result {
unimplemented!("JSON_BUILD_OBJECT is not yet supported on MSSQL")
}

#[cfg(feature = "postgresql")]
fn visit_text_search(&mut self, _text_search: crate::prelude::TextSearch<'a>) -> visitor::Result {
unimplemented!("Full-text search is not yet supported on MSSQL")
Expand Down
10 changes: 10 additions & 0 deletions quaint/src/visitor/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,16 @@ impl<'a> Visitor<'a> for Mysql<'a> {
Ok(())
}

#[cfg(feature = "postgresql")]
fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result {
unimplemented!("JSON_ARRAYAGG is not yet supported on MySQL")
}

#[cfg(feature = "postgresql")]
fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result {
unimplemented!("JSON_OBJECT is not yet supported on MySQL")
}

fn visit_ordering(&mut self, ordering: Ordering<'a>) -> visitor::Result {
let len = ordering.0.len();

Expand Down
89 changes: 89 additions & 0 deletions quaint/src/visitor/postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::{
ast::*,
visitor::{self, Visitor},
};
use itertools::Itertools;
use std::{
fmt::{self, Write},
ops::Deref,
Expand Down Expand Up @@ -499,6 +500,57 @@ impl<'a> Visitor<'a> for Postgres<'a> {
}
}

#[cfg(feature = "postgresql")]
fn visit_json_array_agg(&mut self, array_agg: JsonArrayAgg<'a>) -> visitor::Result {
self.write("JSONB_AGG")?;
self.surround_with("(", ")", |s| s.visit_expression(*array_agg.expr))?;

Ok(())
}

#[cfg(feature = "postgresql")]
fn visit_json_build_object(&mut self, build_obj: JsonBuildObject<'a>) -> visitor::Result {
// Functions in PostgreSQL can only accept up to 100 arguments, which means that we can't
// build an object with more than 50 fields using `JSON_BUILD_OBJECT`. To work around
// that, we chunk the fields into subsets of 50 fields or less, build one or more JSONB
// objects using one or more `JSONB_BUILD_OBJECT` invocations, and merge them together
// using the `||` operator (which is not possible with plain JSON).
//
// See <https://github.com/prisma/prisma/issues/22298>.
//
// Another alternative that was considered for the specific use case of loading relations
// in Query Engine was using `ROW_TO_JSON` but it turned out to not be a suitable
// replacement for several reasons, the main one being the limit of the length of field
// names (63 characters).
const MAX_FIELDS: usize = 50;
let num_chunks = build_obj.exprs.len().div_ceil(MAX_FIELDS);

for (i, chunk) in build_obj.exprs.into_iter().chunks(MAX_FIELDS).into_iter().enumerate() {
let mut chunk = chunk.peekable();

self.write("JSONB_BUILD_OBJECT")?;

self.surround_with("(", ")", |s| {
while let Some((name, expr)) = chunk.next() {
s.visit_raw_value(Value::text(name))?;
s.write(", ")?;
s.visit_expression(expr)?;
if chunk.peek().is_some() {
s.write(", ")?;
}
}

Ok(())
})?;

if i < num_chunks - 1 {
self.write(" || ")?;
}
}

Ok(())
}

fn visit_text_search(&mut self, text_search: crate::prelude::TextSearch<'a>) -> visitor::Result {
let len = text_search.exprs.len();
self.surround_with("to_tsvector(concat_ws(' ', ", "))", |s| {
Expand Down Expand Up @@ -1209,4 +1261,41 @@ mod tests {

assert_eq!("SELECT MIN(\"enum\")::text, MAX(\"enum\")::text FROM \"User\"", sql);
}

mod test_json_build_object {
use super::*;

#[test]
fn simple() {
let build_json = build_json_object(3);
let query = Select::default().value(build_json);
let (sql, _) = Postgres::build(query).unwrap();

assert_eq!("SELECT JSONB_BUILD_OBJECT('f1', $1, 'f2', $2, 'f3', $3)", sql);
}

#[test]
fn chunked() {
let build_json = build_json_object(110);
let query = Select::default().value(build_json);
let (sql, _) = Postgres::build(query).unwrap();

assert_eq!(
concat!(
"SELECT JSONB_BUILD_OBJECT('f1', $1, 'f2', $2, 'f3', $3, 'f4', $4, 'f5', $5, 'f6', $6, 'f7', $7, 'f8', $8, 'f9', $9, 'f10', $10, 'f11', $11, 'f12', $12, 'f13', $13, 'f14', $14, 'f15', $15, 'f16', $16, 'f17', $17, 'f18', $18, 'f19', $19, 'f20', $20, 'f21', $21, 'f22', $22, 'f23', $23, 'f24', $24, 'f25', $25, 'f26', $26, 'f27', $27, 'f28', $28, 'f29', $29, 'f30', $30, 'f31', $31, 'f32', $32, 'f33', $33, 'f34', $34, 'f35', $35, 'f36', $36, 'f37', $37, 'f38', $38, 'f39', $39, 'f40', $40, 'f41', $41, 'f42', $42, 'f43', $43, 'f44', $44, 'f45', $45, 'f46', $46, 'f47', $47, 'f48', $48, 'f49', $49, 'f50', $50)",
" || JSONB_BUILD_OBJECT('f51', $51, 'f52', $52, 'f53', $53, 'f54', $54, 'f55', $55, 'f56', $56, 'f57', $57, 'f58', $58, 'f59', $59, 'f60', $60, 'f61', $61, 'f62', $62, 'f63', $63, 'f64', $64, 'f65', $65, 'f66', $66, 'f67', $67, 'f68', $68, 'f69', $69, 'f70', $70, 'f71', $71, 'f72', $72, 'f73', $73, 'f74', $74, 'f75', $75, 'f76', $76, 'f77', $77, 'f78', $78, 'f79', $79, 'f80', $80, 'f81', $81, 'f82', $82, 'f83', $83, 'f84', $84, 'f85', $85, 'f86', $86, 'f87', $87, 'f88', $88, 'f89', $89, 'f90', $90, 'f91', $91, 'f92', $92, 'f93', $93, 'f94', $94, 'f95', $95, 'f96', $96, 'f97', $97, 'f98', $98, 'f99', $99, 'f100', $100)",
" || JSONB_BUILD_OBJECT('f101', $101, 'f102', $102, 'f103', $103, 'f104', $104, 'f105', $105, 'f106', $106, 'f107', $107, 'f108', $108, 'f109', $109, 'f110', $110)"
),
sql
);
}

fn build_json_object(num_fields: u32) -> JsonBuildObject<'static> {
let fields = (1..=num_fields)
.map(|i| (format!("f{i}").into(), Expression::from(i as i64)))
.collect();

JsonBuildObject { exprs: fields }
}
}
}
10 changes: 10 additions & 0 deletions quaint/src/visitor/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,16 @@ impl<'a> Visitor<'a> for Sqlite<'a> {
unimplemented!("JSON filtering is not yet supported on SQLite")
}

#[cfg(feature = "postgresql")]
fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result {
unimplemented!("JSON_AGG is not yet supported on SQLite")
}

#[cfg(feature = "postgresql")]
fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result {
unimplemented!("JSON_BUILD_OBJECT is not yet supported on SQLite")
}

fn visit_ordering(&mut self, ordering: Ordering<'a>) -> visitor::Result {
let len = ordering.0.len();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod prisma_20799;
mod prisma_21182;
mod prisma_21369;
mod prisma_21901;
mod prisma_22298;
mod prisma_5952;
mod prisma_6173;
mod prisma_7010;
Expand Down
Loading

0 comments on commit d8af2bb

Please sign in to comment.