|
| 1 | +DataLoader |
| 2 | +========== |
| 3 | + |
| 4 | +DataLoader pattern, named after the correspondent [`dataloader` NPM package][0], represents a mechanism of batching and caching data requests in a delayed manner for solving the [N+1 problem](n_plus_1.md). |
| 5 | + |
| 6 | +> A port of the "Loader" API originally developed by [@schrockn] at Facebook in 2010 as a simplifying force to coalesce the sundry key-value store back-end APIs which existed at the time. At Facebook, "Loader" became one of the implementation details of the "Ent" framework, a privacy-aware data entity loading and caching layer within web server product code. This ultimately became the underpinning for Facebook's GraphQL server implementation and type definitions. |
| 7 | +
|
| 8 | +In [Rust] ecosystem, DataLoader pattern is introduced with the [`dataloader` crate][1], naturally usable with [Juniper]. |
| 9 | + |
| 10 | +Let's remake our [example of N+1 problem](n_plus_1.md), so it's solved by applying the DataLoader pattern: |
| 11 | +```rust |
| 12 | +# extern crate anyhow; |
| 13 | +# extern crate dataloader; |
| 14 | +# extern crate juniper; |
| 15 | +# use std::{collections::HashMap, sync::Arc}; |
| 16 | +# use anyhow::anyhow; |
| 17 | +# use dataloader::non_cached::Loader; |
| 18 | +# use juniper::{graphql_object, GraphQLObject}; |
| 19 | +# |
| 20 | +# type CultId = i32; |
| 21 | +# type UserId = i32; |
| 22 | +# |
| 23 | +# struct Repository; |
| 24 | +# |
| 25 | +# impl Repository { |
| 26 | +# async fn load_cults_by_ids(&self, cult_ids: &[CultId]) -> anyhow::Result<HashMap<CultId, Cult>> { unimplemented!() } |
| 27 | +# async fn load_all_persons(&self) -> anyhow::Result<Vec<Person>> { unimplemented!() } |
| 28 | +# } |
| 29 | +# |
| 30 | +struct Context { |
| 31 | + repo: Repository, |
| 32 | + cult_loader: CultLoader, |
| 33 | +} |
| 34 | + |
| 35 | +impl juniper::Context for Context {} |
| 36 | + |
| 37 | +#[derive(Clone, GraphQLObject)] |
| 38 | +struct Cult { |
| 39 | + id: CultId, |
| 40 | + name: String, |
| 41 | +} |
| 42 | + |
| 43 | +struct CultBatcher { |
| 44 | + repo: Repository, |
| 45 | +} |
| 46 | + |
| 47 | +// Since `BatchFn` doesn't provide any notion of fallible loading, like |
| 48 | +// `try_load()` returning `Result<HashMap<K, V>, E>`, we handle possible |
| 49 | +// errors as loaded values and unpack them later in the resolver. |
| 50 | +impl dataloader::BatchFn<CultId, Result<Cult, Arc<anyhow::Error>>> for CultBatcher { |
| 51 | + async fn load( |
| 52 | + &mut self, |
| 53 | + cult_ids: &[CultId], |
| 54 | + ) -> HashMap<CultId, Result<Cult, Arc<anyhow::Error>>> { |
| 55 | + // Effectively performs the following SQL query: |
| 56 | + // SELECT id, name FROM cults WHERE id IN (${cult_id1}, ${cult_id2}, ...) |
| 57 | + match self.repo.load_cults_by_ids(cult_ids).await { |
| 58 | + Ok(found_cults) => { |
| 59 | + found_cults.into_iter().map(|(id, cult)| (id, Ok(cult))).collect() |
| 60 | + } |
| 61 | + // One could choose a different strategy to deal with fallible loads, |
| 62 | + // like consider values that failed to load as absent, or just panic. |
| 63 | + // See cksac/dataloader-rs#35 for details: |
| 64 | + // https://github.com/cksac/dataloader-rs/issues/35 |
| 65 | + Err(e) => { |
| 66 | + // Since `anyhow::Error` doesn't implement `Clone`, we have to |
| 67 | + // work around here. |
| 68 | + let e = Arc::new(e); |
| 69 | + cult_ids.iter().map(|k| (k.clone(), Err(e.clone()))).collect() |
| 70 | + } |
| 71 | + } |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +type CultLoader = Loader<CultId, Result<Cult, Arc<anyhow::Error>>, CultBatcher>; |
| 76 | + |
| 77 | +fn new_cult_loader(repo: Repository) -> CultLoader { |
| 78 | + CultLoader::new(CultBatcher { repo }) |
| 79 | + // Usually a `Loader` will coalesce all individual loads which occur |
| 80 | + // within a single frame of execution before calling a `BatchFn::load()` |
| 81 | + // with all the collected keys. However, sometimes this behavior is not |
| 82 | + // desirable or optimal (perhaps, a request is expected to be spread out |
| 83 | + // over a few subsequent ticks). |
| 84 | + // A larger yield count will allow more keys to be appended to the batch, |
| 85 | + // but will wait longer before the actual load. For more details see: |
| 86 | + // https://github.com/cksac/dataloader-rs/issues/12 |
| 87 | + // https://github.com/graphql/dataloader#batch-scheduling |
| 88 | + .with_yield_count(100) |
| 89 | +} |
| 90 | + |
| 91 | +struct Person { |
| 92 | + id: UserId, |
| 93 | + name: String, |
| 94 | + cult_id: CultId, |
| 95 | +} |
| 96 | + |
| 97 | +#[graphql_object] |
| 98 | +#[graphql(context = Context)] |
| 99 | +impl Person { |
| 100 | + fn id(&self) -> CultId { |
| 101 | + self.id |
| 102 | + } |
| 103 | + |
| 104 | + fn name(&self) -> &str { |
| 105 | + self.name.as_str() |
| 106 | + } |
| 107 | + |
| 108 | + async fn cult(&self, ctx: &Context) -> anyhow::Result<Cult> { |
| 109 | + ctx.cult_loader |
| 110 | + // Here, we don't run the `CultBatcher::load()` eagerly, but rather |
| 111 | + // only register the `self.cult_id` value in the `cult_loader` and |
| 112 | + // wait for other concurrent resolvers to do the same. |
| 113 | + // The actual batch loading happens once all the resolvers register |
| 114 | + // their IDs and there is nothing more to execute. |
| 115 | + .try_load(self.cult_id) |
| 116 | + .await |
| 117 | + // The outer error is the `io::Error` returned by `try_load()` if |
| 118 | + // no value is present in the `HashMap` for the specified |
| 119 | + // `self.cult_id`, meaning that there is no `Cult` with such ID |
| 120 | + // in the `Repository`. |
| 121 | + .map_err(|_| anyhow!("No cult exists for ID `{}`", self.cult_id))? |
| 122 | + // The inner error is the one returned by the `CultBatcher::load()` |
| 123 | + // if the `Repository::load_cults_by_ids()` fails, meaning that |
| 124 | + // running the SQL query failed. |
| 125 | + .map_err(|arc_err| anyhow!("{arc_err}")) |
| 126 | + } |
| 127 | +} |
| 128 | + |
| 129 | +struct Query; |
| 130 | + |
| 131 | +#[graphql_object] |
| 132 | +#[graphql(context = Context)] |
| 133 | +impl Query { |
| 134 | + async fn persons(ctx: &Context) -> anyhow::Result<Vec<Person>> { |
| 135 | + // Effectively performs the following SQL query: |
| 136 | + // SELECT id, name, cult_id FROM persons |
| 137 | + ctx.repo.load_all_persons().await |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +fn main() { |
| 142 | + |
| 143 | +} |
| 144 | +``` |
| 145 | + |
| 146 | +And now, performing a [GraphQL query which lead to N+1 problem](n_plus_1.md) |
| 147 | +```graphql |
| 148 | +query { |
| 149 | + persons { |
| 150 | + id |
| 151 | + name |
| 152 | + cult { |
| 153 | + id |
| 154 | + name |
| 155 | + } |
| 156 | + } |
| 157 | +} |
| 158 | +``` |
| 159 | +will lead to efficient [SQL] queries, just as expected: |
| 160 | +```sql |
| 161 | +SELECT id, name, cult_id FROM persons; |
| 162 | +SELECT id, name FROM cults WHERE id IN (1, 2, 3, 4); |
| 163 | +``` |
| 164 | + |
| 165 | + |
| 166 | + |
| 167 | + |
| 168 | +## Caching |
| 169 | + |
| 170 | +[`dataloader::cached`] provides a [memoization][2] cache: after `BatchFn::load()` is called once with given keys, the resulting values are cached to eliminate redundant loads. |
| 171 | + |
| 172 | +DataLoader caching does not replace [Redis], [Memcached], or any other shared application-level cache. DataLoader is first and foremost a data loading mechanism, and its cache only serves the purpose of not repeatedly loading the same data [in the context of a single request][3]. |
| 173 | + |
| 174 | +> **WARNING**: A DataLoader should be created per-request to avoid risk of bugs where one client is able to load cached/batched data from another client outside its authenticated scope. Creating a DataLoader within an individual resolver will prevent batching from occurring and will nullify any benefits of it. |
| 175 | +
|
| 176 | + |
| 177 | + |
| 178 | + |
| 179 | +## Full example |
| 180 | + |
| 181 | +For a full example using DataLoaders in [Juniper] check out the [`jayy-lmao/rust-graphql-docker` repository][4]. |
| 182 | + |
| 183 | + |
| 184 | + |
| 185 | + |
| 186 | +[`dataloader::cached`]: https://docs.rs/dataloader/latest/dataloader/cached/index.html |
| 187 | +[@schrockn]: https://github.com/schrockn |
| 188 | +[Juniper]: https://docs.rs/juniper |
| 189 | +[Memcached]: https://memcached.org |
| 190 | +[Redis]: https://redis.io |
| 191 | +[Rust]: https://www.rust-lang.org |
| 192 | +[SQL]: https://en.wikipedia.org/wiki/SQL |
| 193 | + |
| 194 | +[0]: https://github.com/graphql/dataloader |
| 195 | +[1]: https://docs.rs/crate/dataloader |
| 196 | +[2]: https://en.wikipedia.org/wiki/Memoization |
| 197 | +[3]: https://github.com/graphql/dataloader#caching |
| 198 | +[4]: https://github.com/jayy-lmao/rust-graphql-docker |
0 commit comments