Skip to content

Commit 1adf5b6

Browse files
committed
Implement review
1 parent e85023d commit 1adf5b6

File tree

8 files changed

+59
-54
lines changed

8 files changed

+59
-54
lines changed

README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -706,15 +706,15 @@ This macro returns the sql required to build a date spine. The spine will includ
706706
}}
707707
```
708708

709-
#### dedupe ([source](macros/sql/dedupe.sql))
710-
This macro returns the sql required to remove deduplicate rows from a model or source.
709+
#### deduplicate ([source](macros/sql/deduplicate.sql))
710+
This macro returns the sql required to remove duplicate rows from a model or source.
711711

712712
**Usage:**
713713

714714
```
715-
{{ dbt_utils.dedupe(
716-
source('my_source', 'my_table'),
717-
"user_id, cast(timestamp as day)",
715+
{{ dbt_utils.deduplicate(
716+
relation=source('my_source', 'my_table'),
717+
group_by="user_id, cast(timestamp as day)",
718718
order_by="timestamp desc"
719719
)
720720
}}

integration_tests/models/sql/schema.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,4 @@ models:
146146
- name: test_dedupe
147147
tests:
148148
- dbt_utils.equality:
149-
compare_model: ref('data_dedupe_expected')
149+
compare_model: ref('data_deduplicate_expected')

integration_tests/models/sql/test_dedupe.sql

-7
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
with deduped as (
2+
3+
{{ dbt_utils.dededuplicate(ref('data_deduplicate'), group_by='user_id', order_by='version desc') | indent }}
4+
5+
)
6+
7+
select * from deduped

macros/sql/dedupe.sql

-41
This file was deleted.

macros/sql/deduplicate.sql

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{%- macro deduplicate(relation, group_by, order_by=none) -%}
2+
{{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by)) }}
3+
{% endmacro %}
4+
5+
{%- macro default__deduplicate(relation, group_by, order_by=none) -%}
6+
7+
select
8+
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
9+
from (
10+
select
11+
_inner.*,
12+
row_number() over (
13+
partition by {{ group_by }}
14+
{% if order_by is not none -%}
15+
order by {{ order_by }}
16+
{%- endif %}
17+
) as rn
18+
from {{ relation }} as _inner
19+
) as deduped
20+
where deduped.rn = 1
21+
22+
{%- endmacro -%}
23+
24+
{#
25+
-- It is more performant to deduplicate using `array_agg` with a limit
26+
-- clause in BigQuery:
27+
-- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572
28+
#}
29+
{%- macro bigquery__deduplicate(relation, group_by, order_by=none) -%}
30+
31+
select
32+
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
33+
from (
34+
select
35+
array_agg (
36+
original
37+
{% if order_by is not none-%}
38+
order by {{ order_by }}
39+
{%- endif %}
40+
limit 1
41+
)[offset(0)] as deduped
42+
from {{ relation }} as original
43+
group by {{ group_by }}
44+
)
45+
46+
{%- endmacro -%}

0 commit comments

Comments
 (0)