Skip to content

Commit

Permalink
1119 filter metadata from api (#1642)
Browse files Browse the repository at this point in the history
* Building sql query based on columns rather than SELECT *
  • Loading branch information
RyanRConaway authored Apr 3, 2023
1 parent dd8b01a commit f0b2500
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 21 deletions.
20 changes: 18 additions & 2 deletions apps/discovery_api/lib/discovery_api/services/presto_service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ defmodule DiscoveryApi.Services.PrestoService do
|> Prestige.query!("show columns from #{dataset_system_name}")
|> Map.get(:rows)
|> Enum.map(fn [column_name | _tail] -> column_name end)
|> remove_metadata_columns()
end

def is_select_statement?(statement) do
Expand Down Expand Up @@ -131,10 +132,11 @@ defmodule DiscoveryApi.Services.PrestoService do
[] -> {:error, "Table #{system_name} not found"}
names -> {:ok, names}
end
|> remove_metadata_columns()
end

def build_query(params, system_name) do
column_string = Map.get(params, "columns", "*")
def build_query(params, system_name, columns) do
column_string = Map.get(params, "columns", Enum.join(columns, ", "))

["SELECT"]
|> build_columns(column_string)
Expand Down Expand Up @@ -179,4 +181,18 @@ defmodule DiscoveryApi.Services.PrestoService do
|> String.split(",", trim: true)
|> Enum.map(&String.trim/1)
end

defp remove_metadata_columns({:error, _reason} = error) do
error
end

defp remove_metadata_columns({:ok, columns}) do
{:ok, remove_metadata_columns(columns)}
end

defp remove_metadata_columns(columns) do
metadata_columns = ["_extraction_start_time", "_ingestion_id", "os_partition"]

columns |> Enum.reject(fn column -> column in metadata_columns end)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ defmodule DiscoveryApiWeb.DataController do
api_key = Plug.Conn.get_req_header(conn, "api_key")

with {:ok, columns} <- PrestoService.get_column_names(session, dataset_name, Map.get(params, "columns")),
{:ok, query} <- PrestoService.build_query(params, dataset_name),
{:ok, query} <- PrestoService.build_query(params, dataset_name, columns),
{:ok, affected_models} <- QueryAccessUtils.get_affected_models(query),
true <- QueryAccessUtils.user_is_authorized?(affected_models, current_user, api_key) do
data_stream =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,28 @@ defmodule DiscoveryApi.Services.PrestoServiceTest do
assert list_of_columns == result
end

test "preview_columns should filter out any metadata columns" do
dataset = "things_in_the_fire"

list_of_columns = ["col_a"]

unprocessed_columns = %Prestige.Result{
columns: :doesnt_matter,
presto_headers: :doesnt_matter,
rows: [
["col_a", "varchar", "", ""],
["_extraction_start_time", "varchar", "", ""],
["_ingestion_id", "varchar", "", ""],
["os_partition", "varchar", "", ""]
]
}

allow(Prestige.query!(:connection, "show columns from #{dataset}"), return: unprocessed_columns)

result = PrestoService.preview_columns(:connection, dataset)
assert list_of_columns == result
end

describe "get_affected_tables/1" do
setup do
public_one_model =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ defmodule DiscoveryApiWeb.DataController.ContentTest do
allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["feature"]})
allow(PrestoService.preview_columns(any(), @system_name), return: ["feature"])
allow(PrestoService.preview(any(), @system_name), return: @geo_json_features)
allow(PrestoService.build_query(any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "select * from #{@system_name}"})

allow(Prestige.new_session(any()), return: :connect)
allow(Prestige.query!(any(), "select * from #{@system_name}"), return: :result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,47 @@ defmodule DiscoveryApiWeb.DataController.PreviewTest do
assert expected == actual
end

test "preview controller does not return any metadata columns", %{conn: conn, model: model} do
list_of_maps = [
%{
"id" => Faker.UUID.v4(),
"_ingestion_id" => "will",
"_extraction_id" => "be",
"os_partition" => "removed",
"json_encoded" => "{\"json_encoded\": \"tony\"}",
"other" => "foo"
},
%{
"id" => Faker.UUID.v4(),
"_ingestion_id" => "will",
"_extraction_id" => "be",
"os_partition" => "removed",
"json_encoded" => "{\"json_encoded\": \"andy\"}"
},
%{
"id" => Faker.UUID.v4(),
"_ingestion_id" => "will",
"_extraction_id" => "be",
"os_partition" => "removed",
"json_encoded" => "{\"json_encoded\": \"smith\"}"
}
]

schema = model.schema
encoded_maps = Enum.map(list_of_maps, &JsonFieldDecoder.decode_one_datum(schema, &1))

list_of_columns = ["id", "json_encoded", "other"]

expected = %{"data" => encoded_maps, "meta" => %{"columns" => list_of_columns}}

expect(PrestoService.preview(any(), @system_name), return: list_of_maps)
expect(PrestoService.preview_columns(any(), @system_name), return: list_of_columns)

actual = conn |> put_req_header("accept", "application/json") |> get("/api/v1/dataset/#{@dataset_id}/preview") |> json_response(200)

assert expected == actual
end

test "preview controller returns an empty list for an existing dataset with no data", %{conn: conn} do
list_of_columns = ["id", "json_encoded"]
expected = %{"data" => [], "meta" => %{"columns" => list_of_columns}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
conn |> put_req_header("accept", "text/csv") |> get(url) |> response(200)

assert_called(Prestige.query!(:connection, "describe #{@system_name}"), once())
assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name}"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name}"), once())

where(
url: [
Expand All @@ -75,7 +75,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the where clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, where: "name='Robby'") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} WHERE name='Robby'"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} WHERE name='Robby'"), once())

where(
url: [
Expand All @@ -88,7 +88,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the order by clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, orderBy: "id") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} ORDER BY id"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} ORDER BY id"), once())

where(
url: [
Expand All @@ -101,7 +101,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the limit clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, limit: "200") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} LIMIT 200"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} LIMIT 200"), once())

where(
url: [
Expand All @@ -114,7 +114,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the group by clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, groupBy: "one") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} GROUP BY one"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} GROUP BY one"), once())

where(
url: [
Expand All @@ -130,7 +130,10 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
|> get(url, where: "id=1", orderBy: "name", limit: "200", groupBy: "name")
|> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} WHERE id=1 GROUP BY name ORDER BY name LIMIT 200"), once())
assert_called(
Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} WHERE id=1 GROUP BY name ORDER BY name LIMIT 200"),
once()
)

where(
url: [
Expand Down Expand Up @@ -198,29 +201,29 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
test "json queries cannot contain semicolons", %{conn: conn} do
conn
|> put_req_header("accept", "application/json")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select * from system; two")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select id, name from system; two")
|> response(400)

assert_called(
Prestige.stream!(:connection, "SELECT id, one; select * from system; two FROM coda__test_dataset"),
Prestige.stream!(:connection, "SELECT id, one; select id, name from system; two FROM coda__test_dataset"),
times(0)
)
end

test "csv queries cannot contain semicolons", %{conn: conn} do
conn
|> put_req_header("accept", "text/csv")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select * from system; two")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select id, name from system; two")
|> response(400)

assert_called(
Prestige.stream!(:connection, "SELECT id, one; select * from system; two FROM coda__test_dataset"),
Prestige.stream!(:connection, "SELECT id, one; select id, name from system; two FROM coda__test_dataset"),
times(0)
)
end

test "queries cannot contain block comments", %{conn: conn} do
query_string = "SELECT * FROM coda__test_dataset ORDER BY /* This is a comment */"
query_string = "SELECT id, name FROM coda__test_dataset ORDER BY /* This is a comment */"

conn
|> put_req_header("accept", "text/csv")
Expand All @@ -231,7 +234,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
end

test "queries cannot contain single-line comments", %{conn: conn} do
query_string = "SELECT * FROM coda__test_dataset ORDER BY -- This is a comment"
query_string = "SELECT id, name FROM coda__test_dataset ORDER BY -- This is a comment"

conn
|> put_req_header("accept", "text/csv")
Expand Down Expand Up @@ -260,14 +263,14 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
allow(Model.get(any()), return: model)
allow(Model.get_all(), return: [model])

allow(Prestige.stream!(:connection, "SELECT * FROM geojson"),
allow(Prestige.stream!(:connection, "SELECT id, name FROM geojson"),
return: [:any]
)

allow(Redix.command!(any(), any()), return: :doesnt_matter)

allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["feature"]})
allow(PrestoService.build_query(any(), any()), return: {:ok, "SELECT * FROM geojson"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "SELECT id, name FROM geojson"})
allow(PrestoService.is_select_statement?(any()), return: true)
allow(PrestoService.get_affected_tables(any(), any()), return: {:ok, ["geojson__geojson"]})
allow(ModelAccessUtils.has_access?(any(), any()), return: true)
Expand Down Expand Up @@ -307,7 +310,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
"type" => @feature_type
}

assert_called(Prestige.stream!(:connection, "SELECT * FROM geojson"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM geojson"), once())

where(
url: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule DiscoveryApiWeb.DataController.RestrictedTest do
allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["id", "name"]})
allow(PrestoService.preview_columns(any(), @system_name), return: ["id", "name"])
allow(PrestoService.preview(any(), @system_name), return: [[1, "Joe"], [2, "Robby"]])
allow(PrestoService.build_query(any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.is_select_statement?("select * from #{@system_name}"), return: true)
allow(PrestoService.get_affected_tables(any(), "select * from #{@system_name}"), return: {:ok, ["#{@system_name}"]})

Expand Down
2 changes: 1 addition & 1 deletion apps/raptor_service/lib/raptor_service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ defmodule RaptorService do
end

def check_auth0_role(raptor_url, api_key, role) do
case HTTPoison.get(url_for_checking_role(raptor_url, api_key, role) |> IO.inspect(label: "RYAN - URL")) do
case HTTPoison.get(url_for_checking_role(raptor_url, api_key, role)) do
{:ok, %{body: body, status_code: status_code}} when status_code in 200..399 ->
{:ok, Jason.decode!(body)["has_role"]}

Expand Down

0 comments on commit f0b2500

Please sign in to comment.