Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1119 filter metadata from api #1642

Merged
merged 5 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions apps/discovery_api/lib/discovery_api/services/presto_service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ defmodule DiscoveryApi.Services.PrestoService do
|> Prestige.query!("show columns from #{dataset_system_name}")
|> Map.get(:rows)
|> Enum.map(fn [column_name | _tail] -> column_name end)
|> remove_metadata_columns()
end

def is_select_statement?(statement) do
Expand Down Expand Up @@ -131,10 +132,11 @@ defmodule DiscoveryApi.Services.PrestoService do
[] -> {:error, "Table #{system_name} not found"}
names -> {:ok, names}
end
|> remove_metadata_columns()
end

def build_query(params, system_name) do
column_string = Map.get(params, "columns", "*")
def build_query(params, system_name, columns) do
column_string = Map.get(params, "columns", Enum.join(columns, ", "))

["SELECT"]
|> build_columns(column_string)
Expand Down Expand Up @@ -179,4 +181,18 @@ defmodule DiscoveryApi.Services.PrestoService do
|> String.split(",", trim: true)
|> Enum.map(&String.trim/1)
end

defp remove_metadata_columns({:error, _reason} = error) do
error
end

defp remove_metadata_columns({:ok, columns}) do
{:ok, remove_metadata_columns(columns)}
end

defp remove_metadata_columns(columns) do
metadata_columns = ["_extraction_start_time", "_ingestion_id", "os_partition"]

columns |> Enum.reject(fn column -> column in metadata_columns end)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ defmodule DiscoveryApiWeb.DataController do
api_key = Plug.Conn.get_req_header(conn, "api_key")

with {:ok, columns} <- PrestoService.get_column_names(session, dataset_name, Map.get(params, "columns")),
{:ok, query} <- PrestoService.build_query(params, dataset_name),
{:ok, query} <- PrestoService.build_query(params, dataset_name, columns),
{:ok, affected_models} <- QueryAccessUtils.get_affected_models(query),
true <- QueryAccessUtils.user_is_authorized?(affected_models, current_user, api_key) do
data_stream =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ defmodule DiscoveryApi.Services.PrestoServiceTest do
assert list_of_columns == result
end

test "preview_columns should filter out any metadata columns" do
dataset = "things_in_the_fire"

list_of_columns = ["col_a"]

unprocessed_columns = %Prestige.Result{
columns: :doesnt_matter,
presto_headers: :doesnt_matter,
rows: [["col_a", "varchar", "", ""], ["_extraction_start_time", "varchar", "", ""], ["_ingestion_id", "varchar", "", ""], ["os_partition", "varchar", "", ""]]
}

allow(Prestige.query!(:connection, "show columns from #{dataset}"), return: unprocessed_columns)

result = PrestoService.preview_columns(:connection, dataset)
assert list_of_columns == result
end

describe "get_affected_tables/1" do
setup do
public_one_model =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ defmodule DiscoveryApiWeb.DataController.ContentTest do
allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["feature"]})
allow(PrestoService.preview_columns(any(), @system_name), return: ["feature"])
allow(PrestoService.preview(any(), @system_name), return: @geo_json_features)
allow(PrestoService.build_query(any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "select * from #{@system_name}"})

allow(Prestige.new_session(any()), return: :connect)
allow(Prestige.query!(any(), "select * from #{@system_name}"), return: :result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ defmodule DiscoveryApiWeb.DataController.PreviewTest do
assert expected == actual
end

test "preview controller does not return any metadata columns", %{conn: conn, model: model} do
list_of_maps = [
%{"id" => Faker.UUID.v4(), "_ingestion_id" => "will", "_extraction_id" => "be", "os_partition" => "removed", "json_encoded" => "{\"json_encoded\": \"tony\"}", "other" => "foo"},
%{"id" => Faker.UUID.v4(), "_ingestion_id" => "will", "_extraction_id" => "be", "os_partition" => "removed", "json_encoded" => "{\"json_encoded\": \"andy\"}"},
%{"id" => Faker.UUID.v4(), "_ingestion_id" => "will", "_extraction_id" => "be", "os_partition" => "removed", "json_encoded" => "{\"json_encoded\": \"smith\"}"}
]

schema = model.schema
encoded_maps = Enum.map(list_of_maps, &JsonFieldDecoder.decode_one_datum(schema, &1))

list_of_columns = ["id", "json_encoded", "other"]

expected = %{"data" => encoded_maps, "meta" => %{"columns" => list_of_columns}}

expect(PrestoService.preview(any(), @system_name), return: list_of_maps)
expect(PrestoService.preview_columns(any(), @system_name), return: list_of_columns)

actual = conn |> put_req_header("accept", "application/json") |> get("/api/v1/dataset/#{@dataset_id}/preview") |> json_response(200)

assert expected == actual
end

test "preview controller returns an empty list for an existing dataset with no data", %{conn: conn} do
list_of_columns = ["id", "json_encoded"]
expected = %{"data" => [], "meta" => %{"columns" => list_of_columns}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
conn |> put_req_header("accept", "text/csv") |> get(url) |> response(200)

assert_called(Prestige.query!(:connection, "describe #{@system_name}"), once())
assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name}"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name}"), once())

where(
url: [
Expand All @@ -75,7 +75,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the where clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, where: "name='Robby'") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} WHERE name='Robby'"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} WHERE name='Robby'"), once())

where(
url: [
Expand All @@ -88,7 +88,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the order by clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, orderBy: "id") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} ORDER BY id"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} ORDER BY id"), once())

where(
url: [
Expand All @@ -101,7 +101,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the limit clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, limit: "200") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} LIMIT 200"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} LIMIT 200"), once())

where(
url: [
Expand All @@ -114,7 +114,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
data_test "selects using the group by clause provided", %{conn: conn} do
conn |> put_req_header("accept", "text/csv") |> get(url, groupBy: "one") |> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} GROUP BY one"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} GROUP BY one"), once())

where(
url: [
Expand All @@ -130,7 +130,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
|> get(url, where: "id=1", orderBy: "name", limit: "200", groupBy: "name")
|> response(200)

assert_called(Prestige.stream!(:connection, "SELECT * FROM #{@system_name} WHERE id=1 GROUP BY name ORDER BY name LIMIT 200"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM #{@system_name} WHERE id=1 GROUP BY name ORDER BY name LIMIT 200"), once())

where(
url: [
Expand Down Expand Up @@ -198,29 +198,29 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
test "json queries cannot contain semicolons", %{conn: conn} do
conn
|> put_req_header("accept", "application/json")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select * from system; two")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select id, name from system; two")
|> response(400)

assert_called(
Prestige.stream!(:connection, "SELECT id, one; select * from system; two FROM coda__test_dataset"),
Prestige.stream!(:connection, "SELECT id, one; select id, name from system; two FROM coda__test_dataset"),
times(0)
)
end

test "csv queries cannot contain semicolons", %{conn: conn} do
conn
|> put_req_header("accept", "text/csv")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select * from system; two")
|> get("/api/v1/organization/org1/dataset/data1/query", columns: "id,one; select id, name from system; two")
|> response(400)

assert_called(
Prestige.stream!(:connection, "SELECT id, one; select * from system; two FROM coda__test_dataset"),
Prestige.stream!(:connection, "SELECT id, one; select id, name from system; two FROM coda__test_dataset"),
times(0)
)
end

test "queries cannot contain block comments", %{conn: conn} do
query_string = "SELECT * FROM coda__test_dataset ORDER BY /* This is a comment */"
query_string = "SELECT id, name FROM coda__test_dataset ORDER BY /* This is a comment */"

conn
|> put_req_header("accept", "text/csv")
Expand All @@ -231,7 +231,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
end

test "queries cannot contain single-line comments", %{conn: conn} do
query_string = "SELECT * FROM coda__test_dataset ORDER BY -- This is a comment"
query_string = "SELECT id, name FROM coda__test_dataset ORDER BY -- This is a comment"

conn
|> put_req_header("accept", "text/csv")
Expand Down Expand Up @@ -260,14 +260,14 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
allow(Model.get(any()), return: model)
allow(Model.get_all(), return: [model])

allow(Prestige.stream!(:connection, "SELECT * FROM geojson"),
allow(Prestige.stream!(:connection, "SELECT id, name FROM geojson"),
return: [:any]
)

allow(Redix.command!(any(), any()), return: :doesnt_matter)

allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["feature"]})
allow(PrestoService.build_query(any(), any()), return: {:ok, "SELECT * FROM geojson"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "SELECT id, name FROM geojson"})
allow(PrestoService.is_select_statement?(any()), return: true)
allow(PrestoService.get_affected_tables(any(), any()), return: {:ok, ["geojson__geojson"]})
allow(ModelAccessUtils.has_access?(any(), any()), return: true)
Expand Down Expand Up @@ -307,7 +307,7 @@ defmodule DiscoveryApiWeb.DataController.QueryTest do
"type" => @feature_type
}

assert_called(Prestige.stream!(:connection, "SELECT * FROM geojson"), once())
assert_called(Prestige.stream!(:connection, "SELECT id, name FROM geojson"), once())

where(
url: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule DiscoveryApiWeb.DataController.RestrictedTest do
allow(PrestoService.get_column_names(any(), any(), any()), return: {:ok, ["id", "name"]})
allow(PrestoService.preview_columns(any(), @system_name), return: ["id", "name"])
allow(PrestoService.preview(any(), @system_name), return: [[1, "Joe"], [2, "Robby"]])
allow(PrestoService.build_query(any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.build_query(any(), any(), any()), return: {:ok, "select * from #{@system_name}"})
allow(PrestoService.is_select_statement?("select * from #{@system_name}"), return: true)
allow(PrestoService.get_affected_tables(any(), "select * from #{@system_name}"), return: {:ok, ["#{@system_name}"]})

Expand Down
2 changes: 1 addition & 1 deletion apps/raptor_service/lib/raptor_service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ defmodule RaptorService do
end

def check_auth0_role(raptor_url, api_key, role) do
case HTTPoison.get(url_for_checking_role(raptor_url, api_key, role) |> IO.inspect(label: "RYAN - URL")) do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

real quick IO inspect

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uhhh, that's not mine? I've never seen that IO inspect in my life!

case HTTPoison.get(url_for_checking_role(raptor_url, api_key, role)) do
{:ok, %{body: body, status_code: status_code}} when status_code in 200..399 ->
{:ok, Jason.decode!(body)["has_role"]}

Expand Down