From 228ae2b593dd33d46bcf4dbe50955a01f0f21d2d Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Mon, 30 Jan 2023 16:54:14 -0600
Subject: [PATCH 01/17] wip: vlmd aggmds support

---
 .gitignore                                    |  1 +
 src/mds/agg_mds/adapters.py                   |  4 ++++
 src/mds/agg_mds/commons.py                    |  2 ++
 .../agg_mds/datastore/elasticsearch_dao.py    | 24 +++++++++++++------
 src/mds/config.py                             |  3 +++
 src/mds/populate.py                           |  8 ++++++-
 tests/test_agg_mds_commons.py                 | 17 +++++++++++--
 tests/test_agg_mds_elasticsearch_dao.py       | 18 ++++++++++----
 tests/test_populate.py                        |  2 +-
 9 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2645ee69..e587c450 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,4 @@ postgres-data/
 
 # VSCode
 .vscode/
+.dccache
diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py
index 8a1c18ae..351f9de0 100644
--- a/src/mds/agg_mds/adapters.py
+++ b/src/mds/agg_mds/adapters.py
@@ -1001,6 +1001,7 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
         mappings = kwargs.get("mappings", None)
         config = kwargs.get("config", {})
         study_field = config.get("study_field", "gen3_discovery")
+        data_dict_field = config.get("data_dict_field", None)
         keepOriginalFields = kwargs.get("keepOriginalFields", True)
         globalFieldFilters = kwargs.get("globalFieldFilters", [])
         schema = kwargs.get("schema", {})
@@ -1021,6 +1022,9 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
                 "_guid_type": "discovery_metadata",
                 "gen3_discovery": item,
             }
+            # for VLMD, bring it into AggMDS records
+            if data_dict_field is not None and data_dict_field in record:
+                results[guid][data_dict_field] = record[data_dict_field]
 
         perItemValues = kwargs.get("perItemValues", None)
         if perItemValues is not None:
diff --git a/src/mds/agg_mds/commons.py b/src/mds/agg_mds/commons.py
index 69677a1e..6dddbb00 100644
--- a/src/mds/agg_mds/commons.py
+++ b/src/mds/agg_mds/commons.py
@@ -197,6 +197,7 @@ class MDSInstance:
     ] = None
     study_data_field: str = "gen3_discovery"
     guid_type: str = "discovery_metadata"
+    data_dict_field: Optional[str] = None
     select_field: Optional[Dict[str, str]] = None
 
     def __post_init__(self):
@@ -219,6 +220,7 @@ class AdapterMDSInstance:
     field_mappings: Optional[Dict[str, Any]] = None
     per_item_values: Optional[Dict[str, Any]] = None
     study_data_field: str = "gen3_discovery"
+    data_dict_field: Optional[str] = None
     keep_original_fields: bool = True
     global_field_filters: List[str] = field(default_factory=list)
     commons_name: Optional[str] = None
diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index cc2a8319..7d145810 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -2,7 +2,12 @@
 from typing import Any, List, Dict, Union, Optional, Tuple
 from math import ceil
 from mds import logger
-from mds.config import AGG_MDS_NAMESPACE, ES_RETRY_LIMIT, ES_RETRY_INTERVAL
+from mds.config import (
+    AGG_MDS_NAMESPACE,
+    ES_RETRY_LIMIT,
+    ES_RETRY_INTERVAL,
+    AGG_MDS_DEFAULT_STUDY_DATA_FIELD,
+)
 
 AGG_MDS_INDEX = f"{AGG_MDS_NAMESPACE}-commons-index"
 AGG_MDS_TYPE = "commons"
@@ -189,7 +194,7 @@ async def update_metadata(
     guid_arr: List[str],
     tags: Dict[str, List[str]],
     info: Dict[str, str],
-    study_data_field: str,
+    data_dict_field: str = None,
     use_temp_index: bool = False,
 ):
     index_to_update = AGG_MDS_INFO_INDEX_TEMP if use_temp_index else AGG_MDS_INFO_INDEX
@@ -201,10 +206,15 @@ async def update_metadata(
     )
 
     index_to_update = AGG_MDS_INDEX_TEMP if use_temp_index else AGG_MDS_INDEX
-    for doc in data:
-        key = list(doc.keys())[0]
+    for d in data:
+        key = list(d.keys())[0]
         # Flatten out this structure
-        doc = doc[key][study_data_field]
+        doc = {
+            AGG_MDS_DEFAULT_STUDY_DATA_FIELD: d[key][AGG_MDS_DEFAULT_STUDY_DATA_FIELD]
+        }
+        if data_dict_field in d[key]:
+            doc[data_dict_field] = d[key][data_dict_field]
+        print(doc)
 
         try:
             elastic_search_client.index(
@@ -295,11 +305,11 @@ async def get_all_metadata(limit, offset, counts: Optional[str] = None, flatten=
     counts: converts the count of the entry[count] if it is a dict or array
     returns:
 
-    flattend == true
+    flattened == true
     results : MDS results as a dict
               paging info
 
-    flattend == false
+    flattened == false
     results : {
         commonsA: metadata
         commonsB: metadata
diff --git a/src/mds/config.py b/src/mds/config.py
index 83eebdbe..367de8aa 100644
--- a/src/mds/config.py
+++ b/src/mds/config.py
@@ -19,6 +19,9 @@ def __init__(self, value):
 URL_PREFIX = config("URL_PREFIX", default="/" if DEBUG else "/mds")
 USE_AGG_MDS = config("USE_AGG_MDS", cast=bool, default=False)
 AGG_MDS_NAMESPACE = config("AGG_MDS_NAMESPACE", default="default_namespace")
+AGG_MDS_DEFAULT_STUDY_DATA_FIELD = config(
+    "AGG_MDS_DEFAULT_STUDY_DATA_FIELD", cast=str, default="gen3_discovery"
+)
 ES_ENDPOINT = config("GEN3_ES_ENDPOINT", default="http://localhost:9200")
 
 # Database
diff --git a/src/mds/populate.py b/src/mds/populate.py
index ea8a367c..dc4fb8b1 100644
--- a/src/mds/populate.py
+++ b/src/mds/populate.py
@@ -44,6 +44,12 @@ async def populate_metadata(name: str, common, results, use_temp_index=False):
         entry = next(iter(x.values()))
 
         def normalize(entry: dict) -> Any:
+            # normalize study level metadata field names
+            if common.study_data_field != config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD:
+                entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD] = entry.pop(
+                    common.study_data_field
+                )
+
             if (
                 not hasattr(common, "columns_to_fields")
                 or common.columns_to_fields is None
@@ -91,7 +97,7 @@ def normalize(entry: dict) -> Any:
     info = {"commons_url": common.commons_url}
 
     await datastore.update_metadata(
-        name, mds_arr, keys, tags, info, common.study_data_field, use_temp_index
+        name, mds_arr, keys, tags, info, common.data_dict_field, use_temp_index
     )
 
 
diff --git a/tests/test_agg_mds_commons.py b/tests/test_agg_mds_commons.py
index 807d2d7d..6aceb7fc 100644
--- a/tests/test_agg_mds_commons.py
+++ b/tests/test_agg_mds_commons.py
@@ -227,7 +227,6 @@ def test_parse_config():
                     }
                 }
             },
-
             "gen3_commons": {
                 "my_gen3_commons": {
                     "mds_url": "http://mds",
@@ -247,6 +246,13 @@ def test_parse_config():
                     "mds_url": "http://non-gen3",
                     "commons_url": "non-gen3",
                     "adapter": "icpsr"
+                },
+                "another_gen3_commons": {
+                    "mds_url": "http://another-gen3",
+                    "commons_url": "another-gen3",
+                    "adapter": "gen3",
+                    "study_data_field" : "my_metadata",
+                    "data_dict_field" : "my_data_dict"
                 }
             }
         }
@@ -295,7 +301,14 @@ def test_parse_config():
                 "http://non-gen3",
                 "non-gen3",
                 "icpsr",
-            )
+            ),
+            "another_gen3_commons": AdapterMDSInstance(
+                "http://another-gen3",
+                "another-gen3",
+                "gen3",
+                study_data_field="my_metadata",
+                data_dict_field="my_data_dict",
+            ),
         },
     )
 
diff --git a/tests/test_agg_mds_elasticsearch_dao.py b/tests/test_agg_mds_elasticsearch_dao.py
index e5637f21..e26638bb 100644
--- a/tests/test_agg_mds_elasticsearch_dao.py
+++ b/tests/test_agg_mds_elasticsearch_dao.py
@@ -221,7 +221,6 @@ async def test_update_metadata():
             [],
             {},
             {},
-            "gen3_discovery",
         )
     mock_index.assert_has_calls(
         [
@@ -232,7 +231,13 @@ async def test_update_metadata():
                 index=AGG_MDS_INFO_INDEX,
             ),
             call(
-                body={"some_field": "some_value", "__manifest": {}, "sites": ""},
+                body={
+                    "gen3_discovery": {
+                        "some_field": "some_value",
+                        "__manifest": {},
+                        "sites": "",
+                    }
+                },
                 doc_type="commons",
                 id="my_id",
                 index=AGG_MDS_INDEX,
@@ -264,7 +269,6 @@ async def test_update_metadata_to_temp_index():
             [],
             {},
             {},
-            "gen3_discovery",
             use_temp_index=True,
         )
     mock_index.assert_has_calls(
@@ -276,7 +280,13 @@ async def test_update_metadata_to_temp_index():
                 index=AGG_MDS_INFO_INDEX_TEMP,
             ),
             call(
-                body={"some_field": "some_value", "__manifest": {}, "sites": ""},
+                body={
+                    "gen3_discovery": {
+                        "some_field": "some_value",
+                        "__manifest": {},
+                        "sites": "",
+                    }
+                },
                 doc_type="commons",
                 id="my_id",
                 index=AGG_MDS_INDEX_TEMP,
diff --git a/tests/test_populate.py b/tests/test_populate.py
index cf666570..7c50b0ea 100644
--- a/tests/test_populate.py
+++ b/tests/test_populate.py
@@ -81,7 +81,7 @@ async def test_populate_metadata():
             ["id1"],
             {"my_category": ["my_name"]},
             {"commons_url": "http://commons"},
-            "gen3_discovery",
+            None,
             False,
         )
 

From 61d141b31449a096d051ad82f5e85c2f1f92f8a4 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Mon, 30 Jan 2023 21:05:53 -0600
Subject: [PATCH 02/17] wip: add config field

---
 .../agg_mds/datastore/elasticsearch_dao.py    |  9 +++---
 src/mds/config.py                             |  3 ++
 src/mds/populate.py                           | 28 +++++++++++--------
 tests/test_populate.py                        |  1 -
 4 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index 7d145810..d912665a 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -7,6 +7,7 @@
     ES_RETRY_LIMIT,
     ES_RETRY_INTERVAL,
     AGG_MDS_DEFAULT_STUDY_DATA_FIELD,
+    AGG_MDS_DEFAULT_DATA_DICT_FIELD,
 )
 
 AGG_MDS_INDEX = f"{AGG_MDS_NAMESPACE}-commons-index"
@@ -194,7 +195,6 @@ async def update_metadata(
     guid_arr: List[str],
     tags: Dict[str, List[str]],
     info: Dict[str, str],
-    data_dict_field: str = None,
     use_temp_index: bool = False,
 ):
     index_to_update = AGG_MDS_INFO_INDEX_TEMP if use_temp_index else AGG_MDS_INFO_INDEX
@@ -212,9 +212,10 @@ async def update_metadata(
         doc = {
             AGG_MDS_DEFAULT_STUDY_DATA_FIELD: d[key][AGG_MDS_DEFAULT_STUDY_DATA_FIELD]
         }
-        if data_dict_field in d[key]:
-            doc[data_dict_field] = d[key][data_dict_field]
-        print(doc)
+        if AGG_MDS_DEFAULT_DATA_DICT_FIELD in d[key]:
+            doc[AGG_MDS_DEFAULT_DATA_DICT_FIELD] = d[key][
+                AGG_MDS_DEFAULT_DATA_DICT_FIELD
+            ]
 
         try:
             elastic_search_client.index(
diff --git a/src/mds/config.py b/src/mds/config.py
index 367de8aa..977a034e 100644
--- a/src/mds/config.py
+++ b/src/mds/config.py
@@ -22,6 +22,9 @@ def __init__(self, value):
 AGG_MDS_DEFAULT_STUDY_DATA_FIELD = config(
     "AGG_MDS_DEFAULT_STUDY_DATA_FIELD", cast=str, default="gen3_discovery"
 )
+AGG_MDS_DEFAULT_DATA_DICT_FIELD = config(
+    "AGG_MDS_DEFAULT_DATA_DICT_FIELD", cast=str, default="data_dictionaries"
+)
 ES_ENDPOINT = config("GEN3_ES_ENDPOINT", default="http://localhost:9200")
 
 # Database
diff --git a/src/mds/populate.py b/src/mds/populate.py
index dc4fb8b1..f2d279b7 100644
--- a/src/mds/populate.py
+++ b/src/mds/populate.py
@@ -49,6 +49,14 @@ def normalize(entry: dict) -> Any:
                 entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD] = entry.pop(
                     common.study_data_field
                 )
+            # normalize variable level metadata field names, if available
+            if (
+                common.data_dict_field is not None
+                and common.data_dict_field != config.AGG_MDS_DEFAULT_DATA_DICT_FIELD
+            ):
+                entry[config.AGG_MDS_DEFAULT_DATA_DICT_FIELD] = entry.pop(
+                    common.data_dict_field
+                )
 
             if (
                 not hasattr(common, "columns_to_fields")
@@ -60,13 +68,13 @@ def normalize(entry: dict) -> Any:
                 if field == column:
                     continue
                 if isinstance(field, ColumnsToFields):
-                    entry[common.study_data_field][column] = field.get_value(
-                        entry[common.study_data_field]
-                    )
+                    entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD][
+                        column
+                    ] = field.get_value(entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD])
                 else:
-                    if field in entry[common.study_data_field]:
-                        entry[common.study_data_field][column] = entry[
-                            common.study_data_field
+                    if field in entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD]:
+                        entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD][column] = entry[
+                            config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD
                         ][field]
             return entry
 
@@ -74,14 +82,14 @@ def normalize(entry: dict) -> Any:
 
         # add the common field, selecting the name or an override (i.e. commons_name) and url to the entry
 
-        entry[common.study_data_field]["commons_name"] = (
+        entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD]["commons_name"] = (
             common.commons_name
             if hasattr(common, "commons_name") and common.commons_name is not None
             else name
         )
 
         # add to tags
-        for t in entry[common.study_data_field].get("tags") or {}:
+        for t in entry[config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD].get("tags") or {}:
             if "category" not in t:
                 continue
             if t["category"] not in tags:
@@ -96,9 +104,7 @@ def normalize(entry: dict) -> Any:
     keys = list(results.keys())
     info = {"commons_url": common.commons_url}
 
-    await datastore.update_metadata(
-        name, mds_arr, keys, tags, info, common.data_dict_field, use_temp_index
-    )
+    await datastore.update_metadata(name, mds_arr, keys, tags, info, use_temp_index)
 
 
 async def populate_info(commons_config: Commons, use_temp_index=False) -> None:
diff --git a/tests/test_populate.py b/tests/test_populate.py
index 7c50b0ea..11e8c3f9 100644
--- a/tests/test_populate.py
+++ b/tests/test_populate.py
@@ -81,7 +81,6 @@ async def test_populate_metadata():
             ["id1"],
             {"my_category": ["my_name"]},
             {"commons_url": "http://commons"},
-            None,
             False,
         )
 

From dd7490d1c9fbca47e398213db8e4564ef9d6d096 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Mon, 30 Jan 2023 22:44:39 -0600
Subject: [PATCH 03/17] wip: query

---
 .../agg_mds/datastore/elasticsearch_dao.py    |  21 +--
 tests/test_agg_mds_query.py                   | 134 +++++++++---------
 2 files changed, 82 insertions(+), 73 deletions(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index d912665a..c55c69ff 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -293,9 +293,12 @@ def process_record(record: dict, counts: Optional[List[str]]) -> Tuple[str, dict
     """
     _id = record["_id"]
     normalized = record["_source"]
-    for c in counts:
-        if c in normalized:
-            normalized[c] = count(normalized[c])
+    if AGG_MDS_DEFAULT_STUDY_DATA_FIELD in normalized:
+        for c in counts:
+            if c in normalized[AGG_MDS_DEFAULT_STUDY_DATA_FIELD]:
+                normalized[AGG_MDS_DEFAULT_STUDY_DATA_FIELD][c] = count(
+                    normalized[AGG_MDS_DEFAULT_STUDY_DATA_FIELD][c]
+                )
     return _id, normalized
 
 
@@ -356,7 +359,7 @@ async def get_all_metadata(limit, offset, counts: Optional[str] = None, flatten=
             flat = []
             for record in res["hits"]["hits"]:
                 rid, normalized = process_record(record, toReduce)
-                flat.append({rid: {"gen3_discovery": normalized}})
+                flat.append({rid: normalized})
             return {
                 "results": flat,
                 "pagination": {
@@ -378,12 +381,12 @@ async def get_all_metadata(limit, offset, counts: Optional[str] = None, flatten=
             }
             for record in res["hits"]["hits"]:
                 rid, normalized = process_record(record, toReduce)
-                commons_name = normalized["commons_name"]
+                commons_name = normalized[AGG_MDS_DEFAULT_STUDY_DATA_FIELD][
+                    "commons_name"
+                ]
                 if commons_name not in byCommons["results"]:
                     byCommons["results"][commons_name] = []
-                byCommons["results"][commons_name].append(
-                    {rid: {"gen3_discovery": normalized}}
-                )
+                byCommons["results"][commons_name].append({rid: normalized})
 
             return byCommons
     except Exception as error:
@@ -455,7 +458,7 @@ async def get_commons_attribute(name):
                 }
             },
         )
-        return data["hits"]["hits"][0]["_source"]
+        return data["hits"]["hits"][0][AGG_MDS_DEFAULT_STUDY_DATA_FIELD]["_source"]
     except Exception as error:
         logger.error(error)
         return None
diff --git a/tests/test_agg_mds_query.py b/tests/test_agg_mds_query.py
index 1e34d75d..240cf173 100644
--- a/tests/test_agg_mds_query.py
+++ b/tests/test_agg_mds_query.py
@@ -107,28 +107,30 @@ async def test_aggregate_metadata_paged_flat(client):
                     "_id": "815616c0-dfsdfjjj",
                     "_score": 1.0,
                     "_source": {
-                        "link": "",
-                        "tags": [
-                            {"name": "restricted", "category": "Access"},
-                            {"name": "genomic", "category": "category"},
-                        ],
-                        "commons": "LI",
-                        "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
-                        "dataset_code": "LI",
-                        "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
-                        "dataset_title": "Lorem ipsum dolor sit amet",
-                        "samples_count": "",
-                        "subjects_count": "",
-                        "data_files_count": 11062,
-                        "_subjects_count": "",
-                        "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
-                        "short_name": "Lorem ipsum dolor sit amet",
-                        "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
-                        "commons_name": "Lorem ipsum",
-                        "__manifest": [
-                            {"filename": "foo2.txt"},
-                            {"filename": "foo3.txt"},
-                        ],
+                        "gen3_discovery": {
+                            "link": "",
+                            "tags": [
+                                {"name": "restricted", "category": "Access"},
+                                {"name": "genomic", "category": "category"},
+                            ],
+                            "commons": "LI",
+                            "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
+                            "dataset_code": "LI",
+                            "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+                            "dataset_title": "Lorem ipsum dolor sit amet",
+                            "samples_count": "",
+                            "subjects_count": "",
+                            "data_files_count": 11062,
+                            "_subjects_count": "",
+                            "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
+                            "short_name": "Lorem ipsum dolor sit amet",
+                            "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
+                            "commons_name": "Lorem ipsum",
+                            "__manifest": [
+                                {"filename": "foo2.txt"},
+                                {"filename": "foo3.txt"},
+                            ],
+                        },
                     },
                 }
             ],
@@ -194,28 +196,30 @@ async def test_aggregate_metadata_counts(client):
                     "_id": "815616c0-dfsdfjjj",
                     "_score": 1.0,
                     "_source": {
-                        "link": "",
-                        "tags": [
-                            {"name": "restricted", "category": "Access"},
-                            {"name": "genomic", "category": "category"},
-                        ],
-                        "commons": "LI",
-                        "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
-                        "dataset_code": "LI",
-                        "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
-                        "dataset_title": "Lorem ipsum dolor sit amet",
-                        "samples_count": "",
-                        "subjects_count": "",
-                        "data_files_count": 11062,
-                        "_subjects_count": "",
-                        "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
-                        "short_name": "Lorem ipsum dolor sit amet",
-                        "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
-                        "commons_name": "Lorem ipsum",
-                        "__manifest": [
-                            {"filename": "foo2.txt"},
-                            {"filename": "foo3.txt"},
-                        ],
+                        "gen3_discovery": {
+                            "link": "",
+                            "tags": [
+                                {"name": "restricted", "category": "Access"},
+                                {"name": "genomic", "category": "category"},
+                            ],
+                            "commons": "LI",
+                            "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
+                            "dataset_code": "LI",
+                            "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+                            "dataset_title": "Lorem ipsum dolor sit amet",
+                            "samples_count": "",
+                            "subjects_count": "",
+                            "data_files_count": 11062,
+                            "_subjects_count": "",
+                            "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
+                            "short_name": "Lorem ipsum dolor sit amet",
+                            "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
+                            "commons_name": "Lorem ipsum",
+                            "__manifest": [
+                                {"filename": "foo2.txt"},
+                                {"filename": "foo3.txt"},
+                            ],
+                        },
                     },
                 }
             ],
@@ -261,7 +265,7 @@ async def test_aggregate_metadata_counts(client):
         assert resp.json() == results
 
     # test multiple counts field
-    mock_data["hits"]["hits"][0]["_source"]["__manifest"] = [
+    mock_data["hits"]["hits"][0]["_source"]["gen3_discovery"]["__manifest"] = [
         {"filename": "foo2.txt"},
         {"filename": "foo3.txt"},
     ]
@@ -291,25 +295,27 @@ async def test_aggregate_metadata_counts_null(client):
                     "_id": "815616c0-dfsdfjjj",
                     "_score": 1.0,
                     "_source": {
-                        "link": "",
-                        "tags": [
-                            {"name": "restricted", "category": "Access"},
-                            {"name": "genomic", "category": "category"},
-                        ],
-                        "commons": "LI",
-                        "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
-                        "dataset_code": "LI",
-                        "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
-                        "dataset_title": "Lorem ipsum dolor sit amet",
-                        "samples_count": "",
-                        "subjects_count": "",
-                        "data_files_count": 11062,
-                        "_subjects_count": "",
-                        "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
-                        "short_name": "Lorem ipsum dolor sit amet",
-                        "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
-                        "commons_name": "Lorem ipsum",
-                        "__manifest": None,
+                        "gen3_discovery": {
+                            "link": "",
+                            "tags": [
+                                {"name": "restricted", "category": "Access"},
+                                {"name": "genomic", "category": "category"},
+                            ],
+                            "commons": "LI",
+                            "_unique_id": "815616c0-c4a4-4883-9107-a05694499a36",
+                            "dataset_code": "LI",
+                            "brief_summary": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+                            "dataset_title": "Lorem ipsum dolor sit amet",
+                            "samples_count": "",
+                            "subjects_count": "",
+                            "data_files_count": 11062,
+                            "_subjects_count": "",
+                            "study_description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ultricies tristique nulla aliquet enim tortor at auctor.",
+                            "short_name": "Lorem ipsum dolor sit amet",
+                            "full_name": "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
+                            "commons_name": "Lorem ipsum",
+                            "__manifest": None,
+                        },
                     },
                 }
             ],

From 546757f061cda63776304f2c00f3917c4e0c7397 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Mon, 30 Jan 2023 22:52:07 -0600
Subject: [PATCH 04/17] wip: fix test

---
 tests/test_agg_mds_elasticsearch_dao.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_agg_mds_elasticsearch_dao.py b/tests/test_agg_mds_elasticsearch_dao.py
index e26638bb..4ad90a0a 100644
--- a/tests/test_agg_mds_elasticsearch_dao.py
+++ b/tests/test_agg_mds_elasticsearch_dao.py
@@ -410,11 +410,11 @@ def test_count_value_none():
 
 def test_process_records():
     _id = "123"
-    _source = {"count": [1, 2, 3, 4], "name": "my_name"}
+    _source = {"gen3_discovery": {"count": [1, 2, 3, 4], "name": "my_name"}}
     record = {"_id": _id, "_source": _source}
     rid, normalized = process_record(record, ["count"])
     assert rid == _id
-    assert normalized == {"count": 4, "name": "my_name"}
+    assert normalized == {"gen3_discovery": {"count": 4, "name": "my_name"}}
 
     # test if passed dict field is not array
     rid, normalized = process_record(record, ["name"])

From 3592b832459d976e2efcc57060b33bfb5e102af5 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 14:26:59 -0600
Subject: [PATCH 05/17] wip: es update

---
 .../agg_mds/datastore/elasticsearch_dao.py    | 22 +++++++++++++++----
 src/mds/populate.py                           |  9 ++++++--
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index c55c69ff..5266ef45 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -260,7 +260,13 @@ async def get_commons():
             index=AGG_MDS_INDEX,
             body={
                 "size": 0,
-                "aggs": {"commons_names": {"terms": {"field": "commons_name.keyword"}}},
+                "aggs": {
+                    "commons_names": {
+                        "terms": {
+                            "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword"
+                        }
+                    }
+                },
             },
         )
         return {
@@ -398,7 +404,13 @@ async def get_all_named_commons_metadata(name):
     try:
         res = elastic_search_client.search(
             index=AGG_MDS_INDEX,
-            body={"query": {"match": {"commons_name.keyword": name}}},
+            body={
+                "query": {
+                    "match": {
+                        f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword": name
+                    }
+                }
+            },
         )
         return [x["_source"] for x in res["hits"]["hits"]]
     except Exception as error:
@@ -458,7 +470,7 @@ async def get_commons_attribute(name):
                 }
             },
         )
-        return data["hits"]["hits"][0][AGG_MDS_DEFAULT_STUDY_DATA_FIELD]["_source"]
+        return data["hits"]["hits"][0]["_source"]
     except Exception as error:
         logger.error(error)
         return None
@@ -473,7 +485,9 @@ async def get_aggregations(name):
                 "query": {
                     "constant_score": {
                         "filter": {
-                            "match": {"commons_name": name},
+                            "match": {
+                                f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name": name
+                            },
                         }
                     }
                 },
diff --git a/src/mds/populate.py b/src/mds/populate.py
index f2d279b7..269f58c0 100644
--- a/src/mds/populate.py
+++ b/src/mds/populate.py
@@ -178,8 +178,13 @@ async def main(commons_config: Commons) -> None:
         "mappings": {
             "commons": {
                 "properties": {
-                    k: v.to_schema(True)
-                    for k, v in commons_config.configuration.schema.items()
+                    config.AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
+                        "type": "nested",
+                        "properties": {
+                            k: v.to_schema(True)
+                            for k, v in commons_config.configuration.schema.items()
+                        },
+                    }
                 }
             }
         }

From 1ef12b24a79f9e939a524b79cd7e720304d85b21 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 15:42:18 -0600
Subject: [PATCH 06/17] wip: es update

---
 src/mds/agg_mds/datastore/elasticsearch_dao.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index 5266ef45..a5f1e869 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -406,8 +406,13 @@ async def get_all_named_commons_metadata(name):
             index=AGG_MDS_INDEX,
             body={
                 "query": {
-                    "match": {
-                        f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword": name
+                    "nested": {
+                        "path": AGG_MDS_DEFAULT_STUDY_DATA_FIELD,
+                        "query": {
+                            "match": {
+                                f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword": "HEAL"
+                            }
+                        },
                     }
                 }
             },
@@ -426,14 +431,16 @@ async def metadata_tags():
                 "size": 0,
                 "aggs": {
                     "tags": {
-                        "nested": {"path": "tags"},
+                        "nested": {"path": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags"},
                         "aggs": {
                             "categories": {
-                                "terms": {"field": "tags.category.keyword"},
+                                "terms": {
+                                    "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags.category.keyword"
+                                },
                                 "aggs": {
                                     "name": {
                                         "terms": {
-                                            "field": "tags.name.keyword",
+                                            "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags.name.keyword"
                                         }
                                     }
                                 },

From 9eafb0c45f6e7a95f33add9c28d596548e233961 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 16:06:28 -0600
Subject: [PATCH 07/17] wip: es update

---
 src/mds/agg_mds/datastore/elasticsearch_dao.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index a5f1e869..f72055ce 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -261,10 +261,15 @@ async def get_commons():
             body={
                 "size": 0,
                 "aggs": {
-                    "commons_names": {
-                        "terms": {
-                            "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword"
-                        }
+                    AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
+                        "nested": {"path": AGG_MDS_DEFAULT_STUDY_DATA_FIELD},
+                        "aggs": {
+                            "commons_names": {
+                                "terms": {
+                                    "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword"
+                                }
+                            }
+                        },
                     }
                 },
             },

From d0c333450e1fac1bc3e3468c99d5689fe81e28ce Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 16:07:36 -0600
Subject: [PATCH 08/17] wip: es update

---
 src/mds/agg_mds/datastore/elasticsearch_dao.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mds/agg_mds/datastore/elasticsearch_dao.py b/src/mds/agg_mds/datastore/elasticsearch_dao.py
index f72055ce..6b9fb3b7 100644
--- a/src/mds/agg_mds/datastore/elasticsearch_dao.py
+++ b/src/mds/agg_mds/datastore/elasticsearch_dao.py
@@ -276,7 +276,10 @@ async def get_commons():
         )
         return {
             "commons": [
-                x["key"] for x in res["aggregations"]["commons_names"]["buckets"]
+                x["key"]
+                for x in res["aggregations"][AGG_MDS_DEFAULT_STUDY_DATA_FIELD][
+                    "commons_names"
+                ]["buckets"]
             ]
         }
     except Exception as error:

From 626c967b00d518179c25220bf9c6734a5d1120de Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 16:35:32 -0600
Subject: [PATCH 09/17] wip: fix test

---
 tests/test_agg_mds_elasticsearch_dao.py | 61 ++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/tests/test_agg_mds_elasticsearch_dao.py b/tests/test_agg_mds_elasticsearch_dao.py
index 4ad90a0a..99f4823c 100644
--- a/tests/test_agg_mds_elasticsearch_dao.py
+++ b/tests/test_agg_mds_elasticsearch_dao.py
@@ -12,6 +12,7 @@
     AGG_MDS_INFO_INDEX_TEMP,
     AGG_MDS_CONFIG_INDEX_TEMP,
     AGG_MDS_INFO_TYPE,
+    AGG_MDS_DEFAULT_STUDY_DATA_FIELD,
     count,
     process_record,
 )
@@ -210,7 +211,7 @@ async def test_update_metadata():
             [
                 {
                     "my_id": {
-                        "gen3_discovery": {
+                        AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
                             "some_field": "some_value",
                             "__manifest": {},
                             "sites": "",
@@ -232,7 +233,7 @@ async def test_update_metadata():
             ),
             call(
                 body={
-                    "gen3_discovery": {
+                    AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
                         "some_field": "some_value",
                         "__manifest": {},
                         "sites": "",
@@ -258,7 +259,7 @@ async def test_update_metadata_to_temp_index():
             [
                 {
                     "my_id": {
-                        "gen3_discovery": {
+                        AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
                             "some_field": "some_value",
                             "__manifest": {},
                             "sites": "",
@@ -281,7 +282,7 @@ async def test_update_metadata_to_temp_index():
             ),
             call(
                 body={
-                    "gen3_discovery": {
+                    AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
                         "some_field": "some_value",
                         "__manifest": {},
                         "sites": "",
@@ -377,7 +378,18 @@ async def test_get_commons():
             index=AGG_MDS_INDEX,
             body={
                 "size": 0,
-                "aggs": {"commons_names": {"terms": {"field": "commons_name.keyword"}}},
+                "aggs": {
+                    AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {
+                        "nested": {"path": AGG_MDS_DEFAULT_STUDY_DATA_FIELD},
+                        "aggs": {
+                            "commons_names": {
+                                "terms": {
+                                    "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword"
+                                }
+                            }
+                        },
+                    }
+                },
             },
         )
 
@@ -410,11 +422,15 @@ def test_count_value_none():
 
 def test_process_records():
     _id = "123"
-    _source = {"gen3_discovery": {"count": [1, 2, 3, 4], "name": "my_name"}}
+    _source = {
+        AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {"count": [1, 2, 3, 4], "name": "my_name"}
+    }
     record = {"_id": _id, "_source": _source}
     rid, normalized = process_record(record, ["count"])
     assert rid == _id
-    assert normalized == {"gen3_discovery": {"count": 4, "name": "my_name"}}
+    assert normalized == {
+        AGG_MDS_DEFAULT_STUDY_DATA_FIELD: {"count": 4, "name": "my_name"}
+    }
 
     # test if passed dict field is not array
     rid, normalized = process_record(record, ["name"])
@@ -453,7 +469,18 @@ async def test_get_all_named_commons_metadata():
         await elasticsearch_dao.get_all_named_commons_metadata("my-commons")
         mock_client.search.assert_called_with(
             index=AGG_MDS_INDEX,
-            body={"query": {"match": {"commons_name.keyword": "my-commons"}}},
+            body={
+                "query": {
+                    "nested": {
+                        "path": AGG_MDS_DEFAULT_STUDY_DATA_FIELD,
+                        "query": {
+                            "match": {
+                                f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name.keyword": "HEAL"
+                            }
+                        },
+                    }
+                }
+            },
         )
 
     with patch(
@@ -477,12 +504,18 @@ async def test_metadata_tags():
                 "size": 0,
                 "aggs": {
                     "tags": {
-                        "nested": {"path": "tags"},
+                        "nested": {"path": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags"},
                         "aggs": {
                             "categories": {
-                                "terms": {"field": "tags.category.keyword"},
+                                "terms": {
+                                    "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags.category.keyword"
+                                },
                                 "aggs": {
-                                    "name": {"terms": {"field": "tags.name.keyword"}}
+                                    "name": {
+                                        "terms": {
+                                            "field": f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.tags.name.keyword"
+                                        }
+                                    }
                                 },
                             }
                         },
@@ -528,7 +561,11 @@ async def test_get_aggregations():
                 "size": 0,
                 "query": {
                     "constant_score": {
-                        "filter": {"match": {"commons_name": "my-commons"}}
+                        "filter": {
+                            "match": {
+                                f"{AGG_MDS_DEFAULT_STUDY_DATA_FIELD}.commons_name": "my-commons"
+                            }
+                        }
                     }
                 },
                 "aggs": {"_subjects_count": {"sum": {"field": "_subjects_count"}}},

From c7f1f3e1f40287f5d49b458ce03f98e5b895f824 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 16:47:10 -0600
Subject: [PATCH 10/17] wip: fix test

---
 tests/test_populate.py | 2 +-
 tests/test_query.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_populate.py b/tests/test_populate.py
index 11e8c3f9..b50af1eb 100644
--- a/tests/test_populate.py
+++ b/tests/test_populate.py
@@ -275,7 +275,7 @@ async def test_populate_config():
             )
         config = parse_config_from_file(Path(fp.name))
         await populate_config(config)
-        mock_datastore.update_config_info.called_with(["_subjects_count"])
+        await mock_datastore.update_config_info.called_with(["_subjects_count"])
 
 
 @pytest.mark.asyncio
diff --git a/tests/test_query.py b/tests/test_query.py
index 20ce9d16..8269a84d 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -171,7 +171,7 @@ def test_query_filter_all_values(client):
         assert list(sorted(client.get("/metadata?a.b=*").json())) == ["tq_5"]
 
         # query all records with a == "*"
-        assert list(sorted(client.get("/metadata?a=\*").json())) == ["tq_4"]
+        assert list(sorted(client.get("/metadata?a=*").json())) == ["tq_4"]
     finally:
         for i in range(1, 8):
             client.delete(f"/metadata/tq_{i}")

From 10cc1d1abd3aad003cf2bceb6dc3e532303646f0 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Tue, 31 Jan 2023 16:50:34 -0600
Subject: [PATCH 11/17] wip: fix test

---
 tests/test_query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_query.py b/tests/test_query.py
index 8269a84d..20ce9d16 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -171,7 +171,7 @@ def test_query_filter_all_values(client):
         assert list(sorted(client.get("/metadata?a.b=*").json())) == ["tq_5"]
 
         # query all records with a == "*"
-        assert list(sorted(client.get("/metadata?a=*").json())) == ["tq_4"]
+        assert list(sorted(client.get("/metadata?a=\*").json())) == ["tq_4"]
     finally:
         for i in range(1, 8):
             client.delete(f"/metadata/tq_{i}")

From db7be3a40c1fe2bb0f92ee40e8058f1b11e4d512 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Wed, 1 Feb 2023 11:20:01 -0600
Subject: [PATCH 12/17] update version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 428d0c5d..7862cead 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mds"
-version = "2.0.1"
+version = "3.0.0"
 description = "Metadata Service"
 authors = ["CTDS UChicago <cdis@uchicago.edu>"]
 license = "Apache-2.0"

From 4a651c79e6ee2a081c06bdd838bc72d7de8f4c19 Mon Sep 17 00:00:00 2001
From: mfshao <mfshao@users.noreply.github.com>
Date: Wed, 1 Feb 2023 17:20:55 +0000
Subject: [PATCH 13/17] Apply automatic documentation changes

---
 docs/openapi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/openapi.yaml b/docs/openapi.yaml
index e27f9a19..e6f32a34 100644
--- a/docs/openapi.yaml
+++ b/docs/openapi.yaml
@@ -92,7 +92,7 @@ components:
       type: http
 info:
   title: Framework Services Object Management Service
-  version: 2.0.1
+  version: 3.0.0
 openapi: 3.0.2
 paths:
   /_status:

From d851a5f0bbe7da3f35bf6adabb2367279d576a62 Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Wed, 1 Feb 2023 14:48:25 -0600
Subject: [PATCH 14/17] update

---
 src/mds/agg_mds/query.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/mds/agg_mds/query.py b/src/mds/agg_mds/query.py
index 365acc6c..ccf805b6 100644
--- a/src/mds/agg_mds/query.py
+++ b/src/mds/agg_mds/query.py
@@ -29,12 +29,7 @@ async def get_commons_info(what: str):
 
     Example:
 
-        {
-          schema: {
-                ...
-                ...
-                }
-        }
+        {"__manifest":{"type":"array","properties":{"file_name":{"type":"string","description":""},"file_size":{"type":"integer","description":""}},"description":"","default":[]},"commons_url":{"type":"string","description":""}}
 
     """
     res = await datastore.get_commons_attribute(what)

From bc32f94fc7032081d0fc606e9e54e76cee95279c Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Wed, 1 Feb 2023 14:56:33 -0600
Subject: [PATCH 15/17] update doc

---
 src/mds/agg_mds/query.py | 54 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/src/mds/agg_mds/query.py b/src/mds/agg_mds/query.py
index ccf805b6..4bac360b 100644
--- a/src/mds/agg_mds/query.py
+++ b/src/mds/agg_mds/query.py
@@ -29,7 +29,30 @@ async def get_commons_info(what: str):
 
     Example:
 
-        {"__manifest":{"type":"array","properties":{"file_name":{"type":"string","description":""},"file_size":{"type":"integer","description":""}},"description":"","default":[]},"commons_url":{"type":"string","description":""}}
+    {
+        "__manifest":{
+            "type":"array",
+            "properties":{
+                "file_name":{
+                    "type":"string",
+                    "description":""
+                },
+                "file_size":{
+                    "type":"integer",
+                    "description":""
+                }
+            },
+            "description":"",
+            "default":[
+
+            ]
+        },
+        "commons_url":{
+            "type":"string",
+            "description":""
+        },
+        ...
+    }
 
     """
     res = await datastore.get_commons_attribute(what)
@@ -131,7 +154,26 @@ async def get_aggregate_metadata_for_commons(
 
     Example:
 
-        [ { id2: { name: "bear" } } , { id3: { name: "cat" } }]
+        [
+            {
+                "gen3_discovery": {
+                    "name": "bear",
+                    "type": "study",
+                    ...
+                },
+                "data_dictionaries": {
+                    ...
+                }
+            },
+            {
+                "gen3_discovery": {
+                    "name": "cat",
+                    "type": "study",
+                    ...
+                }
+            },
+            ...
+        ]
 
     """
     res = await datastore.get_all_named_commons_metadata(name)
@@ -206,7 +248,13 @@ async def get_aggregate_metadata_guid(guid: str):
 
     Example:
 
-         { id2: { name: "bear" } }
+         {
+            "gen3_discovery": {
+                "name": "cat",
+                "type": "study",
+                ...
+            }
+        }
     """
     res = await datastore.get_by_guid(guid)
     if res:

From 316e9db05a8c18e7d8b567a961adf63e1a25cc6d Mon Sep 17 00:00:00 2001
From: mfshao <mfshao@users.noreply.github.com>
Date: Wed, 1 Feb 2023 20:57:22 +0000
Subject: [PATCH 16/17] Apply automatic documentation changes

---
 docs/openapi.yaml | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/docs/openapi.yaml b/docs/openapi.yaml
index e6f32a34..81054513 100644
--- a/docs/openapi.yaml
+++ b/docs/openapi.yaml
@@ -126,8 +126,14 @@ paths:
     get:
       description: "Returns status and configuration information about aggregate metadata\
         \ service.\n\nReturn configuration information. Currently supports only 1\
-        \ information type:\n**schema**\n\nExample:\n\n    {\n      schema: {\n  \
-        \          ...\n            ...\n            }\n    }"
+        \ information type:\n**schema**\n\nExample:\n\n{\n    \"__manifest\":{\n \
+        \       \"type\":\"array\",\n        \"properties\":{\n            \"file_name\"\
+        :{\n                \"type\":\"string\",\n                \"description\"\
+        :\"\"\n            },\n            \"file_size\":{\n                \"type\"\
+        :\"integer\",\n                \"description\":\"\"\n            }\n     \
+        \   },\n        \"description\":\"\",\n        \"default\":[\n\n        ]\n\
+        \    },\n    \"commons_url\":{\n        \"type\":\"string\",\n        \"description\"\
+        :\"\"\n    },\n    ...\n}"
       operationId: get_commons_info_aggregate_info__what__get
       parameters:
       - in: path
@@ -238,8 +244,9 @@ paths:
       - Aggregate
   /aggregate/metadata/guid/{guid}:
     get:
-      description: "Returns a metadata record by GUID\n\nExample:\n\n     { id2: {\
-        \ name: \"bear\" } }"
+      description: "Returns a metadata record by GUID\n\nExample:\n\n     {\n    \
+        \    \"gen3_discovery\": {\n            \"name\": \"cat\",\n            \"\
+        type\": \"study\",\n            ...\n        }\n    }"
       operationId: get_aggregate_metadata_guid_aggregate_metadata_guid__guid__get
       parameters:
       - in: path
@@ -267,8 +274,13 @@ paths:
     get:
       description: "et all metadata records from a commons by name\n\nReturns an array\
         \ containing all the metadata entries for a single commons.\nThere are no\
-        \ limit/offset parameters.\n\nExample:\n\n    [ { id2: { name: \"bear\" }\
-        \ } , { id3: { name: \"cat\" } }]"
+        \ limit/offset parameters.\n\nExample:\n\n    [\n        {\n            \"\
+        gen3_discovery\": {\n                \"name\": \"bear\",\n               \
+        \ \"type\": \"study\",\n                ...\n            },\n            \"\
+        data_dictionaries\": {\n                ...\n            }\n        },\n \
+        \       {\n            \"gen3_discovery\": {\n                \"name\": \"\
+        cat\",\n                \"type\": \"study\",\n                ...\n      \
+        \      }\n        },\n        ...\n    ]"
       operationId: get_aggregate_metadata_for_commons_aggregate_metadata__name__get
       parameters:
       - in: path

From 90e605a025a1bb88f6c1be818b7d6a39042b4bfc Mon Sep 17 00:00:00 2001
From: Mingfei Shao <mshao1@uchicago.edu>
Date: Wed, 1 Feb 2023 21:34:03 -0600
Subject: [PATCH 17/17] dummy