PGScatalog
diff --git a/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/__init__.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/__init__.py b/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/__init__.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/__init__.py
diff --git a/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/read.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/read.py b/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/read.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/read.py
diff --git a/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/tools.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/tools.py
+1-2 b/‎pgscatalog.calclib/src/pgscatalog/calclib/ancestry/tools.py ‎pgscatalog.calclib/src/pgscatalog/calclib/_ancestry/tools.py
+1-2
diff --git a/‎pgscatalog.calclib/src/pgscatalog/calclib/polygenicscore.py
+11-7 b/‎pgscatalog.calclib/src/pgscatalog/calclib/polygenicscore.py
+11-7
diff --git a/‎pgscatalog.calclib/src/pgscatalog/calclib/principalcomponents.py
+22-4 b/‎pgscatalog.calclib/src/pgscatalog/calclib/principalcomponents.py
+22-4
@@ -280,8 +280,7 @@ def pgs_adjust(
     :param norm2_2step: boolean (default=False) whether to use the two-step model vs. the full-fit
     :param ref_train_col: column name with true/false labels of samples that should be included in training PGS methods
     :param n_pcs: number of genetic PCs that will be used for PGS-adjustment
-    :return: [results_ref:df , results_target:df , results_models: dict] adjusted dfs for reference and target
-        populations, and a dictionary with model fit/parameters.
+    :return: [results_ref:df , results_target:df , results_models: dict] adjusted dfs for reference and target populations, and a dictionary with model fit/parameters.
     """
     # Check that datasets have the correct columns
     ## Check that score is in both dfs
 
@@ -6,14 +6,14 @@
 
 import pandas as pd
 
-from .ancestry.tools import (
+from ._ancestry.tools import (
     compare_ancestry,
     choose_pval_threshold,
     pgs_adjust,
     write_model,
 )
 from .principalcomponents import PopulationType
-from .ancestry import read
+from ._ancestry import read
 
 
 logger = logging.getLogger(__name__)
@@ -41,7 +41,7 @@ def __post_init__(self):
 
 @dataclasses.dataclass(frozen=True)
 class AdjustResults:
-    """Results returned by the adjust method of a PolygenicScore"""
+    """Results returned by :class:`AggregatedPGS.adjust()`"""
 
     target_label: str
     models: pd.DataFrame
@@ -51,6 +51,7 @@ class AdjustResults:
     scorecols: list[str]
 
     def write(self, directory):
+        """Write model, PGS, and PCA data to a directory"""
         self._write_model(directory)
         self._write_pgs(directory)
         self._write_pca(directory)
@@ -105,7 +106,9 @@ def _write_pgs(self, directory):
 
 
 class AggregatedPGS:
-    """A PGS that's been aggregated and melted, and may contain a reference panel and a target set
+    """A PGS that's been aggregated, melted, and probably contains samples from a reference panel and a target population.
+
+    The most useful method in this class adjusts PGS based on :func:`genetic ancestry similarity estimation <pgscatalog.calclib.AggregatedPGS.adjust>`.
 
     >>> from ._config import Config
     >>> score_path = Config.ROOT_DIR / "tests" / "aggregated_scores.txt.gz"
@@ -156,9 +159,9 @@ def _check_overlap(self, ref_pc, target_pc):
             raise ValueError
 
     def adjust(self, *, ref_pc, target_pc, adjust_arguments=None):
-        """Adjust a PGS based on genetic ancestry similarity.
+        """Adjust a PGS based on genetic ancestry similarity estimations.
 
-        Adjusting a PGS returns AdjustResults:
+        :returns: :class:`AdjustResults`
 
         >>> from ._config import Config
         >>> from .principalcomponents import PrincipalComponents
@@ -255,7 +258,7 @@ def adjust(self, *, ref_pc, target_pc, adjust_arguments=None):
 
 
 class PolygenicScore:
-    """Represents the output of plink2 --score written to a file
+    """Represents the output of ``plink2 --score`` written to a file
 
     >>> from ._config import Config
     >>> import reprlib
@@ -340,6 +343,7 @@ def __add__(self, other):
 
     @property
     def df(self):
+        """A generator that yields dataframe chunks."""
         if self.path is not None:
             self._df = self.lazy_read()
         elif self._bigdf is not None:
 
@@ -2,19 +2,25 @@
 import itertools
 import logging
 
-from .ancestry import read
+from ._ancestry import read
 
 logger = logging.getLogger(__name__)
 
 
 class PopulationType(enum.Enum):
+    """PGS can be calculated on a reference panel or target population.
+
+    This enum mostly helps to disambiguate instances of :class:`PrincipalComponents`."""
+
     TARGET = "target"
     REFERENCE = "reference"
 
 
 class PrincipalComponents:
     """
-    This class represents principal components data calculated by fraposa-pgsc
+    This class represents principal components analysis (PCA) data calculated by ``fraposa-pgsc``.
+
+    PCA data may come from a reference population or a target population. Target populations have been projected onto the reference population.
 
     >>> from ._config import Config
     >>> related_path = Config.ROOT_DIR / "tests" / "ref.king.cutoff.id"
@@ -24,7 +30,6 @@ class PrincipalComponents:
     PrincipalComponents(dataset='reference', pop_type=PopulationType.REFERENCE, pcs_path=[PosixPath('.../pgscatalog.calclib/tests/ref.pcs')], psam_path=PosixPath('.../pgscatalog.calclib/tests/ref.psam'))
     >>> ref_pc.df.to_dict()
     {'PC1': {('reference', 'HG00096'): -23.8212, ('reference', 'HG00097'): -24.8106, ...
-
     >>> target_pcs = PrincipalComponents(pcs_path=Config.ROOT_DIR / "tests" / "target.pcs", dataset="target", pop_type=PopulationType.TARGET)
     >>> target_pcs
     PrincipalComponents(dataset='target', pop_type=PopulationType.TARGET, pcs_path=[PosixPath('.../pgscatalog.calclib/tests/target.pcs')], psam_path=None)
@@ -65,18 +70,25 @@ def __repr__(self):
 
     @property
     def pop_type(self):
+        """See :class:`PopulationType`"""
         return self._pop_type
 
     @property
     def psam_path(self):
+        """Path to a plink2 sample information file for the reference population"""
         return self._psam_path
 
     @property
     def related_path(self):
+        """Path to a plink2 kinship cutoff file
+
+        Related reference samples are removed from analysis
+        """
         return self._related_path
 
     @property
     def poplabel(self):
+        """The group label used to assign target samples that are similar to reference population groups, e.g. SAS/EUR/AFR"""
         return self._poplabel
 
     @property
@@ -98,7 +110,7 @@ def npcs_popcomp(self, value):
 
     @property
     def npcs_norm(self):
-        """Number of PCs used for population NORMALIZATION (default = 4)"""
+        """Number of PCs used for population normalization (default = 4)"""
         return self._npcs_norm
 
     @npcs_norm.setter
@@ -110,6 +122,12 @@ def npcs_norm(self, value):
 
     @property
     def df(self):
+        """A pandas dataframe that contains PCA data.
+
+        Reference data also contains population label columns loaded from sample information files.
+
+        :raises ValueError: If the reference population consists of fewer than 100 samples
+        """
         if self._df is None:
             df = read.read_pcs(
                 loc_pcs=self.pcs_path,