Add initial data dict

2025-12-12 09:40:42 -05:00 · 2025-12-12 09:40:42 -05:00 · 36fcb5f260
commit 36fcb5f260
parent fb28c6c5c5
19 changed files with 784 additions and 116 deletions
--- a/src/nrsk/models.py
+++ b/src/nrsk/models.py
@ -0,0 +1,228 @@
+"""
+Define the Data Dictionary.
+
+Implementation of Data Dictionary
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. impl:: Maintain the Data Dictionary base data using Pydantic
+    :id: I_DATA_DICT
+    :links: R_DATA_DICT
+
+    The data dictionary is managed using Pydantic. Pydantic allows for
+    concise Python code to richly define data models and their fields. From a single
+    class definition, it provides data validation, automatic rich documentation (via
+    automatic a Sphinx plugin), an integration with FastAPI for data exchange, and
+    relatively easy integration with sqlalchemy for database persistence. Changes to
+    the schema can be managed and controlled via the revision control system, and
+    changes to a single source (the Python code) will automatically propagate the
+    rendered documentation and, potentially the database (e.g. using *alembic*)
+
+    Using SQLAchemy as the database engine enables wide flexibility in underlying
+    database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL
+    Server.  Pydantic models allows us to validate data loaded from a database,
+    directly from structured text file, or from JSON data delivered via the network.
+
+Analysis of Alternatives
+^^^^^^^^^^^^^^^^^^^^^^^^
+SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was
+determined to be less mature than pydantic and sqlalchemy, with inadequate
+documentation related to field validation. It was determined to use Pydantic
+directly for schema definitions.
+
+.. _data-dict:
+
+Data Dictionary
+^^^^^^^^^^^^^^^
+This is the official Data Dictionary discussed in :ref:`the Information
+Management Plan <info-mgmt-data-dict>`.
+"""
+
+import re
+import uuid
+from datetime import date, datetime
+from typing import Annotated, Optional
+
+from pydantic import (
+    AnyUrl,
+    BaseModel,
+    EmailStr,
+    Field,
+    FieldValidationInfo,
+    PositiveInt,
+    ValidationError,
+    field_validator,
+)
+
+ALL_CAPS = re.compile("^[A-Z]$")
+UUID_PK = Annotated[
+    uuid.UUID,
+    Field(
+        default_factory=uuid.uuid4,
+        description="The unique ID of this object. Used as a primary key in the database.",
+        examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
+        frozen=True,
+        primary_key=True,
+    ),
+]
+
+
+class User(BaseModel):
+    """A person involved in the Project."""
+
+    uuid: UUID_PK
+    given_name: str
+    family_name: str
+    preferred_name: Optional[str] = None
+    email: EmailStr
+    joined_on: Optional[datetime]
+    deactivated_on: Optional[datetime]
+
+
+class OpenItem(BaseModel):
+    uuid: UUID_PK
+    name: str
+    status: str
+    created_on: datetime
+    closed_on: Optional[datetime] = None
+
+
+class SSC(BaseModel):
+    """
+    A Structure, System, or Component in the plant.
+
+    This is a generic hierarchical object that can represent plants, units,
+    buildings and their structures, systems, subsystems, components,
+    subcomponents, etc.
+
+    A physical tree of buildings/structures/rooms may have overlapping
+    contents in terms of systems/components/equipment/parts
+    """
+
+    uuid: UUID_PK
+    name: str
+    pbs_code: Optional[str] = Field(
+        description="An integer sequence that determines the 'system number' and also the ordering in printouts",
+        examples=["1.2.3", "20.5.11"],
+        default="",
+    )
+    """PBS code is tied closely to the structure of the PBS, obviously. If 1.2
+    is a category level, that's ok, but that doesn't imply that the second level
+    of PBS 2 is also a category level; it may be systems.
+    Since this can change in major PBS reorganizations, it should not be used
+    for cross-referencing (use UUID).
+    """
+
+    abbrev: str = Field(
+        description="A human-friendly abbreviation uniquely defining the system"
+    )
+    parent: Optional["SSC"] = None
+    functions: Optional[list[str]] = Field(
+        description="Functions of this system", default=None
+    )
+
+    @field_validator("abbrev")
+    @classmethod
+    def abbrev_must_be_all_caps(cls, v: str, info: FieldValidationInfo) -> str:  # noqa: D102
+        assert re.match(ALL_CAPS, v), f"{info.field_name} must be all CAPS"
+
+    @field_validator("pbs_code")
+    @classmethod
+    def pbs_must_be_int_sequence(cls, v: str, info: FieldValidationInfo) -> str:  # noqa: D102
+        assert not v or re.match(r"^(\d+\.?)+$", v), (
+            f"{info.field_name} must be an integer sequence, like 1.2.3"
+        )
+
+
+class SystemsList(BaseModel):
+    """A flat list of Systems in the plant.
+
+    Can be used e.g. to render a snapshot of the Master Systems List.
+
+    Does not include categories like "Nuclear Island" or "Primary Systems".
+
+    We may want another structure that represents the whole tree in a
+    well-defined manner, or we may want to add a 'path' attr
+    to systems that define where they live.
+    """
+
+    systems: list[SSC]
+
+
+class ParamDef(BaseModel):
+    """A parameter class defining an aspect of plant design."""
+
+    uuid: UUID_PK
+    name: str = Field(
+        description="Name of parameter class", examples=["Nominal gross power"]
+    )
+    description: str = Field(
+        description="Detailed description of what parameters of this type represent"
+    )
+    valid_units: Optional[list[str]] = Field(
+        description="List of units allowed", examples=["MW", "W", "shp"], default=None
+    )
+
+
+class ParamVal(BaseModel):
+    """A particular value of a Parameter, assigned to a particular SSC."""
+
+    ssc: SSC
+    pdef: ParamDef
+    value: str
+    units: Optional[str] = None
+    pedigree: str = Field(
+        description="Indication of how well it is known (rough estimate, final design, as-built)."
+    )
+    source: str = Field(description="Where this version of the value came from")
+
+
+class ITSystem(BaseModel):
+    """An IT system used by the project."""
+
+    uuid: UUID_PK
+    name: str
+    vendor: str
+    version: Optional[str] = None
+    use_cases: list[str] = Field(
+        description="One or more use cases this system is used for.",
+        examples=[
+            [
+                "Document management",
+            ]
+        ],
+    )
+    physical_location: str = Field(description="Where the system is physically located")
+    url: Optional[AnyUrl] = Field(description="Full URL to the system", default=None)
+    custodian: Optional[User] = Field(
+        description="Person currently in charge of system", default=None
+    )
+    launched_on: Optional[datetime] = None
+    retired_on: Optional[datetime] = None
+    quality_related: bool
+
+
+class Document(BaseModel):
+    uuid: UUID_PK
+    title: str = Field(
+        description="Descriptive title explaining the contents",
+        examples=["CNSG Development and Status 1966-1977"],
+    )
+    """
+    .. impl:: Document title
+
+        This is how doc titles are done.
+    """
+    revision: str = Field(
+        description="Revision number",
+        examples=["0", "1", "1a", "A"],
+    )
+    type: str
+    originators: list[str]
+    status: str
+
+    @field_validator("type")
+    @classmethod
+    def type_must_be_valid(cls, v: str, info: FieldValidationInfo) -> str:
+        assert v in ["CALC", "PROC"], (
+            f"{info.field_name} must be within the list of doctypes"
+        )
--- a/src/nrsk/plant/init.py
+++ b/src/nrsk/plant/init.py
--- a/src/nrsk/plant/load_plant_data.py
+++ b/src/nrsk/plant/load_plant_data.py
@ -0,0 +1,66 @@
+"""
+Read plant information like systems, equipment, & params from a folder structure.
+
+This reads it into the standard data structures defined via Pydantic,
+which can then be used for any other purpose (reporting, etc.)
+
+The structure here is path/to/system where the folders define the
+functional hierarchy (i.e. plant, 'island', system, subsystem).
+
+Some files can exist in the hierarchy:
+
+* System data files *.yaml
+* System documents *.rst
+
+The documents often make use of the data in the yaml file through
+system-level (or other) ``datatemplate`` directives, e.g. to print
+out a list of System Functions or Parameters.
+
+This module parses the directory tree and YAML files, combining them into one
+big tree of data.
+
+Future considerations:
+
+* It may make sense to have ``system.yaml`` (or ``equipment.yaml``) and
+  ``parameters.yaml`` in each of these folders for longer-term efficient
+  loading of just the Systems List vs. the entire Equipment List (which
+  will end up being more efficient in a proper database). Or not... I mean
+  we could just statically render everything and it'd be pretty performant
+  during reads. Maybe just have system, equipment, and param info in the
+  yaml file.
+
+"""
+
+import logging
+from pathlib import Path
+
+from ruamel.yaml import YAML
+
+logger = logging.getLogger(__name__)
+
+
+def load_yaml_tree(root_path: str | Path) -> dict:
+    """Load a directory tree of files to represent the Plant systems and params."""
+    root_path = Path(root_path)
+    yaml = YAML(typ="safe")
+    tree = {}
+
+    for root, dirs, files in root_path.walk():
+        # Ensure empty folders get included in tree.
+        current = tree
+        rel = root.relative_to(root_path)
+        parts = rel.parts
+        logger.info(f"loading {parts}")
+        # drill into the part of the tree where we are
+        for part in parts:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+        for file in files:
+            if file.endswith(".yaml"):
+                data = yaml.load(root / file)
+                current.update(data)
+        if parts and not current:
+            current.update({"name": parts[-1]})
+        logger.info(f"{current}")
+    return tree
--- a/src/nrsk/plant/plant_data_table.py
+++ b/src/nrsk/plant/plant_data_table.py
@ -0,0 +1,138 @@
+"""Sphinx directive that makes tables of Plant Data from the PBS tree.
+
+Since individual system-level data can be nicely handled with datatemplates,
+this custom directive just looks at the whole tree and makes the PBS
+structure.
+
+This is somewhat duplicative of the TOC directive in the Plant folder,
+but the automatic sphinx numbering and lack of abbrev is a bit sad.
+"""
+
+import os
+from pathlib import Path
+
+from docutils import nodes
+from docutils.parsers.rst.directives.tables import Table
+from sphinx.util import logging
+
+from nrsk.plant.load_plant_data import load_yaml_tree
+
+logger = logging.getLogger("[plant_data_table]")
+
+
+class PlantBreakdownStructureTable(Table):
+    """Plant Breakdown Structure Table."""
+
+    has_content = False
+    required_arguments = 1
+    optional_arguments = 0
+    option_spec = {
+        "start-node": str,
+        "columns": lambda x: [c.strip() for c in x.split(",")],
+        "max-depth": int,
+        "hide-empty": lambda x: True,
+    }
+
+    def get_default_columns(self):
+        return ["Path", "Value", "Tags"]
+
+    def run(self):
+        env = self.state.document.settings.env
+        pbs_path = Path(env.srcdir) / Path(self.arguments[0])
+        logger.info(f"[plant-data-table] Loading data from: {pbs_path}")
+
+        if not pbs_path.exists():
+            logger.warning(f"Input data not found: {pbs_path}")
+            return [nodes.paragraph(text=f"PBS data not found: {pbs_path}")]
+
+        data = load_yaml_tree(pbs_path)
+
+        # Drill down to optional key-path
+        if "key-path" in self.options:
+            keys = self.options["key-path"].split(".")
+            logger.info(f"Using subkey: {keys}")
+            for k in keys:
+                data = data[k]
+
+        max_depth = int(self.options.get("max-depth", 10))
+        hide_empty = "hide-empty" in self.options
+        columns = self.options.get("columns")
+        if not columns:
+            columns = self.get_default_columns()
+
+        # Build table
+        table_node = nodes.table()
+        classes = table_node.get("classes", [])  # want table wider: this doesn't work
+        classes.append("full-width")
+        table_node["classes"] = classes
+        tgroup = nodes.tgroup(cols=len(columns))
+        table_node += tgroup
+
+        # Header
+        for _ in columns:
+            tgroup += nodes.colspec(colwidth=10)
+        head = nodes.thead()
+        tgroup += head
+        row = nodes.row()
+        for col in columns:
+            row += nodes.entry("", nodes.paragraph(text=col))
+        head += row
+
+        # Body
+        tbody = nodes.tbody()
+        tgroup += tbody
+
+        def walk(obj, path="", depth=0):
+            if depth >= max_depth:
+                return
+            if not isinstance(obj, dict):
+                return
+            for k, v in obj.items():
+                current_path = f"{path}.{k}" if path else k
+                if hide_empty and self.is_empty(v):
+                    continue
+                if not isinstance(v, dict):
+                    continue
+                self.add_row(tbody, columns, current_path, v, depth)
+                if "functions" not in obj:
+                    # stop if you hit a system with functions
+                    walk(v, current_path, depth + 1)
+
+        walk(data)
+
+        return [table_node]
+
+    def is_empty(self, value):
+        return value in ({}, [], "", None)
+
+    def add_row(self, tbody, columns, path, value, depth):
+        """Add a row to the table."""
+        row = nodes.row()
+        indent = " " * depth * 2  # em spaces for indentation
+
+        cols = []
+        cols.append(path)  # path
+        cols.append(value.get("name", "(noname)") * 10)
+        cols.append(value.get("abbrev", ""))
+        cols.append(value.get("desc", ""))
+        cols.append(value.get("tags", ""))
+
+        for col in cols:
+            entry = nodes.entry()
+            para = nodes.paragraph()
+            para += nodes.Text(col)
+            entry += para
+            row += entry
+
+        tbody += row
+
+
+def setup(app):
+    """Setup for sphinx extension."""
+    app.add_directive("plant-data-table", PlantBreakdownStructureTable)
+
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }