Add initial data dict

This commit is contained in:
Nick Touran 2025-12-12 09:40:42 -05:00
parent fb28c6c5c5
commit 36fcb5f260
19 changed files with 784 additions and 116 deletions

228
src/nrsk/models.py Normal file
View file

@ -0,0 +1,228 @@
"""
Define the Data Dictionary.
Implementation of Data Dictionary
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. impl:: Maintain the Data Dictionary base data using Pydantic
:id: I_DATA_DICT
:links: R_DATA_DICT
The data dictionary is managed using Pydantic. Pydantic allows for
concise Python code to richly define data models and their fields. From a single
class definition, it provides data validation, automatic rich documentation (via
automatic a Sphinx plugin), an integration with FastAPI for data exchange, and
relatively easy integration with sqlalchemy for database persistence. Changes to
the schema can be managed and controlled via the revision control system, and
changes to a single source (the Python code) will automatically propagate the
rendered documentation and, potentially the database (e.g. using *alembic*)
Using SQLAchemy as the database engine enables wide flexibility in underlying
database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL
Server. Pydantic models allows us to validate data loaded from a database,
directly from structured text file, or from JSON data delivered via the network.
Analysis of Alternatives
^^^^^^^^^^^^^^^^^^^^^^^^
SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was
determined to be less mature than pydantic and sqlalchemy, with inadequate
documentation related to field validation. It was determined to use Pydantic
directly for schema definitions.
.. _data-dict:
Data Dictionary
^^^^^^^^^^^^^^^
This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`.
"""
import re
import uuid
from datetime import date, datetime
from typing import Annotated, Optional
from pydantic import (
AnyUrl,
BaseModel,
EmailStr,
Field,
FieldValidationInfo,
PositiveInt,
ValidationError,
field_validator,
)
ALL_CAPS = re.compile("^[A-Z]$")
UUID_PK = Annotated[
uuid.UUID,
Field(
default_factory=uuid.uuid4,
description="The unique ID of this object. Used as a primary key in the database.",
examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
frozen=True,
primary_key=True,
),
]
class User(BaseModel):
"""A person involved in the Project."""
uuid: UUID_PK
given_name: str
family_name: str
preferred_name: Optional[str] = None
email: EmailStr
joined_on: Optional[datetime]
deactivated_on: Optional[datetime]
class OpenItem(BaseModel):
uuid: UUID_PK
name: str
status: str
created_on: datetime
closed_on: Optional[datetime] = None
class SSC(BaseModel):
"""
A Structure, System, or Component in the plant.
This is a generic hierarchical object that can represent plants, units,
buildings and their structures, systems, subsystems, components,
subcomponents, etc.
A physical tree of buildings/structures/rooms may have overlapping
contents in terms of systems/components/equipment/parts
"""
uuid: UUID_PK
name: str
pbs_code: Optional[str] = Field(
description="An integer sequence that determines the 'system number' and also the ordering in printouts",
examples=["1.2.3", "20.5.11"],
default="",
)
"""PBS code is tied closely to the structure of the PBS, obviously. If 1.2
is a category level, that's ok, but that doesn't imply that the second level
of PBS 2 is also a category level; it may be systems.
Since this can change in major PBS reorganizations, it should not be used
for cross-referencing (use UUID).
"""
abbrev: str = Field(
description="A human-friendly abbreviation uniquely defining the system"
)
parent: Optional["SSC"] = None
functions: Optional[list[str]] = Field(
description="Functions of this system", default=None
)
@field_validator("abbrev")
@classmethod
def abbrev_must_be_all_caps(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
assert re.match(ALL_CAPS, v), f"{info.field_name} must be all CAPS"
@field_validator("pbs_code")
@classmethod
def pbs_must_be_int_sequence(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
assert not v or re.match(r"^(\d+\.?)+$", v), (
f"{info.field_name} must be an integer sequence, like 1.2.3"
)
class SystemsList(BaseModel):
"""A flat list of Systems in the plant.
Can be used e.g. to render a snapshot of the Master Systems List.
Does not include categories like "Nuclear Island" or "Primary Systems".
We may want another structure that represents the whole tree in a
well-defined manner, or we may want to add a 'path' attr
to systems that define where they live.
"""
systems: list[SSC]
class ParamDef(BaseModel):
"""A parameter class defining an aspect of plant design."""
uuid: UUID_PK
name: str = Field(
description="Name of parameter class", examples=["Nominal gross power"]
)
description: str = Field(
description="Detailed description of what parameters of this type represent"
)
valid_units: Optional[list[str]] = Field(
description="List of units allowed", examples=["MW", "W", "shp"], default=None
)
class ParamVal(BaseModel):
"""A particular value of a Parameter, assigned to a particular SSC."""
ssc: SSC
pdef: ParamDef
value: str
units: Optional[str] = None
pedigree: str = Field(
description="Indication of how well it is known (rough estimate, final design, as-built)."
)
source: str = Field(description="Where this version of the value came from")
class ITSystem(BaseModel):
"""An IT system used by the project."""
uuid: UUID_PK
name: str
vendor: str
version: Optional[str] = None
use_cases: list[str] = Field(
description="One or more use cases this system is used for.",
examples=[
[
"Document management",
]
],
)
physical_location: str = Field(description="Where the system is physically located")
url: Optional[AnyUrl] = Field(description="Full URL to the system", default=None)
custodian: Optional[User] = Field(
description="Person currently in charge of system", default=None
)
launched_on: Optional[datetime] = None
retired_on: Optional[datetime] = None
quality_related: bool
class Document(BaseModel):
uuid: UUID_PK
title: str = Field(
description="Descriptive title explaining the contents",
examples=["CNSG Development and Status 1966-1977"],
)
"""
.. impl:: Document title
This is how doc titles are done.
"""
revision: str = Field(
description="Revision number",
examples=["0", "1", "1a", "A"],
)
type: str
originators: list[str]
status: str
@field_validator("type")
@classmethod
def type_must_be_valid(cls, v: str, info: FieldValidationInfo) -> str:
assert v in ["CALC", "PROC"], (
f"{info.field_name} must be within the list of doctypes"
)

View file

View file

@ -0,0 +1,66 @@
"""
Read plant information like systems, equipment, & params from a folder structure.
This reads it into the standard data structures defined via Pydantic,
which can then be used for any other purpose (reporting, etc.)
The structure here is path/to/system where the folders define the
functional hierarchy (i.e. plant, 'island', system, subsystem).
Some files can exist in the hierarchy:
* System data files *.yaml
* System documents *.rst
The documents often make use of the data in the yaml file through
system-level (or other) ``datatemplate`` directives, e.g. to print
out a list of System Functions or Parameters.
This module parses the directory tree and YAML files, combining them into one
big tree of data.
Future considerations:
* It may make sense to have ``system.yaml`` (or ``equipment.yaml``) and
``parameters.yaml`` in each of these folders for longer-term efficient
loading of just the Systems List vs. the entire Equipment List (which
will end up being more efficient in a proper database). Or not... I mean
we could just statically render everything and it'd be pretty performant
during reads. Maybe just have system, equipment, and param info in the
yaml file.
"""
import logging
from pathlib import Path
from ruamel.yaml import YAML
logger = logging.getLogger(__name__)
def load_yaml_tree(root_path: str | Path) -> dict:
"""Load a directory tree of files to represent the Plant systems and params."""
root_path = Path(root_path)
yaml = YAML(typ="safe")
tree = {}
for root, dirs, files in root_path.walk():
# Ensure empty folders get included in tree.
current = tree
rel = root.relative_to(root_path)
parts = rel.parts
logger.info(f"loading {parts}")
# drill into the part of the tree where we are
for part in parts:
if part not in current:
current[part] = {}
current = current[part]
for file in files:
if file.endswith(".yaml"):
data = yaml.load(root / file)
current.update(data)
if parts and not current:
current.update({"name": parts[-1]})
logger.info(f"{current}")
return tree

View file

@ -0,0 +1,138 @@
"""Sphinx directive that makes tables of Plant Data from the PBS tree.
Since individual system-level data can be nicely handled with datatemplates,
this custom directive just looks at the whole tree and makes the PBS
structure.
This is somewhat duplicative of the TOC directive in the Plant folder,
but the automatic sphinx numbering and lack of abbrev is a bit sad.
"""
import os
from pathlib import Path
from docutils import nodes
from docutils.parsers.rst.directives.tables import Table
from sphinx.util import logging
from nrsk.plant.load_plant_data import load_yaml_tree
logger = logging.getLogger("[plant_data_table]")
class PlantBreakdownStructureTable(Table):
"""Plant Breakdown Structure Table."""
has_content = False
required_arguments = 1
optional_arguments = 0
option_spec = {
"start-node": str,
"columns": lambda x: [c.strip() for c in x.split(",")],
"max-depth": int,
"hide-empty": lambda x: True,
}
def get_default_columns(self):
return ["Path", "Value", "Tags"]
def run(self):
env = self.state.document.settings.env
pbs_path = Path(env.srcdir) / Path(self.arguments[0])
logger.info(f"[plant-data-table] Loading data from: {pbs_path}")
if not pbs_path.exists():
logger.warning(f"Input data not found: {pbs_path}")
return [nodes.paragraph(text=f"PBS data not found: {pbs_path}")]
data = load_yaml_tree(pbs_path)
# Drill down to optional key-path
if "key-path" in self.options:
keys = self.options["key-path"].split(".")
logger.info(f"Using subkey: {keys}")
for k in keys:
data = data[k]
max_depth = int(self.options.get("max-depth", 10))
hide_empty = "hide-empty" in self.options
columns = self.options.get("columns")
if not columns:
columns = self.get_default_columns()
# Build table
table_node = nodes.table()
classes = table_node.get("classes", []) # want table wider: this doesn't work
classes.append("full-width")
table_node["classes"] = classes
tgroup = nodes.tgroup(cols=len(columns))
table_node += tgroup
# Header
for _ in columns:
tgroup += nodes.colspec(colwidth=10)
head = nodes.thead()
tgroup += head
row = nodes.row()
for col in columns:
row += nodes.entry("", nodes.paragraph(text=col))
head += row
# Body
tbody = nodes.tbody()
tgroup += tbody
def walk(obj, path="", depth=0):
if depth >= max_depth:
return
if not isinstance(obj, dict):
return
for k, v in obj.items():
current_path = f"{path}.{k}" if path else k
if hide_empty and self.is_empty(v):
continue
if not isinstance(v, dict):
continue
self.add_row(tbody, columns, current_path, v, depth)
if "functions" not in obj:
# stop if you hit a system with functions
walk(v, current_path, depth + 1)
walk(data)
return [table_node]
def is_empty(self, value):
return value in ({}, [], "", None)
def add_row(self, tbody, columns, path, value, depth):
"""Add a row to the table."""
row = nodes.row()
indent = "" * depth * 2 # em spaces for indentation
cols = []
cols.append(path) # path
cols.append(value.get("name", "(noname)") * 10)
cols.append(value.get("abbrev", ""))
cols.append(value.get("desc", ""))
cols.append(value.get("tags", ""))
for col in cols:
entry = nodes.entry()
para = nodes.paragraph()
para += nodes.Text(col)
entry += para
row += entry
tbody += row
def setup(app):
"""Setup for sphinx extension."""
app.add_directive("plant-data-table", PlantBreakdownStructureTable)
return {
"version": "0.1",
"parallel_read_safe": True,
"parallel_write_safe": True,
}