Add initial data dict
This commit is contained in:
parent
fb28c6c5c5
commit
36fcb5f260
19 changed files with 784 additions and 116 deletions
228
src/nrsk/models.py
Normal file
228
src/nrsk/models.py
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
"""
|
||||
Define the Data Dictionary.
|
||||
|
||||
Implementation of Data Dictionary
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. impl:: Maintain the Data Dictionary base data using Pydantic
|
||||
:id: I_DATA_DICT
|
||||
:links: R_DATA_DICT
|
||||
|
||||
The data dictionary is managed using Pydantic. Pydantic allows for
|
||||
concise Python code to richly define data models and their fields. From a single
|
||||
class definition, it provides data validation, automatic rich documentation (via
|
||||
automatic a Sphinx plugin), an integration with FastAPI for data exchange, and
|
||||
relatively easy integration with sqlalchemy for database persistence. Changes to
|
||||
the schema can be managed and controlled via the revision control system, and
|
||||
changes to a single source (the Python code) will automatically propagate the
|
||||
rendered documentation and, potentially the database (e.g. using *alembic*)
|
||||
|
||||
Using SQLAchemy as the database engine enables wide flexibility in underlying
|
||||
database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL
|
||||
Server. Pydantic models allows us to validate data loaded from a database,
|
||||
directly from structured text file, or from JSON data delivered via the network.
|
||||
|
||||
Analysis of Alternatives
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was
|
||||
determined to be less mature than pydantic and sqlalchemy, with inadequate
|
||||
documentation related to field validation. It was determined to use Pydantic
|
||||
directly for schema definitions.
|
||||
|
||||
.. _data-dict:
|
||||
|
||||
Data Dictionary
|
||||
^^^^^^^^^^^^^^^
|
||||
This is the official Data Dictionary discussed in :ref:`the Information
|
||||
Management Plan <info-mgmt-data-dict>`.
|
||||
"""
|
||||
|
||||
import re
|
||||
import uuid
|
||||
from datetime import date, datetime
|
||||
from typing import Annotated, Optional
|
||||
|
||||
from pydantic import (
|
||||
AnyUrl,
|
||||
BaseModel,
|
||||
EmailStr,
|
||||
Field,
|
||||
FieldValidationInfo,
|
||||
PositiveInt,
|
||||
ValidationError,
|
||||
field_validator,
|
||||
)
|
||||
|
||||
ALL_CAPS = re.compile("^[A-Z]$")
|
||||
UUID_PK = Annotated[
|
||||
uuid.UUID,
|
||||
Field(
|
||||
default_factory=uuid.uuid4,
|
||||
description="The unique ID of this object. Used as a primary key in the database.",
|
||||
examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
|
||||
frozen=True,
|
||||
primary_key=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
"""A person involved in the Project."""
|
||||
|
||||
uuid: UUID_PK
|
||||
given_name: str
|
||||
family_name: str
|
||||
preferred_name: Optional[str] = None
|
||||
email: EmailStr
|
||||
joined_on: Optional[datetime]
|
||||
deactivated_on: Optional[datetime]
|
||||
|
||||
|
||||
class OpenItem(BaseModel):
|
||||
uuid: UUID_PK
|
||||
name: str
|
||||
status: str
|
||||
created_on: datetime
|
||||
closed_on: Optional[datetime] = None
|
||||
|
||||
|
||||
class SSC(BaseModel):
|
||||
"""
|
||||
A Structure, System, or Component in the plant.
|
||||
|
||||
This is a generic hierarchical object that can represent plants, units,
|
||||
buildings and their structures, systems, subsystems, components,
|
||||
subcomponents, etc.
|
||||
|
||||
A physical tree of buildings/structures/rooms may have overlapping
|
||||
contents in terms of systems/components/equipment/parts
|
||||
"""
|
||||
|
||||
uuid: UUID_PK
|
||||
name: str
|
||||
pbs_code: Optional[str] = Field(
|
||||
description="An integer sequence that determines the 'system number' and also the ordering in printouts",
|
||||
examples=["1.2.3", "20.5.11"],
|
||||
default="",
|
||||
)
|
||||
"""PBS code is tied closely to the structure of the PBS, obviously. If 1.2
|
||||
is a category level, that's ok, but that doesn't imply that the second level
|
||||
of PBS 2 is also a category level; it may be systems.
|
||||
Since this can change in major PBS reorganizations, it should not be used
|
||||
for cross-referencing (use UUID).
|
||||
"""
|
||||
|
||||
abbrev: str = Field(
|
||||
description="A human-friendly abbreviation uniquely defining the system"
|
||||
)
|
||||
parent: Optional["SSC"] = None
|
||||
functions: Optional[list[str]] = Field(
|
||||
description="Functions of this system", default=None
|
||||
)
|
||||
|
||||
@field_validator("abbrev")
|
||||
@classmethod
|
||||
def abbrev_must_be_all_caps(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
|
||||
assert re.match(ALL_CAPS, v), f"{info.field_name} must be all CAPS"
|
||||
|
||||
@field_validator("pbs_code")
|
||||
@classmethod
|
||||
def pbs_must_be_int_sequence(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
|
||||
assert not v or re.match(r"^(\d+\.?)+$", v), (
|
||||
f"{info.field_name} must be an integer sequence, like 1.2.3"
|
||||
)
|
||||
|
||||
|
||||
class SystemsList(BaseModel):
|
||||
"""A flat list of Systems in the plant.
|
||||
|
||||
Can be used e.g. to render a snapshot of the Master Systems List.
|
||||
|
||||
Does not include categories like "Nuclear Island" or "Primary Systems".
|
||||
|
||||
We may want another structure that represents the whole tree in a
|
||||
well-defined manner, or we may want to add a 'path' attr
|
||||
to systems that define where they live.
|
||||
"""
|
||||
|
||||
systems: list[SSC]
|
||||
|
||||
|
||||
class ParamDef(BaseModel):
|
||||
"""A parameter class defining an aspect of plant design."""
|
||||
|
||||
uuid: UUID_PK
|
||||
name: str = Field(
|
||||
description="Name of parameter class", examples=["Nominal gross power"]
|
||||
)
|
||||
description: str = Field(
|
||||
description="Detailed description of what parameters of this type represent"
|
||||
)
|
||||
valid_units: Optional[list[str]] = Field(
|
||||
description="List of units allowed", examples=["MW", "W", "shp"], default=None
|
||||
)
|
||||
|
||||
|
||||
class ParamVal(BaseModel):
|
||||
"""A particular value of a Parameter, assigned to a particular SSC."""
|
||||
|
||||
ssc: SSC
|
||||
pdef: ParamDef
|
||||
value: str
|
||||
units: Optional[str] = None
|
||||
pedigree: str = Field(
|
||||
description="Indication of how well it is known (rough estimate, final design, as-built)."
|
||||
)
|
||||
source: str = Field(description="Where this version of the value came from")
|
||||
|
||||
|
||||
class ITSystem(BaseModel):
|
||||
"""An IT system used by the project."""
|
||||
|
||||
uuid: UUID_PK
|
||||
name: str
|
||||
vendor: str
|
||||
version: Optional[str] = None
|
||||
use_cases: list[str] = Field(
|
||||
description="One or more use cases this system is used for.",
|
||||
examples=[
|
||||
[
|
||||
"Document management",
|
||||
]
|
||||
],
|
||||
)
|
||||
physical_location: str = Field(description="Where the system is physically located")
|
||||
url: Optional[AnyUrl] = Field(description="Full URL to the system", default=None)
|
||||
custodian: Optional[User] = Field(
|
||||
description="Person currently in charge of system", default=None
|
||||
)
|
||||
launched_on: Optional[datetime] = None
|
||||
retired_on: Optional[datetime] = None
|
||||
quality_related: bool
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
uuid: UUID_PK
|
||||
title: str = Field(
|
||||
description="Descriptive title explaining the contents",
|
||||
examples=["CNSG Development and Status 1966-1977"],
|
||||
)
|
||||
"""
|
||||
.. impl:: Document title
|
||||
|
||||
This is how doc titles are done.
|
||||
"""
|
||||
revision: str = Field(
|
||||
description="Revision number",
|
||||
examples=["0", "1", "1a", "A"],
|
||||
)
|
||||
type: str
|
||||
originators: list[str]
|
||||
status: str
|
||||
|
||||
@field_validator("type")
|
||||
@classmethod
|
||||
def type_must_be_valid(cls, v: str, info: FieldValidationInfo) -> str:
|
||||
assert v in ["CALC", "PROC"], (
|
||||
f"{info.field_name} must be within the list of doctypes"
|
||||
)
|
||||
0
src/nrsk/plant/__init__.py
Normal file
0
src/nrsk/plant/__init__.py
Normal file
66
src/nrsk/plant/load_plant_data.py
Normal file
66
src/nrsk/plant/load_plant_data.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""
|
||||
Read plant information like systems, equipment, & params from a folder structure.
|
||||
|
||||
This reads it into the standard data structures defined via Pydantic,
|
||||
which can then be used for any other purpose (reporting, etc.)
|
||||
|
||||
The structure here is path/to/system where the folders define the
|
||||
functional hierarchy (i.e. plant, 'island', system, subsystem).
|
||||
|
||||
Some files can exist in the hierarchy:
|
||||
|
||||
* System data files *.yaml
|
||||
* System documents *.rst
|
||||
|
||||
The documents often make use of the data in the yaml file through
|
||||
system-level (or other) ``datatemplate`` directives, e.g. to print
|
||||
out a list of System Functions or Parameters.
|
||||
|
||||
This module parses the directory tree and YAML files, combining them into one
|
||||
big tree of data.
|
||||
|
||||
Future considerations:
|
||||
|
||||
* It may make sense to have ``system.yaml`` (or ``equipment.yaml``) and
|
||||
``parameters.yaml`` in each of these folders for longer-term efficient
|
||||
loading of just the Systems List vs. the entire Equipment List (which
|
||||
will end up being more efficient in a proper database). Or not... I mean
|
||||
we could just statically render everything and it'd be pretty performant
|
||||
during reads. Maybe just have system, equipment, and param info in the
|
||||
yaml file.
|
||||
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_yaml_tree(root_path: str | Path) -> dict:
|
||||
"""Load a directory tree of files to represent the Plant systems and params."""
|
||||
root_path = Path(root_path)
|
||||
yaml = YAML(typ="safe")
|
||||
tree = {}
|
||||
|
||||
for root, dirs, files in root_path.walk():
|
||||
# Ensure empty folders get included in tree.
|
||||
current = tree
|
||||
rel = root.relative_to(root_path)
|
||||
parts = rel.parts
|
||||
logger.info(f"loading {parts}")
|
||||
# drill into the part of the tree where we are
|
||||
for part in parts:
|
||||
if part not in current:
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
for file in files:
|
||||
if file.endswith(".yaml"):
|
||||
data = yaml.load(root / file)
|
||||
current.update(data)
|
||||
if parts and not current:
|
||||
current.update({"name": parts[-1]})
|
||||
logger.info(f"{current}")
|
||||
return tree
|
||||
138
src/nrsk/plant/plant_data_table.py
Normal file
138
src/nrsk/plant/plant_data_table.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""Sphinx directive that makes tables of Plant Data from the PBS tree.
|
||||
|
||||
Since individual system-level data can be nicely handled with datatemplates,
|
||||
this custom directive just looks at the whole tree and makes the PBS
|
||||
structure.
|
||||
|
||||
This is somewhat duplicative of the TOC directive in the Plant folder,
|
||||
but the automatic sphinx numbering and lack of abbrev is a bit sad.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst.directives.tables import Table
|
||||
from sphinx.util import logging
|
||||
|
||||
from nrsk.plant.load_plant_data import load_yaml_tree
|
||||
|
||||
logger = logging.getLogger("[plant_data_table]")
|
||||
|
||||
|
||||
class PlantBreakdownStructureTable(Table):
|
||||
"""Plant Breakdown Structure Table."""
|
||||
|
||||
has_content = False
|
||||
required_arguments = 1
|
||||
optional_arguments = 0
|
||||
option_spec = {
|
||||
"start-node": str,
|
||||
"columns": lambda x: [c.strip() for c in x.split(",")],
|
||||
"max-depth": int,
|
||||
"hide-empty": lambda x: True,
|
||||
}
|
||||
|
||||
def get_default_columns(self):
|
||||
return ["Path", "Value", "Tags"]
|
||||
|
||||
def run(self):
|
||||
env = self.state.document.settings.env
|
||||
pbs_path = Path(env.srcdir) / Path(self.arguments[0])
|
||||
logger.info(f"[plant-data-table] Loading data from: {pbs_path}")
|
||||
|
||||
if not pbs_path.exists():
|
||||
logger.warning(f"Input data not found: {pbs_path}")
|
||||
return [nodes.paragraph(text=f"PBS data not found: {pbs_path}")]
|
||||
|
||||
data = load_yaml_tree(pbs_path)
|
||||
|
||||
# Drill down to optional key-path
|
||||
if "key-path" in self.options:
|
||||
keys = self.options["key-path"].split(".")
|
||||
logger.info(f"Using subkey: {keys}")
|
||||
for k in keys:
|
||||
data = data[k]
|
||||
|
||||
max_depth = int(self.options.get("max-depth", 10))
|
||||
hide_empty = "hide-empty" in self.options
|
||||
columns = self.options.get("columns")
|
||||
if not columns:
|
||||
columns = self.get_default_columns()
|
||||
|
||||
# Build table
|
||||
table_node = nodes.table()
|
||||
classes = table_node.get("classes", []) # want table wider: this doesn't work
|
||||
classes.append("full-width")
|
||||
table_node["classes"] = classes
|
||||
tgroup = nodes.tgroup(cols=len(columns))
|
||||
table_node += tgroup
|
||||
|
||||
# Header
|
||||
for _ in columns:
|
||||
tgroup += nodes.colspec(colwidth=10)
|
||||
head = nodes.thead()
|
||||
tgroup += head
|
||||
row = nodes.row()
|
||||
for col in columns:
|
||||
row += nodes.entry("", nodes.paragraph(text=col))
|
||||
head += row
|
||||
|
||||
# Body
|
||||
tbody = nodes.tbody()
|
||||
tgroup += tbody
|
||||
|
||||
def walk(obj, path="", depth=0):
|
||||
if depth >= max_depth:
|
||||
return
|
||||
if not isinstance(obj, dict):
|
||||
return
|
||||
for k, v in obj.items():
|
||||
current_path = f"{path}.{k}" if path else k
|
||||
if hide_empty and self.is_empty(v):
|
||||
continue
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
self.add_row(tbody, columns, current_path, v, depth)
|
||||
if "functions" not in obj:
|
||||
# stop if you hit a system with functions
|
||||
walk(v, current_path, depth + 1)
|
||||
|
||||
walk(data)
|
||||
|
||||
return [table_node]
|
||||
|
||||
def is_empty(self, value):
|
||||
return value in ({}, [], "", None)
|
||||
|
||||
def add_row(self, tbody, columns, path, value, depth):
|
||||
"""Add a row to the table."""
|
||||
row = nodes.row()
|
||||
indent = " " * depth * 2 # em spaces for indentation
|
||||
|
||||
cols = []
|
||||
cols.append(path) # path
|
||||
cols.append(value.get("name", "(noname)") * 10)
|
||||
cols.append(value.get("abbrev", ""))
|
||||
cols.append(value.get("desc", ""))
|
||||
cols.append(value.get("tags", ""))
|
||||
|
||||
for col in cols:
|
||||
entry = nodes.entry()
|
||||
para = nodes.paragraph()
|
||||
para += nodes.Text(col)
|
||||
entry += para
|
||||
row += entry
|
||||
|
||||
tbody += row
|
||||
|
||||
|
||||
def setup(app):
|
||||
"""Setup for sphinx extension."""
|
||||
app.add_directive("plant-data-table", PlantBreakdownStructureTable)
|
||||
|
||||
return {
|
||||
"version": "0.1",
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue