starter-kit/src/nrsk/models.py
Nick Touran 8c73123862 Add postgresql persistence of data in data dict
Moved back to sqlmodel because we do need some way
of putting this info into a database.
2026-01-05 12:04:22 -05:00

666 lines
23 KiB
Python

"""
Define the Data Dictionary.
Implementation of Data Dictionary
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. impl:: Maintain the Data Dictionary base data using Pydantic
:id: I_DATA_DICT
:links: R_DATA_DICT
The data dictionary is managed using Pydantic. Pydantic allows for
concise Python code to richly define data models and their fields. From a single
class definition, it provides data validation, automatic rich documentation (via
automatic a Sphinx plugin), an integration with FastAPI for data exchange, and
relatively easy integration with sqlalchemy for database persistence. Changes to
the schema can be managed and controlled via the revision control system, and
changes to a single source (the Python code) will automatically propagate the
rendered documentation and, potentially the database (e.g. using *alembic*)
Using SQLAchemy as the database engine enables wide flexibility in underlying
database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL
Server. Pydantic models allows us to validate data loaded from a database,
directly from structured text file, or from JSON data delivered via the network.
Analysis of Alternatives
^^^^^^^^^^^^^^^^^^^^^^^^
SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was
determined to be less mature than pydantic and sqlalchemy, with inadequate
documentation related to field validation. It was determined to use Pydantic
directly for schema definitions.
.. _data-dict:
Data Dictionary
^^^^^^^^^^^^^^^
This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`.
"""
import re
from datetime import datetime, timedelta
from enum import StrEnum
from typing import Annotated, Any, Optional
from uuid import UUID, uuid4
# _PK_TYPE = UUID
# moving away from UUID at least temporarily b/c SQLite doesn't
# really support it, which makes adding new data via DBeaver harder
_PK_TYPE = int
from pydantic import (
AnyUrl,
BaseModel,
ConfigDict,
EmailStr,
PositiveInt,
TypeAdapter,
ValidationError,
computed_field,
field_validator,
model_validator,
)
from sqlalchemy import text
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
ALL_CAPS = re.compile("^[A-Z]$")
class NRSKModel(SQLModel):
id: _PK_TYPE = Field(
# default_factory=uuid4,
description="The unique ID of this object. Used as a primary key in the database.",
primary_key=True,
# schema_extra={
# "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
# },
)
class DocumentUserLink(NRSKModel, table=True):
"""Linkages between users and documents."""
position: int = Field(default=0)
"""Integer indicating order of people"""
role_note: str = Field(
default="",
)
"""Extra information about role such as 'lead' or 'section 2.4'"""
document_id: _PK_TYPE | None = Field(
foreign_key="document.id", primary_key=True, default=None
)
user_id: _PK_TYPE | None = Field(
foreign_key="user.id", primary_key=True, default=None
)
class User(NRSKModel, table=True):
"""A person involved in the Project."""
given_name: str
family_name: str
preferred_name: str | None = None
previous_name: str | None = None
email: EmailStr
joined_on: datetime | None
deactivated_on: datetime | None
organization: str | None
title: str | None
contributed: list["Document"] = Relationship(
back_populates="contributors", link_model=DocumentUserLink
)
class OpenItem(NRSKModel):
name: str
status: str
created_on: datetime
closed_on: datetime | None = None
class SSC(NRSKModel):
"""
A Structure, System, or Component in the plant.
This is a generic hierarchical object that can represent plants, units,
buildings and their structures, systems, subsystems, components,
subcomponents, etc.
A physical tree of buildings/structures/rooms may have overlapping
contents in terms of systems/components/equipment/parts
"""
name: str
pbs_code: str | None = Field(
description="An integer sequence that determines the 'system number' and also the ordering in printouts",
schema_extra={
"examples": ["1.2.3", "20.5.11"],
},
default="",
)
"""PBS code is tied closely to the structure of the PBS, obviously. If 1.2
is a category level, that's ok, but that doesn't imply that the second level
of PBS 2 is also a category level; it may be systems.
Since this can change in major PBS reorganizations, it should not be used
for cross-referencing (use ID).
"""
abbrev: str = Field(
description="A human-friendly abbreviation uniquely defining the system"
)
parent: Optional["SSC"] = None
functions: list[str | None] = Field(
description="Functions of this system", default=None
)
@field_validator("abbrev", mode="after")
@classmethod
def abbrev_must_be_all_caps(cls, v: str) -> str: # noqa: D102
if not re.match(ALL_CAPS, v):
raise ValueError("{v} must be all CAPS")
@field_validator("pbs_code", mode="after")
@classmethod
def pbs_must_be_int_sequence(cls, v: str) -> str: # noqa: D102
if not v or re.match(r"^(\d+\.?)+$", v):
raise ValueError(f"{v} must be an integer sequence, like 1.2.3")
class SystemsList(BaseModel):
"""A flat list of Systems in the plant.
Can be used e.g. to render a snapshot of the Master Systems List.
Does not include categories like "Nuclear Island" or "Primary Systems".
We may want another structure that represents the whole tree in a
well-defined manner, or we may want to add a 'path' attr
to systems that define where they live.
"""
systems: list[SSC]
class ParamDef(NRSKModel):
"""A parameter class defining an aspect of plant design."""
name: str = Field(
schema_extra={"examples": ["Nominal gross power"]},
)
"""Name of the parameter class."""
description: str
"""Detailed description of what parameters of this type represent"""
valid_units: list[str | None] = Field(
schema_extra={"examples": ["MW", "W", "shp"]}, default=None
)
"""List of units allowed"""
class ParamVal(NRSKModel):
"""A particular value of a Parameter, assigned to a particular SSC."""
ssc: SSC
pdef: ParamDef
value: str
units: str | None = None
pedigree: str = Field(
description="Indication of how well it is known (rough estimate, final design, as-built)."
)
source: str = Field(description="Where this version of the value came from")
class ITSystem(NRSKModel):
"""An IT system used by the project."""
name: str
vendor: str
version: str | None = None
use_cases: list[str] = Field(
schema_extra={
"examples": [
[
"Document management",
]
],
}
)
"""One or more use cases this system is used for."""
physical_location: str = Field(description="Where the system is physically located")
url: AnyUrl | None = Field(description="Full URL to the system", default=None)
custodian: User | None = Field(
description="Person currently in charge of system", default=None
)
launched_on: datetime | None = None
retired_on: datetime | None = None
quality_related: bool
class InformationType(NRSKModel, table=True):
"""A type/kind/class of Information, Document, or Record."""
model_config = ConfigDict(extra="forbid")
name: str
abbrev: str
examples: list[str] | None = Field(
default=None,
sa_column=Column(JSON),
)
description: str = ""
retention: str | None = ""
record: bool = True
use_cases: str = ""
notes: str = ""
parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id")
# Add these two relationships for easier DB parsing in code
parent: Optional["InformationType"] = Relationship(
back_populates="subtypes",
sa_relationship_kwargs={"remote_side": "InformationType.id"},
)
subtypes: list["InformationType"] = Relationship(back_populates="parent")
InformationTypes = TypeAdapter(list[InformationType])
"""A list of document types."""
class Document(NRSKModel, table=True):
"""
Data dictionary entry for Documents and Records.
Document data is designed to satisfy the needs defined in :ref:`rmdc-proc`.
See Also
--------
* Some of the field definitions come from CFIHOS
https://www.jip36-cfihos.org/wp-content/uploads/2023/08/v.1.5.1-CFIHOS-Specification-Document-1.docx
* ISO-19650 has different Status Codes defining suitability level (for information, as-built)
https://ukbimframework.org/wp-content/uploads/2020/05/ISO19650-2Edition4.pdf
"""
class STATUS(StrEnum):
"""Document Status options."""
# Much of the wording here comes from cloverDocumentControlRecords2010.
# NOTE: if you add or remove a status, be sure to also update the
# category property below AND :ref:`rmdc-doc-status`!
## Not Yet Approved:
RESERVED = "RESERVED"
"""
A Document ID has been assigned, but the document is in development or
has not yet been started (default).
"""
IN_PROGRESS = "IN PROGRESS"
"""One or more authors are creating or revising the document."""
IN_REVIEW = "IN REVIEW"
"""A completed draft of the document has been submitted and is pending review."""
REJECTED = "REJECTED"
"""A draft that was rejected by the review team and may be revised and resubmitted."""
AUTHORIZED = "AUTHORIZED"
"""A controlled revision that has been signed but is not yet effective.
Such documents may be used for training, etc. Documents with this status may
be used for plant modifications in a work package, but not for normal operations."""
REFERENCE = "REFERENCE"
"""Document is stored in EDMS for ease of access and reference, but
there is no assertion that the information is the latest available.
Useful for Standards, engineering handbook excerpts, vendor notices."""
NATIVE = "NATIVE"
"""A document file that may be in EDMS in the native file format. Not
used in the field because they (a) may require special software to view
and (b) may not be controlled for field use (i.e. not quarantined if
errors are discovered)."""
## Approved:
APPROVED = "APPROVED"
"""A document revision that has been submitted by the releasing
organization and that is authorized for the use case defined in
the suitability code.
* A drawing with this status during operation reflects the plant configuration
* A drawing with this status before or during construction reflects that it is
ready to be fabricated/built
* A procedure with this status is effective.
"""
## No longer Approved:
QUARANTINED = "QUARANTINED"
"""(On hold, Suspended) A document revision that was previously
authorized and has been placed on hold, e.g. a procedure that cannot be
performed as written or a design that is known to have pending changes."""
SUPERSEDED = "SUPERSEDED"
"""A document that has been replaced by another document. The new
document is to be recorded in the index."""
REVISED = "REVISED"
"""A document that has been replaced by a subsequent revision of that
document."""
VOIDED = "VOIDED"
"""A document or revision that is no longer needed and there is no
revision or superseding document. This would also be used for documents
that have reached a predetermined expiration date, such as a temporary
procedure."""
CLOSED = "CLOSED"
"""(Archived) A document for which the work has been completed."""
@property
def category(self) -> str:
"""High-level status category: Not yet approved, Approved, or No Longer Approved."""
if self.value in {
self.RESERVED,
self.IN_PROGRESS,
self.IN_REVIEW,
self.REJECTED,
self.AUTHORIZED,
self.REFERENCE,
self.NATIVE,
}:
return "Not Yet Approved"
if self.value in {self.APPROVED}:
return "Approved"
return "No Longer Approved"
class USAGE(StrEnum):
"""Usage options.
Usage governs what use cases a document may be used for. It is a notion
derived from the ISO 19650 'suitability' idea, but used in combination
with the NIRMA status codes. It allows a document to be approved for
e.g. a conceptual design stage without letting it inadvertently be
released for bid or manufacture. Releasing organizations can update the
suitability as needed.
See https://ukbimframework.org/wp-content/uploads/2020/09/Guidance-Part-C_Facilitating-the-common-data-environment-workflow-and-technical-solutions_Edition-1.pdf
"""
FOR_INFORMATION = "FOR INFORMATION"
"""A document revision that may be used for information only, not for
any contractual purpose."""
FOR_STAGE_APPROVAL = "FOR STAGE APPROVAL"
"""A document revision that is considered complete for the contractual stage in
which it was created. For example, in a Preliminary Design phase, this
usage would indicate that it is at the expected usage level for
preliminary design. Most design-phase documents that are not yet ready
for bid or construction will be marked for this usage."""
FOR_BID = "FOR BID"
"""A document revision that is ready to be sent to external parties for bid.
During the bid process, changes may be expected based on vendor feedback."""
FOR_CONSTRUCTION = "FOR CONSTRUCTION"
"""A document revision that is ready to be sent to the field for manufacture,
fabrication, construction. An approved document with this usage implies
that all the quality, regulatory, and design aspects are in place, and
that work can proceed. However, what is constructed is not yet
authorized for operation."""
FOR_OPERATION = "FOR OPERATION"
"""A document revision that can be used to operate the business and/or plant.
Procedures of this usage may be used to do work or operate equipment."""
AS_BUILT = "AS BUILT"
"""A document revision that is an as-built record of construction or manufacture.
Documents of this usage may be used to operate the plant."""
class RETENTION(StrEnum):
"""Retention plan options.
Retention plans define how long the document or record is to be
kept before it is destroyed.
.. note:: May want this to actually be a timedelta
"""
LIFETIME = "LIFETIME"
"""Lifetime of the plant."""
# use_attribute_docstrings allows us to just use docstrings and get
# the same info in both the JSON Schema and also the Sphinx render
model_config = ConfigDict(use_attribute_docstrings=True)
number: str
"""The identification number meeting the document numbering rules"""
title: str = Field(
schema_extra={
"examples": ["CNSG Development and Status 1966-1977"],
},
)
"""Descriptive title explaining the contents"""
revision: str = Field(
schema_extra={
"examples": ["0", "1", "1a", "A"],
},
)
"""Revision code"""
originating_organization_id: _PK_TYPE | None = Field(
foreign_key="organization.id",
description="The organization that owns or issued this document",
default=None,
)
# This allows you to do `my_document.orginating_organization` in Python
originating_organization: "Organization" = Relationship()
originator_number: str | None = None
"""The originating organization's document number (if originated externally)."""
originator_revision: str | None = None
"""The originating organization's revision code (if originated externally)."""
type_id: _PK_TYPE = Field(
foreign_key="informationtype.id",
description="The ID of the InformationType",
)
# type: "InformationType" = Relationship()
contributors: list[User] = Relationship(
back_populates="contributed",
link_model=DocumentUserLink,
sa_relationship_kwargs={
"order_by": "DocumentUserLink.position",
"lazy": "selectin",
},
)
"""Holds all relationships with users but does not show up in JSON Schema"""
@computed_field
@property
def authors(self) -> list[User]:
"""List of author info for the UI."""
return [{"id": a.id, "name": a.name} for a in self.contributors]
@computed_field
@property
def reviewers(self) -> list[User]:
"""List of reviewer info for the UI."""
return [
{"id": a.id, "name": a.name}
for a in self.contributors
if a.role == "reviewer"
]
# revision_reviewers: list[RevisionReviewerLink] = Relationship(
# back_populates="reviewed",
# link_model=RevisionReviewerLink,
# sa_relationship_kwargs={
# "order_by": "RevisionReviewerLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The reviewer(s), if any."""
# revision_approvers: list[RevisionApproverLink] = Relationship(
# back_populates="approved",
# link_model=RevisionApproverLink,
# sa_relationship_kwargs={
# "order_by": "RevisionApproverLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The approver(s), if any."""
revision_comment: str | None = None
"""Explanation of what changed in this revision"""
status: STATUS = STATUS.RESERVED
usage: USAGE = USAGE.FOR_INFORMATION
retention_plan: RETENTION = RETENTION.LIFETIME
restriction_codes: str = Field(
description="Markings for export control, legal, etc.", default=""
)
actual_reviewed_date: datetime | None = None
actual_approved_date: datetime | None = None
# filenames may be empty at first, i.e. for RESERVED docs
filenames: list[str] = Field(
description="Filenames of files attached to this Document. Main file should be the first.",
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
file_notes: list[str] = Field(
description="Short description of each file represented in filenames.",
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
checksums: list[str] = Field(
description="SHA-256 checksum of each file for data integrity",
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
"""Checksums are used to verify long-term data integrity against tampering
and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
and built-in at this point. In the future, switching to BLAKE3 may make sense for
easier periodic re-verification of large data libraries."""
physical_location: str | None = Field(
description="Location of a media (only valid when not stored as an electronic file).",
default=None,
)
notes: str = Field(
description="Additional information about the Document/Record", default=""
)
@computed_field
@property
def status_category(self) -> str:
"""The top-level status category, derived from Document Status"""
return self.status.category
@model_validator(mode="after")
def cant_have_electronic_and_physical_location(self) -> "Document": # noqa: D102
has_physical_location = self.physical_location is not None
has_file = self.filenames is not None
if has_physical_location and has_file:
raise ValueError(
"Cannot provide both physical_location and filename(s). They are mutually exclusive."
)
return self
class Organization(NRSKModel, table=True):
"""An organization of people: companies, departments, governments, etc."""
name: str = Field(index=True)
"""Organization Name"""
abbreviation: str | None = Field(default=None, index=True)
website: str | None = None
is_active: bool = Field(default=True)
# allow it to be hierarchical to capture full org trees and refer to
# divisions
parent_id: _PK_TYPE | None = Field(
default=None,
foreign_key="organization.id",
)
"""The parent organization this org reports to"""
parent: Optional["Organization"] = Relationship(
back_populates="child_orgs",
sa_relationship_kwargs={"remote_side": "Organization.id"},
)
child_orgs: list["Organization"] = Relationship(back_populates="parent")
class PredecessorTask(NRSKModel):
"""Link to a predecessor task."""
class PRED_TYPE(StrEnum): # noqa: N801
"""Predecessor relationship type."""
FS = "FS"
"""Finish-to-start: predecessor finishes before successor starts (very common)"""
FF = "FF"
"""Finish-to-finish: predecessor finishes before successor can finish"""
SS = "SS"
"""Start-to-start: predecessor starts before successor starts"""
SF = "SF"
"""Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
id: str
"""ID of the predecessor task."""
type: PRED_TYPE = PRED_TYPE.FS
lag: timedelta | None = Field(
description="Lag time. Negative timedelta implies negative lag "
"(lead time, starts before predecessor ends)",
default=None,
)
class ScheduledTask(NRSKModel):
"""Scheduled task, e.g. in P6."""
name: str
id: str | None = None
is_milestone: bool = False
predecessors: list[PredecessorTask] = []
duration: timedelta | None = None
actual_start: datetime | None = None
actual_end: datetime | None = None
scheduled_start: datetime | None = None
scheduled_end: datetime | None = None
@model_validator(mode="before")
@classmethod
def convert_days_to_duration(cls, data: Any) -> Any:
"""Allow input of duration_days, but convert on way in."""
if isinstance(data, dict):
days = data.get("duration_days")
if days is not None:
data["duration"] = timedelta(days=float(days))
del data["duration_days"]
return data
class ScheduleLane(BaseModel):
"""A section of a schedule."""
name: str
color: str | None = None
tasks: list[ScheduledTask]
ScheduleInput = TypeAdapter(list[ScheduleLane])
"""A list of lanes, representing full schedule input."""