starter-kit/src/nrsk/models.py

"""
Define the Data Dictionary.

Implementation of Data Dictionary
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. impl:: Maintain the Data Dictionary base data using Pydantic
    :id: I_DATA_DICT
    :links: R_DATA_DICT

    The data dictionary is managed using Pydantic. Pydantic allows for
    concise Python code to richly define data models and their fields. From a single
    class definition, it provides data validation, automatic rich documentation (via
    automatic a Sphinx plugin), an integration with FastAPI for data exchange, and
    relatively easy integration with sqlalchemy for database persistence. Changes to
    the schema can be managed and controlled via the revision control system, and
    changes to a single source (the Python code) will automatically propagate the
    rendered documentation and, potentially the database (e.g. using *alembic*)

    Using SQLAchemy as the database engine enables wide flexibility in underlying
    database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL
    Server.  Pydantic models allows us to validate data loaded from a database,
    directly from structured text file, or from JSON data delivered via the network.

Analysis of Alternatives
^^^^^^^^^^^^^^^^^^^^^^^^
SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was
determined to be less mature than pydantic and sqlalchemy, with inadequate
documentation related to field validation. It was determined to use Pydantic
directly for schema definitions.

.. _data-dict:

Data Dictionary
^^^^^^^^^^^^^^^
This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`.
"""

import re
from datetime import datetime, timedelta
from enum import StrEnum
from typing import Annotated, Any, Optional
from uuid import UUID, uuid4

# _PK_TYPE = UUID
# moving away from UUID at least temporarily b/c SQLite doesn't
# really support it, which makes adding new data via DBeaver harder
_PK_TYPE = int

from pydantic import (
    AnyUrl,
    BaseModel,
    ConfigDict,
    EmailStr,
    PositiveInt,
    TypeAdapter,
    ValidationError,
    computed_field,
    field_validator,
    model_validator,
)
from sqlalchemy import text
from sqlmodel import JSON, Column, Field, Relationship, SQLModel

ALL_CAPS = re.compile("^[A-Z]$")


class NRSKModel(SQLModel):
    id: _PK_TYPE = Field(
        # default_factory=uuid4,
        description="The unique ID of this object. Used as a primary key in the database.",
        primary_key=True,
        # schema_extra={
        #    "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
        # },
    )


class DocumentUserLink(NRSKModel, table=True):
    """Linkages between users and documents."""

    position: int = Field(default=0)
    """Integer indicating order of people"""

    role_note: str = Field(
        default="",
    )
    """Extra information about role such as 'lead' or 'section 2.4'"""

    document_id: _PK_TYPE | None = Field(
        foreign_key="document.id", primary_key=True, default=None
    )
    user_id: _PK_TYPE | None = Field(
        foreign_key="user.id", primary_key=True, default=None
    )


class User(NRSKModel, table=True):
    """A person involved in the Project."""

    given_name: str
    family_name: str
    preferred_name: str | None = None
    previous_name: str | None = None
    email: EmailStr
    joined_on: datetime | None
    deactivated_on: datetime | None
    organization: str | None
    title: str | None

    contributed: list["Document"] = Relationship(
        back_populates="contributors", link_model=DocumentUserLink
    )


class OpenItem(NRSKModel):
    name: str
    status: str
    created_on: datetime
    closed_on: datetime | None = None


class SSC(NRSKModel):
    """
    A Structure, System, or Component in the plant.

    This is a generic hierarchical object that can represent plants, units,
    buildings and their structures, systems, subsystems, components,
    subcomponents, etc.

    A physical tree of buildings/structures/rooms may have overlapping
    contents in terms of systems/components/equipment/parts
    """

    name: str
    pbs_code: str | None = Field(
        description="An integer sequence that determines the 'system number' and also the ordering in printouts",
        schema_extra={
            "examples": ["1.2.3", "20.5.11"],
        },
        default="",
    )
    """PBS code is tied closely to the structure of the PBS, obviously. If 1.2
    is a category level, that's ok, but that doesn't imply that the second level
    of PBS 2 is also a category level; it may be systems.
    Since this can change in major PBS reorganizations, it should not be used
    for cross-referencing (use ID).
    """

    abbrev: str = Field(
        description="A human-friendly abbreviation uniquely defining the system"
    )
    parent: Optional["SSC"] = None
    functions: list[str | None] = Field(
        description="Functions of this system", default=None
    )

    @field_validator("abbrev", mode="after")
    @classmethod
    def abbrev_must_be_all_caps(cls, v: str) -> str:  # noqa: D102
        if not re.match(ALL_CAPS, v):
            raise ValueError("{v} must be all CAPS")

    @field_validator("pbs_code", mode="after")
    @classmethod
    def pbs_must_be_int_sequence(cls, v: str) -> str:  # noqa: D102
        if not v or re.match(r"^(\d+\.?)+$", v):
            raise ValueError(f"{v} must be an integer sequence, like 1.2.3")


class SystemsList(BaseModel):
    """A flat list of Systems in the plant.

    Can be used e.g. to render a snapshot of the Master Systems List.

    Does not include categories like "Nuclear Island" or "Primary Systems".

    We may want another structure that represents the whole tree in a
    well-defined manner, or we may want to add a 'path' attr
    to systems that define where they live.
    """

    systems: list[SSC]


class ParamDef(NRSKModel):
    """A parameter class defining an aspect of plant design."""

    name: str = Field(
        schema_extra={"examples": ["Nominal gross power"]},
    )
    """Name of the parameter class."""
    description: str
    """Detailed description of what parameters of this type represent"""

    valid_units: list[str | None] = Field(
        schema_extra={"examples": ["MW", "W", "shp"]}, default=None
    )
    """List of units allowed"""


class ParamVal(NRSKModel):
    """A particular value of a Parameter, assigned to a particular SSC."""

    ssc: SSC
    pdef: ParamDef
    value: str
    units: str | None = None
    pedigree: str = Field(
        description="Indication of how well it is known (rough estimate, final design, as-built)."
    )
    source: str = Field(description="Where this version of the value came from")


class ITSystem(NRSKModel):
    """An IT system used by the project."""

    name: str
    vendor: str
    version: str | None = None
    use_cases: list[str] = Field(
        schema_extra={
            "examples": [
                [
                    "Document management",
                ]
            ],
        }
    )
    """One or more use cases this system is used for."""

    physical_location: str = Field(description="Where the system is physically located")
    url: AnyUrl | None = Field(description="Full URL to the system", default=None)
    custodian: User | None = Field(
        description="Person currently in charge of system", default=None
    )
    launched_on: datetime | None = None
    retired_on: datetime | None = None
    quality_related: bool


class InformationType(NRSKModel, table=True):
    """A type/kind/class of Information, Document, or Record."""

    model_config = ConfigDict(extra="forbid")

    name: str
    abbrev: str
    examples: list[str] | None = Field(
        default=None,
        sa_column=Column(JSON),
    )
    description: str = ""
    retention: str | None = ""
    record: bool = True
    use_cases: str = ""
    notes: str = ""
    parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id")
    # Add these two relationships for easier DB parsing in code
    parent: Optional["InformationType"] = Relationship(
        back_populates="subtypes",
        sa_relationship_kwargs={"remote_side": "InformationType.id"},
    )
    subtypes: list["InformationType"] = Relationship(back_populates="parent")


InformationTypes = TypeAdapter(list[InformationType])
"""A list of document types."""


class Document(NRSKModel, table=True):
    """
    Data dictionary entry for Documents and Records.

    Document data is designed to satisfy the needs defined in :ref:`rmdc-proc`.

    See Also
    --------
    * Some of the field definitions come from CFIHOS
      https://www.jip36-cfihos.org/wp-content/uploads/2023/08/v.1.5.1-CFIHOS-Specification-Document-1.docx
    * ISO-19650 has different Status Codes defining suitability level (for information, as-built)
      https://ukbimframework.org/wp-content/uploads/2020/05/ISO19650-2Edition4.pdf
    """

    class STATUS(StrEnum):
        """Document Status options."""

        # Much of the wording here comes from cloverDocumentControlRecords2010.

        # NOTE: if you add or remove a status, be sure to also update the
        # category property below AND :ref:`rmdc-doc-status`!

        ## Not Yet Approved:
        RESERVED = "RESERVED"
        """
        A Document ID has been assigned, but the document is in development or
        has not yet been started (default).
        """

        IN_PROGRESS = "IN PROGRESS"
        """One or more authors are creating or revising the document."""

        IN_REVIEW = "IN REVIEW"
        """A completed draft of the document has been submitted and is pending review."""

        REJECTED = "REJECTED"
        """A draft that was rejected by the review team and may be revised and resubmitted."""

        AUTHORIZED = "AUTHORIZED"
        """A controlled revision that has been signed but is not yet effective.
        Such documents may be used for training, etc. Documents with this status may
        be used for plant modifications in a work package, but not for normal operations."""

        REFERENCE = "REFERENCE"
        """Document is stored in EDMS for ease of access and reference, but
        there is no assertion that the information is the latest available.
        Useful for Standards, engineering handbook excerpts, vendor notices."""

        NATIVE = "NATIVE"
        """A document file that may be in EDMS in the native file format. Not
        used in the field because they (a) may require special software to view
        and (b) may not be controlled for field use (i.e. not quarantined if
        errors are discovered)."""

        ## Approved:
        APPROVED = "APPROVED"
        """A document revision that has been submitted by the releasing
        organization and that is authorized for the use case defined in
        the suitability code.

        * A drawing with this status during operation reflects the plant configuration
        * A drawing with this status before or during construction reflects that it is
          ready to be fabricated/built
        * A procedure with this status is effective.
        """

        ## No longer Approved:
        QUARANTINED = "QUARANTINED"
        """(On hold, Suspended) A document revision that was previously
        authorized and has been placed on hold, e.g. a procedure that cannot be
        performed as written or a design that is known to have pending changes."""

        SUPERSEDED = "SUPERSEDED"
        """A document that has been replaced by another document. The new
        document is to be recorded in the index."""

        REVISED = "REVISED"
        """A document that has been replaced by a subsequent revision of that
        document."""

        VOIDED = "VOIDED"
        """A document or revision that is no longer needed and there is no
        revision or superseding document. This would also be used for documents
        that have reached a predetermined expiration date, such as a temporary
        procedure."""

        CLOSED = "CLOSED"
        """(Archived) A document for which the work has been completed."""

        @property
        def category(self) -> str:
            """High-level status category: Not yet approved, Approved, or No Longer Approved."""
            if self.value in {
                self.RESERVED,
                self.IN_PROGRESS,
                self.IN_REVIEW,
                self.REJECTED,
                self.AUTHORIZED,
                self.REFERENCE,
                self.NATIVE,
            }:
                return "Not Yet Approved"
            if self.value in {self.APPROVED}:
                return "Approved"
            return "No Longer Approved"

    class USAGE(StrEnum):
        """Usage options.

        Usage governs what use cases a document may be used for. It is a notion
        derived from the ISO 19650 'suitability' idea, but used in combination
        with the NIRMA status codes. It allows a document to be approved for
        e.g. a conceptual design stage without letting it inadvertently be
        released for bid or manufacture.  Releasing organizations can update the
        suitability as needed.

        See https://ukbimframework.org/wp-content/uploads/2020/09/Guidance-Part-C_Facilitating-the-common-data-environment-workflow-and-technical-solutions_Edition-1.pdf
        """

        FOR_INFORMATION = "FOR INFORMATION"
        """A document revision that may be used for information only, not for
        any contractual purpose."""

        FOR_STAGE_APPROVAL = "FOR STAGE APPROVAL"
        """A document revision that is considered complete for the contractual stage in
        which it was created.  For example, in a Preliminary Design phase, this
        usage would indicate that it is at the expected usage level for
        preliminary design. Most design-phase documents that are not yet ready
        for bid or construction will be marked for this usage."""

        FOR_BID = "FOR BID"
        """A document revision that is ready to be sent to external parties for bid.
        During the bid process, changes may be expected based on vendor feedback."""

        FOR_CONSTRUCTION = "FOR CONSTRUCTION"
        """A document revision that is ready to be sent to the field for manufacture,
        fabrication, construction. An approved document with this usage implies
        that all the quality, regulatory, and design aspects are in place, and
        that work can proceed. However, what is constructed is not yet
        authorized for operation."""

        FOR_OPERATION = "FOR OPERATION"
        """A document revision that can be used to operate the business and/or plant.
        Procedures of this usage may be used to do work or operate equipment."""

        AS_BUILT = "AS BUILT"
        """A document revision that is an as-built record of construction or manufacture.
        Documents of this usage may be used to operate the plant."""

    class RETENTION(StrEnum):
        """Retention plan options.

        Retention plans define how long the document or record is to be
        kept before it is destroyed.

        .. note:: May want this to actually be a timedelta

        """

        LIFETIME = "LIFETIME"
        """Lifetime of the plant."""

    # use_attribute_docstrings allows us to just use docstrings and get
    # the same info in both the JSON Schema and also the Sphinx render
    model_config = ConfigDict(use_attribute_docstrings=True)

    number: str
    """The identification number meeting the document numbering rules"""

    title: str = Field(
        schema_extra={
            "examples": ["CNSG Development and Status 1966-1977"],
        },
    )
    """Descriptive title explaining the contents"""

    revision: str = Field(
        schema_extra={
            "examples": ["0", "1", "1a", "A"],
        },
    )
    """Revision code"""

    originating_organization_id: _PK_TYPE | None = Field(
        foreign_key="organization.id",
        description="The organization that owns or issued this document",
        default=None,
    )
    # This allows you to do `my_document.orginating_organization` in Python
    originating_organization: "Organization" = Relationship()

    originator_number: str | None = None
    """The originating organization's document number (if originated externally)."""

    originator_revision: str | None = None
    """The originating organization's revision code (if originated externally)."""

    type_id: _PK_TYPE = Field(
        foreign_key="informationtype.id",
        description="The ID of the InformationType",
    )
    # type: "InformationType" = Relationship()

    contributors: list[User] = Relationship(
        back_populates="contributed",
        link_model=DocumentUserLink,
        sa_relationship_kwargs={
            "order_by": "DocumentUserLink.position",
            "lazy": "selectin",
        },
    )
    """Holds all relationships with users but does not show up in JSON Schema"""

    @computed_field
    @property
    def authors(self) -> list[User]:
        """List of author info for the UI."""
        return [{"id": a.id, "name": a.name} for a in self.contributors]

    @computed_field
    @property
    def reviewers(self) -> list[User]:
        """List of reviewer info for the UI."""
        return [
            {"id": a.id, "name": a.name}
            for a in self.contributors
            if a.role == "reviewer"
        ]

    # revision_reviewers: list[RevisionReviewerLink] = Relationship(
    #    back_populates="reviewed",
    #    link_model=RevisionReviewerLink,
    #    sa_relationship_kwargs={
    #        "order_by": "RevisionReviewerLink.position",
    #        "cascade": "all, delete-orphan",
    #    },
    # )
    # """The reviewer(s), if any."""

    # revision_approvers: list[RevisionApproverLink] = Relationship(
    #    back_populates="approved",
    #    link_model=RevisionApproverLink,
    #    sa_relationship_kwargs={
    #        "order_by": "RevisionApproverLink.position",
    #        "cascade": "all, delete-orphan",
    #    },
    # )
    # """The approver(s), if any."""

    revision_comment: str | None = None
    """Explanation of what changed in this revision"""

    status: STATUS = STATUS.RESERVED
    usage: USAGE = USAGE.FOR_INFORMATION
    retention_plan: RETENTION = RETENTION.LIFETIME
    restriction_codes: str = Field(
        description="Markings for export control, legal, etc.", default=""
    )

    actual_reviewed_date: datetime | None = None
    actual_approved_date: datetime | None = None

    # filenames may be empty at first, i.e. for RESERVED docs
    filenames: list[str] = Field(
        description="Filenames of files attached to this Document. Main file should be the first.",
        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    file_notes: list[str] = Field(
        description="Short description of each file represented in filenames.",
        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    checksums: list[str] = Field(
        description="SHA-256 checksum of each file for data integrity",
        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    """Checksums are used to verify long-term data integrity against tampering
    and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
    and built-in at this point. In the future, switching to BLAKE3 may make sense for
    easier periodic re-verification of large data libraries."""

    physical_location: str | None = Field(
        description="Location of a media (only valid when not stored as an electronic file).",
        default=None,
    )
    notes: str = Field(
        description="Additional information about the Document/Record", default=""
    )

    @computed_field
    @property
    def status_category(self) -> str:
        """The top-level status category, derived from Document Status"""
        return self.status.category

    @model_validator(mode="after")
    def cant_have_electronic_and_physical_location(self) -> "Document":  # noqa: D102
        has_physical_location = self.physical_location is not None
        has_file = self.filenames is not None

        if has_physical_location and has_file:
            raise ValueError(
                "Cannot provide both physical_location and filename(s). They are mutually exclusive."
            )

        return self


class Organization(NRSKModel, table=True):
    """An organization of people: companies, departments, governments, etc."""

    name: str = Field(index=True)
    """Organization Name"""

    abbreviation: str | None = Field(default=None, index=True)
    website: str | None = None
    is_active: bool = Field(default=True)

    # allow it to be hierarchical to capture full org trees and refer to
    # divisions
    parent_id: _PK_TYPE | None = Field(
        default=None,
        foreign_key="organization.id",
    )
    """The parent organization this org reports to"""

    parent: Optional["Organization"] = Relationship(
        back_populates="child_orgs",
        sa_relationship_kwargs={"remote_side": "Organization.id"},
    )
    child_orgs: list["Organization"] = Relationship(back_populates="parent")


class PredecessorTask(NRSKModel):
    """Link to a predecessor task."""

    class PRED_TYPE(StrEnum):  # noqa: N801
        """Predecessor relationship type."""

        FS = "FS"
        """Finish-to-start: predecessor finishes before successor starts (very common)"""
        FF = "FF"
        """Finish-to-finish: predecessor finishes before successor can finish"""
        SS = "SS"
        """Start-to-start: predecessor starts before successor starts"""
        SF = "SF"
        """Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""

    id: str
    """ID of the predecessor task."""
    type: PRED_TYPE = PRED_TYPE.FS
    lag: timedelta | None = Field(
        description="Lag time. Negative timedelta implies negative lag "
        "(lead time, starts before predecessor ends)",
        default=None,
    )


class ScheduledTask(NRSKModel):
    """Scheduled task, e.g. in P6."""

    name: str
    id: str | None = None
    is_milestone: bool = False
    predecessors: list[PredecessorTask] = []
    duration: timedelta | None = None
    actual_start: datetime | None = None
    actual_end: datetime | None = None
    scheduled_start: datetime | None = None
    scheduled_end: datetime | None = None

    @model_validator(mode="before")
    @classmethod
    def convert_days_to_duration(cls, data: Any) -> Any:
        """Allow input of duration_days, but convert on way in."""
        if isinstance(data, dict):
            days = data.get("duration_days")
            if days is not None:
                data["duration"] = timedelta(days=float(days))
                del data["duration_days"]
        return data


class ScheduleLane(BaseModel):
    """A section of a schedule."""

    name: str
    color: str | None = None
    tasks: list[ScheduledTask]


ScheduleInput = TypeAdapter(list[ScheduleLane])
"""A list of lanes, representing full schedule input."""