""" Define the Data Dictionary. Implementation of Data Dictionary ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. impl:: Maintain the Data Dictionary base data using Pydantic :id: I_DATA_DICT :links: R_DATA_DICT The data dictionary is managed using Pydantic. Pydantic allows for concise Python code to richly define data models and their fields. From a single class definition, it provides data validation, automatic rich documentation (via automatic a Sphinx plugin), an integration with FastAPI for data exchange, and relatively easy integration with sqlalchemy for database persistence. Changes to the schema can be managed and controlled via the revision control system, and changes to a single source (the Python code) will automatically propagate the rendered documentation and, potentially the database (e.g. using *alembic*) Using SQLAchemy as the database engine enables wide flexibility in underlying database technology, including PostgreSQL, MySQL, SQLite, Oracle, and MS SQL Server. Pydantic models allows us to validate data loaded from a database, directly from structured text file, or from JSON data delivered via the network. Analysis of Alternatives ^^^^^^^^^^^^^^^^^^^^^^^^ SQLModel :cite:p:`SQLModel` was considered as the data layer base, but it was determined to be less mature than pydantic and sqlalchemy, with inadequate documentation related to field validation. It was determined to use Pydantic directly for schema definitions. .. _data-dict: Data Dictionary ^^^^^^^^^^^^^^^ This is the official Data Dictionary discussed in :ref:`the Information Management Plan `. """ import re from datetime import datetime, timedelta from enum import StrEnum from typing import Annotated, Any, Optional from uuid import UUID, uuid4 # _PK_TYPE = UUID # moving away from UUID at least temporarily b/c SQLite doesn't # really support it, which makes adding new data via DBeaver harder _PK_TYPE = int from pydantic import ( AnyUrl, BaseModel, ConfigDict, EmailStr, PositiveInt, TypeAdapter, ValidationError, computed_field, field_validator, model_validator, ) from sqlalchemy import text from sqlmodel import JSON, Column, Field, Relationship, SQLModel ALL_CAPS = re.compile("^[A-Z]$") class NRSKModel(SQLModel): id: _PK_TYPE = Field( # default_factory=uuid4, description="The unique ID of this object. Used as a primary key in the database.", primary_key=True, # schema_extra={ # "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"], # }, ) class DocumentUserLink(NRSKModel, table=True): """Linkages between users and documents.""" position: int = Field(default=0) """Integer indicating order of people""" role_note: str = Field( default="", ) """Extra information about role such as 'lead' or 'section 2.4'""" document_id: _PK_TYPE | None = Field( foreign_key="document.id", primary_key=True, default=None ) user_id: _PK_TYPE | None = Field( foreign_key="user.id", primary_key=True, default=None ) class User(NRSKModel, table=True): """A person involved in the Project.""" given_name: str family_name: str preferred_name: str | None = None previous_name: str | None = None email: EmailStr joined_on: datetime | None deactivated_on: datetime | None organization: str | None title: str | None contributed: list["Document"] = Relationship( back_populates="contributors", link_model=DocumentUserLink ) class OpenItem(NRSKModel): name: str status: str created_on: datetime closed_on: datetime | None = None class SSC(NRSKModel): """ A Structure, System, or Component in the plant. This is a generic hierarchical object that can represent plants, units, buildings and their structures, systems, subsystems, components, subcomponents, etc. A physical tree of buildings/structures/rooms may have overlapping contents in terms of systems/components/equipment/parts """ name: str pbs_code: str | None = Field( description="An integer sequence that determines the 'system number' and also the ordering in printouts", schema_extra={ "examples": ["1.2.3", "20.5.11"], }, default="", ) """PBS code is tied closely to the structure of the PBS, obviously. If 1.2 is a category level, that's ok, but that doesn't imply that the second level of PBS 2 is also a category level; it may be systems. Since this can change in major PBS reorganizations, it should not be used for cross-referencing (use ID). """ abbrev: str = Field( description="A human-friendly abbreviation uniquely defining the system" ) parent: Optional["SSC"] = None functions: list[str | None] = Field( description="Functions of this system", default=None ) @field_validator("abbrev", mode="after") @classmethod def abbrev_must_be_all_caps(cls, v: str) -> str: # noqa: D102 if not re.match(ALL_CAPS, v): raise ValueError("{v} must be all CAPS") @field_validator("pbs_code", mode="after") @classmethod def pbs_must_be_int_sequence(cls, v: str) -> str: # noqa: D102 if not v or re.match(r"^(\d+\.?)+$", v): raise ValueError(f"{v} must be an integer sequence, like 1.2.3") class SystemsList(BaseModel): """A flat list of Systems in the plant. Can be used e.g. to render a snapshot of the Master Systems List. Does not include categories like "Nuclear Island" or "Primary Systems". We may want another structure that represents the whole tree in a well-defined manner, or we may want to add a 'path' attr to systems that define where they live. """ systems: list[SSC] class ParamDef(NRSKModel): """A parameter class defining an aspect of plant design.""" name: str = Field( schema_extra={"examples": ["Nominal gross power"]}, ) """Name of the parameter class.""" description: str """Detailed description of what parameters of this type represent""" valid_units: list[str | None] = Field( schema_extra={"examples": ["MW", "W", "shp"]}, default=None ) """List of units allowed""" class ParamVal(NRSKModel): """A particular value of a Parameter, assigned to a particular SSC.""" ssc: SSC pdef: ParamDef value: str units: str | None = None pedigree: str = Field( description="Indication of how well it is known (rough estimate, final design, as-built)." ) source: str = Field(description="Where this version of the value came from") class ITSystem(NRSKModel): """An IT system used by the project.""" name: str vendor: str version: str | None = None use_cases: list[str] = Field( schema_extra={ "examples": [ [ "Document management", ] ], } ) """One or more use cases this system is used for.""" physical_location: str = Field(description="Where the system is physically located") url: AnyUrl | None = Field(description="Full URL to the system", default=None) custodian: User | None = Field( description="Person currently in charge of system", default=None ) launched_on: datetime | None = None retired_on: datetime | None = None quality_related: bool class InformationType(NRSKModel, table=True): """A type/kind/class of Information, Document, or Record.""" model_config = ConfigDict(extra="forbid") name: str abbrev: str examples: list[str] | None = Field( default=None, sa_column=Column(JSON), ) description: str = "" retention: str | None = "" record: bool = True use_cases: str = "" notes: str = "" parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id") # Add these two relationships for easier DB parsing in code parent: Optional["InformationType"] = Relationship( back_populates="subtypes", sa_relationship_kwargs={"remote_side": "InformationType.id"}, ) subtypes: list["InformationType"] = Relationship(back_populates="parent") InformationTypes = TypeAdapter(list[InformationType]) """A list of document types.""" class Document(NRSKModel, table=True): """ Data dictionary entry for Documents and Records. Document data is designed to satisfy the needs defined in :ref:`rmdc-proc`. See Also -------- * Some of the field definitions come from CFIHOS https://www.jip36-cfihos.org/wp-content/uploads/2023/08/v.1.5.1-CFIHOS-Specification-Document-1.docx * ISO-19650 has different Status Codes defining suitability level (for information, as-built) https://ukbimframework.org/wp-content/uploads/2020/05/ISO19650-2Edition4.pdf """ class STATUS(StrEnum): """Document Status options.""" # Much of the wording here comes from cloverDocumentControlRecords2010. # NOTE: if you add or remove a status, be sure to also update the # category property below AND :ref:`rmdc-doc-status`! ## Not Yet Approved: RESERVED = "RESERVED" """ A Document ID has been assigned, but the document is in development or has not yet been started (default). """ IN_PROGRESS = "IN PROGRESS" """One or more authors are creating or revising the document.""" IN_REVIEW = "IN REVIEW" """A completed draft of the document has been submitted and is pending review.""" REJECTED = "REJECTED" """A draft that was rejected by the review team and may be revised and resubmitted.""" AUTHORIZED = "AUTHORIZED" """A controlled revision that has been signed but is not yet effective. Such documents may be used for training, etc. Documents with this status may be used for plant modifications in a work package, but not for normal operations.""" REFERENCE = "REFERENCE" """Document is stored in EDMS for ease of access and reference, but there is no assertion that the information is the latest available. Useful for Standards, engineering handbook excerpts, vendor notices.""" NATIVE = "NATIVE" """A document file that may be in EDMS in the native file format. Not used in the field because they (a) may require special software to view and (b) may not be controlled for field use (i.e. not quarantined if errors are discovered).""" ## Approved: APPROVED = "APPROVED" """A document revision that has been submitted by the releasing organization and that is authorized for the use case defined in the suitability code. * A drawing with this status during operation reflects the plant configuration * A drawing with this status before or during construction reflects that it is ready to be fabricated/built * A procedure with this status is effective. """ ## No longer Approved: QUARANTINED = "QUARANTINED" """(On hold, Suspended) A document revision that was previously authorized and has been placed on hold, e.g. a procedure that cannot be performed as written or a design that is known to have pending changes.""" SUPERSEDED = "SUPERSEDED" """A document that has been replaced by another document. The new document is to be recorded in the index.""" REVISED = "REVISED" """A document that has been replaced by a subsequent revision of that document.""" VOIDED = "VOIDED" """A document or revision that is no longer needed and there is no revision or superseding document. This would also be used for documents that have reached a predetermined expiration date, such as a temporary procedure.""" CLOSED = "CLOSED" """(Archived) A document for which the work has been completed.""" @property def category(self) -> str: """High-level status category: Not yet approved, Approved, or No Longer Approved.""" if self.value in { self.RESERVED, self.IN_PROGRESS, self.IN_REVIEW, self.REJECTED, self.AUTHORIZED, self.REFERENCE, self.NATIVE, }: return "Not Yet Approved" if self.value in {self.APPROVED}: return "Approved" return "No Longer Approved" class USAGE(StrEnum): """Usage options. Usage governs what use cases a document may be used for. It is a notion derived from the ISO 19650 'suitability' idea, but used in combination with the NIRMA status codes. It allows a document to be approved for e.g. a conceptual design stage without letting it inadvertently be released for bid or manufacture. Releasing organizations can update the suitability as needed. See https://ukbimframework.org/wp-content/uploads/2020/09/Guidance-Part-C_Facilitating-the-common-data-environment-workflow-and-technical-solutions_Edition-1.pdf """ FOR_INFORMATION = "FOR INFORMATION" """A document revision that may be used for information only, not for any contractual purpose.""" FOR_STAGE_APPROVAL = "FOR STAGE APPROVAL" """A document revision that is considered complete for the contractual stage in which it was created. For example, in a Preliminary Design phase, this usage would indicate that it is at the expected usage level for preliminary design. Most design-phase documents that are not yet ready for bid or construction will be marked for this usage.""" FOR_BID = "FOR BID" """A document revision that is ready to be sent to external parties for bid. During the bid process, changes may be expected based on vendor feedback.""" FOR_CONSTRUCTION = "FOR CONSTRUCTION" """A document revision that is ready to be sent to the field for manufacture, fabrication, construction. An approved document with this usage implies that all the quality, regulatory, and design aspects are in place, and that work can proceed. However, what is constructed is not yet authorized for operation.""" FOR_OPERATION = "FOR OPERATION" """A document revision that can be used to operate the business and/or plant. Procedures of this usage may be used to do work or operate equipment.""" AS_BUILT = "AS BUILT" """A document revision that is an as-built record of construction or manufacture. Documents of this usage may be used to operate the plant.""" class RETENTION(StrEnum): """Retention plan options. Retention plans define how long the document or record is to be kept before it is destroyed. .. note:: May want this to actually be a timedelta """ LIFETIME = "LIFETIME" """Lifetime of the plant.""" # use_attribute_docstrings allows us to just use docstrings and get # the same info in both the JSON Schema and also the Sphinx render model_config = ConfigDict(use_attribute_docstrings=True) number: str """The identification number meeting the document numbering rules""" title: str = Field( schema_extra={ "examples": ["CNSG Development and Status 1966-1977"], }, ) """Descriptive title explaining the contents""" revision: str = Field( schema_extra={ "examples": ["0", "1", "1a", "A"], }, ) """Revision code""" originating_organization_id: _PK_TYPE | None = Field( foreign_key="organization.id", description="The organization that owns or issued this document", default=None, ) # This allows you to do `my_document.orginating_organization` in Python originating_organization: "Organization" = Relationship() originator_number: str | None = None """The originating organization's document number (if originated externally).""" originator_revision: str | None = None """The originating organization's revision code (if originated externally).""" type_id: _PK_TYPE = Field( foreign_key="informationtype.id", description="The ID of the InformationType", ) # type: "InformationType" = Relationship() contributors: list[User] = Relationship( back_populates="contributed", link_model=DocumentUserLink, sa_relationship_kwargs={ "order_by": "DocumentUserLink.position", "lazy": "selectin", }, ) """Holds all relationships with users but does not show up in JSON Schema""" @computed_field @property def authors(self) -> list[User]: """List of author info for the UI.""" return [{"id": a.id, "name": a.name} for a in self.contributors] @computed_field @property def reviewers(self) -> list[User]: """List of reviewer info for the UI.""" return [ {"id": a.id, "name": a.name} for a in self.contributors if a.role == "reviewer" ] # revision_reviewers: list[RevisionReviewerLink] = Relationship( # back_populates="reviewed", # link_model=RevisionReviewerLink, # sa_relationship_kwargs={ # "order_by": "RevisionReviewerLink.position", # "cascade": "all, delete-orphan", # }, # ) # """The reviewer(s), if any.""" # revision_approvers: list[RevisionApproverLink] = Relationship( # back_populates="approved", # link_model=RevisionApproverLink, # sa_relationship_kwargs={ # "order_by": "RevisionApproverLink.position", # "cascade": "all, delete-orphan", # }, # ) # """The approver(s), if any.""" revision_comment: str | None = None """Explanation of what changed in this revision""" status: STATUS = STATUS.RESERVED usage: USAGE = USAGE.FOR_INFORMATION retention_plan: RETENTION = RETENTION.LIFETIME restriction_codes: str = Field( description="Markings for export control, legal, etc.", default="" ) actual_reviewed_date: datetime | None = None actual_approved_date: datetime | None = None # filenames may be empty at first, i.e. for RESERVED docs filenames: list[str] = Field( description="Filenames of files attached to this Document. Main file should be the first.", default_factory=list, sa_column=Column(JSON, nullable=False, server_default=text("'[]'")), ) file_notes: list[str] = Field( description="Short description of each file represented in filenames.", default_factory=list, sa_column=Column(JSON, nullable=False, server_default=text("'[]'")), ) checksums: list[str] = Field( description="SHA-256 checksum of each file for data integrity", default_factory=list, sa_column=Column(JSON, nullable=False, server_default=text("'[]'")), ) """Checksums are used to verify long-term data integrity against tampering and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard and built-in at this point. In the future, switching to BLAKE3 may make sense for easier periodic re-verification of large data libraries.""" physical_location: str | None = Field( description="Location of a media (only valid when not stored as an electronic file).", default=None, ) notes: str = Field( description="Additional information about the Document/Record", default="" ) @computed_field @property def status_category(self) -> str: """The top-level status category, derived from Document Status""" return self.status.category @model_validator(mode="after") def cant_have_electronic_and_physical_location(self) -> "Document": # noqa: D102 has_physical_location = self.physical_location is not None has_file = self.filenames is not None if has_physical_location and has_file: raise ValueError( "Cannot provide both physical_location and filename(s). They are mutually exclusive." ) return self class Organization(NRSKModel, table=True): """An organization of people: companies, departments, governments, etc.""" name: str = Field(index=True) """Organization Name""" abbreviation: str | None = Field(default=None, index=True) website: str | None = None is_active: bool = Field(default=True) # allow it to be hierarchical to capture full org trees and refer to # divisions parent_id: _PK_TYPE | None = Field( default=None, foreign_key="organization.id", ) """The parent organization this org reports to""" parent: Optional["Organization"] = Relationship( back_populates="child_orgs", sa_relationship_kwargs={"remote_side": "Organization.id"}, ) child_orgs: list["Organization"] = Relationship(back_populates="parent") class PredecessorTask(NRSKModel): """Link to a predecessor task.""" class PRED_TYPE(StrEnum): # noqa: N801 """Predecessor relationship type.""" FS = "FS" """Finish-to-start: predecessor finishes before successor starts (very common)""" FF = "FF" """Finish-to-finish: predecessor finishes before successor can finish""" SS = "SS" """Start-to-start: predecessor starts before successor starts""" SF = "SF" """Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)""" id: str """ID of the predecessor task.""" type: PRED_TYPE = PRED_TYPE.FS lag: timedelta | None = Field( description="Lag time. Negative timedelta implies negative lag " "(lead time, starts before predecessor ends)", default=None, ) class ScheduledTask(NRSKModel): """Scheduled task, e.g. in P6.""" name: str id: str | None = None is_milestone: bool = False predecessors: list[PredecessorTask] = [] duration: timedelta | None = None actual_start: datetime | None = None actual_end: datetime | None = None scheduled_start: datetime | None = None scheduled_end: datetime | None = None @model_validator(mode="before") @classmethod def convert_days_to_duration(cls, data: Any) -> Any: """Allow input of duration_days, but convert on way in.""" if isinstance(data, dict): days = data.get("duration_days") if days is not None: data["duration"] = timedelta(days=float(days)) del data["duration_days"] return data class ScheduleLane(BaseModel): """A section of a schedule.""" name: str color: str | None = None tasks: list[ScheduledTask] ScheduleInput = TypeAdapter(list[ScheduleLane]) """A list of lanes, representing full schedule input."""