Add postgresql persistence of data in data dict

Moved back to sqlmodel because we do need some way
of putting this info into a database.
This commit is contained in:
Nick Touran 2026-01-05 12:04:22 -05:00
parent f7ee72a66b
commit 8c73123862
8 changed files with 355 additions and 77 deletions

View file

@ -37,20 +37,22 @@ This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`.
"""
from __future__ import annotations # allow lookahead annotation
import re
import uuid
from datetime import datetime, timedelta
from enum import StrEnum
from typing import Annotated, Any
from typing import Annotated, Any, Optional
from uuid import UUID, uuid4
# _PK_TYPE = UUID
# moving away from UUID at least temporarily b/c SQLite doesn't
# really support it, which makes adding new data via DBeaver harder
_PK_TYPE = int
from pydantic import (
AnyUrl,
BaseModel,
ConfigDict,
EmailStr,
Field,
PositiveInt,
TypeAdapter,
ValidationError,
@ -58,23 +60,45 @@ from pydantic import (
field_validator,
model_validator,
)
from sqlalchemy import text
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
ALL_CAPS = re.compile("^[A-Z]$")
UUID_PK = Annotated[
uuid.UUID,
Field(
default_factory=uuid.uuid4,
class NRSKModel(SQLModel):
id: _PK_TYPE = Field(
# default_factory=uuid4,
description="The unique ID of this object. Used as a primary key in the database.",
examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
frozen=True,
),
]
primary_key=True,
# schema_extra={
# "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
# },
)
class User(BaseModel):
class DocumentUserLink(NRSKModel, table=True):
"""Linkages between users and documents."""
position: int = Field(default=0)
"""Integer indicating order of people"""
role_note: str = Field(
default="",
)
"""Extra information about role such as 'lead' or 'section 2.4'"""
document_id: _PK_TYPE | None = Field(
foreign_key="document.id", primary_key=True, default=None
)
user_id: _PK_TYPE | None = Field(
foreign_key="user.id", primary_key=True, default=None
)
class User(NRSKModel, table=True):
"""A person involved in the Project."""
uuid: UUID_PK
given_name: str
family_name: str
preferred_name: str | None = None
@ -85,16 +109,19 @@ class User(BaseModel):
organization: str | None
title: str | None
contributed: list["Document"] = Relationship(
back_populates="contributors", link_model=DocumentUserLink
)
class OpenItem(BaseModel):
uuid: UUID_PK
class OpenItem(NRSKModel):
name: str
status: str
created_on: datetime
closed_on: datetime | None = None
class SSC(BaseModel):
class SSC(NRSKModel):
"""
A Structure, System, or Component in the plant.
@ -106,24 +133,25 @@ class SSC(BaseModel):
contents in terms of systems/components/equipment/parts
"""
uuid: UUID_PK
name: str
pbs_code: str | None = Field(
description="An integer sequence that determines the 'system number' and also the ordering in printouts",
examples=["1.2.3", "20.5.11"],
schema_extra={
"examples": ["1.2.3", "20.5.11"],
},
default="",
)
"""PBS code is tied closely to the structure of the PBS, obviously. If 1.2
is a category level, that's ok, but that doesn't imply that the second level
of PBS 2 is also a category level; it may be systems.
Since this can change in major PBS reorganizations, it should not be used
for cross-referencing (use UUID).
for cross-referencing (use ID).
"""
abbrev: str = Field(
description="A human-friendly abbreviation uniquely defining the system"
)
parent: SSC | None = None
parent: Optional["SSC"] = None
functions: list[str | None] = Field(
description="Functions of this system", default=None
)
@ -156,25 +184,25 @@ class SystemsList(BaseModel):
systems: list[SSC]
class ParamDef(BaseModel):
class ParamDef(NRSKModel):
"""A parameter class defining an aspect of plant design."""
uuid: UUID_PK
name: str = Field(
description="Name of parameter class", examples=["Nominal gross power"]
)
description: str = Field(
description="Detailed description of what parameters of this type represent"
schema_extra={"examples": ["Nominal gross power"]},
)
"""Name of the parameter class."""
description: str
"""Detailed description of what parameters of this type represent"""
valid_units: list[str | None] = Field(
description="List of units allowed", examples=["MW", "W", "shp"], default=None
schema_extra={"examples": ["MW", "W", "shp"]}, default=None
)
"""List of units allowed"""
class ParamVal(BaseModel):
class ParamVal(NRSKModel):
"""A particular value of a Parameter, assigned to a particular SSC."""
uuid: UUID_PK
ssc: SSC
pdef: ParamDef
value: str
@ -185,21 +213,23 @@ class ParamVal(BaseModel):
source: str = Field(description="Where this version of the value came from")
class ITSystem(BaseModel):
class ITSystem(NRSKModel):
"""An IT system used by the project."""
uuid: UUID_PK
name: str
vendor: str
version: str | None = None
use_cases: list[str] = Field(
description="One or more use cases this system is used for.",
examples=[
[
"Document management",
]
],
schema_extra={
"examples": [
[
"Document management",
]
],
}
)
"""One or more use cases this system is used for."""
physical_location: str = Field(description="Where the system is physically located")
url: AnyUrl | None = Field(description="Full URL to the system", default=None)
custodian: User | None = Field(
@ -210,27 +240,36 @@ class ITSystem(BaseModel):
quality_related: bool
class InformationType(BaseModel):
class InformationType(NRSKModel, table=True):
"""A type/kind/class of Information, Document, or Record."""
model_config = ConfigDict(extra="forbid")
name: str
abbrev: str
examples: list[str] | None = None
examples: list[str] | None = Field(
default=None,
sa_column=Column(JSON),
)
description: str = ""
retention: str | None = ""
record: bool = True
use_cases: str = ""
notes: str = ""
parent: InformationType | None = None
parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id")
# Add these two relationships for easier DB parsing in code
parent: Optional["InformationType"] = Relationship(
back_populates="subtypes",
sa_relationship_kwargs={"remote_side": "InformationType.id"},
)
subtypes: list["InformationType"] = Relationship(back_populates="parent")
InformationTypes = TypeAdapter(list[InformationType])
"""A list of document types."""
class Document(BaseModel):
class Document(NRSKModel, table=True):
"""
Data dictionary entry for Documents and Records.
@ -392,34 +431,96 @@ class Document(BaseModel):
LIFETIME = "LIFETIME"
"""Lifetime of the plant."""
uuid: UUID_PK
number: str = Field(
description="The identification number meeting the document numbering rules",
)
# use_attribute_docstrings allows us to just use docstrings and get
# the same info in both the JSON Schema and also the Sphinx render
model_config = ConfigDict(use_attribute_docstrings=True)
number: str
"""The identification number meeting the document numbering rules"""
title: str = Field(
description="Descriptive title explaining the contents",
examples=["CNSG Development and Status 1966-1977"],
schema_extra={
"examples": ["CNSG Development and Status 1966-1977"],
},
)
"""Descriptive title explaining the contents"""
revision: str = Field(
description="Revision code",
examples=["0", "1", "1a", "A"],
schema_extra={
"examples": ["0", "1", "1a", "A"],
},
)
originating_organization: str
originator_number: str | None = Field(
description="The originating organization's document number (if originated externally).",
"""Revision code"""
originating_organization_id: _PK_TYPE | None = Field(
foreign_key="organization.id",
description="The organization that owns or issued this document",
default=None,
)
originator_revision: str | None = Field(
description="The originating organization's revision code (if originated externally).",
default=None,
# This allows you to do `my_document.orginating_organization` in Python
originating_organization: "Organization" = Relationship()
originator_number: str | None = None
"""The originating organization's document number (if originated externally)."""
originator_revision: str | None = None
"""The originating organization's revision code (if originated externally)."""
type_id: _PK_TYPE = Field(
foreign_key="informationtype.id",
description="The ID of the InformationType",
)
type: str
revision_authors: list[str] | None
revision_reviewers: list[str] | None
revision_approvers: list[str] | None
revision_comment: str = Field(
description="Explanation of what changed in this revision", default=""
# type: "InformationType" = Relationship()
contributors: list[User] = Relationship(
back_populates="contributed",
link_model=DocumentUserLink,
sa_relationship_kwargs={
"order_by": "DocumentUserLink.position",
"lazy": "selectin",
},
)
"""Holds all relationships with users but does not show up in JSON Schema"""
@computed_field
@property
def authors(self) -> list[User]:
"""List of author info for the UI."""
return [{"id": a.id, "name": a.name} for a in self.contributors]
@computed_field
@property
def reviewers(self) -> list[User]:
"""List of reviewer info for the UI."""
return [
{"id": a.id, "name": a.name}
for a in self.contributors
if a.role == "reviewer"
]
# revision_reviewers: list[RevisionReviewerLink] = Relationship(
# back_populates="reviewed",
# link_model=RevisionReviewerLink,
# sa_relationship_kwargs={
# "order_by": "RevisionReviewerLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The reviewer(s), if any."""
# revision_approvers: list[RevisionApproverLink] = Relationship(
# back_populates="approved",
# link_model=RevisionApproverLink,
# sa_relationship_kwargs={
# "order_by": "RevisionApproverLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The approver(s), if any."""
revision_comment: str | None = None
"""Explanation of what changed in this revision"""
status: STATUS = STATUS.RESERVED
usage: USAGE = USAGE.FOR_INFORMATION
retention_plan: RETENTION = RETENTION.LIFETIME
@ -433,14 +534,18 @@ class Document(BaseModel):
# filenames may be empty at first, i.e. for RESERVED docs
filenames: list[str] = Field(
description="Filenames of files attached to this Document. Main file should be the first.",
default=[],
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
file_notes: list[str] = Field(
description="Short description of each file represented in filenames.",
default=[],
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
checksums: list[str] = Field(
description="SHA-256 checksum of each file for data integrity", default=[]
description="SHA-256 checksum of each file for data integrity",
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
)
"""Checksums are used to verify long-term data integrity against tampering
and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
@ -455,11 +560,6 @@ class Document(BaseModel):
description="Additional information about the Document/Record", default=""
)
@field_validator("type", mode="after")
@classmethod
def type_must_be_valid(cls, v: str) -> str:
assert v in ["CALC", "PROC"], f"{v} must be within the list of doctypes"
@computed_field
@property
def status_category(self) -> str:
@ -479,7 +579,32 @@ class Document(BaseModel):
return self
class PredecessorTask(BaseModel):
class Organization(NRSKModel, table=True):
"""An organization of people: companies, departments, governments, etc."""
name: str = Field(index=True)
"""Organization Name"""
abbreviation: str | None = Field(default=None, index=True)
website: str | None = None
is_active: bool = Field(default=True)
# allow it to be hierarchical to capture full org trees and refer to
# divisions
parent_id: _PK_TYPE | None = Field(
default=None,
foreign_key="organization.id",
)
"""The parent organization this org reports to"""
parent: Optional["Organization"] = Relationship(
back_populates="child_orgs",
sa_relationship_kwargs={"remote_side": "Organization.id"},
)
child_orgs: list["Organization"] = Relationship(back_populates="parent")
class PredecessorTask(NRSKModel):
"""Link to a predecessor task."""
class PRED_TYPE(StrEnum): # noqa: N801
@ -494,7 +619,6 @@ class PredecessorTask(BaseModel):
SF = "SF"
"""Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
uuid: UUID_PK
id: str
"""ID of the predecessor task."""
type: PRED_TYPE = PRED_TYPE.FS
@ -505,10 +629,9 @@ class PredecessorTask(BaseModel):
)
class ScheduledTask(BaseModel):
class ScheduledTask(NRSKModel):
"""Scheduled task, e.g. in P6."""
uuid: UUID_PK
name: str
id: str | None = None
is_milestone: bool = False