Add postgresql persistence of data in data dict

Moved back to sqlmodel because we do need some way of putting this info into a database.
2026-01-05 12:04:22 -05:00 · 2026-01-05 12:04:22 -05:00 · 8c73123862
commit 8c73123862
parent f7ee72a66b
8 changed files with 355 additions and 77 deletions
--- a/documents/conf.py
+++ b/documents/conf.py
@ -193,6 +193,7 @@ apidoc_separate_modules = True
 autodoc_pydantic_model_show_field_summary = True
 autodoc_pydantic_model_show_validator_summary = True
 autodoc_pydantic_field_doc_policy = "both"
 autodoc_pydantic_field_docutils_summary = True
 set_type_checking_flag = True
--- a/pyproject.toml
+++ b/pyproject.toml
@ -38,6 +38,11 @@ dependencies = [
  "email-validator>=2.3.0",
  "sphinxcontrib-apidoc>=0.6.0",
  "autodoc-pydantic>=2.2.0",
  "sqlmodel>=0.0.31",
  "fastapi>=0.128.0",
  "uvicorn>=0.38.0",
  "python-dotenv>=1.2.1",
  "psycopg2>=2.9.11",
 ]
 classifiers = [
    "Programming Language :: Python :: 3",
--- a/src/nrsk/init.py
+++ b/src/nrsk/init.py
@ -0,0 +1,7 @@
 """NRSK root."""
 from pathlib import Path
 PACKAGE_ROOT = Path(__file__).resolve().parent
 PROJECT_ROOT = PACKAGE_ROOT.parent.parent
 DOCS_ROOT = PACKAGE_ROOT.parent.parent / "documents"
--- a/src/nrsk/db.py
+++ b/src/nrsk/db.py
@ -0,0 +1,18 @@
 """Database management code."""
 import os
 from dotenv import load_dotenv
 from sqlmodel import Session, SQLModel, create_engine, select
 load_dotenv()
 POSTGRES_USER = os.getenv("POSTGRES_USER")
 POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
 POSTGRES_PATH = os.getenv("POSTGRES_PATH")
 def get_engine():
    return create_engine(
        f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_PATH}",
    )
--- a/src/nrsk/documents/intake.py
+++ b/src/nrsk/documents/intake.py
@ -0,0 +1,97 @@
 """Document data intake."""
 import os
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse
 from sqlmodel import Session, SQLModel, select
 from nrsk.db import get_engine
 # import others to create DB?
 from nrsk.models import Document, User
 engine = get_engine()
@asynccontextmanager
 async def lifespan(app: FastAPI):
    # --- Startup Logic ---
    yield  # The app runs while it's "yielding"
    # --- Shutdown Logic ---
    print("Shutting down safely")
 app = FastAPI(lifespan=lifespan)
@app.get("/schema")
 def get_schema():
    # This generates the JSON Schema from your SQLModel/Pydantic model
    return Document.model_json_schema(mode="serialization")
@app.post("/submit")
 def submit_data(data: Document):
    with Session(engine) as session:
        breakpoint()
        data = Document.model_validate(data)
        session.add(data)
        session.commit()
        return {"status": "success", "id": data.id}
@app.get("/documents/")
 def read_documents(skip: int = 0, limit: int = 10):
    with Session(engine) as session:
        statement = select(Document).offset(skip).limit(limit)
        results = session.exec(statement).all()
    return results
@app.get("/", response_class=HTMLResponse)
 def get_form():
    return """
    <!DOCTYPE html>
    <html>
    <head>
        <title>QA Entry Form</title>
        <script src="https://cdn.jsdelivr.net/npm/@json-editor/json-editor@latest/dist/jsoneditor.min.js"></script>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css">
    </head>
    <body class="container mt-5">
        <h2>Submit QA Revision</h2>
        <div id="editor_holder"></div>
        <button id="submit" class="btn btn-primary mt-3">Save to Database</button>
        <script>
            // 1. Fetch the schema from FastAPI
            fetch('/schema').then(res => res.json()).then(schema => {
                const editor = new JSONEditor(document.getElementById('editor_holder'), {
                    schema: schema,
                    theme: 'bootstrap5',
                    iconlib: 'fontawesome5'
                });
                // 2. Handle Submission
                document.getElementById('submit').addEventListener('click', () => {
                    const errors = editor.validate();
                    if (errors.length) {
                        alert("Validation Error: " + JSON.stringify(errors));
                        return;
                    }
                    fetch('/submit', {
                        method: 'POST',
                        headers: {'Content-Type': 'application/json'},
                        body: JSON.stringify(editor.getValue())
                    }).then(response => alert("Saved Successfully!"));
                });
            });
        </script>
    </body>
    </html>
    """
--- a/src/nrsk/documents/seed_doc_db.py
+++ b/src/nrsk/documents/seed_doc_db.py
@ -0,0 +1,23 @@
 """Seed DB for documents, e.g. with doc types"""
 from sqlmodel import Session
 from nrsk import DOCS_ROOT
 from nrsk.db import get_engine
 from nrsk.documents.validate import validate_doc_types
 def seed_doc_types():
    engine = get_engine()
    doc_types = validate_doc_types(DOCS_ROOT / "_data" / "doc-types.yaml")
    with Session(engine) as session:
        for dtype in doc_types:
            session.add(dtype)
        session.commit()
 if __name__ == "__main__":
    seed_doc_types()
    print("seeded doc types")
--- a/src/nrsk/documents/validate.py
+++ b/src/nrsk/documents/validate.py
@ -11,9 +11,13 @@ import yaml
 from nrsk.models import InformationTypes
-def validate_doc_types(app):
+def sphinx_validate_doc_types(app) -> dict:
    """Ensure doc type data is valid."""
    fpath = pathlib.Path(app.srcdir) / "_data" / "doc-types.yaml"
    return validate_doc_types(fpath)
 def validate_doc_types(fpath: str) -> dict:
    with open(fpath) as f:
        data = yaml.safe_load(f)
-        data = InformationTypes.validate_python(data)
+        return InformationTypes.validate_python(data)
--- a/src/nrsk/models.py
+++ b/src/nrsk/models.py
@ -37,20 +37,22 @@ This is the official Data Dictionary discussed in :ref:`the Information
 Management Plan <info-mgmt-data-dict>`.
 """
 from __future__ import annotations  # allow lookahead annotation
 import re
 import uuid
 from datetime import datetime, timedelta
 from enum import StrEnum
-from typing import Annotated, Any
+from typing import Annotated, Any, Optional
 from uuid import UUID, uuid4
 # _PK_TYPE = UUID
 # moving away from UUID at least temporarily b/c SQLite doesn't
 # really support it, which makes adding new data via DBeaver harder
 _PK_TYPE = int
 from pydantic import (
    AnyUrl,
    BaseModel,
    ConfigDict,
    EmailStr,
    Field,
    PositiveInt,
    TypeAdapter,
    ValidationError,
@ -58,23 +60,45 @@ from pydantic import (
    field_validator,
    model_validator,
 )
 from sqlalchemy import text
 from sqlmodel import JSON, Column, Field, Relationship, SQLModel
 ALL_CAPS = re.compile("^[A-Z]$")
-UUID_PK = Annotated[
+
-    uuid.UUID,
+
-    Field(
+class NRSKModel(SQLModel):
-        default_factory=uuid.uuid4,
+    id: _PK_TYPE = Field(
        # default_factory=uuid4,
        description="The unique ID of this object. Used as a primary key in the database.",
-        examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
+        primary_key=True,
-        frozen=True,
+        # schema_extra={
-    ),
+        #    "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
-]
+        # },
    )
-class User(BaseModel):
+class DocumentUserLink(NRSKModel, table=True):
    """Linkages between users and documents."""
    position: int = Field(default=0)
    """Integer indicating order of people"""
    role_note: str = Field(
        default="",
    )
    """Extra information about role such as 'lead' or 'section 2.4'"""
    document_id: _PK_TYPE | None = Field(
        foreign_key="document.id", primary_key=True, default=None
    )
    user_id: _PK_TYPE | None = Field(
        foreign_key="user.id", primary_key=True, default=None
    )
 class User(NRSKModel, table=True):
    """A person involved in the Project."""
    uuid: UUID_PK
    given_name: str
    family_name: str
    preferred_name: str | None = None
@ -85,16 +109,19 @@ class User(BaseModel):
    organization: str | None
    title: str | None
    contributed: list["Document"] = Relationship(
        back_populates="contributors", link_model=DocumentUserLink
    )
-class OpenItem(BaseModel):
+
-    uuid: UUID_PK
+class OpenItem(NRSKModel):
    name: str
    status: str
    created_on: datetime
    closed_on: datetime | None = None
-class SSC(BaseModel):
+class SSC(NRSKModel):
    """
    A Structure, System, or Component in the plant.
@ -106,24 +133,25 @@ class SSC(BaseModel):
    contents in terms of systems/components/equipment/parts
    """
    uuid: UUID_PK
    name: str
    pbs_code: str | None = Field(
        description="An integer sequence that determines the 'system number' and also the ordering in printouts",
-        examples=["1.2.3", "20.5.11"],
+        schema_extra={
            "examples": ["1.2.3", "20.5.11"],
        },
        default="",
    )
    """PBS code is tied closely to the structure of the PBS, obviously. If 1.2
    is a category level, that's ok, but that doesn't imply that the second level
    of PBS 2 is also a category level; it may be systems.
    Since this can change in major PBS reorganizations, it should not be used
-    for cross-referencing (use UUID).
+    for cross-referencing (use ID).
    """
    abbrev: str = Field(
        description="A human-friendly abbreviation uniquely defining the system"
    )
-    parent: SSC | None = None
+    parent: Optional["SSC"] = None
    functions: list[str | None] = Field(
        description="Functions of this system", default=None
    )
@ -156,25 +184,25 @@ class SystemsList(BaseModel):
    systems: list[SSC]
-class ParamDef(BaseModel):
+class ParamDef(NRSKModel):
    """A parameter class defining an aspect of plant design."""
    uuid: UUID_PK
    name: str = Field(
-        description="Name of parameter class", examples=["Nominal gross power"]
+        schema_extra={"examples": ["Nominal gross power"]},
    )
    description: str = Field(
        description="Detailed description of what parameters of this type represent"
    )
    """Name of the parameter class."""
    description: str
    """Detailed description of what parameters of this type represent"""
    valid_units: list[str | None] = Field(
-        description="List of units allowed", examples=["MW", "W", "shp"], default=None
+        schema_extra={"examples": ["MW", "W", "shp"]}, default=None
    )
    """List of units allowed"""
-class ParamVal(BaseModel):
+class ParamVal(NRSKModel):
    """A particular value of a Parameter, assigned to a particular SSC."""
    uuid: UUID_PK
    ssc: SSC
    pdef: ParamDef
    value: str
@ -185,21 +213,23 @@ class ParamVal(BaseModel):
    source: str = Field(description="Where this version of the value came from")
-class ITSystem(BaseModel):
+class ITSystem(NRSKModel):
    """An IT system used by the project."""
    uuid: UUID_PK
    name: str
    vendor: str
    version: str | None = None
    use_cases: list[str] = Field(
-        description="One or more use cases this system is used for.",
+        schema_extra={
-        examples=[
+            "examples": [
-            [
+                [
-                "Document management",
+                    "Document management",
-            ]
+                ]
-        ],
+            ],
        }
    )
    """One or more use cases this system is used for."""
    physical_location: str = Field(description="Where the system is physically located")
    url: AnyUrl | None = Field(description="Full URL to the system", default=None)
    custodian: User | None = Field(
@ -210,27 +240,36 @@ class ITSystem(BaseModel):
    quality_related: bool
-class InformationType(BaseModel):
+class InformationType(NRSKModel, table=True):
    """A type/kind/class of Information, Document, or Record."""
    model_config = ConfigDict(extra="forbid")
    name: str
    abbrev: str
-    examples: list[str] | None = None
+    examples: list[str] | None = Field(
        default=None,
        sa_column=Column(JSON),
    )
    description: str = ""
    retention: str | None = ""
    record: bool = True
    use_cases: str = ""
    notes: str = ""
-    parent: InformationType | None = None
+    parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id")
    # Add these two relationships for easier DB parsing in code
    parent: Optional["InformationType"] = Relationship(
        back_populates="subtypes",
        sa_relationship_kwargs={"remote_side": "InformationType.id"},
    )
    subtypes: list["InformationType"] = Relationship(back_populates="parent")
 InformationTypes = TypeAdapter(list[InformationType])
 """A list of document types."""
-class Document(BaseModel):
+class Document(NRSKModel, table=True):
    """
    Data dictionary entry for Documents and Records.
@ -392,34 +431,96 @@ class Document(BaseModel):
        LIFETIME = "LIFETIME"
        """Lifetime of the plant."""
-    uuid: UUID_PK
+    # use_attribute_docstrings allows us to just use docstrings and get
-    number: str = Field(
+    # the same info in both the JSON Schema and also the Sphinx render
-        description="The identification number meeting the document numbering rules",
+    model_config = ConfigDict(use_attribute_docstrings=True)
-    )
+
    number: str
    """The identification number meeting the document numbering rules"""
    title: str = Field(
-        description="Descriptive title explaining the contents",
+        schema_extra={
-        examples=["CNSG Development and Status 1966-1977"],
+            "examples": ["CNSG Development and Status 1966-1977"],
        },
    )
    """Descriptive title explaining the contents"""
    revision: str = Field(
-        description="Revision code",
+        schema_extra={
-        examples=["0", "1", "1a", "A"],
+            "examples": ["0", "1", "1a", "A"],
        },
    )
-    originating_organization: str
+    """Revision code"""
-    originator_number: str | None = Field(
+
-        description="The originating organization's document number (if originated externally).",
+    originating_organization_id: _PK_TYPE | None = Field(
        foreign_key="organization.id",
        description="The organization that owns or issued this document",
        default=None,
    )
-    originator_revision: str | None = Field(
+    # This allows you to do `my_document.orginating_organization` in Python
-        description="The originating organization's revision code (if originated externally).",
+    originating_organization: "Organization" = Relationship()
-        default=None,
+
    originator_number: str | None = None
    """The originating organization's document number (if originated externally)."""
    originator_revision: str | None = None
    """The originating organization's revision code (if originated externally)."""
    type_id: _PK_TYPE = Field(
        foreign_key="informationtype.id",
        description="The ID of the InformationType",
    )
-    type: str
+    # type: "InformationType" = Relationship()
-    revision_authors: list[str] | None
+
-    revision_reviewers: list[str] | None
+    contributors: list[User] = Relationship(
-    revision_approvers: list[str] | None
+        back_populates="contributed",
-    revision_comment: str = Field(
+        link_model=DocumentUserLink,
-        description="Explanation of what changed in this revision", default=""
+        sa_relationship_kwargs={
            "order_by": "DocumentUserLink.position",
            "lazy": "selectin",
        },
    )
    """Holds all relationships with users but does not show up in JSON Schema"""
    @computed_field
    @property
    def authors(self) -> list[User]:
        """List of author info for the UI."""
        return [{"id": a.id, "name": a.name} for a in self.contributors]
    @computed_field
    @property
    def reviewers(self) -> list[User]:
        """List of reviewer info for the UI."""
        return [
            {"id": a.id, "name": a.name}
            for a in self.contributors
            if a.role == "reviewer"
        ]
    # revision_reviewers: list[RevisionReviewerLink] = Relationship(
    #    back_populates="reviewed",
    #    link_model=RevisionReviewerLink,
    #    sa_relationship_kwargs={
    #        "order_by": "RevisionReviewerLink.position",
    #        "cascade": "all, delete-orphan",
    #    },
    # )
    # """The reviewer(s), if any."""
    # revision_approvers: list[RevisionApproverLink] = Relationship(
    #    back_populates="approved",
    #    link_model=RevisionApproverLink,
    #    sa_relationship_kwargs={
    #        "order_by": "RevisionApproverLink.position",
    #        "cascade": "all, delete-orphan",
    #    },
    # )
    # """The approver(s), if any."""
    revision_comment: str | None = None
    """Explanation of what changed in this revision"""
    status: STATUS = STATUS.RESERVED
    usage: USAGE = USAGE.FOR_INFORMATION
    retention_plan: RETENTION = RETENTION.LIFETIME
@ -433,14 +534,18 @@ class Document(BaseModel):
    # filenames may be empty at first, i.e. for RESERVED docs
    filenames: list[str] = Field(
        description="Filenames of files attached to this Document. Main file should be the first.",
-        default=[],
+        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    file_notes: list[str] = Field(
        description="Short description of each file represented in filenames.",
-        default=[],
+        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    checksums: list[str] = Field(
-        description="SHA-256 checksum of each file for data integrity", default=[]
+        description="SHA-256 checksum of each file for data integrity",
        default_factory=list,
        sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
    )
    """Checksums are used to verify long-term data integrity against tampering
    and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
@ -455,11 +560,6 @@ class Document(BaseModel):
        description="Additional information about the Document/Record", default=""
    )
    @field_validator("type", mode="after")
    @classmethod
    def type_must_be_valid(cls, v: str) -> str:
        assert v in ["CALC", "PROC"], f"{v} must be within the list of doctypes"
    @computed_field
    @property
    def status_category(self) -> str:
@ -479,7 +579,32 @@ class Document(BaseModel):
        return self
-class PredecessorTask(BaseModel):
+class Organization(NRSKModel, table=True):
    """An organization of people: companies, departments, governments, etc."""
    name: str = Field(index=True)
    """Organization Name"""
    abbreviation: str | None = Field(default=None, index=True)
    website: str | None = None
    is_active: bool = Field(default=True)
    # allow it to be hierarchical to capture full org trees and refer to
    # divisions
    parent_id: _PK_TYPE | None = Field(
        default=None,
        foreign_key="organization.id",
    )
    """The parent organization this org reports to"""
    parent: Optional["Organization"] = Relationship(
        back_populates="child_orgs",
        sa_relationship_kwargs={"remote_side": "Organization.id"},
    )
    child_orgs: list["Organization"] = Relationship(back_populates="parent")
 class PredecessorTask(NRSKModel):
    """Link to a predecessor task."""
    class PRED_TYPE(StrEnum):  # noqa: N801
@ -494,7 +619,6 @@ class PredecessorTask(BaseModel):
        SF = "SF"
        """Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
    uuid: UUID_PK
    id: str
    """ID of the predecessor task."""
    type: PRED_TYPE = PRED_TYPE.FS
@ -505,10 +629,9 @@ class PredecessorTask(BaseModel):
    )
-class ScheduledTask(BaseModel):
+class ScheduledTask(NRSKModel):
    """Scheduled task, e.g. in P6."""
    uuid: UUID_PK
    name: str
    id: str | None = None
    is_milestone: bool = False