Add postgresql persistence of data in data dict

Moved back to sqlmodel because we do need some way
of putting this info into a database.
This commit is contained in:
Nick Touran 2026-01-05 12:04:22 -05:00
parent f7ee72a66b
commit 8c73123862
8 changed files with 355 additions and 77 deletions

View file

@ -193,6 +193,7 @@ apidoc_separate_modules = True
autodoc_pydantic_model_show_field_summary = True autodoc_pydantic_model_show_field_summary = True
autodoc_pydantic_model_show_validator_summary = True autodoc_pydantic_model_show_validator_summary = True
autodoc_pydantic_field_doc_policy = "both" autodoc_pydantic_field_doc_policy = "both"
autodoc_pydantic_field_docutils_summary = True
set_type_checking_flag = True set_type_checking_flag = True

View file

@ -38,6 +38,11 @@ dependencies = [
"email-validator>=2.3.0", "email-validator>=2.3.0",
"sphinxcontrib-apidoc>=0.6.0", "sphinxcontrib-apidoc>=0.6.0",
"autodoc-pydantic>=2.2.0", "autodoc-pydantic>=2.2.0",
"sqlmodel>=0.0.31",
"fastapi>=0.128.0",
"uvicorn>=0.38.0",
"python-dotenv>=1.2.1",
"psycopg2>=2.9.11",
] ]
classifiers = [ classifiers = [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",

View file

@ -0,0 +1,7 @@
"""NRSK root."""
from pathlib import Path
PACKAGE_ROOT = Path(__file__).resolve().parent
PROJECT_ROOT = PACKAGE_ROOT.parent.parent
DOCS_ROOT = PACKAGE_ROOT.parent.parent / "documents"

18
src/nrsk/db.py Normal file
View file

@ -0,0 +1,18 @@
"""Database management code."""
import os
from dotenv import load_dotenv
from sqlmodel import Session, SQLModel, create_engine, select
load_dotenv()
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_PATH = os.getenv("POSTGRES_PATH")
def get_engine():
return create_engine(
f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_PATH}",
)

View file

@ -0,0 +1,97 @@
"""Document data intake."""
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from sqlmodel import Session, SQLModel, select
from nrsk.db import get_engine
# import others to create DB?
from nrsk.models import Document, User
engine = get_engine()
@asynccontextmanager
async def lifespan(app: FastAPI):
# --- Startup Logic ---
yield # The app runs while it's "yielding"
# --- Shutdown Logic ---
print("Shutting down safely")
app = FastAPI(lifespan=lifespan)
@app.get("/schema")
def get_schema():
# This generates the JSON Schema from your SQLModel/Pydantic model
return Document.model_json_schema(mode="serialization")
@app.post("/submit")
def submit_data(data: Document):
with Session(engine) as session:
breakpoint()
data = Document.model_validate(data)
session.add(data)
session.commit()
return {"status": "success", "id": data.id}
@app.get("/documents/")
def read_documents(skip: int = 0, limit: int = 10):
with Session(engine) as session:
statement = select(Document).offset(skip).limit(limit)
results = session.exec(statement).all()
return results
@app.get("/", response_class=HTMLResponse)
def get_form():
return """
<!DOCTYPE html>
<html>
<head>
<title>QA Entry Form</title>
<script src="https://cdn.jsdelivr.net/npm/@json-editor/json-editor@latest/dist/jsoneditor.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css">
</head>
<body class="container mt-5">
<h2>Submit QA Revision</h2>
<div id="editor_holder"></div>
<button id="submit" class="btn btn-primary mt-3">Save to Database</button>
<script>
// 1. Fetch the schema from FastAPI
fetch('/schema').then(res => res.json()).then(schema => {
const editor = new JSONEditor(document.getElementById('editor_holder'), {
schema: schema,
theme: 'bootstrap5',
iconlib: 'fontawesome5'
});
// 2. Handle Submission
document.getElementById('submit').addEventListener('click', () => {
const errors = editor.validate();
if (errors.length) {
alert("Validation Error: " + JSON.stringify(errors));
return;
}
fetch('/submit', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(editor.getValue())
}).then(response => alert("Saved Successfully!"));
});
});
</script>
</body>
</html>
"""

View file

@ -0,0 +1,23 @@
"""Seed DB for documents, e.g. with doc types"""
from sqlmodel import Session
from nrsk import DOCS_ROOT
from nrsk.db import get_engine
from nrsk.documents.validate import validate_doc_types
def seed_doc_types():
engine = get_engine()
doc_types = validate_doc_types(DOCS_ROOT / "_data" / "doc-types.yaml")
with Session(engine) as session:
for dtype in doc_types:
session.add(dtype)
session.commit()
if __name__ == "__main__":
seed_doc_types()
print("seeded doc types")

View file

@ -11,9 +11,13 @@ import yaml
from nrsk.models import InformationTypes from nrsk.models import InformationTypes
def validate_doc_types(app): def sphinx_validate_doc_types(app) -> dict:
"""Ensure doc type data is valid.""" """Ensure doc type data is valid."""
fpath = pathlib.Path(app.srcdir) / "_data" / "doc-types.yaml" fpath = pathlib.Path(app.srcdir) / "_data" / "doc-types.yaml"
return validate_doc_types(fpath)
def validate_doc_types(fpath: str) -> dict:
with open(fpath) as f: with open(fpath) as f:
data = yaml.safe_load(f) data = yaml.safe_load(f)
data = InformationTypes.validate_python(data) return InformationTypes.validate_python(data)

View file

@ -37,20 +37,22 @@ This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`. Management Plan <info-mgmt-data-dict>`.
""" """
from __future__ import annotations # allow lookahead annotation
import re import re
import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import StrEnum from enum import StrEnum
from typing import Annotated, Any from typing import Annotated, Any, Optional
from uuid import UUID, uuid4
# _PK_TYPE = UUID
# moving away from UUID at least temporarily b/c SQLite doesn't
# really support it, which makes adding new data via DBeaver harder
_PK_TYPE = int
from pydantic import ( from pydantic import (
AnyUrl, AnyUrl,
BaseModel, BaseModel,
ConfigDict, ConfigDict,
EmailStr, EmailStr,
Field,
PositiveInt, PositiveInt,
TypeAdapter, TypeAdapter,
ValidationError, ValidationError,
@ -58,23 +60,45 @@ from pydantic import (
field_validator, field_validator,
model_validator, model_validator,
) )
from sqlalchemy import text
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
ALL_CAPS = re.compile("^[A-Z]$") ALL_CAPS = re.compile("^[A-Z]$")
UUID_PK = Annotated[
uuid.UUID,
Field( class NRSKModel(SQLModel):
default_factory=uuid.uuid4, id: _PK_TYPE = Field(
# default_factory=uuid4,
description="The unique ID of this object. Used as a primary key in the database.", description="The unique ID of this object. Used as a primary key in the database.",
examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"], primary_key=True,
frozen=True, # schema_extra={
), # "examples": ["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
] # },
)
class User(BaseModel): class DocumentUserLink(NRSKModel, table=True):
"""Linkages between users and documents."""
position: int = Field(default=0)
"""Integer indicating order of people"""
role_note: str = Field(
default="",
)
"""Extra information about role such as 'lead' or 'section 2.4'"""
document_id: _PK_TYPE | None = Field(
foreign_key="document.id", primary_key=True, default=None
)
user_id: _PK_TYPE | None = Field(
foreign_key="user.id", primary_key=True, default=None
)
class User(NRSKModel, table=True):
"""A person involved in the Project.""" """A person involved in the Project."""
uuid: UUID_PK
given_name: str given_name: str
family_name: str family_name: str
preferred_name: str | None = None preferred_name: str | None = None
@ -85,16 +109,19 @@ class User(BaseModel):
organization: str | None organization: str | None
title: str | None title: str | None
contributed: list["Document"] = Relationship(
back_populates="contributors", link_model=DocumentUserLink
)
class OpenItem(BaseModel):
uuid: UUID_PK class OpenItem(NRSKModel):
name: str name: str
status: str status: str
created_on: datetime created_on: datetime
closed_on: datetime | None = None closed_on: datetime | None = None
class SSC(BaseModel): class SSC(NRSKModel):
""" """
A Structure, System, or Component in the plant. A Structure, System, or Component in the plant.
@ -106,24 +133,25 @@ class SSC(BaseModel):
contents in terms of systems/components/equipment/parts contents in terms of systems/components/equipment/parts
""" """
uuid: UUID_PK
name: str name: str
pbs_code: str | None = Field( pbs_code: str | None = Field(
description="An integer sequence that determines the 'system number' and also the ordering in printouts", description="An integer sequence that determines the 'system number' and also the ordering in printouts",
examples=["1.2.3", "20.5.11"], schema_extra={
"examples": ["1.2.3", "20.5.11"],
},
default="", default="",
) )
"""PBS code is tied closely to the structure of the PBS, obviously. If 1.2 """PBS code is tied closely to the structure of the PBS, obviously. If 1.2
is a category level, that's ok, but that doesn't imply that the second level is a category level, that's ok, but that doesn't imply that the second level
of PBS 2 is also a category level; it may be systems. of PBS 2 is also a category level; it may be systems.
Since this can change in major PBS reorganizations, it should not be used Since this can change in major PBS reorganizations, it should not be used
for cross-referencing (use UUID). for cross-referencing (use ID).
""" """
abbrev: str = Field( abbrev: str = Field(
description="A human-friendly abbreviation uniquely defining the system" description="A human-friendly abbreviation uniquely defining the system"
) )
parent: SSC | None = None parent: Optional["SSC"] = None
functions: list[str | None] = Field( functions: list[str | None] = Field(
description="Functions of this system", default=None description="Functions of this system", default=None
) )
@ -156,25 +184,25 @@ class SystemsList(BaseModel):
systems: list[SSC] systems: list[SSC]
class ParamDef(BaseModel): class ParamDef(NRSKModel):
"""A parameter class defining an aspect of plant design.""" """A parameter class defining an aspect of plant design."""
uuid: UUID_PK
name: str = Field( name: str = Field(
description="Name of parameter class", examples=["Nominal gross power"] schema_extra={"examples": ["Nominal gross power"]},
)
description: str = Field(
description="Detailed description of what parameters of this type represent"
) )
"""Name of the parameter class."""
description: str
"""Detailed description of what parameters of this type represent"""
valid_units: list[str | None] = Field( valid_units: list[str | None] = Field(
description="List of units allowed", examples=["MW", "W", "shp"], default=None schema_extra={"examples": ["MW", "W", "shp"]}, default=None
) )
"""List of units allowed"""
class ParamVal(BaseModel): class ParamVal(NRSKModel):
"""A particular value of a Parameter, assigned to a particular SSC.""" """A particular value of a Parameter, assigned to a particular SSC."""
uuid: UUID_PK
ssc: SSC ssc: SSC
pdef: ParamDef pdef: ParamDef
value: str value: str
@ -185,21 +213,23 @@ class ParamVal(BaseModel):
source: str = Field(description="Where this version of the value came from") source: str = Field(description="Where this version of the value came from")
class ITSystem(BaseModel): class ITSystem(NRSKModel):
"""An IT system used by the project.""" """An IT system used by the project."""
uuid: UUID_PK
name: str name: str
vendor: str vendor: str
version: str | None = None version: str | None = None
use_cases: list[str] = Field( use_cases: list[str] = Field(
description="One or more use cases this system is used for.", schema_extra={
examples=[ "examples": [
[ [
"Document management", "Document management",
] ]
], ],
}
) )
"""One or more use cases this system is used for."""
physical_location: str = Field(description="Where the system is physically located") physical_location: str = Field(description="Where the system is physically located")
url: AnyUrl | None = Field(description="Full URL to the system", default=None) url: AnyUrl | None = Field(description="Full URL to the system", default=None)
custodian: User | None = Field( custodian: User | None = Field(
@ -210,27 +240,36 @@ class ITSystem(BaseModel):
quality_related: bool quality_related: bool
class InformationType(BaseModel): class InformationType(NRSKModel, table=True):
"""A type/kind/class of Information, Document, or Record.""" """A type/kind/class of Information, Document, or Record."""
model_config = ConfigDict(extra="forbid") model_config = ConfigDict(extra="forbid")
name: str name: str
abbrev: str abbrev: str
examples: list[str] | None = None examples: list[str] | None = Field(
default=None,
sa_column=Column(JSON),
)
description: str = "" description: str = ""
retention: str | None = "" retention: str | None = ""
record: bool = True record: bool = True
use_cases: str = "" use_cases: str = ""
notes: str = "" notes: str = ""
parent: InformationType | None = None parent_id: _PK_TYPE | None = Field(default=None, foreign_key="informationtype.id")
# Add these two relationships for easier DB parsing in code
parent: Optional["InformationType"] = Relationship(
back_populates="subtypes",
sa_relationship_kwargs={"remote_side": "InformationType.id"},
)
subtypes: list["InformationType"] = Relationship(back_populates="parent")
InformationTypes = TypeAdapter(list[InformationType]) InformationTypes = TypeAdapter(list[InformationType])
"""A list of document types.""" """A list of document types."""
class Document(BaseModel): class Document(NRSKModel, table=True):
""" """
Data dictionary entry for Documents and Records. Data dictionary entry for Documents and Records.
@ -392,34 +431,96 @@ class Document(BaseModel):
LIFETIME = "LIFETIME" LIFETIME = "LIFETIME"
"""Lifetime of the plant.""" """Lifetime of the plant."""
uuid: UUID_PK # use_attribute_docstrings allows us to just use docstrings and get
number: str = Field( # the same info in both the JSON Schema and also the Sphinx render
description="The identification number meeting the document numbering rules", model_config = ConfigDict(use_attribute_docstrings=True)
)
number: str
"""The identification number meeting the document numbering rules"""
title: str = Field( title: str = Field(
description="Descriptive title explaining the contents", schema_extra={
examples=["CNSG Development and Status 1966-1977"], "examples": ["CNSG Development and Status 1966-1977"],
},
) )
"""Descriptive title explaining the contents"""
revision: str = Field( revision: str = Field(
description="Revision code", schema_extra={
examples=["0", "1", "1a", "A"], "examples": ["0", "1", "1a", "A"],
},
) )
originating_organization: str """Revision code"""
originator_number: str | None = Field(
description="The originating organization's document number (if originated externally).", originating_organization_id: _PK_TYPE | None = Field(
foreign_key="organization.id",
description="The organization that owns or issued this document",
default=None, default=None,
) )
originator_revision: str | None = Field( # This allows you to do `my_document.orginating_organization` in Python
description="The originating organization's revision code (if originated externally).", originating_organization: "Organization" = Relationship()
default=None,
originator_number: str | None = None
"""The originating organization's document number (if originated externally)."""
originator_revision: str | None = None
"""The originating organization's revision code (if originated externally)."""
type_id: _PK_TYPE = Field(
foreign_key="informationtype.id",
description="The ID of the InformationType",
) )
type: str # type: "InformationType" = Relationship()
revision_authors: list[str] | None
revision_reviewers: list[str] | None contributors: list[User] = Relationship(
revision_approvers: list[str] | None back_populates="contributed",
revision_comment: str = Field( link_model=DocumentUserLink,
description="Explanation of what changed in this revision", default="" sa_relationship_kwargs={
"order_by": "DocumentUserLink.position",
"lazy": "selectin",
},
) )
"""Holds all relationships with users but does not show up in JSON Schema"""
@computed_field
@property
def authors(self) -> list[User]:
"""List of author info for the UI."""
return [{"id": a.id, "name": a.name} for a in self.contributors]
@computed_field
@property
def reviewers(self) -> list[User]:
"""List of reviewer info for the UI."""
return [
{"id": a.id, "name": a.name}
for a in self.contributors
if a.role == "reviewer"
]
# revision_reviewers: list[RevisionReviewerLink] = Relationship(
# back_populates="reviewed",
# link_model=RevisionReviewerLink,
# sa_relationship_kwargs={
# "order_by": "RevisionReviewerLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The reviewer(s), if any."""
# revision_approvers: list[RevisionApproverLink] = Relationship(
# back_populates="approved",
# link_model=RevisionApproverLink,
# sa_relationship_kwargs={
# "order_by": "RevisionApproverLink.position",
# "cascade": "all, delete-orphan",
# },
# )
# """The approver(s), if any."""
revision_comment: str | None = None
"""Explanation of what changed in this revision"""
status: STATUS = STATUS.RESERVED status: STATUS = STATUS.RESERVED
usage: USAGE = USAGE.FOR_INFORMATION usage: USAGE = USAGE.FOR_INFORMATION
retention_plan: RETENTION = RETENTION.LIFETIME retention_plan: RETENTION = RETENTION.LIFETIME
@ -433,14 +534,18 @@ class Document(BaseModel):
# filenames may be empty at first, i.e. for RESERVED docs # filenames may be empty at first, i.e. for RESERVED docs
filenames: list[str] = Field( filenames: list[str] = Field(
description="Filenames of files attached to this Document. Main file should be the first.", description="Filenames of files attached to this Document. Main file should be the first.",
default=[], default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
) )
file_notes: list[str] = Field( file_notes: list[str] = Field(
description="Short description of each file represented in filenames.", description="Short description of each file represented in filenames.",
default=[], default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
) )
checksums: list[str] = Field( checksums: list[str] = Field(
description="SHA-256 checksum of each file for data integrity", default=[] description="SHA-256 checksum of each file for data integrity",
default_factory=list,
sa_column=Column(JSON, nullable=False, server_default=text("'[]'")),
) )
"""Checksums are used to verify long-term data integrity against tampering """Checksums are used to verify long-term data integrity against tampering
and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
@ -455,11 +560,6 @@ class Document(BaseModel):
description="Additional information about the Document/Record", default="" description="Additional information about the Document/Record", default=""
) )
@field_validator("type", mode="after")
@classmethod
def type_must_be_valid(cls, v: str) -> str:
assert v in ["CALC", "PROC"], f"{v} must be within the list of doctypes"
@computed_field @computed_field
@property @property
def status_category(self) -> str: def status_category(self) -> str:
@ -479,7 +579,32 @@ class Document(BaseModel):
return self return self
class PredecessorTask(BaseModel): class Organization(NRSKModel, table=True):
"""An organization of people: companies, departments, governments, etc."""
name: str = Field(index=True)
"""Organization Name"""
abbreviation: str | None = Field(default=None, index=True)
website: str | None = None
is_active: bool = Field(default=True)
# allow it to be hierarchical to capture full org trees and refer to
# divisions
parent_id: _PK_TYPE | None = Field(
default=None,
foreign_key="organization.id",
)
"""The parent organization this org reports to"""
parent: Optional["Organization"] = Relationship(
back_populates="child_orgs",
sa_relationship_kwargs={"remote_side": "Organization.id"},
)
child_orgs: list["Organization"] = Relationship(back_populates="parent")
class PredecessorTask(NRSKModel):
"""Link to a predecessor task.""" """Link to a predecessor task."""
class PRED_TYPE(StrEnum): # noqa: N801 class PRED_TYPE(StrEnum): # noqa: N801
@ -494,7 +619,6 @@ class PredecessorTask(BaseModel):
SF = "SF" SF = "SF"
"""Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)""" """Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
uuid: UUID_PK
id: str id: str
"""ID of the predecessor task.""" """ID of the predecessor task."""
type: PRED_TYPE = PRED_TYPE.FS type: PRED_TYPE = PRED_TYPE.FS
@ -505,10 +629,9 @@ class PredecessorTask(BaseModel):
) )
class ScheduledTask(BaseModel): class ScheduledTask(NRSKModel):
"""Scheduled task, e.g. in P6.""" """Scheduled task, e.g. in P6."""
uuid: UUID_PK
name: str name: str
id: str | None = None id: str | None = None
is_milestone: bool = False is_milestone: bool = False