Lots more Document data definition

Schedule updates: * Defined Schedule types * Updated schedule loader to validate with pydantic * Added ability to specify predecessor type and lead/lag Other structural/outline stuff as well Oh and added a unit test.
2025-12-19 14:15:07 -05:00 · 2025-12-19 14:15:07 -05:00 · 373dfe4c3b
commit 373dfe4c3b
parent 36fcb5f260
16 changed files with 535 additions and 67 deletions
--- a/src/nrsk/models.py
+++ b/src/nrsk/models.py
@ -37,20 +37,25 @@ This is the official Data Dictionary discussed in :ref:`the Information
 Management Plan <info-mgmt-data-dict>`.
 """

+from __future__ import annotations  # allow lookahead annotation
+
 import re
 import uuid
-from datetime import date, datetime
-from typing import Annotated, Optional
+from datetime import datetime, timedelta
+from enum import StrEnum
+from typing import Annotated, Any

 from pydantic import (
    AnyUrl,
    BaseModel,
    EmailStr,
    Field,
-    FieldValidationInfo,
    PositiveInt,
+    TypeAdapter,
    ValidationError,
+    computed_field,
    field_validator,
+    model_validator,
 )

 ALL_CAPS = re.compile("^[A-Z]$")
@ -61,7 +66,6 @@ UUID_PK = Annotated[
        description="The unique ID of this object. Used as a primary key in the database.",
        examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
        frozen=True,
-        primary_key=True,
    ),
 ]

@ -72,10 +76,13 @@ class User(BaseModel):
    uuid: UUID_PK
    given_name: str
    family_name: str
-    preferred_name: Optional[str] = None
+    preferred_name: str | None = None
+    previous_name: str | None = None
    email: EmailStr
-    joined_on: Optional[datetime]
-    deactivated_on: Optional[datetime]
+    joined_on: datetime | None
+    deactivated_on: datetime | None
+    organization: str | None
+    title: str | None


 class OpenItem(BaseModel):
@ -83,7 +90,7 @@ class OpenItem(BaseModel):
    name: str
    status: str
    created_on: datetime
-    closed_on: Optional[datetime] = None
+    closed_on: datetime | None = None


 class SSC(BaseModel):
@ -100,7 +107,7 @@ class SSC(BaseModel):

    uuid: UUID_PK
    name: str
-    pbs_code: Optional[str] = Field(
+    pbs_code: str | None = Field(
        description="An integer sequence that determines the 'system number' and also the ordering in printouts",
        examples=["1.2.3", "20.5.11"],
        default="",
@ -115,22 +122,22 @@ class SSC(BaseModel):
    abbrev: str = Field(
        description="A human-friendly abbreviation uniquely defining the system"
    )
-    parent: Optional["SSC"] = None
-    functions: Optional[list[str]] = Field(
+    parent: SSC | None = None
+    functions: list[str | None] = Field(
        description="Functions of this system", default=None
    )

-    @field_validator("abbrev")
+    @field_validator("abbrev", mode="after")
    @classmethod
-    def abbrev_must_be_all_caps(cls, v: str, info: FieldValidationInfo) -> str:  # noqa: D102
-        assert re.match(ALL_CAPS, v), f"{info.field_name} must be all CAPS"
+    def abbrev_must_be_all_caps(cls, v: str) -> str:  # noqa: D102
+        if not re.match(ALL_CAPS, v):
+            raise ValueError("{v} must be all CAPS")

-    @field_validator("pbs_code")
+    @field_validator("pbs_code", mode="after")
    @classmethod
-    def pbs_must_be_int_sequence(cls, v: str, info: FieldValidationInfo) -> str:  # noqa: D102
-        assert not v or re.match(r"^(\d+\.?)+$", v), (
-            f"{info.field_name} must be an integer sequence, like 1.2.3"
-        )
+    def pbs_must_be_int_sequence(cls, v: str) -> str:  # noqa: D102
+        if not v or re.match(r"^(\d+\.?)+$", v):
+            raise ValueError(f"{v} must be an integer sequence, like 1.2.3")


 class SystemsList(BaseModel):
@ -158,7 +165,7 @@ class ParamDef(BaseModel):
    description: str = Field(
        description="Detailed description of what parameters of this type represent"
    )
-    valid_units: Optional[list[str]] = Field(
+    valid_units: list[str | None] = Field(
        description="List of units allowed", examples=["MW", "W", "shp"], default=None
    )

@ -166,10 +173,11 @@ class ParamDef(BaseModel):
 class ParamVal(BaseModel):
    """A particular value of a Parameter, assigned to a particular SSC."""

+    uuid: UUID_PK
    ssc: SSC
    pdef: ParamDef
    value: str
-    units: Optional[str] = None
+    units: str | None = None
    pedigree: str = Field(
        description="Indication of how well it is known (rough estimate, final design, as-built)."
    )
@ -182,7 +190,7 @@ class ITSystem(BaseModel):
    uuid: UUID_PK
    name: str
    vendor: str
-    version: Optional[str] = None
+    version: str | None = None
    use_cases: list[str] = Field(
        description="One or more use cases this system is used for.",
        examples=[
@ -192,37 +200,313 @@ class ITSystem(BaseModel):
        ],
    )
    physical_location: str = Field(description="Where the system is physically located")
-    url: Optional[AnyUrl] = Field(description="Full URL to the system", default=None)
-    custodian: Optional[User] = Field(
+    url: AnyUrl | None = Field(description="Full URL to the system", default=None)
+    custodian: User | None = Field(
        description="Person currently in charge of system", default=None
    )
-    launched_on: Optional[datetime] = None
-    retired_on: Optional[datetime] = None
+    launched_on: datetime | None = None
+    retired_on: datetime | None = None
    quality_related: bool


 class Document(BaseModel):
+    """
+    Data dictionary entry for Documents and Records.
+
+    Document data is designed to satisfy the needs defined in :ref:`rmdc-proc`.
+
+    See Also
+    --------
+    * Some of the field definitions come from CFIHOS
+      https://www.jip36-cfihos.org/wp-content/uploads/2023/08/v.1.5.1-CFIHOS-Specification-Document-1.docx
+    * ISO-19650 has different Status Codes defining suitability level (for information, as-built)
+      https://ukbimframework.org/wp-content/uploads/2020/05/ISO19650-2Edition4.pdf
+    """
+
+    class STATUS(StrEnum):
+        """Document Status options."""
+
+        # Much of the wording here comes from cloverDocumentControlRecords2010.
+
+        # NOTE: if you add or remove a status, be sure to also update the
+        # category property below AND :ref:`rmdc-doc-status`!
+
+        ## Not Yet Approved:
+        RESERVED = "RESERVED"
+        """
+        A Document ID has been assigned, but the document is in development or
+        has not yet been started (default).
+        """
+
+        IN_PROGRESS = "IN PROGRESS"
+        """One or more authors are creating or revising the document."""
+
+        IN_REVIEW = "IN REVIEW"
+        """A completed draft of the document has been submitted and is pending review."""
+
+        REJECTED = "REJECTED"
+        """A draft that was rejected by the review team and may be revised and resubmitted."""
+
+        AUTHORIZED = "AUTHORIZED"
+        """A controlled revision that has been signed but is not yet effective. 
+        Such documents may be used for training, etc. Documents with this status may 
+        be used for plant modifications in a work package, but not for normal operations."""
+
+        REFERENCE = "REFERENCE"
+        """Document is stored in EDMS for ease of access and reference, but
+        there is no assertion that the information is the latest available.
+        Useful for Standards, engineering handbook excerpts, vendor notices."""
+
+        NATIVE = "NATIVE"
+        """A document file that may be in EDMS in the native file format. Not
+        used in the field because they (a) may require special software to view
+        and (b) may not be controlled for field use (i.e. not quarantined if
+        errors are discovered)."""
+
+        ## Approved:
+        APPROVED = "APPROVED"
+        """A document revision that has been submitted by the releasing
+        organization and that is authorized for the use case defined in
+        the suitability code.
+
+        * A drawing with this status during operation reflects the plant configuration
+        * A drawing with this status before or during construction reflects that it is
+          ready to be fabricated/built
+        * A procedure with this status is effective.
+        """
+
+        ## No longer Approved:
+        QUARANTINED = "QUARANTINED"
+        """(On hold, Suspended) A document revision that was previously
+        authorized and has been placed on hold, e.g. a procedure that cannot be
+        performed as written or a design that is known to have pending changes."""
+
+        SUPERSEDED = "SUPERSEDED"
+        """A document that has been replaced by another document. The new
+        document is to be recorded in the index."""
+
+        REVISED = "REVISED"
+        """A document that has been replaced by a subsequent revision of that
+        document."""
+
+        VOIDED = "VOIDED"
+        """A document or revision that is no longer needed and there is no
+        revision or superseding document. This would also be used for documents
+        that have reached a predetermined expiration date, such as a temporary
+        procedure."""
+
+        CLOSED = "CLOSED"
+        """(Archived) A document for which the work has been completed."""
+
+        @property
+        def category(self) -> str:
+            """High-level status category: Not yet approved, Approved, or No Longer Approved."""
+            if self.value in {
+                self.RESERVED,
+                self.IN_PROGRESS,
+                self.IN_REVIEW,
+                self.REJECTED,
+                self.AUTHORIZED,
+                self.REFERENCE,
+                self.NATIVE,
+            }:
+                return "Not Yet Approved"
+            if self.value in {self.APPROVED}:
+                return "Approved"
+            return "No Longer Approved"
+
+    class USAGE(StrEnum):
+        """Usage options.
+
+        Usage governs what use cases a document may be used for. It is a notion
+        derived from the ISO 19650 'suitability' idea, but used in combination
+        with the NIRMA status codes. It allows a document to be approved for
+        e.g. a conceptual design stage without letting it inadvertently be
+        released for bid or manufacture.  Releasing organizations can update the
+        suitability as needed.
+
+        See https://ukbimframework.org/wp-content/uploads/2020/09/Guidance-Part-C_Facilitating-the-common-data-environment-workflow-and-technical-solutions_Edition-1.pdf
+        """
+
+        FOR_INFORMATION = "FOR INFORMATION"
+        """A document revision that may be used for information only, not for
+        any contractual purpose."""
+
+        FOR_STAGE_APPROVAL = "FOR STAGE APPROVAL"
+        """A document revision that is considered complete for the contractual stage in
+        which it was created.  For example, in a Preliminary Design phase, this
+        usage would indicate that it is at the expected usage level for
+        preliminary design. Most design-phase documents that are not yet ready
+        for bid or construction will be marked for this usage."""
+
+        FOR_BID = "FOR BID"
+        """A document revision that is ready to be sent to external parties for bid.
+        During the bid process, changes may be expected based on vendor feedback."""
+
+        FOR_CONSTRUCTION = "FOR CONSTRUCTION"
+        """A document revision that is ready to be sent to the field for manufacture,
+        fabrication, construction. An approved document with this usage implies
+        that all the quality, regulatory, and design aspects are in place, and
+        that work can proceed. However, what is constructed is not yet
+        authorized for operation."""
+
+        FOR_OPERATION = "FOR OPERATION"
+        """A document revision that can be used to operate the business and/or plant.
+        Procedures of this usage may be used to do work or operate equipment."""
+
+        AS_BUILT = "AS BUILT"
+        """A document revision that is an as-built record of construction or manufacture.
+        Documents of this usage may be used to operate the plant."""
+
+    class RETENTION(StrEnum):
+        """Retention plan options.
+
+        Retention plans define how long the document or record is to be
+        kept before it is destroyed.
+        """
+
+        LIFETIME = "LIFETIME"
+        """Lifetime of the plant."""
+
    uuid: UUID_PK
+    number: str = Field(
+        description="The identification number meeting the document numbering rules",
+    )
    title: str = Field(
        description="Descriptive title explaining the contents",
        examples=["CNSG Development and Status 1966-1977"],
    )
-    """
-    .. impl:: Document title
-
-        This is how doc titles are done.
-    """
    revision: str = Field(
-        description="Revision number",
+        description="Revision code",
        examples=["0", "1", "1a", "A"],
    )
+    originating_organization: str
+    originator_number: str | None = Field(
+        description="The originating organization's document number (if originated externally).",
+        default=None,
+    )
+    originator_revision: str | None = Field(
+        description="The originating organization's revision code (if originated externally).",
+        default=None,
+    )
    type: str
-    originators: list[str]
-    status: str
+    revision_authors: list[str] | None
+    revision_reviewers: list[str] | None
+    revision_approvers: list[str] | None
+    revision_comment: str = Field(
+        description="Explanation of what changed in this revision", default=""
+    )
+    status: STATUS = STATUS.RESERVED
+    usage: USAGE = USAGE.FOR_INFORMATION
+    retention_plan: RETENTION = RETENTION.LIFETIME
+    restriction_codes: str = Field(
+        description="Markings for export control, legal, etc.", default=""
+    )

-    @field_validator("type")
+    actual_reviewed_date: datetime | None = None
+    actual_approved_date: datetime | None = None
+
+    # filenames may be empty at first, i.e. for RESERVED docs
+    filenames: list[str] = Field(
+        description="Filenames of files attached to this Document. Main file should be the first.",
+        default=[],
+    )
+    checksums: list[str] = Field(
+        description="SHA-256 checksum of each file for data integrity", default=[]
+    )
+    """Checksums are used to verify long-term data integrity against tampering
+    and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
+    and built-in at this point. In the future, switching to BLAKE3 may make sense for 
+    easier periodic re-verification of large data libraries."""
+
+    physical_location: str | None = Field(
+        description="Location of a media when not stored as an electronic file.",
+        default=None,
+    )
+
+    @field_validator("type", mode="after")
    @classmethod
-    def type_must_be_valid(cls, v: str, info: FieldValidationInfo) -> str:
-        assert v in ["CALC", "PROC"], (
-            f"{info.field_name} must be within the list of doctypes"
-        )
+    def type_must_be_valid(cls, v: str) -> str:
+        assert v in ["CALC", "PROC"], f"{v} must be within the list of doctypes"
+
+    @computed_field
+    @property
+    def status_category(self) -> str:
+        """The top-level status category, derived from Document Status"""
+        return self.status.category
+
+    @model_validator(mode="after")
+    def cant_have_electronic_and_physical_location(self) -> "Document":  # noqa: D102
+        has_physical_location = self.physical_location is not None
+        has_file = self.filenames is not None
+
+        if has_physical_location and has_file:
+            raise ValueError(
+                "Cannot provide both physical_location and filename(s). They are mutually exclusive."
+            )
+
+        return self
+
+
+class PredecessorTask(BaseModel):
+    """Link to a predecessor task."""
+
+    class PRED_TYPE(StrEnum):  # noqa: N801
+        """Predecessor relationship type."""
+
+        FS = "FS"
+        """Finish-to-start: predecessor finishes before successor starts (very common)"""
+        FF = "FF"
+        """Finish-to-finish: predecessor finishes before successor can finish"""
+        SS = "SS"
+        """Start-to-start: predecessor starts before successor starts"""
+        SF = "SF"
+        """Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
+
+    uuid: UUID_PK
+    id: str
+    """ID of the predecessor task."""
+    type: PRED_TYPE = PRED_TYPE.FS
+    lag: timedelta | None = Field(
+        description="Lag time. Negative timedelta implies negative lag "
+        "(lead time, starts before predecessor ends)",
+        default=None,
+    )
+
+
+class ScheduledTask(BaseModel):
+    """Scheduled task, e.g. in P6."""
+
+    uuid: UUID_PK
+    name: str
+    id: str | None = None
+    is_milestone: bool = False
+    predecessors: list[PredecessorTask] = []
+    duration: timedelta | None = None
+    actual_start: datetime | None = None
+    actual_end: datetime | None = None
+    scheduled_start: datetime | None = None
+    scheduled_end: datetime | None = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def convert_days_to_duration(cls, data: Any) -> Any:
+        """Allow input of duration_days, but convert on way in."""
+        if isinstance(data, dict):
+            days = data.get("duration_days")
+            if days is not None:
+                data["duration"] = timedelta(days=float(days))
+                del data["duration_days"]
+        return data
+
+
+class ScheduleLane(BaseModel):
+    """A section of a schedule."""
+
+    name: str
+    color: str | None = None
+    tasks: list[ScheduledTask]
+
+
+ScheduleInput = TypeAdapter(list[ScheduleLane])
+"""A list of lanes, representing full schedule input."""
--- a/src/nrsk/plant/load_plant_data.py
+++ b/src/nrsk/plant/load_plant_data.py
@ -63,4 +63,35 @@ def load_yaml_tree(root_path: str | Path) -> dict:
        if parts and not current:
            current.update({"name": parts[-1]})
        logger.info(f"{current}")
+
+    assign_hierarchical_code(tree)
    return tree
+
+
+def assign_hierarchical_code(data, current_path=""):
+    """
+    Traverses a nested dictionary and adds a 'pbs_code' key to every
+    dictionary at any level of nesting, containing its hierarchical path.
+
+    The dictionary is modified in place.
+
+    Args:
+        data (dict): The dictionary to traverse and modify.
+        current_path (str): The current hierarchical path string (e.g., "1.", "2.3.").
+    """
+    if not isinstance(data, dict):
+        return
+
+    item_counter = 1
+    keys_to_process = list(data.keys())
+
+    for key in keys_to_process:
+        value = data[key]
+
+        # e.g., if current_path="1.", the next item's number is "1.1"
+        new_path = f"{current_path}{item_counter}"
+
+        if isinstance(value, dict):
+            value["pbs_code"] = new_path
+            assign_hierarchical_code(value, new_path + ".")
+        item_counter += 1
--- a/src/nrsk/plant/plant_data_table.py
+++ b/src/nrsk/plant/plant_data_table.py
@ -108,11 +108,12 @@ class PlantBreakdownStructureTable(Table):
    def add_row(self, tbody, columns, path, value, depth):
        """Add a row to the table."""
        row = nodes.row()
-        indent = " " * depth * 2  # em spaces for indentation
+        indent1 = " " * depth  # em spaces for indentation
+        indent2 = " " * depth * 2

        cols = []
-        cols.append(path)  # path
-        cols.append(value.get("name", "(noname)") * 10)
+        cols.append(indent1 + value.get("pbs_code", ""))
+        cols.append(indent2 + value.get("name", "(noname)"))
        cols.append(value.get("abbrev", ""))
        cols.append(value.get("desc", ""))
        cols.append(value.get("tags", ""))
--- a/src/nrsk/plant/plantdata.py
+++ b/src/nrsk/plant/plantdata.py
@ -0,0 +1,42 @@
+"""Load plant PBS data."""
+
+from collections import defaultdict
+from pathlib import Path
+
+from ruamel.yaml import YAML
+
+
+def load_yaml_tree(root_path: str | Path) -> dict:
+    """Load data from yaml tree."""
+    root_path = Path(root_path)
+    yaml = YAML(typ="safe")
+    tree = {}
+
+    for yaml_file in sorted(root_path.rglob("*.yaml")):
+        rel = yaml_file.relative_to(root_path).with_suffix("")  # remove .yaml
+        parts = (
+            rel.parent.parts if rel.name == "index" else (*rel.parent.parts, rel.name)
+        )
+
+        current = tree
+        for part in parts[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+
+        key = parts[-1]
+        data = yaml.load(yaml_file)
+
+        if key == "index":  # treat index.yaml as folder metadata
+            current.update(data or {})
+        else:
+            if (
+                current.get(key) is not None
+                and isinstance(current[key], dict)
+                and isinstance(data, dict)
+            ):
+                current[key].update(data)  # merge if conflict
+            else:
+                current[key] = data
+
+    return tree
--- a/src/nrsk/schedule/load_schedule.py
+++ b/src/nrsk/schedule/load_schedule.py
@ -12,6 +12,7 @@ from datetime import datetime
 from pathlib import Path

 import jpype
+import jpype.imports  # required though not explicitly 'used'
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import pandas as pd
@ -33,6 +34,7 @@ from org.mpxj import (  # noqa: E402
    Duration,
    ProjectFile,
    Relation,
+    RelationType,
    TimeUnit,
 )
 from org.mpxj.cpm import MicrosoftScheduler, PrimaveraScheduler  # noqa: E402
@ -42,6 +44,16 @@ from org.mpxj.writer import (  # noqa:E402
    UniversalProjectWriter,
 )

+from nrsk.models import PredecessorTask, ScheduleInput
+
+_PT = PredecessorTask.PRED_TYPE
+RELATION_MAP = {
+    _PT.FF: RelationType.FINISH_FINISH,
+    _PT.FS: RelationType.FINISH_START,
+    _PT.SS: RelationType.START_START,
+    _PT.SF: RelationType.START_FINISH,
+}
+

 def create_task(parent, name, duration):
    """Make a planned task."""
@ -58,6 +70,7 @@ def load_from_yaml(fname: str = "schedule.yaml") -> ProjectFile:
    """Load data file in YAML format."""
    with open(fname) as f:
        data = yaml.safe_load(f)
+        data = ScheduleInput.validate_python(data)

    project = ProjectFile()

@ -67,35 +80,43 @@ def load_from_yaml(fname: str = "schedule.yaml") -> ProjectFile:
    predecessors = {}
    tasks_by_id = {}

-    for lane in data["lanes"]:
+    for lane in data:
        summary = project.addTask()
-        summary.setName(lane["name"])
+        summary.setName(lane.name)

-        for task_d in lane["tasks"]:
-            if task_d.get("milestone"):
+        for task_d in lane.tasks:
+            if task_d.is_milestone:
                task = create_task(
-                    summary, task_d["name"], Duration.getInstance(0, TimeUnit.DAYS)
+                    summary, task_d.name, Duration.getInstance(0, TimeUnit.DAYS)
                )
            else:
-                if not task_d.get("duration_days"):
-                    raise ValueError(f"{task_d} needs a duration")
                task = create_task(
                    summary,
-                    task_d["name"],
-                    Duration.getInstance(task_d["duration_days"], TimeUnit.DAYS),
+                    task_d.name,
+                    Duration.getInstance(task_d.duration.days, TimeUnit.DAYS),
                )
-            # track predecessors by ID to build after all tasks exist
-            if tid := task_d.get("id"):
+            # track predecessors to build after all tasks exist
+            if tid := task_d.id:
                tasks_by_id[tid] = task
-            for pred_id in task_d.get("predecessors", []):
+            for pred_data in task_d.predecessors:
                pred_ids = predecessors.get(task, [])
-                pred_ids.append(pred_id)
+                pred_ids.append(pred_data)
                predecessors[task] = pred_ids

    for task in project.getTasks():
-        for pred_id in predecessors.get(task, []):
-            pred = tasks_by_id[pred_id]
-            task.addPredecessor(Relation.Builder().predecessorTask(pred))
+        for pred_data in predecessors.get(task, []):
+            pred_id = pred_data.id
+            pred_task = tasks_by_id[pred_id]
+            type = RELATION_MAP[pred_data.type]
+            # lag_duration is handled/translated by pydantic into timedelta
+            if lag := pred_data.lag:
+                lag_days = lag.days  # note that this truncates to nearest day
+            else:
+                lag_days = 0
+            lag = Duration.getInstance(lag_days, TimeUnit.DAYS)
+            task.addPredecessor(
+                Relation.Builder().predecessorTask(pred_task).lag(lag).type(type)
+            )

    return project