Lots more Document data definition

Schedule updates:

* Defined Schedule types
* Updated schedule loader to validate with pydantic
* Added ability to specify predecessor type and lead/lag

Other structural/outline stuff as well

Oh and added a unit test.
This commit is contained in:
Nick Touran 2025-12-19 14:15:07 -05:00
parent 36fcb5f260
commit 373dfe4c3b
16 changed files with 535 additions and 67 deletions

View file

@ -37,20 +37,25 @@ This is the official Data Dictionary discussed in :ref:`the Information
Management Plan <info-mgmt-data-dict>`.
"""
from __future__ import annotations # allow lookahead annotation
import re
import uuid
from datetime import date, datetime
from typing import Annotated, Optional
from datetime import datetime, timedelta
from enum import StrEnum
from typing import Annotated, Any
from pydantic import (
AnyUrl,
BaseModel,
EmailStr,
Field,
FieldValidationInfo,
PositiveInt,
TypeAdapter,
ValidationError,
computed_field,
field_validator,
model_validator,
)
ALL_CAPS = re.compile("^[A-Z]$")
@ -61,7 +66,6 @@ UUID_PK = Annotated[
description="The unique ID of this object. Used as a primary key in the database.",
examples=["3fa85f64-5717-4562-b3fc-2c963f66afa6"],
frozen=True,
primary_key=True,
),
]
@ -72,10 +76,13 @@ class User(BaseModel):
uuid: UUID_PK
given_name: str
family_name: str
preferred_name: Optional[str] = None
preferred_name: str | None = None
previous_name: str | None = None
email: EmailStr
joined_on: Optional[datetime]
deactivated_on: Optional[datetime]
joined_on: datetime | None
deactivated_on: datetime | None
organization: str | None
title: str | None
class OpenItem(BaseModel):
@ -83,7 +90,7 @@ class OpenItem(BaseModel):
name: str
status: str
created_on: datetime
closed_on: Optional[datetime] = None
closed_on: datetime | None = None
class SSC(BaseModel):
@ -100,7 +107,7 @@ class SSC(BaseModel):
uuid: UUID_PK
name: str
pbs_code: Optional[str] = Field(
pbs_code: str | None = Field(
description="An integer sequence that determines the 'system number' and also the ordering in printouts",
examples=["1.2.3", "20.5.11"],
default="",
@ -115,22 +122,22 @@ class SSC(BaseModel):
abbrev: str = Field(
description="A human-friendly abbreviation uniquely defining the system"
)
parent: Optional["SSC"] = None
functions: Optional[list[str]] = Field(
parent: SSC | None = None
functions: list[str | None] = Field(
description="Functions of this system", default=None
)
@field_validator("abbrev")
@field_validator("abbrev", mode="after")
@classmethod
def abbrev_must_be_all_caps(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
assert re.match(ALL_CAPS, v), f"{info.field_name} must be all CAPS"
def abbrev_must_be_all_caps(cls, v: str) -> str: # noqa: D102
if not re.match(ALL_CAPS, v):
raise ValueError("{v} must be all CAPS")
@field_validator("pbs_code")
@field_validator("pbs_code", mode="after")
@classmethod
def pbs_must_be_int_sequence(cls, v: str, info: FieldValidationInfo) -> str: # noqa: D102
assert not v or re.match(r"^(\d+\.?)+$", v), (
f"{info.field_name} must be an integer sequence, like 1.2.3"
)
def pbs_must_be_int_sequence(cls, v: str) -> str: # noqa: D102
if not v or re.match(r"^(\d+\.?)+$", v):
raise ValueError(f"{v} must be an integer sequence, like 1.2.3")
class SystemsList(BaseModel):
@ -158,7 +165,7 @@ class ParamDef(BaseModel):
description: str = Field(
description="Detailed description of what parameters of this type represent"
)
valid_units: Optional[list[str]] = Field(
valid_units: list[str | None] = Field(
description="List of units allowed", examples=["MW", "W", "shp"], default=None
)
@ -166,10 +173,11 @@ class ParamDef(BaseModel):
class ParamVal(BaseModel):
"""A particular value of a Parameter, assigned to a particular SSC."""
uuid: UUID_PK
ssc: SSC
pdef: ParamDef
value: str
units: Optional[str] = None
units: str | None = None
pedigree: str = Field(
description="Indication of how well it is known (rough estimate, final design, as-built)."
)
@ -182,7 +190,7 @@ class ITSystem(BaseModel):
uuid: UUID_PK
name: str
vendor: str
version: Optional[str] = None
version: str | None = None
use_cases: list[str] = Field(
description="One or more use cases this system is used for.",
examples=[
@ -192,37 +200,313 @@ class ITSystem(BaseModel):
],
)
physical_location: str = Field(description="Where the system is physically located")
url: Optional[AnyUrl] = Field(description="Full URL to the system", default=None)
custodian: Optional[User] = Field(
url: AnyUrl | None = Field(description="Full URL to the system", default=None)
custodian: User | None = Field(
description="Person currently in charge of system", default=None
)
launched_on: Optional[datetime] = None
retired_on: Optional[datetime] = None
launched_on: datetime | None = None
retired_on: datetime | None = None
quality_related: bool
class Document(BaseModel):
"""
Data dictionary entry for Documents and Records.
Document data is designed to satisfy the needs defined in :ref:`rmdc-proc`.
See Also
--------
* Some of the field definitions come from CFIHOS
https://www.jip36-cfihos.org/wp-content/uploads/2023/08/v.1.5.1-CFIHOS-Specification-Document-1.docx
* ISO-19650 has different Status Codes defining suitability level (for information, as-built)
https://ukbimframework.org/wp-content/uploads/2020/05/ISO19650-2Edition4.pdf
"""
class STATUS(StrEnum):
"""Document Status options."""
# Much of the wording here comes from cloverDocumentControlRecords2010.
# NOTE: if you add or remove a status, be sure to also update the
# category property below AND :ref:`rmdc-doc-status`!
## Not Yet Approved:
RESERVED = "RESERVED"
"""
A Document ID has been assigned, but the document is in development or
has not yet been started (default).
"""
IN_PROGRESS = "IN PROGRESS"
"""One or more authors are creating or revising the document."""
IN_REVIEW = "IN REVIEW"
"""A completed draft of the document has been submitted and is pending review."""
REJECTED = "REJECTED"
"""A draft that was rejected by the review team and may be revised and resubmitted."""
AUTHORIZED = "AUTHORIZED"
"""A controlled revision that has been signed but is not yet effective.
Such documents may be used for training, etc. Documents with this status may
be used for plant modifications in a work package, but not for normal operations."""
REFERENCE = "REFERENCE"
"""Document is stored in EDMS for ease of access and reference, but
there is no assertion that the information is the latest available.
Useful for Standards, engineering handbook excerpts, vendor notices."""
NATIVE = "NATIVE"
"""A document file that may be in EDMS in the native file format. Not
used in the field because they (a) may require special software to view
and (b) may not be controlled for field use (i.e. not quarantined if
errors are discovered)."""
## Approved:
APPROVED = "APPROVED"
"""A document revision that has been submitted by the releasing
organization and that is authorized for the use case defined in
the suitability code.
* A drawing with this status during operation reflects the plant configuration
* A drawing with this status before or during construction reflects that it is
ready to be fabricated/built
* A procedure with this status is effective.
"""
## No longer Approved:
QUARANTINED = "QUARANTINED"
"""(On hold, Suspended) A document revision that was previously
authorized and has been placed on hold, e.g. a procedure that cannot be
performed as written or a design that is known to have pending changes."""
SUPERSEDED = "SUPERSEDED"
"""A document that has been replaced by another document. The new
document is to be recorded in the index."""
REVISED = "REVISED"
"""A document that has been replaced by a subsequent revision of that
document."""
VOIDED = "VOIDED"
"""A document or revision that is no longer needed and there is no
revision or superseding document. This would also be used for documents
that have reached a predetermined expiration date, such as a temporary
procedure."""
CLOSED = "CLOSED"
"""(Archived) A document for which the work has been completed."""
@property
def category(self) -> str:
"""High-level status category: Not yet approved, Approved, or No Longer Approved."""
if self.value in {
self.RESERVED,
self.IN_PROGRESS,
self.IN_REVIEW,
self.REJECTED,
self.AUTHORIZED,
self.REFERENCE,
self.NATIVE,
}:
return "Not Yet Approved"
if self.value in {self.APPROVED}:
return "Approved"
return "No Longer Approved"
class USAGE(StrEnum):
"""Usage options.
Usage governs what use cases a document may be used for. It is a notion
derived from the ISO 19650 'suitability' idea, but used in combination
with the NIRMA status codes. It allows a document to be approved for
e.g. a conceptual design stage without letting it inadvertently be
released for bid or manufacture. Releasing organizations can update the
suitability as needed.
See https://ukbimframework.org/wp-content/uploads/2020/09/Guidance-Part-C_Facilitating-the-common-data-environment-workflow-and-technical-solutions_Edition-1.pdf
"""
FOR_INFORMATION = "FOR INFORMATION"
"""A document revision that may be used for information only, not for
any contractual purpose."""
FOR_STAGE_APPROVAL = "FOR STAGE APPROVAL"
"""A document revision that is considered complete for the contractual stage in
which it was created. For example, in a Preliminary Design phase, this
usage would indicate that it is at the expected usage level for
preliminary design. Most design-phase documents that are not yet ready
for bid or construction will be marked for this usage."""
FOR_BID = "FOR BID"
"""A document revision that is ready to be sent to external parties for bid.
During the bid process, changes may be expected based on vendor feedback."""
FOR_CONSTRUCTION = "FOR CONSTRUCTION"
"""A document revision that is ready to be sent to the field for manufacture,
fabrication, construction. An approved document with this usage implies
that all the quality, regulatory, and design aspects are in place, and
that work can proceed. However, what is constructed is not yet
authorized for operation."""
FOR_OPERATION = "FOR OPERATION"
"""A document revision that can be used to operate the business and/or plant.
Procedures of this usage may be used to do work or operate equipment."""
AS_BUILT = "AS BUILT"
"""A document revision that is an as-built record of construction or manufacture.
Documents of this usage may be used to operate the plant."""
class RETENTION(StrEnum):
"""Retention plan options.
Retention plans define how long the document or record is to be
kept before it is destroyed.
"""
LIFETIME = "LIFETIME"
"""Lifetime of the plant."""
uuid: UUID_PK
number: str = Field(
description="The identification number meeting the document numbering rules",
)
title: str = Field(
description="Descriptive title explaining the contents",
examples=["CNSG Development and Status 1966-1977"],
)
"""
.. impl:: Document title
This is how doc titles are done.
"""
revision: str = Field(
description="Revision number",
description="Revision code",
examples=["0", "1", "1a", "A"],
)
originating_organization: str
originator_number: str | None = Field(
description="The originating organization's document number (if originated externally).",
default=None,
)
originator_revision: str | None = Field(
description="The originating organization's revision code (if originated externally).",
default=None,
)
type: str
originators: list[str]
status: str
revision_authors: list[str] | None
revision_reviewers: list[str] | None
revision_approvers: list[str] | None
revision_comment: str = Field(
description="Explanation of what changed in this revision", default=""
)
status: STATUS = STATUS.RESERVED
usage: USAGE = USAGE.FOR_INFORMATION
retention_plan: RETENTION = RETENTION.LIFETIME
restriction_codes: str = Field(
description="Markings for export control, legal, etc.", default=""
)
@field_validator("type")
actual_reviewed_date: datetime | None = None
actual_approved_date: datetime | None = None
# filenames may be empty at first, i.e. for RESERVED docs
filenames: list[str] = Field(
description="Filenames of files attached to this Document. Main file should be the first.",
default=[],
)
checksums: list[str] = Field(
description="SHA-256 checksum of each file for data integrity", default=[]
)
"""Checksums are used to verify long-term data integrity against tampering
and data degradation. While BLAKE3 checksums are faster, SHA-256 is more standard
and built-in at this point. In the future, switching to BLAKE3 may make sense for
easier periodic re-verification of large data libraries."""
physical_location: str | None = Field(
description="Location of a media when not stored as an electronic file.",
default=None,
)
@field_validator("type", mode="after")
@classmethod
def type_must_be_valid(cls, v: str, info: FieldValidationInfo) -> str:
assert v in ["CALC", "PROC"], (
f"{info.field_name} must be within the list of doctypes"
)
def type_must_be_valid(cls, v: str) -> str:
assert v in ["CALC", "PROC"], f"{v} must be within the list of doctypes"
@computed_field
@property
def status_category(self) -> str:
"""The top-level status category, derived from Document Status"""
return self.status.category
@model_validator(mode="after")
def cant_have_electronic_and_physical_location(self) -> "Document": # noqa: D102
has_physical_location = self.physical_location is not None
has_file = self.filenames is not None
if has_physical_location and has_file:
raise ValueError(
"Cannot provide both physical_location and filename(s). They are mutually exclusive."
)
return self
class PredecessorTask(BaseModel):
"""Link to a predecessor task."""
class PRED_TYPE(StrEnum): # noqa: N801
"""Predecessor relationship type."""
FS = "FS"
"""Finish-to-start: predecessor finishes before successor starts (very common)"""
FF = "FF"
"""Finish-to-finish: predecessor finishes before successor can finish"""
SS = "SS"
"""Start-to-start: predecessor starts before successor starts"""
SF = "SF"
"""Start-to-finish: predecessor starts before successor finishes (uncommon, maybe shift change)"""
uuid: UUID_PK
id: str
"""ID of the predecessor task."""
type: PRED_TYPE = PRED_TYPE.FS
lag: timedelta | None = Field(
description="Lag time. Negative timedelta implies negative lag "
"(lead time, starts before predecessor ends)",
default=None,
)
class ScheduledTask(BaseModel):
"""Scheduled task, e.g. in P6."""
uuid: UUID_PK
name: str
id: str | None = None
is_milestone: bool = False
predecessors: list[PredecessorTask] = []
duration: timedelta | None = None
actual_start: datetime | None = None
actual_end: datetime | None = None
scheduled_start: datetime | None = None
scheduled_end: datetime | None = None
@model_validator(mode="before")
@classmethod
def convert_days_to_duration(cls, data: Any) -> Any:
"""Allow input of duration_days, but convert on way in."""
if isinstance(data, dict):
days = data.get("duration_days")
if days is not None:
data["duration"] = timedelta(days=float(days))
del data["duration_days"]
return data
class ScheduleLane(BaseModel):
"""A section of a schedule."""
name: str
color: str | None = None
tasks: list[ScheduledTask]
ScheduleInput = TypeAdapter(list[ScheduleLane])
"""A list of lanes, representing full schedule input."""

View file

@ -63,4 +63,35 @@ def load_yaml_tree(root_path: str | Path) -> dict:
if parts and not current:
current.update({"name": parts[-1]})
logger.info(f"{current}")
assign_hierarchical_code(tree)
return tree
def assign_hierarchical_code(data, current_path=""):
"""
Traverses a nested dictionary and adds a 'pbs_code' key to every
dictionary at any level of nesting, containing its hierarchical path.
The dictionary is modified in place.
Args:
data (dict): The dictionary to traverse and modify.
current_path (str): The current hierarchical path string (e.g., "1.", "2.3.").
"""
if not isinstance(data, dict):
return
item_counter = 1
keys_to_process = list(data.keys())
for key in keys_to_process:
value = data[key]
# e.g., if current_path="1.", the next item's number is "1.1"
new_path = f"{current_path}{item_counter}"
if isinstance(value, dict):
value["pbs_code"] = new_path
assign_hierarchical_code(value, new_path + ".")
item_counter += 1

View file

@ -108,11 +108,12 @@ class PlantBreakdownStructureTable(Table):
def add_row(self, tbody, columns, path, value, depth):
"""Add a row to the table."""
row = nodes.row()
indent = "" * depth * 2 # em spaces for indentation
indent1 = "" * depth # em spaces for indentation
indent2 = "" * depth * 2
cols = []
cols.append(path) # path
cols.append(value.get("name", "(noname)") * 10)
cols.append(indent1 + value.get("pbs_code", ""))
cols.append(indent2 + value.get("name", "(noname)"))
cols.append(value.get("abbrev", ""))
cols.append(value.get("desc", ""))
cols.append(value.get("tags", ""))

View file

@ -0,0 +1,42 @@
"""Load plant PBS data."""
from collections import defaultdict
from pathlib import Path
from ruamel.yaml import YAML
def load_yaml_tree(root_path: str | Path) -> dict:
"""Load data from yaml tree."""
root_path = Path(root_path)
yaml = YAML(typ="safe")
tree = {}
for yaml_file in sorted(root_path.rglob("*.yaml")):
rel = yaml_file.relative_to(root_path).with_suffix("") # remove .yaml
parts = (
rel.parent.parts if rel.name == "index" else (*rel.parent.parts, rel.name)
)
current = tree
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
key = parts[-1]
data = yaml.load(yaml_file)
if key == "index": # treat index.yaml as folder metadata
current.update(data or {})
else:
if (
current.get(key) is not None
and isinstance(current[key], dict)
and isinstance(data, dict)
):
current[key].update(data) # merge if conflict
else:
current[key] = data
return tree

View file

@ -12,6 +12,7 @@ from datetime import datetime
from pathlib import Path
import jpype
import jpype.imports # required though not explicitly 'used'
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
@ -33,6 +34,7 @@ from org.mpxj import ( # noqa: E402
Duration,
ProjectFile,
Relation,
RelationType,
TimeUnit,
)
from org.mpxj.cpm import MicrosoftScheduler, PrimaveraScheduler # noqa: E402
@ -42,6 +44,16 @@ from org.mpxj.writer import ( # noqa:E402
UniversalProjectWriter,
)
from nrsk.models import PredecessorTask, ScheduleInput
_PT = PredecessorTask.PRED_TYPE
RELATION_MAP = {
_PT.FF: RelationType.FINISH_FINISH,
_PT.FS: RelationType.FINISH_START,
_PT.SS: RelationType.START_START,
_PT.SF: RelationType.START_FINISH,
}
def create_task(parent, name, duration):
"""Make a planned task."""
@ -58,6 +70,7 @@ def load_from_yaml(fname: str = "schedule.yaml") -> ProjectFile:
"""Load data file in YAML format."""
with open(fname) as f:
data = yaml.safe_load(f)
data = ScheduleInput.validate_python(data)
project = ProjectFile()
@ -67,35 +80,43 @@ def load_from_yaml(fname: str = "schedule.yaml") -> ProjectFile:
predecessors = {}
tasks_by_id = {}
for lane in data["lanes"]:
for lane in data:
summary = project.addTask()
summary.setName(lane["name"])
summary.setName(lane.name)
for task_d in lane["tasks"]:
if task_d.get("milestone"):
for task_d in lane.tasks:
if task_d.is_milestone:
task = create_task(
summary, task_d["name"], Duration.getInstance(0, TimeUnit.DAYS)
summary, task_d.name, Duration.getInstance(0, TimeUnit.DAYS)
)
else:
if not task_d.get("duration_days"):
raise ValueError(f"{task_d} needs a duration")
task = create_task(
summary,
task_d["name"],
Duration.getInstance(task_d["duration_days"], TimeUnit.DAYS),
task_d.name,
Duration.getInstance(task_d.duration.days, TimeUnit.DAYS),
)
# track predecessors by ID to build after all tasks exist
if tid := task_d.get("id"):
# track predecessors to build after all tasks exist
if tid := task_d.id:
tasks_by_id[tid] = task
for pred_id in task_d.get("predecessors", []):
for pred_data in task_d.predecessors:
pred_ids = predecessors.get(task, [])
pred_ids.append(pred_id)
pred_ids.append(pred_data)
predecessors[task] = pred_ids
for task in project.getTasks():
for pred_id in predecessors.get(task, []):
pred = tasks_by_id[pred_id]
task.addPredecessor(Relation.Builder().predecessorTask(pred))
for pred_data in predecessors.get(task, []):
pred_id = pred_data.id
pred_task = tasks_by_id[pred_id]
type = RELATION_MAP[pred_data.type]
# lag_duration is handled/translated by pydantic into timedelta
if lag := pred_data.lag:
lag_days = lag.days # note that this truncates to nearest day
else:
lag_days = 0
lag = Duration.getInstance(lag_days, TimeUnit.DAYS)
task.addPredecessor(
Relation.Builder().predecessorTask(pred_task).lag(lag).type(type)
)
return project