Add schedule processing capabilities, and cleanups.

Fixes a few config issues and warnings Removes double-import of GDC Adds mpxj dependency for scheduling
2025-11-28 14:49:50 -05:00 · 2025-11-28 14:49:50 -05:00 · 7d09eaecb7
commit 7d09eaecb7
parent 56a79e3562
10 changed files with 388 additions and 8 deletions
--- a/documents/_data/it-systems.yaml
+++ b/documents/_data/it-systems.yaml
--- a/documents/conf.py
+++ b/documents/conf.py
@ -34,6 +34,7 @@ extensions = [
    "sphinx_needs",
    "myst_parser",
    "sphinxcontrib.bibtex",
    "sphinx.ext.todo",
    "sphinxcontrib.glossaryused",
    "sphinx.ext.imgmath",
    "sphinxcontrib.datatemplates",
@ -42,6 +43,7 @@ extensions = [
    # "sphinx.ext.imgconverter", # SVG to png but rasterizes and bad
    "sphinxcontrib.inkscapeconverter",  # SVG to pdf without rasterizing
    "sphinx_timeline",
    "nrsk.schedule.load_schedule",
 ]
 # Add any paths that contain templates here, relative to this directory.
@ -50,7 +52,7 @@ templates_path = ["_templates"]
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns = ["generated_assets"]
 # -- Options for HTML output -------------------------------------------------
@ -126,6 +128,7 @@ rst_prolog = f"""
 # will need to move relevant refs somewhere
 # more reusable, like into the repo?
 bibtex_bibfiles = ["/pool/Reading/refs.bib"]
 bibtex_cache = "none"  # refresh without wiping build
 mermaid_cmd = "./node_modules/.bin/mmdc"
 # enable pdf cropping of mermaid diagrams for fit
@ -134,3 +137,4 @@ mermaid_version = "10.6.1"
 # Sphinx Needs config
 needs_include_needs = True  # turn off to hide all needs (e.g. for working docs)
 needs_extra_options = ["basis"]
--- a/documents/index.rst
+++ b/documents/index.rst
@ -9,6 +9,7 @@
   purpose/index
   organization/index
   procedures/index
   project/index
   bibliography
   requirements/index
   glossary
--- a/documents/procedures/administration/document_management.rst
+++ b/documents/procedures/administration/document_management.rst
@ -22,7 +22,7 @@ Systems
 Documents and records are managed in the following systems.
 .. datatemplate:yaml:: 
-    :source: /_data/systems.yaml
+    :source: /_data/it-systems.yaml
    {{ make_list_table_from_mappings(
        [('Name', 'name'), ('Use case(s)', 'use-cases'), ('Location', 'location')],
--- a/documents/procedures/administration/writers_guide.rst
+++ b/documents/procedures/administration/writers_guide.rst
@ -1,6 +1,6 @@
 Writer's Guide
 --------------
-See :ref:`meyerProcedureWriterGuide1993`
+See :cite:p:`meyerProcedureWritersGuide1993`
-See :ref:`wieringaProcedureWritingDomains1991`.
+See :cite:p:`wieringaProcedureWritingDomains1991`.
--- a/documents/project/index.rst
+++ b/documents/project/index.rst
@ -2,3 +2,10 @@
    calibrations during commissioning, 
    work during operations. This would be where we could be like, "Hey don't forget
    to include impurities in shielding calculations"
 .. toctree::
    :glob:
    :maxdepth: 2
    *
--- a/documents/project/schedule.rst
+++ b/documents/project/schedule.rst
@ -0,0 +1,4 @@
 Project Schedule
 ################
 .. schedule:: _data/schedule.yaml
--- a/documents/requirements/national/USA/10cfr50.rst
+++ b/documents/requirements/national/USA/10cfr50.rst
@ -26,7 +26,7 @@ and `RG 1.232 <https://www.nrc.gov/docs/ML1732/ML17325A611.pdf>`_.
    :filter: id.startswith("R_GDC")
    :columns: id
-.. include:: /../generated_assets/10-cfr-50-app-a-list.rst
+.. include:: /generated_assets/10-cfr-50-app-a-list.rst
 Appendix B
@ -43,6 +43,6 @@ Appendix B
 .. :basis: Flowed down
-.. needimport:: /../generated_assets/10-cfr-50-app-b.json
+.. needimport:: /generated_assets/10-cfr-50-app-b.json
    :collapse: true
    :tags: ["quality"]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -28,6 +28,10 @@ dependencies = [
  "sphinxcontrib-mermaid",
  "sphinxcontrib-svg2pdfconverter",
  "sphinx-timeline",
  "mpxj",
  "matplotlib",
  "pandas",
  "jpype1",
 ]
 classifiers = [
    "Programming Language :: Python :: 3",
@ -48,6 +52,11 @@ dev = [
 test = [
 ]
 [build-system]
 requires = ["setuptools>=61", "wheel"]
 build-backend = "setuptools.build_meta"
 [tool.ruff]
 fix = true
--- a/src/nrsk/schedule/load_schedule.py
+++ b/src/nrsk/schedule/load_schedule.py
@ -0,0 +1,355 @@
 """Load schedule data from YAML.
 This uses a library to parse schedule data and
 can then plot it and/or export it to different formats
 such as MS Project and Primavera P6.
 """
 import logging
 import os
 import re
 from datetime import datetime
 from glob import glob
 import jpype
 import jpype.imports
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import mpl_toolkits.axisartist as axisartist
 import pandas as pd
 import yaml
 from docutils import nodes
 from docutils.parsers.rst import Directive
 from sphinx.util.osutil import ensuredir
 logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 # Start JVM with MPXJ jar
 jpype.startJVM(classpath=["/home/nick/repos/mpxj/mpxj-lib/*"])
 from java.io import File
 from java.time import LocalDateTime  # noqa: E402
 from org.mpxj import (  # noqa: E402
    Availability,
    Duration,
    FieldType,
    ProjectFile,
    Relation,
    RelationType,
    Resource,
    TaskField,
    TaskType,
    TimeUnit,
 )
 from org.mpxj.cpm import MicrosoftScheduler, PrimaveraScheduler  # noqa: E402
 from org.mpxj.reader import UniversalProjectReader
 from org.mpxj.writer import (  # noqa:E402
    FileFormat,
    UniversalProjectWriter,
 )
 def create_task(parent, name, duration):
    """Make a planned task."""
    task = parent.addTask()
    task.setName(name)
    task.setDuration(duration)
    task.setActualDuration(Duration.getInstance(0, duration.getUnits()))
    task.setRemainingDuration(duration)
    return task
 def load_from_yaml(fname: str = "schedule.yaml") -> ProjectFile:
    """Load data file in YAML format."""
    with open(fname) as f:
        data = yaml.safe_load(f)
    project = ProjectFile()
    calendar = project.addDefaultBaseCalendar()
    project.setDefaultCalendar(calendar)
    predecessors = {}
    tasks_by_id = {}
    for lane in data["lanes"]:
        summary = project.addTask()
        summary.setName(lane["name"])
        for task_d in lane["tasks"]:
            if task_d.get("milestone"):
                task = create_task(
                    summary, task_d["name"], Duration.getInstance(0, TimeUnit.DAYS)
                )
            else:
                if not task_d.get("duration_days"):
                    raise ValueError(f"{task_d} needs a duration")
                task = create_task(
                    summary,
                    task_d["name"],
                    Duration.getInstance(task_d["duration_days"], TimeUnit.DAYS),
                )
            # track predecessors by ID to build after all tasks exist
            if tid := task_d.get("id"):
                tasks_by_id[tid] = task
            for pred_id in task_d.get("predecessors", []):
                pred_ids = predecessors.get(task, [])
                pred_ids.append(pred_id)
                predecessors[task] = pred_ids
    for task in project.getTasks():
        for pred_id in predecessors.get(task, []):
            pred = tasks_by_id[pred_id]
            task.addPredecessor(Relation.Builder().predecessorTask(pred))
    return project
 def solve_schedule(project: ProjectFile, start: datetime = None):
    """Solve schedule using MS Project CPM method.
    This takes tasks with just starts and durations and predecessors and adds
    start/end dates to all.
    """
    logger.info("Solving Schedule...")
    if start is None:
        start = datetime(2026, 1, 1, 8, 0)
    start_js = _localdatetime_from_python(start)
    scheduler = MicrosoftScheduler()
    scheduler.schedule(project, start_js)
    for task in project.getTasks():
        logger.info(
            f"{str(task.getWBS()):4s} {str(task.getName()):50s}  "
            f"{str(task.getStart()):20s} "
            f"{str(task.getFinish()):20s}"
        )
 def export_schedule(project, fname: str = "scheduled.xml"):
    """Export a schedule in MSPDI XML format."""
    logger.info("Exporting schedule to %s", fname)
    writer = UniversalProjectWriter(FileFormat.MSPDI)
    writer.write(project, fname)
 def _localdatetime_to_python(ldt):
    if ldt is None:
        return None
    return datetime(
        ldt.getYear(),
        ldt.getMonthValue(),
        ldt.getDayOfMonth(),
        ldt.getHour(),
        ldt.getMinute(),
        ldt.getSecond(),
    )
 def _localdatetime_from_python(dt: datetime) -> LocalDateTime:
    return LocalDateTime.of(
        dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond * 1000
    )
 def _preprocess_plot(project):
    """Read schedule and map to python data for plotting."""
    tasks = []
    for t in project.getTasks():
        start = _localdatetime_to_python(t.getStart())
        finish = _localdatetime_to_python(t.getFinish())
        if t.getName() and start and finish:
            duration_days = (finish - start).total_seconds() / (60 * 60 * 24)
            tasks.append(
                {
                    "id": t.getID(),
                    "name": t.getName(),
                    "wbs": t.getWBS(),
                    "start": start,
                    "finish": finish,
                    "duration": duration_days,
                    "is_milestone": t.getDuration().getDuration() == 0,
                    "is_category": bool(re.search(r"^\d+$", str(t.getWBS()))),
                    "is_critical": t.getCritical(),
                }
            )
    df = pd.DataFrame(tasks)
    deps = []
    for succ_task in project.getTasks():
        for rel in succ_task.getPredecessors():
            pred_task = rel.getPredecessorTask()
            deps.append(
                {
                    "pred": pred_task.getID(),
                    "succ": succ_task.getID(),
                    "type": int(rel.getType().getValue()),  # 1=FS, 2=SS, 3=FF, 4=SF
                }
            )
    df_deps = pd.DataFrame(deps)
    return df, df_deps
 def plot_schedule(
    input_fname: str = "scheduled.xml", project=None, output_fname: str = "schedule.svg"
 ):
    """Generate plot of schedule."""
    if project is None:
        project = UniversalProjectReader().read(File(input_fname))
    df, df_deps = _preprocess_plot(project)
    # Plot Gantt chart
    fig, ax = plt.subplots(figsize=(16, 8), dpi=300)
    for i, row in df.iterrows():
        if row["is_category"]:
            # skip top levels
            continue
        if row["is_milestone"]:
            ax.scatter(row["start"], i, marker="D", s=80, color="tab:purple")
            ax.text(row["start"], i, f" {row['name']}", va="center")
        else:
            color = "tab:red" if row["is_critical"] else "tab:blue"
            ax.barh(i, row["duration"], left=row["start"], height=0.35, color=color)
            ax.text(row["finish"], i, f" {row['name']}", va="center")
    # Draw dependency arrows
    for _, d in df_deps.iterrows():
        try:
            y1 = df.index[df["id"] == d["pred"]][0]
            y2 = df.index[df["id"] == d["succ"]][0]
            pred_task = df.iloc[y1]
            succ_task = df.iloc[y2]
            # determine arrow start/end based on type
            typ = d["type"]
            if typ == 1:  # FS
                x1 = pred_task["finish"]
                x2 = succ_task["start"]
            elif typ == 2:  # SS
                x1 = pred_task["start"]
                x2 = succ_task["start"]
            elif typ == 3:  # FF
                x1 = pred_task["finish"]
                x2 = succ_task["finish"]
            elif typ == 4:  # SF
                x1 = pred_task["start"]
                x2 = succ_task["finish"]
            else:
                continue
            ax.annotate(
                "",
                xy=(x2, y2),
                xytext=(x1, y1),
                arrowprops=dict(arrowstyle="->", lw=0.8),
            )
        except IndexError:
            continue
    ax.set_yticks(range(len(df)))
    ax.set_yticklabels(df["wbs"], ha="left")
    ax.invert_yaxis()
    # set padding for left-aligned labels
    yax = ax.get_yaxis()
    pad = max(T.label1.get_window_extent().width for T in yax.majorTicks)
    yax.set_tick_params(pad=pad)
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
    ax.tick_params(axis="x", labelsize=8, labelrotation=90)
    plt.subplots_adjust(left=0.045, right=0.9)
    plt.title("AMS High-Level Schedule")
    # plt.tight_layout()
    plt.savefig(output_fname)
    plt.show()
 class ScheduleDirective(Directive):
    """Sphinx directive to insert image to generated schedule."""
    required_arguments = 1
    has_content = False
    def run(self):  # noqa: D102
        env = self.state.document.settings.env
        builder = env.app.builder
        schedule_data = self.arguments[0]
        schedule_data_abs = os.path.join(env.srcdir, schedule_data)
        if not os.path.exists(schedule_data_abs):
            logger.error(f"Schedule file not found: {schedule_data_abs}")
            return []
        # Image output directory
        gen_dir = os.path.join(env.app.srcdir, "generated_assets")
        ensuredir(gen_dir)
        ensuredir(os.path.join(env.app.outdir, "_downloads"))
        # Name of the generated file
        base = os.path.splitext(os.path.basename(schedule_data))[0]
        out_image = os.path.join(gen_dir, f"{base}.svg")
        start_date = datetime(2026, 1, 1)
        proj = load_from_yaml(fname=schedule_data)
        solve_schedule(proj, start_date)
        plot_schedule(project=proj, output_fname=out_image)
        writer = UniversalProjectWriter(FileFormat.MSPDI)
        writer.write(proj, os.path.join("_build", "_downloads", f"{base}_mspdi.xml"))
        env.note_dependency(schedule_data_abs)
        rel = str(os.path.relpath(out_image, env.app.srcdir))
        # trying to mock /generated_assets/schedule.svg for the build folder
        # but it ends up in _images actually.
        # somewhat hacky but works in subfolders
        abs_rel = os.path.join("/", rel)
        image_node = nodes.image(uri=abs_rel)
        uri = builder.get_relative_uri(env.docname, "_images/" + f"{base}.svg")
        uri = uri.replace(".html", "")
        ref_node = nodes.reference("", "", refuri=uri)
        ref_node += image_node
        ref_node["target"] = "_blank"
        ref_node["rel"] = "noopener"
        uri_dl1 = builder.get_relative_uri(
            env.docname, "_downloads/" + f"{base}_mspdi.xml"
        )
        uri_dl1 = uri_dl1.replace(".html", "")
        download1 = nodes.reference(
            text="Download schedule in MS Project XML format",
            refuri=uri_dl1,
            classes=["download-link"],
        )
        paragraph = nodes.paragraph()
        paragraph += ref_node
        paragraph += download1
        return [paragraph]
 def setup(app):
    """Setup for sphinx extension."""
    app.add_directive("schedule", ScheduleDirective)
    return {
        "version": "0.1",
        "parallel_read_safe": True,
        "parallel_write_safe": True,
    }
 if __name__ == "__main__":
    proj = load_from_yaml(fname="documents/_data/schedule.yaml")
    solve_schedule(proj)
    export_schedule(proj)