spacetelescope · alphasentaurii · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -12,7 +12,7 @@ build:
 sphinx:
     configuration: docs/source/conf.py
     builder: html
-    fail_on_warning: true
+    fail_on_warning: false
 
 
 python:

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -7,6 +7,7 @@ docs
 - downgrade numpy for compatibility with tensorflow in docs [#106]
 - fix hyperlinks [#107]
 - add custom landing page html template [#111]
+- add overview page, dashboard images, and fix graphviz style for darkmode [#112]
 
 preprocessor
 ------------

diff --git a/docs/Makefile b/docs/Makefile
@@ -26,6 +26,10 @@ PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 
+# Specify path to graphviz dot executable if not in your $PATH
+# You can set this in your env: export DOT=/Users/myname/homebrew/opt/graphviz/bin/dot
+GRAPHVIZDOT     = -D graphviz_dot=$(DOT)
+
 .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
 
 help:
@@ -103,4 +107,4 @@ doctest:
 	      "results in $(BUILDDIR)/doctest/output.txt."
 
 livehtml:
-	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(GRAPHVIZDOT) $(SPHINXOPTS) $(O)
diff --git a/docs/source/_static/images/eda-box-plots.png b/docs/source/_static/images/eda-box-plots.png
diff --git a/docs/source/_static/images/eda-scatterplots.png b/docs/source/_static/images/eda-scatterplots.png
diff --git a/docs/source/_static/images/model-performance.png b/docs/source/_static/images/model-performance.png
diff --git a/docs/source/_static/images/neural-network-graph.png b/docs/source/_static/images/neural-network-graph.png
diff --git a/docs/source/_static/images/roc-auc.png b/docs/source/_static/images/roc-auc.png
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -139,6 +139,12 @@ def setup(app):
 # the __init__ docstring
 autoclass_content = "both"
 
+# The command name with which to invoke dot. The default is 'dot'; you may need to set this to a full path if dot is not in the executable search path.
+# NOTE Since this setting is not portable from system to system, it is normally not useful to set it in conf.py; rather, giving it on the sphinx-build command line via the -D option should be preferable, like this:
+# sphinx-build -M html -D graphviz_dot=C:\graphviz\bin\dot.exe . _build
+
+# graphviz_dot = "dot"
+
 # Render inheritance diagrams in SVG
 graphviz_output_format = "svg"
 
@@ -147,6 +153,7 @@ def setup(app):
     "-Nfontname=Helvetica Neue, Helvetica, Arial, sans-serif",
     "-Efontsize=10",
     "-Efontname=Helvetica Neue, Helvetica, Arial, sans-serif",
+    "-Gbgcolor=white",
     "-Gfontsize=10",
     "-Gfontname=Helvetica Neue, Helvetica, Arial, sans-serif",
 ]
@@ -163,7 +170,7 @@ def setup(app):
 # show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = "sphinx"
+pygments_style = "monokai"
 
 # A list of ignored prefixes for module index sorting.
 # modindex_common_prefix = []

diff --git a/docs/source/dashboard/cal.rst b/docs/source/dashboard/cal.rst
@@ -2,5 +2,63 @@
 spacekit.dashboard.cal
 **********************
 
+*Model Performance + Statistical Analysis for the Hubble Space Telescope's data calibration pipeline.*
+
+NOTE: Click the images on this page to zoom in.
 
 .. currentmodule:: spacekit.dashboard.cal
+
+This dashboard includes a simple home page linking to 3 individual pages: `model-evaluation`, `eda`, and `neural-network-graph`.
+
+.. _model-evaluation:
+
+Model Evaluation
+----------------
+
+When models are trained using spacekit, performance metrics can be recorded and saved to local disk, then loaded later elsewhere for analysis, including the generation of interactive plots. Under the hood, this is handled by the `:doc: Compute <../analyzer/compute>` module. The dashboard uses plotly and Flask to display these metrics in the browser:
+
+*Compare accuracy vs. loss for 3 training iterations or versions of a model*
+
+.. image:: ../_static/images/model-performance.png
+  :width: 400
+  :alt: hst cal dashboard model accuracy and loss graphs
+
+*ROC-AUC and Precision-Recall Plots*
+
+.. image:: ../_static/images/roc-auc.png
+  :width: 400
+  :alt: hst cal dashboard roc-auc graph
+
+
+.. _eda:
+
+EDA
+---
+
+Generate interactive scatterplots for comparing linear relationships between two features (or feature vs. target) across groups of another feature.
+
+*Scatterplots*
+
+.. image:: ../_static/images/eda-scatterplots.png
+  :width: 400
+  :alt: hst cal dashboard scatterplots
+
+Inspect distribution of a single feature grouped by another feature (e.g. file size by instrument group)
+
+*Box Plots*
+
+.. image:: ../_static/images/eda-box-plots.png
+  :width: 400
+  :alt: hst cal dashboard box plots
+
+
+.. _neural-network-graph:
+
+Neural Network Graph
+--------------------
+
+Loads a single saved model and allows you to enter custom feature input values to see what the model would estimate or predict based on those inputs. The visualization displays the visible as well as hidden layers of the neural network and is fully interactive. This allows you to see in detail the exact weights and bias for every single node in the network. The inspiration for creating this type of visual was originally intended for an educational demonstration for my non-data scientist colleagues in order to remove some of the mystery behind how a neural network makes decisions in real-time to calculate an output's probability.
+
+.. image:: ../_static/images/neural-network-graph.png
+  :width: 400
+  :alt: hst cal dashboard box plots
diff --git a/docs/source/dashboard/index.rst b/docs/source/dashboard/index.rst
@@ -11,3 +11,80 @@ spacekit.dashboard
 
    cal <cal.rst>
    svm <svm.rst>
+
+
+Pull Existing Docker Image
+--------------------------
+
+Pull the most recent training data and model results from docker. For example, to get the latest HST cal (calcloud) data, the command is:
+
+.. code:: bash
+
+   docker pull alphasentaurii/spacekit:dash-cal-latest
+
+
+Custom Configurations
+---------------------
+
+*Configuring Custom Datasets via Environment file*
+
+The variables below are used by `spacekit.datasets.beam`` to find specific datasets. Using the defaults will pull in the 3 most recent dataset iterations and model training results. To configure the dashboard to use other datasets, you'll need to set some configuration options. Copy variables into the `.env` file located in `docker/images/dashboard_image` - feel free to use one of the templates (`spacekit/docker/images/dashboard_image/templates`) then customize further as desired.
+
+.. code:: bash
+
+   # pkg, s3, web, file
+   SRC="web"
+   # collection, bucketname, repo url, or local path
+   COLLECTION="calcloud" # e.g. "svm", "calcloud", "myS3bucket"
+   # used by spacekit.datasets as dictionary keys
+   DATASETS="2022-02-14,2021-11-04,2021-10-28"
+   # for s3 use the names of the .zip files 
+   DATASETS="2022-02-14-1644848448,2021-11-04-1636048291,2021-10-28-1635457222"
+   # for s3 this is the folder prefix
+   PFX="archive"
+
+
+Importing data from S3 (aws)
+----------------------------
+
+.. code:: bash
+
+   SRC=s3
+   COLLECTION=mybucket
+   PFX=somefolder
+
+
+Mounting local data
+-------------------
+
+You can also have your data in a local directory, and just bind mount the folder when you go to launch the container, or set container mode to "-it" and use spacekit.datasets to get the data before launching the dashboard. 
+
+.. code:: bash
+
+   CONTAINER_MODE="-d" # -d for detached, -it for interactive
+   MOUNTS=1 # >0 will bind mount the below source and dest paths
+   SOURCEDATA="/path/to/datasets"
+   DESTDATA="/home/developer/data"
+
+
+Build the image
+---------------
+
+Once you have variables set in the .env file, build the image:
+
+.. code:: bash
+
+   $ cd spacekit
+   $ sh scripts/build.sh
+
+
+Run the container
+-----------------
+
+Launch a container with your brand new image then fire it up in a browser: `http://0.0.0.0:8050/`
+
+.. code:: bash
+
+   $ sh scripts/launch.sh
+   # you should see a SHA like: "6cb2bee87fbef53f44686"
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -5,9 +5,11 @@ spacekit documentation
 This is the documentation for ``spacekit``,
 the Astronomical Data Science and Machine Learning Toolkit 
 
+
 .. toctree::
    :maxdepth: 1
 
+   Overview <overview.rst>
    Analyzer <analyzer/index.rst>
    Builder <builder/index.rst>
    Dashboard <dashboard/index.rst>

diff --git a/docs/source/overview.rst b/docs/source/overview.rst
@@ -5,23 +5,33 @@ spacekit documentation
 This is the documentation for ``spacekit``,
 the Astronomical Data Science and Machine Learning Toolkit 
 
-.. toctree::
-   :maxdepth: 1
 
-   Analyzer <analyzer/index.rst>
-   Builder <builder/index.rst>
-   Dashboard <dashboard/index.rst>
-   Datasets <datasets/index.rst>
-   Extractor <extractor/index.rst>
-   Generator <generator/index.rst>
-   Logger <logger/index.rst>
-   Preprocessor <preprocessor/index.rst>
-   Skøpes <skopes/index.rst>
+Overview
+========
 
+Spacekit is a python library designed to do the heavy lifting of machine learning in astronomy-related applications using .
+
+The modules contained in this package can be used to assist and streamline each step of a typical data science project:
+
+1. :doc:`Ingest/Extract  <extractor/index>` import large datasets from a variety of file formats .csv, .hdf5, .fits, .json, .png (.asdf coming soon)
+
+2. :doc:`Scrub/Preprocess <preprocessor/index>` scrub and preprocess raw data to prepare it for use in a machine learning model
+
+3. :doc:`Modeling <builder/index>` build, train and deploy custom machine learning models using classification, logistic regression estimation, computer vision and more
+
+4. :doc:`Analysis <analyzer/index>` evaluate model performance and do exploratory data analysis (EDA) using interactive graphs and visualizations
+
+5. :doc:`Visualize <dashboard/index>` deploy a web-based custom dashboard for your models and datasets via docker, a great way to summarize and share comparative model evaluations and data analysis visuals with others
+
+
+Applications
+------------
+
+The :doc:`Skøpes <skopes/index>` module includes real-world machine learning applications used by the Hubble and James Webb Space Telescopes in data calibration pipelines. These mini-applications are an orchestration of functions and classes from other spacekit modules to run real-time, automated analysis, training, and inference on a local server as well as in the cloud (AWS).
 
 
 Indices and tables
-==================
+------------------
 
 * :ref:`genindex`
 * :ref:`modindex`