diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 68bfdd7..4664e25 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -12,7 +12,7 @@ build: sphinx: configuration: docs/source/conf.py builder: html - fail_on_warning: true + fail_on_warning: false python: diff --git a/CHANGES.rst b/CHANGES.rst index c8abbb4..b756730 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,7 @@ docs - downgrade numpy for compatibility with tensorflow in docs [#106] - fix hyperlinks [#107] - add custom landing page html template [#111] +- add overview page, dashboard images, and fix graphviz style for darkmode [#112] preprocessor ------------ diff --git a/docs/Makefile b/docs/Makefile index 8bdd209..3e3a070 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -26,6 +26,10 @@ PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# Specify path to graphviz dot executable if not in your $PATH +# You can set this in your env: export DOT=/Users/myname/homebrew/opt/graphviz/bin/dot +GRAPHVIZDOT = -D graphviz_dot=$(DOT) + .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @@ -103,4 +107,4 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." livehtml: - sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(GRAPHVIZDOT) $(SPHINXOPTS) $(O) diff --git a/docs/source/_static/images/eda-box-plots.png b/docs/source/_static/images/eda-box-plots.png new file mode 100644 index 0000000..90ee1c3 Binary files /dev/null and b/docs/source/_static/images/eda-box-plots.png differ diff --git a/docs/source/_static/images/eda-scatterplots.png b/docs/source/_static/images/eda-scatterplots.png new file mode 100644 index 0000000..1eb0ead Binary files /dev/null and b/docs/source/_static/images/eda-scatterplots.png differ diff --git a/docs/source/_static/images/model-performance.png b/docs/source/_static/images/model-performance.png new file mode 100644 index 0000000..d3f66d0 Binary files /dev/null and b/docs/source/_static/images/model-performance.png differ diff --git a/docs/source/_static/images/neural-network-graph.png b/docs/source/_static/images/neural-network-graph.png new file mode 100644 index 0000000..578a947 Binary files /dev/null and b/docs/source/_static/images/neural-network-graph.png differ diff --git a/docs/source/_static/images/roc-auc.png b/docs/source/_static/images/roc-auc.png new file mode 100644 index 0000000..7c3e790 Binary files /dev/null and b/docs/source/_static/images/roc-auc.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 7660482..679a0cb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -139,6 +139,12 @@ def setup(app): # the __init__ docstring autoclass_content = "both" +# The command name with which to invoke dot. The default is 'dot'; you may need to set this to a full path if dot is not in the executable search path. +# NOTE Since this setting is not portable from system to system, it is normally not useful to set it in conf.py; rather, giving it on the sphinx-build command line via the -D option should be preferable, like this: +# sphinx-build -M html -D graphviz_dot=C:\graphviz\bin\dot.exe . _build + +# graphviz_dot = "dot" + # Render inheritance diagrams in SVG graphviz_output_format = "svg" @@ -147,6 +153,7 @@ def setup(app): "-Nfontname=Helvetica Neue, Helvetica, Arial, sans-serif", "-Efontsize=10", "-Efontname=Helvetica Neue, Helvetica, Arial, sans-serif", + "-Gbgcolor=white", "-Gfontsize=10", "-Gfontname=Helvetica Neue, Helvetica, Arial, sans-serif", ] @@ -163,7 +170,7 @@ def setup(app): # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" +pygments_style = "monokai" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] diff --git a/docs/source/dashboard/cal.rst b/docs/source/dashboard/cal.rst index cd36c0b..3c9ba25 100644 --- a/docs/source/dashboard/cal.rst +++ b/docs/source/dashboard/cal.rst @@ -2,5 +2,63 @@ spacekit.dashboard.cal ********************** +*Model Performance + Statistical Analysis for the Hubble Space Telescope's data calibration pipeline.* + +NOTE: Click the images on this page to zoom in. .. currentmodule:: spacekit.dashboard.cal + +This dashboard includes a simple home page linking to 3 individual pages: `model-evaluation`, `eda`, and `neural-network-graph`. + +.. _model-evaluation: + +Model Evaluation +---------------- + +When models are trained using spacekit, performance metrics can be recorded and saved to local disk, then loaded later elsewhere for analysis, including the generation of interactive plots. Under the hood, this is handled by the `:doc: Compute <../analyzer/compute>` module. The dashboard uses plotly and Flask to display these metrics in the browser: + +*Compare accuracy vs. loss for 3 training iterations or versions of a model* + +.. image:: ../_static/images/model-performance.png + :width: 400 + :alt: hst cal dashboard model accuracy and loss graphs + +*ROC-AUC and Precision-Recall Plots* + +.. image:: ../_static/images/roc-auc.png + :width: 400 + :alt: hst cal dashboard roc-auc graph + + +.. _eda: + +EDA +--- + +Generate interactive scatterplots for comparing linear relationships between two features (or feature vs. target) across groups of another feature. + +*Scatterplots* + +.. image:: ../_static/images/eda-scatterplots.png + :width: 400 + :alt: hst cal dashboard scatterplots + +Inspect distribution of a single feature grouped by another feature (e.g. file size by instrument group) + +*Box Plots* + +.. image:: ../_static/images/eda-box-plots.png + :width: 400 + :alt: hst cal dashboard box plots + + +.. _neural-network-graph: + +Neural Network Graph +-------------------- + +Loads a single saved model and allows you to enter custom feature input values to see what the model would estimate or predict based on those inputs. The visualization displays the visible as well as hidden layers of the neural network and is fully interactive. This allows you to see in detail the exact weights and bias for every single node in the network. The inspiration for creating this type of visual was originally intended for an educational demonstration for my non-data scientist colleagues in order to remove some of the mystery behind how a neural network makes decisions in real-time to calculate an output's probability. + +.. image:: ../_static/images/neural-network-graph.png + :width: 400 + :alt: hst cal dashboard box plots diff --git a/docs/source/dashboard/index.rst b/docs/source/dashboard/index.rst index 8ebee05..c3f8605 100644 --- a/docs/source/dashboard/index.rst +++ b/docs/source/dashboard/index.rst @@ -11,3 +11,80 @@ spacekit.dashboard cal svm + + +Pull Existing Docker Image +-------------------------- + +Pull the most recent training data and model results from docker. For example, to get the latest HST cal (calcloud) data, the command is: + +.. code:: bash + + docker pull alphasentaurii/spacekit:dash-cal-latest + + +Custom Configurations +--------------------- + +*Configuring Custom Datasets via Environment file* + +The variables below are used by `spacekit.datasets.beam`` to find specific datasets. Using the defaults will pull in the 3 most recent dataset iterations and model training results. To configure the dashboard to use other datasets, you'll need to set some configuration options. Copy variables into the `.env` file located in `docker/images/dashboard_image` - feel free to use one of the templates (`spacekit/docker/images/dashboard_image/templates`) then customize further as desired. + +.. code:: bash + + # pkg, s3, web, file + SRC="web" + # collection, bucketname, repo url, or local path + COLLECTION="calcloud" # e.g. "svm", "calcloud", "myS3bucket" + # used by spacekit.datasets as dictionary keys + DATASETS="2022-02-14,2021-11-04,2021-10-28" + # for s3 use the names of the .zip files + DATASETS="2022-02-14-1644848448,2021-11-04-1636048291,2021-10-28-1635457222" + # for s3 this is the folder prefix + PFX="archive" + + +Importing data from S3 (aws) +---------------------------- + +.. code:: bash + + SRC=s3 + COLLECTION=mybucket + PFX=somefolder + + +Mounting local data +------------------- + +You can also have your data in a local directory, and just bind mount the folder when you go to launch the container, or set container mode to "-it" and use spacekit.datasets to get the data before launching the dashboard. + +.. code:: bash + + CONTAINER_MODE="-d" # -d for detached, -it for interactive + MOUNTS=1 # >0 will bind mount the below source and dest paths + SOURCEDATA="/path/to/datasets" + DESTDATA="/home/developer/data" + + +Build the image +--------------- + +Once you have variables set in the .env file, build the image: + +.. code:: bash + + $ cd spacekit + $ sh scripts/build.sh + + +Run the container +----------------- + +Launch a container with your brand new image then fire it up in a browser: `http://0.0.0.0:8050/` + +.. code:: bash + + $ sh scripts/launch.sh + # you should see a SHA like: "6cb2bee87fbef53f44686" + diff --git a/docs/source/index.rst b/docs/source/index.rst index 6dc9b2b..7f1de4b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,9 +5,11 @@ spacekit documentation This is the documentation for ``spacekit``, the Astronomical Data Science and Machine Learning Toolkit + .. toctree:: :maxdepth: 1 + Overview Analyzer Builder Dashboard diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 6dc9b2b..f1e9b32 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -5,23 +5,33 @@ spacekit documentation This is the documentation for ``spacekit``, the Astronomical Data Science and Machine Learning Toolkit -.. toctree:: - :maxdepth: 1 - Analyzer - Builder - Dashboard - Datasets - Extractor - Generator - Logger - Preprocessor - Skøpes +Overview +======== +Spacekit is a python library designed to do the heavy lifting of machine learning in astronomy-related applications using . + +The modules contained in this package can be used to assist and streamline each step of a typical data science project: + +1. :doc:`Ingest/Extract ` import large datasets from a variety of file formats .csv, .hdf5, .fits, .json, .png (.asdf coming soon) + +2. :doc:`Scrub/Preprocess ` scrub and preprocess raw data to prepare it for use in a machine learning model + +3. :doc:`Modeling ` build, train and deploy custom machine learning models using classification, logistic regression estimation, computer vision and more + +4. :doc:`Analysis ` evaluate model performance and do exploratory data analysis (EDA) using interactive graphs and visualizations + +5. :doc:`Visualize ` deploy a web-based custom dashboard for your models and datasets via docker, a great way to summarize and share comparative model evaluations and data analysis visuals with others + + +Applications +------------ + +The :doc:`Skøpes ` module includes real-world machine learning applications used by the Hubble and James Webb Space Telescopes in data calibration pipelines. These mini-applications are an orchestration of functions and classes from other spacekit modules to run real-time, automated analysis, training, and inference on a local server as well as in the cloud (AWS). Indices and tables -================== +------------------ * :ref:`genindex` * :ref:`modindex`