-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #411 from westford14/westford14/codespell-nb
- Loading branch information
Showing
20 changed files
with
183 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Copyright 2024 The PyMC Labs Developers | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
This is a simple script that converts the jupyter notebooks into markdown | ||
for easier (and cleaner) parsing for the codespell check. Whitelisted words | ||
are maintained within this directory in the `codespeel-whitelist.txt`. For | ||
more information on this pre-commit hook please visit the github homepage | ||
for the project: https://github.com/codespell-project/codespell. | ||
""" | ||
|
||
import argparse | ||
import os | ||
from glob import glob | ||
|
||
import nbformat | ||
from nbconvert import MarkdownExporter | ||
|
||
|
||
def notebook_to_markdown(pattern: str, output_dir: str) -> None: | ||
""" | ||
Utility to convert jupyter notebook to markdown files. | ||
:param pattern: | ||
str that is a glob appropriate pattern to search | ||
:param output_dir: | ||
str directory to save the markdown files to | ||
""" | ||
for f_name in glob(pattern, recursive=True): | ||
with open(f_name, "r", encoding="utf-8") as f: | ||
nb = nbformat.read(f, as_version=4) | ||
|
||
markdown_exporter = MarkdownExporter() | ||
(body, _) = markdown_exporter.from_notebook_node(nb) | ||
|
||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
output_file = os.path.join( | ||
output_dir, os.path.splitext(os.path.basename(f_name))[0] + ".md" | ||
) | ||
|
||
with open(output_file, "w", encoding="utf-8") as f: | ||
f.write(body) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"-p", | ||
"--pattern", | ||
help="the glob appropriate pattern to search for jupyter notebooks", | ||
default="docs/**/*.ipynb", | ||
) | ||
parser.add_argument( | ||
"-t", | ||
"--tempdir", | ||
help="temporary directory to save the converted notebooks", | ||
default="tmp_markdown", | ||
) | ||
args = parser.parse_args() | ||
notebook_to_markdown(args.pattern, args.tempdir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print(f\"{os.__file}__\")\n", | ||
"\n", | ||
"# Speling mistake." | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright 2024 The PyMC Labs Developers | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Notebook to markdown tests.""" | ||
|
||
import os | ||
from tempfile import TemporaryDirectory | ||
|
||
import pytest | ||
from notebook_to_markdown import notebook_to_markdown | ||
|
||
|
||
@pytest.fixture | ||
def data_dir() -> str: | ||
"""Get current directory.""" | ||
return os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data") | ||
|
||
|
||
def test_notebook_to_markdown_empty_pattern(data_dir: str) -> None: | ||
"""Test basic functionality of notebook_to_markdown with empty pattern.""" | ||
with TemporaryDirectory() as tmp_dir: | ||
pattern = "*.missing" | ||
notebook_to_markdown(f"{data_dir}/{pattern}", tmp_dir) | ||
assert len(os.listdir(tmp_dir)) == 0 | ||
|
||
|
||
def test_notebook_to_markdown(data_dir: str) -> None: | ||
"""Test basic functionality of notebook_to_markdown with a correct pattern.""" | ||
with TemporaryDirectory() as tmp_dir: | ||
pattern = "*.ipynb" | ||
notebook_to_markdown(f"{data_dir}/{pattern}", tmp_dir) | ||
assert len(os.listdir(tmp_dir)) == 1 | ||
assert "test_notebook.md" in os.listdir(tmp_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.