From bdab382e05f2e1121a157cb6673df8e955e2a6c4 Mon Sep 17 00:00:00 2001 From: Alex Wermer-Colan Date: Tue, 23 Jan 2024 15:03:50 -0500 Subject: [PATCH] Update _index.md --- content/Data/_index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/Data/_index.md b/content/Data/_index.md index 5b534bb..406a585 100644 --- a/content/Data/_index.md +++ b/content/Data/_index.md @@ -35,11 +35,11 @@ from datasets import load_dataset dataset = load_dataset("SF-Corpus/EF_Chapters_and_Chunks") ``` -For more information about working with HuggingFace datasets, review their reference guide: https://huggingface.co/docs/datasets/v1.1.1/loading_datasets.html +For more information about working with HuggingFace datasets, review their [reference guide](https://huggingface.co/docs/datasets/v1.1.1/loading_datasets.html) -An extended discussion on extracted features can be found on the Scholars' Studio blog: https://sites.temple.edu/tudsc/2019/07/18/curating-copyrighted-corpora-an-r-script-for-extracting-and-structuring-textual-features/ +An extended discussion on extracted features can be found on the Scholars' Studio blog, in Jeff Antsen's ["Curating Copyrighted Corpora: AN R Script for Extracting and Structuring Textual Features"](https://sites.temple.edu/tudsc/2019/07/18/curating-copyrighted-corpora-an-r-script-for-extracting-and-structuring-textual-features/). -This project made use of multiple Python and R pipelines to extract features from the science fiction collection. These pipelines are available as both Jupyter Notebooks and Google Colab Notebooks in this Github repository: https://github.com/SF-Nexus/extracted-features/tree/main/notebooks. Below, the process for crafting each extracted features dataset is discussed in more detail. +This project made use of multiple Python and R pipelines to extract features from the science fiction collection. These pipelines are available as both Jupyter Notebooks and Google Colab Notebooks in this [Github repository](https://github.com/SF-Nexus/extracted-features/tree/main/notebooks). Below, the process for crafting each extracted features dataset is discussed in more detail. ## Pipeline 1: Text Sectioning and Disaggregation *Full Code Available on SF Nexus Github:*