From 0eb3bab6607120f98a621cc1ffe97c30e4f1b6e3 Mon Sep 17 00:00:00 2001
From: Martijn van der Klis <martijnvanderklis@yoast.com>
Date: Tue, 12 Nov 2024 16:01:18 +0100
Subject: [PATCH 1/2] Converts the sentence length assessment to use the HTML
 Parser

---
 .../helpers/sentence/sentencesLengthSpec.js   |  57 +++++---
 .../researches/countSentencesFromTextSpec.js  | 138 ++++++++++++------
 .../SentenceLengthInTextAssessmentSpec.js     | 129 ++++++++++++----
 .../checkForTooLongSentencesSpec.js           |  50 -------
 .../helpers/sentence/sentencesLength.js       |  35 +++--
 .../helpers/word/getAllWordsFromTree.js       |  27 +++-
 .../researches/countSentencesFromText.js      |  12 +-
 .../SentenceLengthInTextAssessment.js         |  51 +++----
 .../assessments/checkForTooLongSentences.js   |  13 --
 9 files changed, 290 insertions(+), 222 deletions(-)
 delete mode 100644 packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js
 delete mode 100644 packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js
diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
index 0b50b8e7b7b..5495caa60f8 100644
--- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
@@ -1,41 +1,50 @@
 import sentencesLength from "../../../../src/languageProcessing/helpers/sentence/sentencesLength";
+import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree";
 import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher";
 import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher";
 import Paper from "../../../../src/values/Paper";
+import buildTree from "../../../specHelpers/parse/buildTree";
 
 describe( "A test to count sentence lengths.", function() {
 	it( "should not return a length for an empty sentence", function() {
-		const sentences = [ "", "A sentence" ];
-		const mockResearcher = new EnglishResearcher( new Paper( "" ) );
+		const mockPaper = new Paper( "<p></p><p>A sentence</p>" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
 
-		const lengths = sentencesLength( sentences, mockResearcher );
+		const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher );
 
-		expect( lengths ).toEqual( [
-			{ sentence: "A sentence", sentenceLength: 2 },
-		] );
+		expect( sentenceLengths.length ).toEqual( 1 );
+		expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 2 );
+		expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A sentence" );
 	} );
 
 	it( "should return the sentences and their length (the HTML tags should not be counted if present)", function() {
-		const sentences = [ "A <strong>good</strong> text", "this is a <span style='color: blue;'> textstring </span>" ];
-		const mockResearcher = new EnglishResearcher( new Paper( "" ) );
-
-		const lengths = sentencesLength( sentences, mockResearcher );
-
-		expect( lengths ).toEqual( [
-			{ sentence: "A <strong>good</strong> text", sentenceLength: 3 },
-			{ sentence: "this is a <span style='color: blue;'> textstring </span>", sentenceLength: 4 },
-		] );
+		const mockPaper = new Paper( "<p>A <strong>good</strong> text</p>" +
+			"<p>this is a <span style='color: blue;'>string</span></p>" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher );
+
+		expect( sentenceLengths.length ).toEqual( 2 );
+		expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 3 );
+		expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A good text" );
+		expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 4 );
+		expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" );
 	} );
 
 	it( "should return the sentences and their length for Japanese (so counting characters)", function() {
-		const sentences = [ "自然おのずから存在しているもの", "歩くさわやかな森 <span style='color: red;'> 自然 </span>" ];
-		const mockJapaneseResearcher = new JapaneseResearcher( new Paper( "" ) );
-
-		const lengths = sentencesLength( sentences, mockJapaneseResearcher );
-
-		expect( lengths ).toEqual( [
-			{ sentence: "自然おのずから存在しているもの", sentenceLength: 15 },
-			{ sentence: "歩くさわやかな森 <span style='color: red;'> 自然 </span>", sentenceLength: 10 },
-		] );
+		const mockPaper = new Paper( "<p>自然おのずから存在しているもの</p>" +
+			"<p>歩くさわやかな森 <span style='color: red;'> 自然 </span></p>" );
+		const mockJapaneseResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockJapaneseResearcher );
+
+		const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockJapaneseResearcher );
+
+		expect( sentenceLengths.length ).toEqual( 2 );
+		expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 15 );
+		expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "自然おのずから存在しているもの" );
+		expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 10 );
+		expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "歩くさわやかな森  自然 " );
 	} );
 } );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js
index 79fca670a80..0b65b6c2ae5 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js
@@ -1,68 +1,122 @@
-/* eslint-disable capitalized-comments, spaced-comment */
 import getSentences from "../../../src/languageProcessing/researches/countSentencesFromText.js";
 import Paper from "../../../src/values/Paper";
 import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher";
+import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher";
+import buildTree from "../../specHelpers/parse/buildTree";
 
 describe( "counts words in sentences from text", function() {
-	let paper;
-
 	it( "returns sentences with question mark", function() {
-		paper = new Paper( "Hello. How are you? Bye" );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
+		const mockPaper = new Paper( "Hello. How are you? Bye" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
 	} );
 	it( "returns sentences with exclamation mark", function() {
-		paper = new Paper( "Hello. How are you! Bye" );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
+		const mockPaper = new Paper( "Hello. How are you! Bye" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
 	} );
 	it( "returns sentences with many spaces", function() {
-		paper = new Paper( "Hello.        How are you! Bye" );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 );
+		const mockPaper = new Paper( "Hello.        How are you! Bye" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 1 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 3 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 1 );
 	} );
 	it( "returns sentences with html-tags, should only count words", function() {
-		paper = new Paper( "This is a text <img src='image.jpg' alt='a bunch of words in an alt-tag' />" );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
+		const mockPaper = new Paper( "This is a text <img src='https://example.com/image.jpg' alt='a bunch of words in an alt-tag' />" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
 	} );
 	it( "returns sentences with html-tags, should only count words", function() {
-		paper = new Paper( "This is a text <img src='http://domain.com/image.jpg' alt='a bunch of words in an alt-tag' />. Another sentence." );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 );
+		const mockPaper = new Paper( "This is a text <img src='https://example.com/image.jpg' alt='a bunch of words in an alt-tag' />. Another sentence." );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 2 );
 	} );
 	it( "should not count sentences inside elements we want to exclude from the analysis", function() {
-		paper = new Paper( "This is a text. <code>With some code.</code>. Another sentence." );
-		expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 );
-		expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 );
+		const mockPaper = new Paper( "This is a text. <code>With some code.</code>. Another sentence." );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 4 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 2 );
 	} );
-	/*it( "returns sentences with question mark in Japanese", function() {
-		paper = new Paper( "雨が降っている。 いつ終わるの？ さようなら" );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
+	it( "returns sentences with question mark in Japanese", function() {
+		const mockPaper = new Paper( "雨が降っている。 いつ終わるの？ さようなら" );
+		const mockResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
 	} );
 	it( "returns sentences with exclamation mark", function() {
-		paper = new Paper( "雨が降っている. いつ終わるの！さようなら" );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
+		const mockPaper = new Paper( "雨が降っている. いつ終わるの！さようなら" );
+		const mockResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
 	} );
 	it( "returns sentences with many spaces", function() {
-		paper = new Paper( "雨が降っている。        いつ終わるの？ さようなら" );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 );
+		const mockPaper = new Paper( "雨が降っている。        いつ終わるの？ さようなら" );
+		const mockResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 8 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 7 );
+		expect( sentences[ 2 ].sentenceLength ).toBe( 5 );
 	} );
 	it( "returns sentences with html-tags, should count characters in Japanese", function() {
-		paper = new Paper( "いつ終わるの <img src='image.jpg' alt='自分を大事にして下さい' />" );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 6 );
+		const mockPaper = new Paper( "いつ終わるの <img src='image.jpg' alt='自分を大事にして下さい' />" );
+		const mockResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 6 );
 	} );
 	it( "returns sentences with html-tags, should count characters in Japanese", function() {
-		paper = new Paper( "いつ終わるの <img src='http://domain.com/image.jpg' alt='自分を大事にして下さい' />. 春がやってきます。" );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 7 );
-		expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 9 );
-	} );*/
+		const mockPaper = new Paper( "いつ終わるの <img src='http://domain.com/image.jpg' alt='自分を大事にして下さい' />. 春がやってきます。" );
+		const mockResearcher = new JapaneseResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentences = getSentences( mockPaper, mockResearcher );
+
+		expect( sentences[ 0 ].sentenceLength ).toBe( 7 );
+		expect( sentences[ 1 ].sentenceLength ).toBe( 9 );
+	} );
 } );
diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js
index 38b6d5ea83a..ad1cb7b091f 100644
--- a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js
@@ -5,6 +5,7 @@ import Paper from "../../../../src/values/Paper.js";
 import Mark from "../../../../src/values/Mark.js";
 import addMark from "../../../../src/markers/addMark";
 import Factory from "../../../../src/helpers/factory.js";
+import buildTree from "../../../specHelpers/parse/buildTree";
 
 import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher";
 import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher";
@@ -12,19 +13,21 @@ import PolishResearcher from "../../../../src/languageProcessing/languages/pl/Re
 import RussianResearcher from "../../../../src/languageProcessing/languages/ru/Researcher";
 import ItalianResearcher from "../../../../src/languageProcessing/languages/it/Researcher";
 import TurkishResearcher from "../../../../src/languageProcessing/languages/tr/Researcher";
+import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength";
 
 const shortSentenceDefault = "Word ".repeat( 18 ) + "word. ";
 const longSentenceDefault = "Word ".repeat( 20 ) + "word. ";
 const shortSentence15WordsLimit = "Word ".repeat( 13 ) + "word. ";
 const longSentence15WordsLimit = "Word ".repeat( 15 ) + "word. ";
 
-import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength";
-
 // eslint-disable-next-line max-statements
 describe( "An assessment for sentence length", function() {
 	it( "returns the score for all short sentences using the default config", function() {
 		const mockPaper = new Paper( shortSentenceDefault );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 9 );
@@ -34,7 +37,10 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 50% long sentences using the default config", function() {
 		const mockPaper = new Paper( shortSentenceDefault + longSentenceDefault );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -46,7 +52,10 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 100% long sentences using the default config", function() {
 		const mockPaper = new Paper( longSentenceDefault );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -58,7 +67,10 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 25% long sentences using the default config", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault + shortSentenceDefault + shortSentenceDefault );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 9 );
@@ -68,7 +80,10 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 30% long sentences using the default config", function() {
 		const mockPaper = new Paper( longSentenceDefault.repeat( 3 ) + shortSentenceDefault.repeat( 7 ) );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -80,7 +95,10 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 100% long sentences in a language that overrides the default recommended length config", function() {
 		const mockPaper = new Paper( longSentence15WordsLimit );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new RussianResearcher( mockPaper ) );
+		const mockResearcher = new RussianResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -90,8 +108,15 @@ describe( "An assessment for sentence length", function() {
 		expect( assessment.hasMarks() ).toBe( true );
 		expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, new RussianResearcher( mockPaper ) ) ).toEqual( [
 			new Mark( {
-				original: "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word.",
-				marked: addMark( "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word." ),
+				position: {
+					startOffset: 0,
+					endOffset: 81,
+					startOffsetBlock: 0,
+					endOffsetBlock: 81,
+					clientId: "",
+					attributeId: "",
+					isFirstSection: false,
+				},
 			} ),
 		] );
 	} );
@@ -173,7 +198,10 @@ describe( "An assessment for sentence length", function() {
 	it( "returns the score for 20% long sentences in a language that overrides the default config" +
 		" for maximum allowed percentage of long sentences", function() {
 		const mockPaper = new Paper( longSentenceDefault.repeat( 4 ) + shortSentenceDefault.repeat( 16 ) );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new PolishResearcher( mockPaper ) );
+		const mockResearcher = new PolishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -186,7 +214,10 @@ describe( "An assessment for sentence length", function() {
 	it( "returns the score for 25% long sentences in a language that overrides the default config for both recommended " +
 		"maximum sentence length, and the maximum allowed percentage of long sentences", function() {
 		const mockPaper = new Paper( longSentence15WordsLimit + shortSentence15WordsLimit.repeat( 3 ) );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new TurkishResearcher( mockPaper ) );
+		const mockResearcher = new TurkishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -237,9 +268,12 @@ describe( "An assessment for sentence length", function() {
 
 	it( "returns the score for 100% long sentences in a language that should count sentence length in characters (Japanese)", function() {
 		const mockPaper = new Paper( "" );
-		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, Factory.buildMockResearcher( [
+		const mockResearcher = Factory.buildMockResearcher( [
 			{ sentence: "", sentenceLength: 41 },
-		], false, false, japaneseConfig ) );
+		], false, false, japaneseConfig );
+		buildTree( mockPaper, mockResearcher );
+
+		const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -398,10 +432,13 @@ describe( "An assessment for sentence length for cornerstone content", function(
 
 	it( "returns the score for 25% long sentences in a language that overrides the default cornerstone configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) );
+		const mockResearcher = new PolishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 20,
 			farTooMany: 25,
-		}, true ).getResult( mockPaper, new PolishResearcher( mockPaper ) );
+		}, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -428,10 +465,13 @@ describe( "An assessment for sentence length for cornerstone content", function(
 
 	it( "returns the score for 25% long sentences using the default cornerstone configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) );
+		const mockResearcher = new DefaultResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 20,
 			farTooMany: 25,
-		}, true ).getResult( mockPaper, new DefaultResearcher( mockPaper ) );
+		}, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -445,10 +485,13 @@ describe( "An assessment for sentence length for cornerstone content", function(
 describe( "An assessment for sentence length for product pages", function() {
 	it( "returns the score for 100% short sentences in English using the product page configuration", function() {
 		const mockPaper = new Paper( shortSentenceDefault );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 20,
 			farTooMany: 25,
-		}, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, false, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 9 );
@@ -458,10 +501,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 100% long sentences in English using the product page configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 20,
 			farTooMany: 25,
-		}, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, false, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -473,10 +519,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 25% long sentences in English using the product page configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 20,
 			farTooMany: 25,
-		}, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -488,10 +537,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 100% short sentences in English using the cornerstone product page configuration", function() {
 		const mockPaper = new Paper( shortSentenceDefault );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 15,
 			farTooMany: 20,
-		}, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, true, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 9 );
@@ -501,10 +553,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 100% long sentences in English using the cornerstone product page configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 15,
 			farTooMany: 20,
-		}, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, true, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -516,10 +571,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 20% long sentences in English using the cornerstone product page configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 4 ) );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 15,
 			farTooMany: 20,
-		}, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, true, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 6 );
@@ -531,10 +589,13 @@ describe( "An assessment for sentence length for product pages", function() {
 
 	it( "returns the score for 25% long sentences in English using the cornerstone product page configuration", function() {
 		const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const assessment = new SentenceLengthInTextAssessment( {
 			slightlyTooMany: 15,
 			farTooMany: 20,
-		}, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) );
+		}, true, true ).getResult( mockPaper, mockResearcher );
 
 		expect( assessment.hasScore() ).toBe( true );
 		expect( assessment.getScore() ).toEqual( 3 );
@@ -547,15 +608,23 @@ describe( "An assessment for sentence length for product pages", function() {
 
 describe( "A test for marking too long sentences", function() {
 	it( "returns markers for too long sentences", function() {
-		const paper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" );
-		const sentenceLengthInText = Factory.buildMockResearcher( [ { sentence: "This is a too long sentence, because it has over twenty" +
-				" words, and that is hard too read, don't you think?", sentenceLength: 21 } ] );
+		const mockPaper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
 		const expected = [
-			new Mark( { original: "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?",
-				marked: "<yoastmark class='yoast-text-mark'>This is a too long sentence, because it has over twenty words, and that is hard too" +
-					" read, don't you think?</yoastmark>" } ),
+			new Mark( {
+				position: {
+					startOffset: 0,
+					endOffset: 106,
+					startOffsetBlock: 0,
+					endOffsetBlock: 106,
+					attributeId: "",
+					clientId: "",
+					isFirstSection: false,
+				} } ),
 		];
-		expect( new SentenceLengthInTextAssessment().getMarks( paper, sentenceLengthInText ) ).toEqual( expected );
+		expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected );
 	} );
 
 	it( "returns no markers if no sentences are too long", function() {
diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js
deleted file mode 100644
index e6bb9eff4b1..00000000000
--- a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js
+++ /dev/null
@@ -1,50 +0,0 @@
-import checkForTooLongSentences from "../../../../src/scoring/helpers/assessments/checkForTooLongSentences.js";
-
-describe( "Checks if sentences are too long", function() {
-	it( "Returns no sentences, none are too long", function() {
-		var sentences =
-			[
-				{ sentence: "", sentenceLength: 32 },
-				{ sentence: "", sentenceLength: 64 },
-				{ sentence: "", sentenceLength: 128 },
-			];
-		var recommendedValue = 256;
-		expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] );
-	} );
-	it( "Returns all sentences, all are too long", function() {
-		var sentences =
-			[
-				{ sentence: "", sentenceLength: 32 },
-				{ sentence: "", sentenceLength: 64 },
-				{ sentence: "", sentenceLength: 128 },
-			];
-		var recommendedValue = 16;
-		expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( sentences );
-	} );
-	it( "Returns 2 sentences that exceed the recommended value", function() {
-		var sentences =
-			[
-				{ sentence: "", sentenceLength: 32 },
-				{ sentence: "", sentenceLength: 64 },
-				{ sentence: "", sentenceLength: 128 },
-				{ sentence: "", sentenceLength: 256 },
-			];
-		var recommendedValue = 96;
-		var expectedOutput =
-			[
-				{ sentence: "", sentenceLength: 128 },
-				{ sentence: "", sentenceLength: 256 },
-			];
-		expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( expectedOutput );
-	} );
-	it( "Returns no sentences, since they are the exact allowed length.", function() {
-		var sentences =
-			[
-				{ sentence: "", sentenceLength: 64 },
-				{ sentence: "", sentenceLength: 64 },
-				{ sentence: "", sentenceLength: 64 },
-			];
-		var recommendedValue = 64;
-		expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] );
-	} );
-} );
diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js
index 382d94e5411..e6a33d33bd8 100644
--- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js
+++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js
@@ -1,31 +1,30 @@
-import wordCount from "../word/countWords.js";
-import { forEach } from "lodash";
-import { stripFullTags as stripHTMLTags } from "../sanitize/stripHTMLTags.js";
+import { getWordsFromTokens } from "../word/getAllWordsFromTree";
+
+/**
+ * @typedef {Object} SentenceLength
+ * @property {Sentence} sentence The sentence.
+ * @property {number} sentenceLength The length of the sentence.
+ */
 
 /**
  * Returns an array with the length of each sentence.
  *
- * @param {Array} sentences Array with sentences from text.
+ * @param {Sentence[]} sentences Array with sentences from text.
  * @param {Researcher} 	researcher 	The researcher to use for analysis.
  *
- * @returns {Array} Array with the length of each sentence.
+ * @returns {SentenceLength[]} Array with the length of each sentence.
  */
 export default function( sentences, researcher ) {
 	const sentencesWordCount = [];
-	forEach( sentences, function( sentence ) {
-		// For counting words we want to omit the HTMLtags.
-		const strippedSentence = stripHTMLTags( sentence );
-		// A helper to count characters for languages that don't count number of words for text length.
-		const countCharacters = researcher.getHelper( "customCountLength" );
-		const length = countCharacters ? countCharacters( strippedSentence ) : wordCount( strippedSentence );
-		if ( length <= 0 ) {
-			return;
+	sentences.forEach( sentence => {
+		const customLengthHelper = researcher.getHelper( "customCountLength" );
+		const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens ).length;
+		if ( length > 0 ) {
+			sentencesWordCount.push( {
+				sentence: sentence,
+				sentenceLength: length,
+			} );
 		}
-
-		sentencesWordCount.push( {
-			sentence: sentence,
-			sentenceLength: length,
-		} );
 	} );
 	return sentencesWordCount;
 }
diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js
index 7266af95299..b0f3429a1e2 100644
--- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js
+++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js
@@ -2,22 +2,33 @@ import getSentencesFromTree from "../sentence/getSentencesFromTree";
 import { flatMap } from "lodash";
 import removePunctuation from "../sanitize/removePunctuation";
 
+/**
+ * Gets the words from the tokens.
+ *
+ * @param {Token[]} tokens The tokens to get the words from.
+ *
+ * @returns {string[]} Array of words retrieved from the tokens.
+ */
+export function getWordsFromTokens( tokens ) {
+	// Retrieve all texts from the tokens.
+	let words = tokens.map( token => token.text );
+	// Remove punctuation and spaces.
+	words = words.map( token => removePunctuation( token ) );
+	// Filter out empty tokens.
+	return words.filter( word => word.trim() !== "" );
+}
+
 /**
  * Gets the words from the tree, i.e. from the paragraph and heading nodes.
  * These two node types are the nodes that should contain words for the analysis.
  *
  * @param {Paper} paper The paper to get the tree and words from.
  *
- * @returns {String[]} Array of words retrieved from the tree.
+ * @returns {string[]} Array of words retrieved from the tree.
  */
 export default function( paper ) {
 	const sentences = getSentencesFromTree( paper );
 	// Get all the tokens from each sentence.
-	const tokens = sentences.map( sentence => sentence.tokens );
-	let words = flatMap( tokens ).map( token => token.text );
-	// Remove punctuation and spaces.
-	words = words.map( token => removePunctuation( token ) );
-
-	// Filter out empty tokens.
-	return words.filter( word => word.trim() !== "" );
+	const tokens = flatMap( sentences.map( sentence => sentence.tokens ) );
+	return getWordsFromTokens( tokens );
 }
diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js
index 25932359050..78f87ee31ee 100644
--- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js
+++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js
@@ -1,19 +1,13 @@
-import getSentences from "../helpers/sentence/getSentences";
 import sentencesLength from "../helpers/sentence/sentencesLength.js";
-import removeHtmlBlocks from "../helpers/html/htmlParser";
-import { filterShortcodesFromHTML } from "../helpers";
+import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree";
 
 /**
  * Count sentences in the text.
  * @param {Paper} paper The Paper object to get text from.
  * @param {Researcher} 	researcher 	The researcher to use for analysis.
- * @returns {Array} The sentences from the text.
+ * @returns {SentenceLength[]} The sentences from the text.
  */
 export default function( paper, researcher ) {
-	const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
-	let text = paper.getText();
-	text = removeHtmlBlocks( text );
-	text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
-	const sentences = getSentences( text, memoizedTokenizer );
+	const sentences = getSentencesFromTree( paper );
 	return sentencesLength( sentences, researcher );
 }
diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js
index 155bbce76eb..9c08b32f382 100644
--- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js
@@ -1,13 +1,10 @@
 import { __, sprintf } from "@wordpress/i18n";
-import { map, merge } from "lodash";
+import { merge } from "lodash";
 
 import Assessment from "../assessment";
-import getTooLongSentences from "../../helpers/assessments/checkForTooLongSentences";
 import formatNumber from "../../../helpers/formatNumber";
 import { inRangeEndInclusive as inRange } from "../../helpers/assessments/inRange";
-import addMark from "../../../markers/addMark";
-import { createAnchorOpeningTag } from "../../../helpers/shortlinker";
-import { stripIncompleteTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags";
+import { createAnchorOpeningTag } from "../../../helpers";
 import AssessmentResult from "../../../values/AssessmentResult";
 import Mark from "../../../values/Mark";
 
@@ -70,7 +67,7 @@ class SentenceLengthInTextAssessment extends Assessment {
 
 		assessmentResult.setScore( score );
 		assessmentResult.setText( this.translateScore( score, percentage ) );
-		assessmentResult.setHasMarks( ( percentage > 0 ) );
+		assessmentResult.setHasMarks( percentage > 0 );
 
 		return assessmentResult;
 	}
@@ -99,13 +96,22 @@ class SentenceLengthInTextAssessment extends Assessment {
 		if ( researcher.getConfig( "sentenceLength" ) ) {
 			this._config = this.getLanguageSpecificConfig( researcher );
 		}
-		const sentenceObjects = this.getTooLongSentences( sentenceCount );
+		const tooLongSentences = this.getTooLongSentences( sentenceCount );
 
-		return map( sentenceObjects, function( sentenceObject ) {
-			const sentence = stripTags( sentenceObject.sentence );
+		return tooLongSentences.map( tooLongSentence => {
+			const sentence = tooLongSentence.sentence;
+			const startOffset = sentence.sourceCodeRange.startOffset;
+			const endOffset = sentence.sourceCodeRange.endOffset;
 			return new Mark( {
-				original: sentence,
-				marked: addMark( sentence ),
+				position: {
+					startOffset,
+					endOffset,
+					startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ),
+					endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ),
+					clientId: sentence.parentClientId || "",
+					attributeId: sentence.parentAttributeId || "",
+					isFirstSection: sentence.isParentFirstSectionOfBlock || false,
+				},
 			} );
 		} );
 	}
@@ -179,14 +185,14 @@ class SentenceLengthInTextAssessment extends Assessment {
 	/**
 	 * Calculates the percentage of sentences that are too long.
 	 *
-	 * @param {Array} sentences The sentences to calculate the percentage for.
+	 * @param {SentenceLength[]} sentences The sentences to calculate the percentage for.
 	 * @returns {number} The calculates percentage of too long sentences.
 	 */
 	calculatePercentage( sentences ) {
 		let percentage = 0;
 
 		if ( sentences.length !== 0 ) {
-			const tooLongTotal = this.countTooLongSentences( sentences );
+			const tooLongTotal = this.getTooLongSentences( sentences ).length;
 
 			percentage = formatNumber( ( tooLongTotal / sentences.length ) * 100 );
 		}
@@ -222,23 +228,12 @@ class SentenceLengthInTextAssessment extends Assessment {
 	}
 
 	/**
-	 * Gets the sentences that are qualified as being too long.
-	 *
-	 * @param {array} sentences The sentences to filter through.
-	 * @returns {array} Array with all the sentences considered to be too long.
+	 * Returns the sentences that are qualified as being too long.
+	 * @param {SentenceLength[]} sentences The sentences to filter.
+	 * @returns {SentenceLength[]} Array with all the sentences considered to be too long.
 	 */
 	getTooLongSentences( sentences ) {
-		return getTooLongSentences( sentences, this._config.recommendedLength );
-	}
-
-	/**
-	 * Get the total amount of sentences that are qualified as being too long.
-	 *
-	 * @param {Array} sentences The sentences to filter through.
-	 * @returns {Number} The amount of sentences that are considered too long.
-	 */
-	countTooLongSentences( sentences ) {
-		return this.getTooLongSentences( sentences ).length;
+		return sentences.filter( sentence => sentence.sentenceLength > this._config.recommendedLength );
 	}
 }
 
diff --git a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js b/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js
deleted file mode 100644
index 088ba70fe39..00000000000
--- a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js
+++ /dev/null
@@ -1,13 +0,0 @@
-import { filter } from "lodash";
-
-/**
- * Checks for too long sentences.
- * @param {array} sentences The array with objects containing sentences and their lengths.
- * @param {number} recommendedValue The recommended maximum length of sentence.
- * @returns {array} The array with objects containing too long sentences and their lengths.
- */
-export default function( sentences, recommendedValue ) {
-	return filter( sentences, function( sentence ) {
-		return sentence.sentenceLength > recommendedValue;
-	} );
-}

From 684baabd9e7f577bb7c518b6c221c51c9cd8b242 Mon Sep 17 00:00:00 2001
From: Martijn van der Klis <martijnvanderklis@yoast.com>
Date: Wed, 13 Nov 2024 12:59:38 +0100
Subject: [PATCH 2/2] Adds a temporary output script and (a currently failing)
 unit test

---
 .../spec/fullTextTests/runFullTextTests.js    | 29 +++++++++++++++++++
 .../helpers/sentence/sentencesLengthSpec.js   | 14 +++++++++
 2 files changed, 43 insertions(+)

diff --git a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js
index f43f128c4ce..5bb6cf426dc 100644
--- a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js
+++ b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js
@@ -43,6 +43,7 @@ import { getLanguagesWithWordComplexity } from "../../src/helpers";
 
 // Import test papers.
 import testPapers from "./testTexts";
+import fs from "fs";
 
 testPapers.forEach( function( testPaper ) {
 	// eslint-disable-next-line max-statements
@@ -65,6 +66,34 @@ testPapers.forEach( function( testPaper ) {
 
 		buildTree( paper, researcher );
 
+		/**
+		 * Writes the given contents to the given filename in the temporary directory tmp
+		 * @param {string} filename The name of the file.
+		 * @param {string} content The content of the file.
+		 * @returns {void}
+		 */
+		const writeToTempFile = ( filename, content ) => {
+			// Creates a temporary directory in the current working directory to store the data, if it not yet exists.
+			// (i.e., packages/yoastseo/tmp/ if this function is called from packages/yoastseo/)
+			const dir = "tmp/";
+			if ( ! fs.existsSync( dir ) ) {
+				fs.mkdirSync( dir );
+			}
+
+			// Writes the data to this temporary directory
+			fs.writeFileSync( dir + filename, content );
+		};
+
+		// Collects the results and the header into list of ;-separated rows
+		const sentences = researcher.getResearch( "countSentencesFromText" );
+		const resultLines = sentences.map( sentence => sentence.sentence.trimStart().split( " " )[ 0 ] + ";" + sentence.sentenceLength );
+
+		// Set doExport to true to write the results to a temporary file.
+		const doExport = true;
+		if ( doExport ) {
+			writeToTempFile( testPaper.name + ".csv", resultLines.join( "\n" ) );
+		}
+
 		const expectedResults = testPaper.expectedResults;
 
 		/**
diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
index 5495caa60f8..aa8761cf6df 100644
--- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js
@@ -33,6 +33,20 @@ describe( "A test to count sentence lengths.", function() {
 		expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" );
 	} );
 
+	it( "should return the correct length for sentences containing hyphens", function() {
+		const mockPaper = new Paper(
+			"<p>My know-it-all mother-in-law made a state-of-the-art U-turn.</p>" +
+			"<p>Her ex-husband found that low-key amazing.</p>" );
+		const mockResearcher = new EnglishResearcher( mockPaper );
+		buildTree( mockPaper, mockResearcher );
+
+		const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher );
+
+		expect( sentenceLengths.length ).toEqual( 2 );
+		expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 7 );
+		expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 6 );
+	} );
+
 	it( "should return the sentences and their length for Japanese (so counting characters)", function() {
 		const mockPaper = new Paper( "<p>自然おのずから存在しているもの</p>" +
 			"<p>歩くさわやかな森 <span style='color: red;'> 自然 </span></p>" );