From 0eb3bab6607120f98a621cc1ffe97c30e4f1b6e3 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Tue, 12 Nov 2024 16:01:18 +0100 Subject: [PATCH 1/2] Converts the sentence length assessment to use the HTML Parser --- .../helpers/sentence/sentencesLengthSpec.js | 57 +++++--- .../researches/countSentencesFromTextSpec.js | 138 ++++++++++++------ .../SentenceLengthInTextAssessmentSpec.js | 129 ++++++++++++---- .../checkForTooLongSentencesSpec.js | 50 ------- .../helpers/sentence/sentencesLength.js | 35 +++-- .../helpers/word/getAllWordsFromTree.js | 27 +++- .../researches/countSentencesFromText.js | 12 +- .../SentenceLengthInTextAssessment.js | 51 +++---- .../assessments/checkForTooLongSentences.js | 13 -- 9 files changed, 290 insertions(+), 222 deletions(-) delete mode 100644 packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js delete mode 100644 packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js index 0b50b8e7b7b..5495caa60f8 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js @@ -1,41 +1,50 @@ import sentencesLength from "../../../../src/languageProcessing/helpers/sentence/sentencesLength"; +import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree"; import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; import Paper from "../../../../src/values/Paper"; +import buildTree from "../../../specHelpers/parse/buildTree"; describe( "A test to count sentence lengths.", function() { it( "should not return a length for an empty sentence", function() { - const sentences = [ "", "A sentence" ]; - const mockResearcher = new EnglishResearcher( new Paper( "" ) ); + const mockPaper = new Paper( "

A sentence

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); - const lengths = sentencesLength( sentences, mockResearcher ); + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); - expect( lengths ).toEqual( [ - { sentence: "A sentence", sentenceLength: 2 }, - ] ); + expect( sentenceLengths.length ).toEqual( 1 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A sentence" ); } ); it( "should return the sentences and their length (the HTML tags should not be counted if present)", function() { - const sentences = [ "A good text", "this is a textstring " ]; - const mockResearcher = new EnglishResearcher( new Paper( "" ) ); - - const lengths = sentencesLength( sentences, mockResearcher ); - - expect( lengths ).toEqual( [ - { sentence: "A good text", sentenceLength: 3 }, - { sentence: "this is a textstring ", sentenceLength: 4 }, - ] ); + const mockPaper = new Paper( "

A good text

" + + "

this is a string

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 3 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A good text" ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 4 ); + expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" ); } ); it( "should return the sentences and their length for Japanese (so counting characters)", function() { - const sentences = [ "自然おのずから存在しているもの", "歩くさわやかな森 自然 " ]; - const mockJapaneseResearcher = new JapaneseResearcher( new Paper( "" ) ); - - const lengths = sentencesLength( sentences, mockJapaneseResearcher ); - - expect( lengths ).toEqual( [ - { sentence: "自然おのずから存在しているもの", sentenceLength: 15 }, - { sentence: "歩くさわやかな森 自然 ", sentenceLength: 10 }, - ] ); + const mockPaper = new Paper( "

自然おのずから存在しているもの

" + + "

歩くさわやかな森 自然

" ); + const mockJapaneseResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockJapaneseResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockJapaneseResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 15 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "自然おのずから存在しているもの" ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 10 ); + expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "歩くさわやかな森 自然 " ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js index 79fca670a80..0b65b6c2ae5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js @@ -1,68 +1,122 @@ -/* eslint-disable capitalized-comments, spaced-comment */ import getSentences from "../../../src/languageProcessing/researches/countSentencesFromText.js"; import Paper from "../../../src/values/Paper"; import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher"; +import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher"; +import buildTree from "../../specHelpers/parse/buildTree"; describe( "counts words in sentences from text", function() { - let paper; - it( "returns sentences with question mark", function() { - paper = new Paper( "Hello. How are you? Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you? Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with exclamation mark", function() { - paper = new Paper( "Hello. How are you! Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you! Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with many spaces", function() { - paper = new Paper( "Hello. How are you! Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you! Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with html-tags, should only count words", function() { - paper = new Paper( "This is a text a bunch of words in an alt-tag" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); + const mockPaper = new Paper( "This is a text a bunch of words in an alt-tag" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); } ); it( "returns sentences with html-tags, should only count words", function() { - paper = new Paper( "This is a text a bunch of words in an alt-tag. Another sentence." ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 ); + const mockPaper = new Paper( "This is a text a bunch of words in an alt-tag. Another sentence." ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 2 ); } ); it( "should not count sentences inside elements we want to exclude from the analysis", function() { - paper = new Paper( "This is a text. With some code.. Another sentence." ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 ); + const mockPaper = new Paper( "This is a text. With some code.. Another sentence." ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 2 ); } ); - /*it( "returns sentences with question mark in Japanese", function() { - paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + it( "returns sentences with question mark in Japanese", function() { + const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with exclamation mark", function() { - paper = new Paper( "雨が降っている. いつ終わるの!さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + const mockPaper = new Paper( "雨が降っている. いつ終わるの!さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with many spaces", function() { - paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with html-tags, should count characters in Japanese", function() { - paper = new Paper( "いつ終わるの 自分を大事にして下さい" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 6 ); + const mockPaper = new Paper( "いつ終わるの 自分を大事にして下さい" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 6 ); } ); it( "returns sentences with html-tags, should count characters in Japanese", function() { - paper = new Paper( "いつ終わるの 自分を大事にして下さい. 春がやってきます。" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 9 ); - } );*/ + const mockPaper = new Paper( "いつ終わるの 自分を大事にして下さい. 春がやってきます。" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 9 ); + } ); } ); diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js index 38b6d5ea83a..ad1cb7b091f 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js @@ -5,6 +5,7 @@ import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import addMark from "../../../../src/markers/addMark"; import Factory from "../../../../src/helpers/factory.js"; +import buildTree from "../../../specHelpers/parse/buildTree"; import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; @@ -12,19 +13,21 @@ import PolishResearcher from "../../../../src/languageProcessing/languages/pl/Re import RussianResearcher from "../../../../src/languageProcessing/languages/ru/Researcher"; import ItalianResearcher from "../../../../src/languageProcessing/languages/it/Researcher"; import TurkishResearcher from "../../../../src/languageProcessing/languages/tr/Researcher"; +import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength"; const shortSentenceDefault = "Word ".repeat( 18 ) + "word. "; const longSentenceDefault = "Word ".repeat( 20 ) + "word. "; const shortSentence15WordsLimit = "Word ".repeat( 13 ) + "word. "; const longSentence15WordsLimit = "Word ".repeat( 15 ) + "word. "; -import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength"; - // eslint-disable-next-line max-statements describe( "An assessment for sentence length", function() { it( "returns the score for all short sentences using the default config", function() { const mockPaper = new Paper( shortSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -34,7 +37,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 50% long sentences using the default config", function() { const mockPaper = new Paper( shortSentenceDefault + longSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -46,7 +52,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -58,7 +67,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 25% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault + shortSentenceDefault + shortSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -68,7 +80,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 30% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault.repeat( 3 ) + shortSentenceDefault.repeat( 7 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -80,7 +95,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences in a language that overrides the default recommended length config", function() { const mockPaper = new Paper( longSentence15WordsLimit ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new RussianResearcher( mockPaper ) ); + const mockResearcher = new RussianResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -90,8 +108,15 @@ describe( "An assessment for sentence length", function() { expect( assessment.hasMarks() ).toBe( true ); expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, new RussianResearcher( mockPaper ) ) ).toEqual( [ new Mark( { - original: "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word.", - marked: addMark( "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word." ), + position: { + startOffset: 0, + endOffset: 81, + startOffsetBlock: 0, + endOffsetBlock: 81, + clientId: "", + attributeId: "", + isFirstSection: false, + }, } ), ] ); } ); @@ -173,7 +198,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 20% long sentences in a language that overrides the default config" + " for maximum allowed percentage of long sentences", function() { const mockPaper = new Paper( longSentenceDefault.repeat( 4 ) + shortSentenceDefault.repeat( 16 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new PolishResearcher( mockPaper ) ); + const mockResearcher = new PolishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -186,7 +214,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 25% long sentences in a language that overrides the default config for both recommended " + "maximum sentence length, and the maximum allowed percentage of long sentences", function() { const mockPaper = new Paper( longSentence15WordsLimit + shortSentence15WordsLimit.repeat( 3 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new TurkishResearcher( mockPaper ) ); + const mockResearcher = new TurkishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -237,9 +268,12 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences in a language that should count sentence length in characters (Japanese)", function() { const mockPaper = new Paper( "" ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, Factory.buildMockResearcher( [ + const mockResearcher = Factory.buildMockResearcher( [ { sentence: "", sentenceLength: 41 }, - ], false, false, japaneseConfig ) ); + ], false, false, japaneseConfig ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -398,10 +432,13 @@ describe( "An assessment for sentence length for cornerstone content", function( it( "returns the score for 25% long sentences in a language that overrides the default cornerstone configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new PolishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new PolishResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -428,10 +465,13 @@ describe( "An assessment for sentence length for cornerstone content", function( it( "returns the score for 25% long sentences using the default cornerstone configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -445,10 +485,13 @@ describe( "An assessment for sentence length for cornerstone content", function( describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% short sentences in English using the product page configuration", function() { const mockPaper = new Paper( shortSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, false, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -458,10 +501,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% long sentences in English using the product page configuration", function() { const mockPaper = new Paper( longSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, false, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -473,10 +519,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 25% long sentences in English using the product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -488,10 +537,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% short sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( shortSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -501,10 +553,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -516,10 +571,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 20% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 4 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -531,10 +589,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 25% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -547,15 +608,23 @@ describe( "An assessment for sentence length for product pages", function() { describe( "A test for marking too long sentences", function() { it( "returns markers for too long sentences", function() { - const paper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" ); - const sentenceLengthInText = Factory.buildMockResearcher( [ { sentence: "This is a too long sentence, because it has over twenty" + - " words, and that is hard too read, don't you think?", sentenceLength: 21 } ] ); + const mockPaper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const expected = [ - new Mark( { original: "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?", - marked: "This is a too long sentence, because it has over twenty words, and that is hard too" + - " read, don't you think?" } ), + new Mark( { + position: { + startOffset: 0, + endOffset: 106, + startOffsetBlock: 0, + endOffsetBlock: 106, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ]; - expect( new SentenceLengthInTextAssessment().getMarks( paper, sentenceLengthInText ) ).toEqual( expected ); + expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected ); } ); it( "returns no markers if no sentences are too long", function() { diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js deleted file mode 100644 index e6bb9eff4b1..00000000000 --- a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js +++ /dev/null @@ -1,50 +0,0 @@ -import checkForTooLongSentences from "../../../../src/scoring/helpers/assessments/checkForTooLongSentences.js"; - -describe( "Checks if sentences are too long", function() { - it( "Returns no sentences, none are too long", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - ]; - var recommendedValue = 256; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] ); - } ); - it( "Returns all sentences, all are too long", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - ]; - var recommendedValue = 16; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( sentences ); - } ); - it( "Returns 2 sentences that exceed the recommended value", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - { sentence: "", sentenceLength: 256 }, - ]; - var recommendedValue = 96; - var expectedOutput = - [ - { sentence: "", sentenceLength: 128 }, - { sentence: "", sentenceLength: 256 }, - ]; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( expectedOutput ); - } ); - it( "Returns no sentences, since they are the exact allowed length.", function() { - var sentences = - [ - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 64 }, - ]; - var recommendedValue = 64; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] ); - } ); -} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js index 382d94e5411..e6a33d33bd8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js @@ -1,31 +1,30 @@ -import wordCount from "../word/countWords.js"; -import { forEach } from "lodash"; -import { stripFullTags as stripHTMLTags } from "../sanitize/stripHTMLTags.js"; +import { getWordsFromTokens } from "../word/getAllWordsFromTree"; + +/** + * @typedef {Object} SentenceLength + * @property {Sentence} sentence The sentence. + * @property {number} sentenceLength The length of the sentence. + */ /** * Returns an array with the length of each sentence. * - * @param {Array} sentences Array with sentences from text. + * @param {Sentence[]} sentences Array with sentences from text. * @param {Researcher} researcher The researcher to use for analysis. * - * @returns {Array} Array with the length of each sentence. + * @returns {SentenceLength[]} Array with the length of each sentence. */ export default function( sentences, researcher ) { const sentencesWordCount = []; - forEach( sentences, function( sentence ) { - // For counting words we want to omit the HTMLtags. - const strippedSentence = stripHTMLTags( sentence ); - // A helper to count characters for languages that don't count number of words for text length. - const countCharacters = researcher.getHelper( "customCountLength" ); - const length = countCharacters ? countCharacters( strippedSentence ) : wordCount( strippedSentence ); - if ( length <= 0 ) { - return; + sentences.forEach( sentence => { + const customLengthHelper = researcher.getHelper( "customCountLength" ); + const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens ).length; + if ( length > 0 ) { + sentencesWordCount.push( { + sentence: sentence, + sentenceLength: length, + } ); } - - sentencesWordCount.push( { - sentence: sentence, - sentenceLength: length, - } ); } ); return sentencesWordCount; } diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js index 7266af95299..b0f3429a1e2 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -2,22 +2,33 @@ import getSentencesFromTree from "../sentence/getSentencesFromTree"; import { flatMap } from "lodash"; import removePunctuation from "../sanitize/removePunctuation"; +/** + * Gets the words from the tokens. + * + * @param {Token[]} tokens The tokens to get the words from. + * + * @returns {string[]} Array of words retrieved from the tokens. + */ +export function getWordsFromTokens( tokens ) { + // Retrieve all texts from the tokens. + let words = tokens.map( token => token.text ); + // Remove punctuation and spaces. + words = words.map( token => removePunctuation( token ) ); + // Filter out empty tokens. + return words.filter( word => word.trim() !== "" ); +} + /** * Gets the words from the tree, i.e. from the paragraph and heading nodes. * These two node types are the nodes that should contain words for the analysis. * * @param {Paper} paper The paper to get the tree and words from. * - * @returns {String[]} Array of words retrieved from the tree. + * @returns {string[]} Array of words retrieved from the tree. */ export default function( paper ) { const sentences = getSentencesFromTree( paper ); // Get all the tokens from each sentence. - const tokens = sentences.map( sentence => sentence.tokens ); - let words = flatMap( tokens ).map( token => token.text ); - // Remove punctuation and spaces. - words = words.map( token => removePunctuation( token ) ); - - // Filter out empty tokens. - return words.filter( word => word.trim() !== "" ); + const tokens = flatMap( sentences.map( sentence => sentence.tokens ) ); + return getWordsFromTokens( tokens ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js index 25932359050..78f87ee31ee 100644 --- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js +++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js @@ -1,19 +1,13 @@ -import getSentences from "../helpers/sentence/getSentences"; import sentencesLength from "../helpers/sentence/sentencesLength.js"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; -import { filterShortcodesFromHTML } from "../helpers"; +import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; /** * Count sentences in the text. * @param {Paper} paper The Paper object to get text from. * @param {Researcher} researcher The researcher to use for analysis. - * @returns {Array} The sentences from the text. + * @returns {SentenceLength[]} The sentences from the text. */ export default function( paper, researcher ) { - const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); - const sentences = getSentences( text, memoizedTokenizer ); + const sentences = getSentencesFromTree( paper ); return sentencesLength( sentences, researcher ); } diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 155bbce76eb..9c08b32f382 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -1,13 +1,10 @@ import { __, sprintf } from "@wordpress/i18n"; -import { map, merge } from "lodash"; +import { merge } from "lodash"; import Assessment from "../assessment"; -import getTooLongSentences from "../../helpers/assessments/checkForTooLongSentences"; import formatNumber from "../../../helpers/formatNumber"; import { inRangeEndInclusive as inRange } from "../../helpers/assessments/inRange"; -import addMark from "../../../markers/addMark"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; -import { stripIncompleteTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; +import { createAnchorOpeningTag } from "../../../helpers"; import AssessmentResult from "../../../values/AssessmentResult"; import Mark from "../../../values/Mark"; @@ -70,7 +67,7 @@ class SentenceLengthInTextAssessment extends Assessment { assessmentResult.setScore( score ); assessmentResult.setText( this.translateScore( score, percentage ) ); - assessmentResult.setHasMarks( ( percentage > 0 ) ); + assessmentResult.setHasMarks( percentage > 0 ); return assessmentResult; } @@ -99,13 +96,22 @@ class SentenceLengthInTextAssessment extends Assessment { if ( researcher.getConfig( "sentenceLength" ) ) { this._config = this.getLanguageSpecificConfig( researcher ); } - const sentenceObjects = this.getTooLongSentences( sentenceCount ); + const tooLongSentences = this.getTooLongSentences( sentenceCount ); - return map( sentenceObjects, function( sentenceObject ) { - const sentence = stripTags( sentenceObject.sentence ); + return tooLongSentences.map( tooLongSentence => { + const sentence = tooLongSentence.sentence; + const startOffset = sentence.sourceCodeRange.startOffset; + const endOffset = sentence.sourceCodeRange.endOffset; return new Mark( { - original: sentence, - marked: addMark( sentence ), + position: { + startOffset, + endOffset, + startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), + clientId: sentence.parentClientId || "", + attributeId: sentence.parentAttributeId || "", + isFirstSection: sentence.isParentFirstSectionOfBlock || false, + }, } ); } ); } @@ -179,14 +185,14 @@ class SentenceLengthInTextAssessment extends Assessment { /** * Calculates the percentage of sentences that are too long. * - * @param {Array} sentences The sentences to calculate the percentage for. + * @param {SentenceLength[]} sentences The sentences to calculate the percentage for. * @returns {number} The calculates percentage of too long sentences. */ calculatePercentage( sentences ) { let percentage = 0; if ( sentences.length !== 0 ) { - const tooLongTotal = this.countTooLongSentences( sentences ); + const tooLongTotal = this.getTooLongSentences( sentences ).length; percentage = formatNumber( ( tooLongTotal / sentences.length ) * 100 ); } @@ -222,23 +228,12 @@ class SentenceLengthInTextAssessment extends Assessment { } /** - * Gets the sentences that are qualified as being too long. - * - * @param {array} sentences The sentences to filter through. - * @returns {array} Array with all the sentences considered to be too long. + * Returns the sentences that are qualified as being too long. + * @param {SentenceLength[]} sentences The sentences to filter. + * @returns {SentenceLength[]} Array with all the sentences considered to be too long. */ getTooLongSentences( sentences ) { - return getTooLongSentences( sentences, this._config.recommendedLength ); - } - - /** - * Get the total amount of sentences that are qualified as being too long. - * - * @param {Array} sentences The sentences to filter through. - * @returns {Number} The amount of sentences that are considered too long. - */ - countTooLongSentences( sentences ) { - return this.getTooLongSentences( sentences ).length; + return sentences.filter( sentence => sentence.sentenceLength > this._config.recommendedLength ); } } diff --git a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js b/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js deleted file mode 100644 index 088ba70fe39..00000000000 --- a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js +++ /dev/null @@ -1,13 +0,0 @@ -import { filter } from "lodash"; - -/** - * Checks for too long sentences. - * @param {array} sentences The array with objects containing sentences and their lengths. - * @param {number} recommendedValue The recommended maximum length of sentence. - * @returns {array} The array with objects containing too long sentences and their lengths. - */ -export default function( sentences, recommendedValue ) { - return filter( sentences, function( sentence ) { - return sentence.sentenceLength > recommendedValue; - } ); -} From 684baabd9e7f577bb7c518b6c221c51c9cd8b242 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Wed, 13 Nov 2024 12:59:38 +0100 Subject: [PATCH 2/2] Adds a temporary output script and (a currently failing) unit test --- .../spec/fullTextTests/runFullTextTests.js | 29 +++++++++++++++++++ .../helpers/sentence/sentencesLengthSpec.js | 14 +++++++++ 2 files changed, 43 insertions(+) diff --git a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js index f43f128c4ce..5bb6cf426dc 100644 --- a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js @@ -43,6 +43,7 @@ import { getLanguagesWithWordComplexity } from "../../src/helpers"; // Import test papers. import testPapers from "./testTexts"; +import fs from "fs"; testPapers.forEach( function( testPaper ) { // eslint-disable-next-line max-statements @@ -65,6 +66,34 @@ testPapers.forEach( function( testPaper ) { buildTree( paper, researcher ); + /** + * Writes the given contents to the given filename in the temporary directory tmp + * @param {string} filename The name of the file. + * @param {string} content The content of the file. + * @returns {void} + */ + const writeToTempFile = ( filename, content ) => { + // Creates a temporary directory in the current working directory to store the data, if it not yet exists. + // (i.e., packages/yoastseo/tmp/ if this function is called from packages/yoastseo/) + const dir = "tmp/"; + if ( ! fs.existsSync( dir ) ) { + fs.mkdirSync( dir ); + } + + // Writes the data to this temporary directory + fs.writeFileSync( dir + filename, content ); + }; + + // Collects the results and the header into list of ;-separated rows + const sentences = researcher.getResearch( "countSentencesFromText" ); + const resultLines = sentences.map( sentence => sentence.sentence.trimStart().split( " " )[ 0 ] + ";" + sentence.sentenceLength ); + + // Set doExport to true to write the results to a temporary file. + const doExport = true; + if ( doExport ) { + writeToTempFile( testPaper.name + ".csv", resultLines.join( "\n" ) ); + } + const expectedResults = testPaper.expectedResults; /** diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js index 5495caa60f8..aa8761cf6df 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js @@ -33,6 +33,20 @@ describe( "A test to count sentence lengths.", function() { expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" ); } ); + it( "should return the correct length for sentences containing hyphens", function() { + const mockPaper = new Paper( + "

My know-it-all mother-in-law made a state-of-the-art U-turn.

" + + "

Her ex-husband found that low-key amazing.

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 7 ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 6 ); + } ); + it( "should return the sentences and their length for Japanese (so counting characters)", function() { const mockPaper = new Paper( "

自然おのずから存在しているもの

" + "

歩くさわやかな森 自然

" );