From 0eb3bab6607120f98a621cc1ffe97c30e4f1b6e3 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Tue, 12 Nov 2024 16:01:18 +0100 Subject: [PATCH 01/17] Converts the sentence length assessment to use the HTML Parser --- .../helpers/sentence/sentencesLengthSpec.js | 57 +++++--- .../researches/countSentencesFromTextSpec.js | 138 ++++++++++++------ .../SentenceLengthInTextAssessmentSpec.js | 129 ++++++++++++---- .../checkForTooLongSentencesSpec.js | 50 ------- .../helpers/sentence/sentencesLength.js | 35 +++-- .../helpers/word/getAllWordsFromTree.js | 27 +++- .../researches/countSentencesFromText.js | 12 +- .../SentenceLengthInTextAssessment.js | 51 +++---- .../assessments/checkForTooLongSentences.js | 13 -- 9 files changed, 290 insertions(+), 222 deletions(-) delete mode 100644 packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js delete mode 100644 packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js index 0b50b8e7b7b..5495caa60f8 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js @@ -1,41 +1,50 @@ import sentencesLength from "../../../../src/languageProcessing/helpers/sentence/sentencesLength"; +import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree"; import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; import Paper from "../../../../src/values/Paper"; +import buildTree from "../../../specHelpers/parse/buildTree"; describe( "A test to count sentence lengths.", function() { it( "should not return a length for an empty sentence", function() { - const sentences = [ "", "A sentence" ]; - const mockResearcher = new EnglishResearcher( new Paper( "" ) ); + const mockPaper = new Paper( "

A sentence

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); - const lengths = sentencesLength( sentences, mockResearcher ); + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); - expect( lengths ).toEqual( [ - { sentence: "A sentence", sentenceLength: 2 }, - ] ); + expect( sentenceLengths.length ).toEqual( 1 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A sentence" ); } ); it( "should return the sentences and their length (the HTML tags should not be counted if present)", function() { - const sentences = [ "A good text", "this is a textstring " ]; - const mockResearcher = new EnglishResearcher( new Paper( "" ) ); - - const lengths = sentencesLength( sentences, mockResearcher ); - - expect( lengths ).toEqual( [ - { sentence: "A good text", sentenceLength: 3 }, - { sentence: "this is a textstring ", sentenceLength: 4 }, - ] ); + const mockPaper = new Paper( "

A good text

" + + "

this is a string

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 3 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "A good text" ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 4 ); + expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" ); } ); it( "should return the sentences and their length for Japanese (so counting characters)", function() { - const sentences = [ "自然おのずから存在しているもの", "歩くさわやかな森 自然 " ]; - const mockJapaneseResearcher = new JapaneseResearcher( new Paper( "" ) ); - - const lengths = sentencesLength( sentences, mockJapaneseResearcher ); - - expect( lengths ).toEqual( [ - { sentence: "自然おのずから存在しているもの", sentenceLength: 15 }, - { sentence: "歩くさわやかな森 自然 ", sentenceLength: 10 }, - ] ); + const mockPaper = new Paper( "

自然おのずから存在しているもの

" + + "

歩くさわやかな森 自然

" ); + const mockJapaneseResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockJapaneseResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockJapaneseResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 15 ); + expect( sentenceLengths[ 0 ].sentence.text ).toEqual( "自然おのずから存在しているもの" ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 10 ); + expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "歩くさわやかな森 自然 " ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js index 79fca670a80..0b65b6c2ae5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/countSentencesFromTextSpec.js @@ -1,68 +1,122 @@ -/* eslint-disable capitalized-comments, spaced-comment */ import getSentences from "../../../src/languageProcessing/researches/countSentencesFromText.js"; import Paper from "../../../src/values/Paper"; import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher"; +import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher"; +import buildTree from "../../specHelpers/parse/buildTree"; describe( "counts words in sentences from text", function() { - let paper; - it( "returns sentences with question mark", function() { - paper = new Paper( "Hello. How are you? Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you? Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with exclamation mark", function() { - paper = new Paper( "Hello. How are you! Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you! Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with many spaces", function() { - paper = new Paper( "Hello. How are you! Bye" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 1 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 3 ); - expect( getSentences( paper, new EnglishResearcher() )[ 2 ].sentenceLength ).toBe( 1 ); + const mockPaper = new Paper( "Hello. How are you! Bye" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 1 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 3 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 1 ); } ); it( "returns sentences with html-tags, should only count words", function() { - paper = new Paper( "This is a text a bunch of words in an alt-tag" ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); + const mockPaper = new Paper( "This is a text a bunch of words in an alt-tag" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); } ); it( "returns sentences with html-tags, should only count words", function() { - paper = new Paper( "This is a text a bunch of words in an alt-tag. Another sentence." ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 ); + const mockPaper = new Paper( "This is a text a bunch of words in an alt-tag. Another sentence." ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 2 ); } ); it( "should not count sentences inside elements we want to exclude from the analysis", function() { - paper = new Paper( "This is a text. With some code.. Another sentence." ); - expect( getSentences( paper, new EnglishResearcher() )[ 0 ].sentenceLength ).toBe( 4 ); - expect( getSentences( paper, new EnglishResearcher() )[ 1 ].sentenceLength ).toBe( 2 ); + const mockPaper = new Paper( "This is a text. With some code.. Another sentence." ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 4 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 2 ); } ); - /*it( "returns sentences with question mark in Japanese", function() { - paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + it( "returns sentences with question mark in Japanese", function() { + const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with exclamation mark", function() { - paper = new Paper( "雨が降っている. いつ終わるの!さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + const mockPaper = new Paper( "雨が降っている. いつ終わるの!さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with many spaces", function() { - paper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 8 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 2 ].sentenceLength ).toBe( 5 ); + const mockPaper = new Paper( "雨が降っている。 いつ終わるの? さようなら" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 8 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 2 ].sentenceLength ).toBe( 5 ); } ); it( "returns sentences with html-tags, should count characters in Japanese", function() { - paper = new Paper( "いつ終わるの 自分を大事にして下さい" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 6 ); + const mockPaper = new Paper( "いつ終わるの 自分を大事にして下さい" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 6 ); } ); it( "returns sentences with html-tags, should count characters in Japanese", function() { - paper = new Paper( "いつ終わるの 自分を大事にして下さい. 春がやってきます。" ); - expect( getSentences( paper, new JapaneseResearcher() )[ 0 ].sentenceLength ).toBe( 7 ); - expect( getSentences( paper, new JapaneseResearcher() )[ 1 ].sentenceLength ).toBe( 9 ); - } );*/ + const mockPaper = new Paper( "いつ終わるの 自分を大事にして下さい. 春がやってきます。" ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentences = getSentences( mockPaper, mockResearcher ); + + expect( sentences[ 0 ].sentenceLength ).toBe( 7 ); + expect( sentences[ 1 ].sentenceLength ).toBe( 9 ); + } ); } ); diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js index 38b6d5ea83a..ad1cb7b091f 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js @@ -5,6 +5,7 @@ import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import addMark from "../../../../src/markers/addMark"; import Factory from "../../../../src/helpers/factory.js"; +import buildTree from "../../../specHelpers/parse/buildTree"; import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; @@ -12,19 +13,21 @@ import PolishResearcher from "../../../../src/languageProcessing/languages/pl/Re import RussianResearcher from "../../../../src/languageProcessing/languages/ru/Researcher"; import ItalianResearcher from "../../../../src/languageProcessing/languages/it/Researcher"; import TurkishResearcher from "../../../../src/languageProcessing/languages/tr/Researcher"; +import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength"; const shortSentenceDefault = "Word ".repeat( 18 ) + "word. "; const longSentenceDefault = "Word ".repeat( 20 ) + "word. "; const shortSentence15WordsLimit = "Word ".repeat( 13 ) + "word. "; const longSentence15WordsLimit = "Word ".repeat( 15 ) + "word. "; -import japaneseConfig from "../../../../src/languageProcessing/languages/ja/config/sentenceLength"; - // eslint-disable-next-line max-statements describe( "An assessment for sentence length", function() { it( "returns the score for all short sentences using the default config", function() { const mockPaper = new Paper( shortSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -34,7 +37,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 50% long sentences using the default config", function() { const mockPaper = new Paper( shortSentenceDefault + longSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -46,7 +52,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -58,7 +67,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 25% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault + shortSentenceDefault + shortSentenceDefault ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -68,7 +80,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 30% long sentences using the default config", function() { const mockPaper = new Paper( longSentenceDefault.repeat( 3 ) + shortSentenceDefault.repeat( 7 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -80,7 +95,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences in a language that overrides the default recommended length config", function() { const mockPaper = new Paper( longSentence15WordsLimit ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new RussianResearcher( mockPaper ) ); + const mockResearcher = new RussianResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -90,8 +108,15 @@ describe( "An assessment for sentence length", function() { expect( assessment.hasMarks() ).toBe( true ); expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, new RussianResearcher( mockPaper ) ) ).toEqual( [ new Mark( { - original: "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word.", - marked: addMark( "Word Word Word Word Word Word Word Word Word Word Word Word Word Word Word word." ), + position: { + startOffset: 0, + endOffset: 81, + startOffsetBlock: 0, + endOffsetBlock: 81, + clientId: "", + attributeId: "", + isFirstSection: false, + }, } ), ] ); } ); @@ -173,7 +198,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 20% long sentences in a language that overrides the default config" + " for maximum allowed percentage of long sentences", function() { const mockPaper = new Paper( longSentenceDefault.repeat( 4 ) + shortSentenceDefault.repeat( 16 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new PolishResearcher( mockPaper ) ); + const mockResearcher = new PolishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -186,7 +214,10 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 25% long sentences in a language that overrides the default config for both recommended " + "maximum sentence length, and the maximum allowed percentage of long sentences", function() { const mockPaper = new Paper( longSentence15WordsLimit + shortSentence15WordsLimit.repeat( 3 ) ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, new TurkishResearcher( mockPaper ) ); + const mockResearcher = new TurkishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -237,9 +268,12 @@ describe( "An assessment for sentence length", function() { it( "returns the score for 100% long sentences in a language that should count sentence length in characters (Japanese)", function() { const mockPaper = new Paper( "" ); - const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, Factory.buildMockResearcher( [ + const mockResearcher = Factory.buildMockResearcher( [ { sentence: "", sentenceLength: 41 }, - ], false, false, japaneseConfig ) ); + ], false, false, japaneseConfig ); + buildTree( mockPaper, mockResearcher ); + + const assessment = new SentenceLengthInTextAssessment().getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -398,10 +432,13 @@ describe( "An assessment for sentence length for cornerstone content", function( it( "returns the score for 25% long sentences in a language that overrides the default cornerstone configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new PolishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new PolishResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -428,10 +465,13 @@ describe( "An assessment for sentence length for cornerstone content", function( it( "returns the score for 25% long sentences using the default cornerstone configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new DefaultResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -445,10 +485,13 @@ describe( "An assessment for sentence length for cornerstone content", function( describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% short sentences in English using the product page configuration", function() { const mockPaper = new Paper( shortSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, false, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -458,10 +501,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% long sentences in English using the product page configuration", function() { const mockPaper = new Paper( longSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, false, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, false, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -473,10 +519,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 25% long sentences in English using the product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 20, farTooMany: 25, - }, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -488,10 +537,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% short sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( shortSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 9 ); @@ -501,10 +553,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 100% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -516,10 +571,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 20% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 4 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 6 ); @@ -531,10 +589,13 @@ describe( "An assessment for sentence length for product pages", function() { it( "returns the score for 25% long sentences in English using the cornerstone product page configuration", function() { const mockPaper = new Paper( longSentenceDefault + shortSentenceDefault.repeat( 3 ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = new SentenceLengthInTextAssessment( { slightlyTooMany: 15, farTooMany: 20, - }, true, true ).getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + }, true, true ).getResult( mockPaper, mockResearcher ); expect( assessment.hasScore() ).toBe( true ); expect( assessment.getScore() ).toEqual( 3 ); @@ -547,15 +608,23 @@ describe( "An assessment for sentence length for product pages", function() { describe( "A test for marking too long sentences", function() { it( "returns markers for too long sentences", function() { - const paper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" ); - const sentenceLengthInText = Factory.buildMockResearcher( [ { sentence: "This is a too long sentence, because it has over twenty" + - " words, and that is hard too read, don't you think?", sentenceLength: 21 } ] ); + const mockPaper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const expected = [ - new Mark( { original: "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?", - marked: "This is a too long sentence, because it has over twenty words, and that is hard too" + - " read, don't you think?" } ), + new Mark( { + position: { + startOffset: 0, + endOffset: 106, + startOffsetBlock: 0, + endOffsetBlock: 106, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ]; - expect( new SentenceLengthInTextAssessment().getMarks( paper, sentenceLengthInText ) ).toEqual( expected ); + expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected ); } ); it( "returns no markers if no sentences are too long", function() { diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js deleted file mode 100644 index e6bb9eff4b1..00000000000 --- a/packages/yoastseo/spec/scoring/helpers/assessments/checkForTooLongSentencesSpec.js +++ /dev/null @@ -1,50 +0,0 @@ -import checkForTooLongSentences from "../../../../src/scoring/helpers/assessments/checkForTooLongSentences.js"; - -describe( "Checks if sentences are too long", function() { - it( "Returns no sentences, none are too long", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - ]; - var recommendedValue = 256; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] ); - } ); - it( "Returns all sentences, all are too long", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - ]; - var recommendedValue = 16; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( sentences ); - } ); - it( "Returns 2 sentences that exceed the recommended value", function() { - var sentences = - [ - { sentence: "", sentenceLength: 32 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 128 }, - { sentence: "", sentenceLength: 256 }, - ]; - var recommendedValue = 96; - var expectedOutput = - [ - { sentence: "", sentenceLength: 128 }, - { sentence: "", sentenceLength: 256 }, - ]; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( expectedOutput ); - } ); - it( "Returns no sentences, since they are the exact allowed length.", function() { - var sentences = - [ - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 64 }, - { sentence: "", sentenceLength: 64 }, - ]; - var recommendedValue = 64; - expect( checkForTooLongSentences( sentences, recommendedValue ) ).toEqual( [ ] ); - } ); -} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js index 382d94e5411..e6a33d33bd8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js @@ -1,31 +1,30 @@ -import wordCount from "../word/countWords.js"; -import { forEach } from "lodash"; -import { stripFullTags as stripHTMLTags } from "../sanitize/stripHTMLTags.js"; +import { getWordsFromTokens } from "../word/getAllWordsFromTree"; + +/** + * @typedef {Object} SentenceLength + * @property {Sentence} sentence The sentence. + * @property {number} sentenceLength The length of the sentence. + */ /** * Returns an array with the length of each sentence. * - * @param {Array} sentences Array with sentences from text. + * @param {Sentence[]} sentences Array with sentences from text. * @param {Researcher} researcher The researcher to use for analysis. * - * @returns {Array} Array with the length of each sentence. + * @returns {SentenceLength[]} Array with the length of each sentence. */ export default function( sentences, researcher ) { const sentencesWordCount = []; - forEach( sentences, function( sentence ) { - // For counting words we want to omit the HTMLtags. - const strippedSentence = stripHTMLTags( sentence ); - // A helper to count characters for languages that don't count number of words for text length. - const countCharacters = researcher.getHelper( "customCountLength" ); - const length = countCharacters ? countCharacters( strippedSentence ) : wordCount( strippedSentence ); - if ( length <= 0 ) { - return; + sentences.forEach( sentence => { + const customLengthHelper = researcher.getHelper( "customCountLength" ); + const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens ).length; + if ( length > 0 ) { + sentencesWordCount.push( { + sentence: sentence, + sentenceLength: length, + } ); } - - sentencesWordCount.push( { - sentence: sentence, - sentenceLength: length, - } ); } ); return sentencesWordCount; } diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js index 7266af95299..b0f3429a1e2 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -2,22 +2,33 @@ import getSentencesFromTree from "../sentence/getSentencesFromTree"; import { flatMap } from "lodash"; import removePunctuation from "../sanitize/removePunctuation"; +/** + * Gets the words from the tokens. + * + * @param {Token[]} tokens The tokens to get the words from. + * + * @returns {string[]} Array of words retrieved from the tokens. + */ +export function getWordsFromTokens( tokens ) { + // Retrieve all texts from the tokens. + let words = tokens.map( token => token.text ); + // Remove punctuation and spaces. + words = words.map( token => removePunctuation( token ) ); + // Filter out empty tokens. + return words.filter( word => word.trim() !== "" ); +} + /** * Gets the words from the tree, i.e. from the paragraph and heading nodes. * These two node types are the nodes that should contain words for the analysis. * * @param {Paper} paper The paper to get the tree and words from. * - * @returns {String[]} Array of words retrieved from the tree. + * @returns {string[]} Array of words retrieved from the tree. */ export default function( paper ) { const sentences = getSentencesFromTree( paper ); // Get all the tokens from each sentence. - const tokens = sentences.map( sentence => sentence.tokens ); - let words = flatMap( tokens ).map( token => token.text ); - // Remove punctuation and spaces. - words = words.map( token => removePunctuation( token ) ); - - // Filter out empty tokens. - return words.filter( word => word.trim() !== "" ); + const tokens = flatMap( sentences.map( sentence => sentence.tokens ) ); + return getWordsFromTokens( tokens ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js index 25932359050..78f87ee31ee 100644 --- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js +++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js @@ -1,19 +1,13 @@ -import getSentences from "../helpers/sentence/getSentences"; import sentencesLength from "../helpers/sentence/sentencesLength.js"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; -import { filterShortcodesFromHTML } from "../helpers"; +import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; /** * Count sentences in the text. * @param {Paper} paper The Paper object to get text from. * @param {Researcher} researcher The researcher to use for analysis. - * @returns {Array} The sentences from the text. + * @returns {SentenceLength[]} The sentences from the text. */ export default function( paper, researcher ) { - const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); - const sentences = getSentences( text, memoizedTokenizer ); + const sentences = getSentencesFromTree( paper ); return sentencesLength( sentences, researcher ); } diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 155bbce76eb..9c08b32f382 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -1,13 +1,10 @@ import { __, sprintf } from "@wordpress/i18n"; -import { map, merge } from "lodash"; +import { merge } from "lodash"; import Assessment from "../assessment"; -import getTooLongSentences from "../../helpers/assessments/checkForTooLongSentences"; import formatNumber from "../../../helpers/formatNumber"; import { inRangeEndInclusive as inRange } from "../../helpers/assessments/inRange"; -import addMark from "../../../markers/addMark"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; -import { stripIncompleteTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; +import { createAnchorOpeningTag } from "../../../helpers"; import AssessmentResult from "../../../values/AssessmentResult"; import Mark from "../../../values/Mark"; @@ -70,7 +67,7 @@ class SentenceLengthInTextAssessment extends Assessment { assessmentResult.setScore( score ); assessmentResult.setText( this.translateScore( score, percentage ) ); - assessmentResult.setHasMarks( ( percentage > 0 ) ); + assessmentResult.setHasMarks( percentage > 0 ); return assessmentResult; } @@ -99,13 +96,22 @@ class SentenceLengthInTextAssessment extends Assessment { if ( researcher.getConfig( "sentenceLength" ) ) { this._config = this.getLanguageSpecificConfig( researcher ); } - const sentenceObjects = this.getTooLongSentences( sentenceCount ); + const tooLongSentences = this.getTooLongSentences( sentenceCount ); - return map( sentenceObjects, function( sentenceObject ) { - const sentence = stripTags( sentenceObject.sentence ); + return tooLongSentences.map( tooLongSentence => { + const sentence = tooLongSentence.sentence; + const startOffset = sentence.sourceCodeRange.startOffset; + const endOffset = sentence.sourceCodeRange.endOffset; return new Mark( { - original: sentence, - marked: addMark( sentence ), + position: { + startOffset, + endOffset, + startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), + clientId: sentence.parentClientId || "", + attributeId: sentence.parentAttributeId || "", + isFirstSection: sentence.isParentFirstSectionOfBlock || false, + }, } ); } ); } @@ -179,14 +185,14 @@ class SentenceLengthInTextAssessment extends Assessment { /** * Calculates the percentage of sentences that are too long. * - * @param {Array} sentences The sentences to calculate the percentage for. + * @param {SentenceLength[]} sentences The sentences to calculate the percentage for. * @returns {number} The calculates percentage of too long sentences. */ calculatePercentage( sentences ) { let percentage = 0; if ( sentences.length !== 0 ) { - const tooLongTotal = this.countTooLongSentences( sentences ); + const tooLongTotal = this.getTooLongSentences( sentences ).length; percentage = formatNumber( ( tooLongTotal / sentences.length ) * 100 ); } @@ -222,23 +228,12 @@ class SentenceLengthInTextAssessment extends Assessment { } /** - * Gets the sentences that are qualified as being too long. - * - * @param {array} sentences The sentences to filter through. - * @returns {array} Array with all the sentences considered to be too long. + * Returns the sentences that are qualified as being too long. + * @param {SentenceLength[]} sentences The sentences to filter. + * @returns {SentenceLength[]} Array with all the sentences considered to be too long. */ getTooLongSentences( sentences ) { - return getTooLongSentences( sentences, this._config.recommendedLength ); - } - - /** - * Get the total amount of sentences that are qualified as being too long. - * - * @param {Array} sentences The sentences to filter through. - * @returns {Number} The amount of sentences that are considered too long. - */ - countTooLongSentences( sentences ) { - return this.getTooLongSentences( sentences ).length; + return sentences.filter( sentence => sentence.sentenceLength > this._config.recommendedLength ); } } diff --git a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js b/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js deleted file mode 100644 index 088ba70fe39..00000000000 --- a/packages/yoastseo/src/scoring/helpers/assessments/checkForTooLongSentences.js +++ /dev/null @@ -1,13 +0,0 @@ -import { filter } from "lodash"; - -/** - * Checks for too long sentences. - * @param {array} sentences The array with objects containing sentences and their lengths. - * @param {number} recommendedValue The recommended maximum length of sentence. - * @returns {array} The array with objects containing too long sentences and their lengths. - */ -export default function( sentences, recommendedValue ) { - return filter( sentences, function( sentence ) { - return sentence.sentenceLength > recommendedValue; - } ); -} From 684baabd9e7f577bb7c518b6c221c51c9cd8b242 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Wed, 13 Nov 2024 12:59:38 +0100 Subject: [PATCH 02/17] Adds a temporary output script and (a currently failing) unit test --- .../spec/fullTextTests/runFullTextTests.js | 29 +++++++++++++++++++ .../helpers/sentence/sentencesLengthSpec.js | 14 +++++++++ 2 files changed, 43 insertions(+) diff --git a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js index f43f128c4ce..5bb6cf426dc 100644 --- a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js @@ -43,6 +43,7 @@ import { getLanguagesWithWordComplexity } from "../../src/helpers"; // Import test papers. import testPapers from "./testTexts"; +import fs from "fs"; testPapers.forEach( function( testPaper ) { // eslint-disable-next-line max-statements @@ -65,6 +66,34 @@ testPapers.forEach( function( testPaper ) { buildTree( paper, researcher ); + /** + * Writes the given contents to the given filename in the temporary directory tmp + * @param {string} filename The name of the file. + * @param {string} content The content of the file. + * @returns {void} + */ + const writeToTempFile = ( filename, content ) => { + // Creates a temporary directory in the current working directory to store the data, if it not yet exists. + // (i.e., packages/yoastseo/tmp/ if this function is called from packages/yoastseo/) + const dir = "tmp/"; + if ( ! fs.existsSync( dir ) ) { + fs.mkdirSync( dir ); + } + + // Writes the data to this temporary directory + fs.writeFileSync( dir + filename, content ); + }; + + // Collects the results and the header into list of ;-separated rows + const sentences = researcher.getResearch( "countSentencesFromText" ); + const resultLines = sentences.map( sentence => sentence.sentence.trimStart().split( " " )[ 0 ] + ";" + sentence.sentenceLength ); + + // Set doExport to true to write the results to a temporary file. + const doExport = true; + if ( doExport ) { + writeToTempFile( testPaper.name + ".csv", resultLines.join( "\n" ) ); + } + const expectedResults = testPaper.expectedResults; /** diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js index 5495caa60f8..aa8761cf6df 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js @@ -33,6 +33,20 @@ describe( "A test to count sentence lengths.", function() { expect( sentenceLengths[ 1 ].sentence.text ).toEqual( "this is a string" ); } ); + it( "should return the correct length for sentences containing hyphens", function() { + const mockPaper = new Paper( + "

My know-it-all mother-in-law made a state-of-the-art U-turn.

" + + "

Her ex-husband found that low-key amazing.

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 7 ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 6 ); + } ); + it( "should return the sentences and their length for Japanese (so counting characters)", function() { const mockPaper = new Paper( "

自然おのずから存在しているもの

" + "

歩くさわやかな森 自然

" ); From e338b83c2588d333a9d0f1ba284152a196d4e123 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Thu, 14 Nov 2024 09:00:21 +0100 Subject: [PATCH 03/17] First pass on converting the paragraph length assessment to use the HTML Parser --- .../helpers/html/matchParagraphsSpec.js | 42 ----- .../languages/ar/ResearcherSpec.js | 7 +- .../researches/getParagraphLengthSpec.js | 172 ++++++++++++++---- .../ParagraphTooLongAssessmentSpec.js | 93 +++++----- .../helpers/html/matchParagraphs.js | 62 ------- .../researches/getParagraphLength.js | 52 +++--- .../readability/ParagraphTooLongAssessment.js | 137 ++++++-------- 7 files changed, 272 insertions(+), 293 deletions(-) delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/html/matchParagraphsSpec.js delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/html/matchParagraphs.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/html/matchParagraphsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/html/matchParagraphsSpec.js deleted file mode 100644 index 52b14a1b13d..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/html/matchParagraphsSpec.js +++ /dev/null @@ -1,42 +0,0 @@ -import matchParagraphs from "../../../../src/languageProcessing/helpers/html/matchParagraphs.js"; - -describe( "Matches paragraphs in a text", function() { - it( "returns an array of paragraphs in

tags", function() { - const text = "

This is a text in p-tags

This is more text in p-tags

"; - expect( matchParagraphs( text ) ).toContain( "This is a text in p-tags" ); - } ); - - it( "returns an array of paragraphs from double linebreaks", function() { - const text = "This is a text\n\nwith double linebreaks"; - expect( matchParagraphs( text ) ).toContain( "This is a text" ); - expect( matchParagraphs( text ) ).toContain( "with double linebreaks" ); - } ); - - it( "returns the complete text if no paragraphs or linebreaks are found", function() { - const text = "This is a text without any paragraphs"; - expect( matchParagraphs( text ) ).toContain( "This is a text without any paragraphs" ); - } ); - - it( "returns empty string if there is no text", function() { - const text = ""; - expect( matchParagraphs( text ) ).toContain( "" ); - } ); - - it( "splits on headings", function() { - const text = "A piece of text

More piece of text

Another piece of text."; - const expected = [ "A piece of text", "Another piece of text." ]; - - const actual = matchParagraphs( text ); - - expect( actual ).toEqual( expected ); - } ); - - it( "should see
tags as paragraphs", function() { - const text = "A piece of text
More piece of text
Another piece of text."; - const expected = [ "A piece of text", "
More piece of text
", "Another piece of text." ]; - - const actual = matchParagraphs( text ); - - expect( actual ).toEqual( expected ); - } ); -} ); diff --git a/packages/yoastseo/spec/languageProcessing/languages/ar/ResearcherSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ar/ResearcherSpec.js index ada24ba85ff..32310196451 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/ar/ResearcherSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/ar/ResearcherSpec.js @@ -1,6 +1,7 @@ import Researcher from "../../../../src/languageProcessing/languages/ar/Researcher.js"; import Paper from "../../../../src/values/Paper.js"; import getMorphologyData from "../../../specHelpers/getMorphologyData"; +import buildTree from "../../../specHelpers/parse/buildTree"; import functionWords from "../../../../src/languageProcessing/languages/ar/config/functionWords"; import transitionWords from "../../../../src/languageProcessing/languages/ar/config/transitionWords"; import firstWordExceptions from "../../../../src/languageProcessing/languages/ar/config/firstWordExceptions"; @@ -9,10 +10,12 @@ import twoPartTransitionWords from "../../../../src/languageProcessing/languages const morphologyDataAR = getMorphologyData( "ar" ); describe( "a test for Arabic Researcher", function() { - const researcher = new Researcher( new Paper( "This is another paper!" ) ); + const paper = new Paper( "This is another paper!" ); + const researcher = new Researcher( paper ); + buildTree( paper, researcher ); it( "checks if the Arabic Researcher still inherit the Abstract Researcher", function() { - expect( researcher.getResearch( "getParagraphLength" ) ).toEqual( [ { text: "This is another paper!", countLength: 4 } ] ); + expect( researcher.getResearch( "getParagraphLength" )[ 0 ].paragraphLength ).toEqual( 4 ); } ); it( "returns false if the default research is deleted in Arabic Researcher", function() { diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js index 858b85c4259..af6d54b607d 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js @@ -2,109 +2,204 @@ import getParagraphLength from "../../../src/languageProcessing/researches/getPa import Paper from "../../../src/values/Paper.js"; import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher.js"; import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher.js"; +import buildTree from "../../specHelpers/parse/buildTree"; describe( "a test for getting paragraph length", function() { it( "returns the paragraph length of a paragraph between p tags", function() { const mockPaper = new Paper( "

Lorem ipsum

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 ); } ); it( "returns the paragraph length of a paragraph in Japanese between p tags", function() { const mockPaper = new Paper( "

これに対し日本国有鉄道

" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 11 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 11 ); } ); it( "returns the paragraph length of two paragraphs divided by double linebreaks and ends with a double linebreak", function() { const mockPaper = new Paper( "Lorem \n\n ipsum two \n\n" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 1 ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 2 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 ); } ); it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks and ends with a double linebreak", function() { const mockPaper = new Paper( "1964年 \n\n (昭和39年) \n\n" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 5 ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 7 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 ); } ); it( "returns the paragraph length of two paragraphs divided by double linebreaks that don't end with a double linebreak", function() { const mockPaper = new Paper( "Lorem \n\n ipsum two" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 1 ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 2 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 ); } ); it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks that don't end with a double linebreak", function() { const mockPaper = new Paper( "1964年 \n\n (昭和39年)" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 5 ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 7 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 ); } ); it( "returns the paragraph length of a paragraph without tags or double linebreaks", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); } ); it( "returns the paragraph length of a paragraph in Japanese without tags or double linebreaks", function() { const mockPaper = new Paper( "東京オリンピック開会直前の1964年(昭和39年)10月1日に開業した。" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 36 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 36 ); } ); it( "returns the paragraph length of 2 paragraphs, both between p tags", function() { const mockPaper = new Paper( "

Lorem ipsum

dolor sit amet

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 3 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 3 ); } ); it( "returns the paragraph length of 2 paragraphs in Japanese, both between p tags", function() { const mockPaper = new Paper( "

東京オリンピック開会直前の1964年

(昭和39年)10月1日に開業した。

" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 18 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 18 ); } ); it( "returns the paragraph length of 2 paragraphs, both between p tags, divided by double linebreaks", function() { const mockPaper = new Paper( "

Lorem ipsum

\n\n

dolor sit amet

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 2 ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 1 ].countLength ).toBe( 3 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 3 ); } ); it( "returns the paragraph length of 2 paragraphs in Japanese, both between p tags, divided by double linebreaks", function() { const mockPaper = new Paper( "

東京オリンピック開会直前の1964年

\n\n

(昭和39年)10月1日に開業した。

" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 1 ].countLength ).toBe( 18 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 ); + expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 18 ); } ); it( "returns the paragraph length, with empty paragraphs", function() { const mockPaper = new Paper( "

test

more text

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 2 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 2 ); } ); it( "returns the paragraph length, ignoring text inside an element we want to exclude from the analysis", function() { const mockPaper = new Paper( "

test ignore me

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 1 ); } ); it( "returns the paragraph length, ignoring shortcodes", function() { const mockPaper = new Paper( "

test [shortcode]

", { shortcodes: [ "shortcode" ] } ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 1 ); } ); it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() { const mockPaper = new Paper( "

Lorem ipsum dolor sit amet

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); } ); it( "returns the paragraph length of paragraph in Japanese without p tags or double linebreaks, but with h2 tags", function() { const mockPaper = new Paper( "

(昭和39年)10月1日に開業した。

" ); - expect( getParagraphLength( mockPaper, new JapaneseResearcher() )[ 0 ].countLength ).toBe( 18 ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 ); } ); xit( "returns the paragraph length of an empty paragraph with p tags", function() { const mockPaper = new Paper( "

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() ).countLength ).not.toContain( 0 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.paragraphLength ).not.toContain( 0 ); } ); xit( "returns the paragraph length of an empty paragraph without p tags or double line breaks", function() { const mockPaper = new Paper( "" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher() ).countLength ).not.toContain( 0 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.paragraphLength ).not.toContain( 0 ); } ); } ); @@ -112,17 +207,32 @@ describe( "a test for getting paragraph length of a text with image(s)", () => { it( "should not count a paragraph containing only an image", function() { // The paper contains 3 paragraphs: 2 paragraphs with text and one paragraph with only an image. const mockPaper = new Paper( "

test

more text

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 2 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 2 ); } ); it( "should return 0 for paragraphs count when all paragraphs only contain images", function() { const mockPaper = new Paper( "

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 0 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 0 ); } ); it( "should not include the image in the paragraph length calculation", function() { const mockPaper = new Paper( "

test

test

" ); - expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) ).length ).toBe( 2 ); - expect( getParagraphLength( mockPaper, new EnglishResearcher( mockPaper ) )[ 0 ].countLength ).toBe( 1 ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); + + expect( paragraphLengths.length ).toBe( 2 ); + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 ); } ); } ); diff --git a/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js index 3b8144c45c8..753bca7a923 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js @@ -5,6 +5,7 @@ import Factory from "../../../../src/helpers/factory.js"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import paragraphLengthJapanese from "../../../../src/languageProcessing/languages/ja/config/paragraphLength"; +import buildTree from "../../../specHelpers/parse/buildTree"; const paragraphTooLongAssessment = new ParagraphTooLongAssessment(); const shortTextJapanese = "は".repeat( 300 ); @@ -14,14 +15,14 @@ const veryLongTextJapanese = "は".repeat( 410 ); describe( "An assessment for scoring too long paragraphs.", function() { const paper = new Paper( "" ); it( "should score 1 paragraph with ok length", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 60, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 60 } ] ) ); expect( assessment.getScore() ).toBe( 9 ); expect( assessment.getText() ).toBe( "Paragraph length: None of the paragraphs" + " are too long. Great job!" ); expect( assessment.hasMarks() ).toBe( false ); } ); it( "should score 1 slightly too long paragraph", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 160, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 160 } ] ) ); expect( assessment.getScore() ).toBe( 6 ); expect( assessment.getText() ).toBe( "Paragraph length: 1 of the paragraphs" + " contains more than the recommended maximum of 150 words." + @@ -29,7 +30,7 @@ describe( "An assessment for scoring too long paragraphs.", function() { expect( assessment.hasMarks() ).toBe( true ); } ); it( "should score 1 extremely long paragraph", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 6000, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 6000 } ] ) ); expect( assessment.getScore() ).toBe( 3 ); expect( assessment.getText() ).toBe( "Paragraph length: 1 of the paragraphs" + " contains more than the recommended maximum of 150 words." + @@ -37,16 +38,16 @@ describe( "An assessment for scoring too long paragraphs.", function() { expect( assessment.hasMarks() ).toBe( true ); } ); it( "should score 3 paragraphs with ok length", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 60, text: "" }, - { countLength: 71, text: "" }, { countLength: 83, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 60 }, + { paragraphLength: 71 }, { paragraphLength: 83 } ] ) ); expect( assessment.getScore() ).toBe( 9 ); expect( assessment.getText() ).toBe( "Paragraph length: None of the paragraphs" + " are too long. Great job!" ); expect( assessment.hasMarks() ).toBe( false ); } ); it( "should score 3 paragraphs, one of which is too long", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 60, text: "" }, - { countLength: 71, text: "" }, { countLength: 183, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 60 }, + { paragraphLength: 71 }, { paragraphLength: 183 } ] ) ); expect( assessment.getScore() ).toBe( 6 ); expect( assessment.getText() ).toBe( "Paragraph length: 1 of the paragraphs" + " contains more than the recommended maximum of 150 words." + @@ -54,8 +55,8 @@ describe( "An assessment for scoring too long paragraphs.", function() { expect( assessment.hasMarks() ).toBe( true ); } ); it( "should score 3 paragraphs, two of which are too long", function() { - const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { countLength: 60, text: "" }, - { countLength: 191, text: "" }, { countLength: 183, text: "" } ] ) ); + const assessment = paragraphTooLongAssessment.getResult( paper, Factory.buildMockResearcher( [ { paragraphLength: 60 }, + { paragraphLength: 191 }, { paragraphLength: 183 } ] ) ); expect( assessment.getScore() ).toBe( 6 ); expect( assessment.getText() ).toBe( "Paragraph length: 2 of the paragraphs" + " contain more than the recommended maximum of 150 words." + @@ -73,8 +74,11 @@ describe( "An assessment for scoring too long paragraphs in Japanese in which ch "in the calculation instead of word length", function() { it( "should score 1 slightly too long paragraph", function() { const paper = new Paper( longTextJapanese ); + const japaneseResearcher = new JapaneseResearcher( paper ); + buildTree( paper, japaneseResearcher ); + + const assessment = paragraphTooLongAssessment.getResult( paper, japaneseResearcher ); - const assessment = paragraphTooLongAssessment.getResult( paper, new JapaneseResearcher( paper ) ); expect( assessment.getScore() ).toBe( 6 ); expect( assessment.getText() ).toBe( "Paragraph length: 1 of the paragraphs" + " contains more than the recommended maximum of 300 characters." + @@ -83,8 +87,11 @@ describe( "An assessment for scoring too long paragraphs in Japanese in which ch } ); it( "should score 1 too long paragraph", function() { const paper = new Paper( veryLongTextJapanese ); + const japaneseResearcher = new JapaneseResearcher( paper ); + buildTree( paper, japaneseResearcher ); + + const assessment = paragraphTooLongAssessment.getResult( paper, japaneseResearcher ); - const assessment = paragraphTooLongAssessment.getResult( paper, new JapaneseResearcher( paper ) ); expect( assessment.getScore() ).toBe( 3 ); expect( assessment.getText() ).toBe( "Paragraph length: 1 of the paragraphs" + " contains more than the recommended maximum of 300 characters." + @@ -93,7 +100,11 @@ describe( "An assessment for scoring too long paragraphs in Japanese in which ch } ); it( "should score 2 slightly too long paragraphs", function() { const paper = new Paper( `${shortTextJapanese}

${longTextJapanese}

${longTextJapanese}

` ); - const assessment = paragraphTooLongAssessment.getResult( paper, new JapaneseResearcher( paper ) ); + const japaneseResearcher = new JapaneseResearcher( paper ); + buildTree( paper, japaneseResearcher ); + + const assessment = paragraphTooLongAssessment.getResult( paper, japaneseResearcher ); + expect( assessment.getScore() ).toBe( 6 ); expect( assessment.getText() ).toBe( "Paragraph length: 2 of the paragraphs" + " contain more than the recommended maximum of 300 characters." + @@ -310,8 +321,8 @@ describe( "A test for marking the sentences", function() { } ); it( "should return no markers when no paragraph is too long", function() { const paper = new Paper( "This is a very interesting paper." ); - const paragraphTooLong = Factory.buildMockResearcher( [ { countLength: 60, text: "" }, { countLength: 11, text: "" }, - { countLength: 13, text: "" } ] ); + const paragraphTooLong = Factory.buildMockResearcher( [ { paragraphLength: 60 }, { paragraphLength: 11 }, + { paragraphLength: 13 } ] ); const expected = []; expect( paragraphTooLongAssessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); } ); @@ -327,9 +338,9 @@ describe( "test for paragraph too long assessment when is used in product page a }, }; const result = new ParagraphTooLongAssessment( config ).getResult( paper, Factory.buildMockResearcher( [ - { countLength: 60, text: "" }, - { countLength: 11, text: "" }, - { countLength: 13, text: "" }, + { paragraphLength: 60 }, + { paragraphLength: 11 }, + { paragraphLength: 13 }, ] ) ); expect( result.getScore() ).toEqual( 9 ); expect( result.getText() ).toEqual( "Paragraph length: " + @@ -344,9 +355,9 @@ describe( "test for paragraph too long assessment when is used in product page a }, }; const result = new ParagraphTooLongAssessment( config ).getResult( paper, Factory.buildMockResearcher( [ - { countLength: 110, text: "" }, - { countLength: 150, text: "" }, - { countLength: 150, text: "" }, + { paragraphLength: 110 }, + { paragraphLength: 150 }, + { paragraphLength: 150 }, ] ) ); expect( result.getScore() ).toEqual( 3 ); expect( result.getText() ).toEqual( "Paragraph length: 3 of the paragraphs contain" + @@ -361,9 +372,9 @@ describe( "test for paragraph too long assessment when is used in product page a }, }; const result = new ParagraphTooLongAssessment( config ).getResult( paper, Factory.buildMockResearcher( [ - { countLength: 90, text: "" }, - { countLength: 75, text: "" }, - { countLength: 80, text: "" }, + { paragraphLength: 90 }, + { paragraphLength: 75 }, + { paragraphLength: 80 }, ] ) ); expect( result.getScore() ).toEqual( 6 ); expect( result.getText() ).toEqual( "Paragraph length: 3 of the paragraphs contain " + @@ -377,9 +388,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with paragraphs that contain less than 300 characters (green bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 200, text: "" }, - { countLength: 260, text: "" }, - { countLength: 100, text: "" }, + { paragraphLength: 200 }, + { paragraphLength: 260 }, + { paragraphLength: 100 }, ], false, false, @@ -393,9 +404,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with two paragraphs that contain more than 400 characters (red bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 400, text: "" }, - { countLength: 300, text: "" }, - { countLength: 500, text: "" }, + { paragraphLength: 400 }, + { paragraphLength: 300 }, + { paragraphLength: 500 }, ], false, false, @@ -409,9 +420,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with paragraphs that contain 300-400 characters (orange bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 350, text: "" }, - { countLength: 300, text: "" }, - { countLength: 390, text: "" }, + { paragraphLength: 350 }, + { paragraphLength: 300 }, + { paragraphLength: 390 }, ], false, false, @@ -427,9 +438,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with paragraphs that contain less than 140 characters (green bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 100, text: "" }, - { countLength: 120, text: "" }, - { countLength: 90, text: "" }, + { paragraphLength: 100 }, + { paragraphLength: 120 }, + { paragraphLength: 90 }, ], false, false, @@ -443,9 +454,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with three paragraphs that contain more than 200 characters (red bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 400, text: "" }, - { countLength: 300, text: "" }, - { countLength: 500, text: "" }, + { paragraphLength: 400 }, + { paragraphLength: 300 }, + { paragraphLength: 500 }, ], false, false, @@ -459,9 +470,9 @@ describe( "test for paragraph too long assessment for languages that have langua it( "should assess a paper with all paragraphs that contain 140-200 characters (orange bullet)", function() { const paper = new Paper( "" ); const mockResearcher = Factory.buildMockResearcher( [ - { countLength: 150, text: "" }, - { countLength: 170, text: "" }, - { countLength: 200, text: "" }, + { paragraphLength: 150 }, + { paragraphLength: 170 }, + { paragraphLength: 200 }, ], false, false, diff --git a/packages/yoastseo/src/languageProcessing/helpers/html/matchParagraphs.js b/packages/yoastseo/src/languageProcessing/helpers/html/matchParagraphs.js deleted file mode 100644 index 08c7ef8ac39..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/html/matchParagraphs.js +++ /dev/null @@ -1,62 +0,0 @@ -import { filter, flatMap, map } from "lodash"; - -import { getBlocks } from "./html"; - -/** - * Matches the paragraphs in

-tags and returns the text in them. - * - * @param {string} text The text to match paragraph in. - * - * @returns {array} An array containing all paragraphs texts. - */ -const getParagraphsInTags = function( text ) { - let paragraphs = []; - // Matches everything between the

and

tags. - const regex = /]+)?>(.*?)<\/p>/ig; - let match; - - while ( ( match = regex.exec( text ) ) !== null ) { - paragraphs.push( match ); - } - - // Returns only the text from within the paragraph tags. - paragraphs = map( paragraphs, function( paragraph ) { - return paragraph[ 1 ]; - } ); - - return paragraphs.filter( paragraph => paragraph.length > 0 ); -}; - -/** - * Returns an array with all paragraphs from the text. - * - * @param {string} text The text to match paragraph in. - * - * @returns {Array} The array containing all paragraphs from the text. - */ -export default function( text ) { - let paragraphs = getParagraphsInTags( text ); - - if ( paragraphs.length > 0 ) { - return paragraphs; - } - - // If no

tags found, split on double linebreaks. - let blocks = getBlocks( text ); - - blocks = filter( blocks, function( block ) { - // Match explicit paragraph tags, or if a block has no HTML tags. - return 0 !== block.indexOf( " 0 ) { - return paragraphs; - } - - // If no paragraphs are found, return an array containing the entire text. - return [ text ]; -} diff --git a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js index 353d71a5648..b018bbeb1ea 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js +++ b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js @@ -1,10 +1,10 @@ -import { imageRegex } from "../helpers/image/imageInText"; -import sanitizeLineBreakTag from "../helpers/sanitize/sanitizeLineBreakTag"; -import countWords from "../helpers/word/countWords.js"; -import matchParagraphs from "../helpers/html/matchParagraphs.js"; -import { filter } from "lodash"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; -import { filterShortcodesFromHTML } from "../helpers"; +import { getWordsFromTokens } from "../helpers/word/getAllWordsFromTree"; + +/** + * @typedef {Object} ParagraphLength + * @property {Paragraph} paragraph The paragraph. + * @property {number} paragraphLength The length of the paragraph. + */ /** * Gets all paragraphs and their word counts or character counts from the text. @@ -12,33 +12,23 @@ import { filterShortcodesFromHTML } from "../helpers"; * @param {Paper} paper The paper object to get the text from. * @param {Researcher} researcher The researcher to use for analysis. * - * @returns {Array} The array containing an object with the paragraph word or character count and paragraph text. + * @returns {ParagraphLength[]} The array containing an object with the paragraph word or character count and paragraph text. */ export default function( paper, researcher ) { - let text = paper.getText(); - text = removeHtmlBlocks( text ); - text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); - - // Remove images from text before retrieving the paragraphs. - // This step is done here so that applying highlight in captions is possible for ParagraphTooLongAssessment. - text = text.replace( imageRegex, "" ); + const paragraphs = paper.getTree().findAll( node => node.name === "p" ); + const paragraphLengths = []; - // Replace line break tags containing attribute(s) with paragraph tag. - text = sanitizeLineBreakTag( text ); - const paragraphs = matchParagraphs( text ); - const paragraphsLength = []; - - // An optional custom helper to count length to use instead of countWords. - const customCountLength = researcher.getHelper( "customCountLength" ); - - paragraphs.map( function( paragraph ) { - paragraphsLength.push( { - countLength: customCountLength ? customCountLength( paragraph ) : countWords( paragraph ), - text: paragraph, - } ); + paragraphs.forEach( paragraph => { + const customLengthHelper = researcher.getHelper( "customCountLength" ); + const tokens = paragraph.sentences.map( sentence => sentence.tokens ).flat(); + const length = customLengthHelper ? customLengthHelper( paragraph.innerText() ) : getWordsFromTokens( tokens ).length; + if ( length > 0 ) { + paragraphLengths.push( { + paragraph: paragraph, + paragraphLength: length, + } ); + } } ); - return filter( paragraphsLength, function( paragraphLength ) { - return ( paragraphLength.countLength > 0 ); - } ); + return paragraphLengths; } diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index 8db546553ed..e2d2f14f1a2 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -1,24 +1,23 @@ import { __, _n, sprintf } from "@wordpress/i18n"; -import { filter, map, merge } from "lodash"; +import { map, merge } from "lodash"; + import { stripBlockTagsAtStartEnd as stripHTMLTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; import marker from "../../../markers/addMark"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import { inRangeEndInclusive as inRange } from "../../helpers/assessments/inRange"; import AssessmentResult from "../../../values/AssessmentResult"; import Mark from "../../../values/Mark"; import Assessment from "../assessment"; /** - * Represents the assessment that will look if the text has too long paragraphs. + * Represents the assessment that will look if the Paper contains paragraphs that are considered too long. */ export default class ParagraphTooLongAssessment extends Assessment { /** * Sets the identifier and the config. - * + * @constructor * @param {object} config The configuration to use. * @param {boolean} isProduct Whether product configuration should be used. - * - * @returns {void} */ constructor( config = {}, isProduct = false ) { super(); @@ -41,16 +40,13 @@ export default class ParagraphTooLongAssessment extends Assessment { /** * Returns an array containing only the paragraphs longer than the recommended length. * - * @param {array} paragraphsLength The array containing the lengths of individual paragraphs. - * @param {object} config The config to use. + * @param {ParagraphLength[]} paragraphsLength The array containing the lengths of individual paragraphs. + * @param {object} config The config to use. * - * @returns {array} The number of too long paragraphs. + * @returns {ParagraphLength[]} An array containing too long paragraphs. */ getTooLongParagraphs( paragraphsLength, config ) { - const recommendedLength = config.parameters.recommendedLength; - return filter( paragraphsLength, function( paragraph ) { - return paragraph.countLength > recommendedLength; - } ); + return paragraphsLength.filter( paragraph => paragraph.paragraphLength > config.parameters.recommendedLength ); } /** @@ -79,21 +75,21 @@ export default class ParagraphTooLongAssessment extends Assessment { /** * Returns the scores and text for the ParagraphTooLongAssessment. * - * @param {array} paragraphsLength The array containing the lengths of individual paragraphs. - * @param {array} tooLongParagraphs The number of too long paragraphs. - * @param {object} config The config to use. + * @param {ParagraphLength[]} paragraphsLength The array containing the lengths of individual paragraphs. + * @param {object} config The config to use. * - * @returns {{score: number, text: string }} The assessmentResult. + * @returns {AssessmentResult} The assessmentResult. */ - calculateResult( paragraphsLength, tooLongParagraphs, config ) { - let score; + calculateResult( paragraphsLength, config ) { + const assessmentResult = new AssessmentResult(); if ( paragraphsLength.length === 0 ) { - return {}; + return assessmentResult; } - const longestParagraphLength = paragraphsLength[ 0 ].countLength; - + paragraphsLength = paragraphsLength.sort( ( a, b ) => b.paragraphLength - a.paragraphLength ); + const longestParagraphLength = paragraphsLength[ 0 ].paragraphLength; + let score; if ( longestParagraphLength <= config.parameters.recommendedLength ) { // Green indicator. score = 9; @@ -109,67 +105,51 @@ export default class ParagraphTooLongAssessment extends Assessment { score = 3; } + assessmentResult.setScore( score ); + if ( score >= 7 ) { - return { - score: score, - hasMarks: false, - - text: sprintf( - /* translators: %1$s expands to a link on yoast.com, %2$s expands to the anchor end tag */ - __( - "%1$sParagraph length%2$s: None of the paragraphs are too long. Great job!", - "wordpress-seo" - ), - config.urlTitle, - "" + assessmentResult.setHasMarks( false ); + assessmentResult.setText( sprintf( + /* translators: %1$s expands to a link on yoast.com, %2$s expands to the anchor end tag */ + __( + "%1$sParagraph length%2$s: None of the paragraphs are too long. Great job!", + "wordpress-seo" ), - }; + config.urlTitle, + "" + ) ); + return assessmentResult; } - return { - score: score, - hasMarks: true, - text: sprintf( - /* translators: %1$s and %5$s expand to a link on yoast.com, %2$s expands to the anchor end tag, + + const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, config ); + assessmentResult.setHasMarks( true ); + assessmentResult.setText( sprintf( + /* translators: %1$s and %5$s expand to a link on yoast.com, %2$s expands to the anchor end tag, %3$d expands to the number of paragraphs over the recommended word / character limit, %4$d expands to the word / character limit, %6$s expands to the word 'words' or 'characters'. */ - _n( - "%1$sParagraph length%2$s: %3$d of the paragraphs contains more than the recommended maximum of %4$d %6$s. %5$sShorten your paragraphs%2$s!", - "%1$sParagraph length%2$s: %3$d of the paragraphs contain more than the recommended maximum of %4$d %6$s. %5$sShorten your paragraphs%2$s!", - tooLongParagraphs.length, - "wordpress-seo" - ), - config.urlTitle, - "", + _n( + "%1$sParagraph length%2$s: %3$d of the paragraphs contains more than the recommended maximum of %4$d %6$s. %5$sShorten your paragraphs%2$s!", + "%1$sParagraph length%2$s: %3$d of the paragraphs contain more than the recommended maximum of %4$d %6$s. %5$sShorten your paragraphs%2$s!", tooLongParagraphs.length, - config.parameters.recommendedLength, - config.urlCallToAction, - this._config.countTextIn + "wordpress-seo" ), - }; - } - - /** - * Sort the paragraphs based on word count. - * - * @param {Array} paragraphs The array with paragraphs. - * - * @returns {Array} The array sorted on word counts. - */ - sortParagraphs( paragraphs ) { - return paragraphs.sort( - function( a, b ) { - return b.countLength - a.countLength; - } - ); + config.urlTitle, + "", + tooLongParagraphs.length, + config.parameters.recommendedLength, + config.urlCallToAction, + this._config.countTextIn + ) ); + return assessmentResult; } /** * Creates a marker for the paragraphs. * - * @param {object} paper The paper to use for the assessment. - * @param {Researcher} researcher The researcher used for calling research. + * @param {Paper} paper The paper to use for the assessment. + * @param {Researcher} researcher The researcher used for calling research. * - * @returns {Array} An array with marked paragraphs. + * @returns {Mark[]} An array with marked paragraphs. */ getMarks( paper, researcher ) { const paragraphsLength = researcher.getResearch( "getParagraphLength" ); @@ -190,27 +170,16 @@ export default class ParagraphTooLongAssessment extends Assessment { * @param {Paper} paper The paper to use for the assessment. * @param {Researcher} researcher The researcher used for calling research. * - * @returns {object} The assessment result. + * @returns {AssessmentResult} The assessment result. */ getResult( paper, researcher ) { - let paragraphsLength = researcher.getResearch( "getParagraphLength" ); + const paragraphsLength = researcher.getResearch( "getParagraphLength" ); const countTextInCharacters = researcher.getConfig( "countCharacters" ); if ( countTextInCharacters ) { this._config.countTextIn = __( "characters", "wordpress-seo" ); } - paragraphsLength = this.sortParagraphs( paragraphsLength ); - const config = this.getConfig( researcher ); - - const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, config ); - const paragraphLengthResult = this.calculateResult( paragraphsLength, tooLongParagraphs, config ); - const assessmentResult = new AssessmentResult(); - - assessmentResult.setScore( paragraphLengthResult.score ); - assessmentResult.setText( paragraphLengthResult.text ); - assessmentResult.setHasMarks( paragraphLengthResult.hasMarks ); - - return assessmentResult; + return this.calculateResult( paragraphsLength, this.getConfig( researcher ) ); } /** From 151132ed58d6564b47f99c2763cf554607468ecf Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 27 Nov 2024 12:51:26 +0100 Subject: [PATCH 04/17] Enable AI button for SentenceLengthInTextAssessment.js and ParagraphTooLongAssessment.js --- .../assessments/readability/ParagraphTooLongAssessment.js | 4 +++- .../assessments/readability/SentenceLengthInTextAssessment.js | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index e2d2f14f1a2..989c8735370 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -106,7 +106,9 @@ export default class ParagraphTooLongAssessment extends Assessment { } assessmentResult.setScore( score ); - + if ( score < 9 ) { + assessmentResult.setHasAIFixes( true ); + } if ( score >= 7 ) { assessmentResult.setHasMarks( false ); assessmentResult.setText( sprintf( diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 9c08b32f382..882b73a383b 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -64,6 +64,9 @@ class SentenceLengthInTextAssessment extends Assessment { const score = this.calculateScore( percentage ); const assessmentResult = new AssessmentResult(); + if ( score < 9 ) { + assessmentResult.setHasAIFixes( true ); + } assessmentResult.setScore( score ); assessmentResult.setText( this.translateScore( score, percentage ) ); From a30b72d56d3162058363ebecbcbb28645a525414 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 27 Nov 2024 14:09:36 +0100 Subject: [PATCH 05/17] Return assessmentResult --- .../readability/ParagraphTooLongAssessment.js | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index 76cc962f354..5e5f0b946fd 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -73,22 +73,15 @@ export default class ParagraphTooLongAssessment extends Assessment { } /** - * Returns the scores and text for the ParagraphTooLongAssessment. - * - * @param {ParagraphLength[]} paragraphsLength The array containing the lengths of individual paragraphs. + * Returns the score for the ParagraphTooLongAssessment. + * @param {array} paragraphsLength The array containing the lengths of individual paragraphs. * @param {object} config The config to use. - * - * @returns {AssessmentResult} The assessmentResult. + * @returns {number} The score. */ - calculateResult( paragraphsLength, config ) { - const assessmentResult = new AssessmentResult(); - - if ( paragraphsLength.length === 0 ) { - return assessmentResult; - } + getScore( paragraphsLength, config ) { + const sortedParagraphsLength = paragraphsLength.sort( ( a, b ) => b.paragraphLength - a.paragraphLength ); - paragraphsLength = paragraphsLength.sort( ( a, b ) => b.paragraphLength - a.paragraphLength ); - const longestParagraphLength = paragraphsLength[ 0 ].paragraphLength; + const longestParagraphLength = sortedParagraphsLength[ 0 ].paragraphLength; let score; if ( longestParagraphLength <= config.parameters.recommendedLength ) { // Green indicator. @@ -104,11 +97,30 @@ export default class ParagraphTooLongAssessment extends Assessment { // Red indicator. score = 3; } + return score; + } - assessmentResult.setScore( score ); - if ( score < 9 ) { - assessmentResult.setHasAIFixes( true ); + /** + * Returns the scores and text for the ParagraphTooLongAssessment. + * + * @param {ParagraphLength[]} paragraphsLength The array containing the lengths of individual paragraphs. + * @param {object} config The config to use. + * + * @returns {AssessmentResult} The assessmentResult. + */ + calculateResult( paragraphsLength, config ) { + const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, config ); + + const assessmentResult = new AssessmentResult(); + + if ( paragraphsLength.length === 0 ) { + return assessmentResult; } + + const score = this.getScore( paragraphsLength, config ); + + assessmentResult.setScore( score ); + if ( score >= 7 ) { assessmentResult.setHasMarks( false ); assessmentResult.setText( sprintf( @@ -154,12 +166,11 @@ export default class ParagraphTooLongAssessment extends Assessment { config.parameters.recommendedLength, config.urlCallToAction ); + assessmentResult.setHasMarks( true ); + assessmentResult.setText( config.countCharacters ? characterFeedback : wordFeedback ); + assessmentResult.setHasAIFixes( true ); - return { - score: score, - hasMarks: true, - text: config.countCharacters ? characterFeedback : wordFeedback, - }; + return assessmentResult; } /** From 4b9349cbb92fec1a8e08964c9324706235ed35da Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 27 Nov 2024 14:59:00 +0100 Subject: [PATCH 06/17] Adapt getMarks for ParagraphTooLongAssessment.js --- .../readability/ParagraphTooLongAssessment.js | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index 5e5f0b946fd..e7d9cbd813a 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -74,7 +74,7 @@ export default class ParagraphTooLongAssessment extends Assessment { /** * Returns the score for the ParagraphTooLongAssessment. - * @param {array} paragraphsLength The array containing the lengths of individual paragraphs. + * @param {ParagraphLength[]} paragraphsLength The array containing the lengths of individual paragraphs. * @param {object} config The config to use. * @returns {number} The score. */ @@ -199,14 +199,19 @@ export default class ParagraphTooLongAssessment extends Assessment { getMarks( paper, researcher ) { const paragraphsLength = researcher.getResearch( "getParagraphLength" ); const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, this.getConfig( researcher ) ); - return map( tooLongParagraphs, function( paragraph ) { - const paragraphText = stripHTMLTags( paragraph.text ); - const marked = marker( paragraphText ); - return new Mark( { - original: paragraphText, - marked: marked, - } ); - } ); + return tooLongParagraphs.flatMap( ( { paragraph } ) => + paragraph.sentences.map( sentence => new Mark( { + position: { + startOffset: sentence.sourceCodeRange.startOffset, + endOffset: sentence.sourceCodeRange.endOffset, + startOffsetBlock: sentence.sourceCodeRange.startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: sentence.sourceCodeRange.endOffset - ( sentence.parentStartOffset || 0 ), + clientId: sentence.parentClientId || "", + attributeId: sentence.parentAttributeId || "", + isFirstSection: sentence.isParentFirstSectionOfBlock || false, + }, + } ) ) + ); } /** From 500e2cb415188cf78043f2cc64e0f12c6db3c343 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 11 Dec 2024 14:32:30 +0100 Subject: [PATCH 07/17] Fix highlighting for paragraph length assessment --- .../readability/ParagraphTooLongAssessment.js | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index e7d9cbd813a..ae325e23fbd 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -1,8 +1,6 @@ import { __, _n, sprintf } from "@wordpress/i18n"; -import { map, merge } from "lodash"; +import { merge } from "lodash"; -import { stripBlockTagsAtStartEnd as stripHTMLTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; -import marker from "../../../markers/addMark"; import { createAnchorOpeningTag } from "../../../helpers"; import { inRangeEndInclusive as inRange } from "../../helpers/assessments/inRange"; import AssessmentResult from "../../../values/AssessmentResult"; @@ -199,19 +197,19 @@ export default class ParagraphTooLongAssessment extends Assessment { getMarks( paper, researcher ) { const paragraphsLength = researcher.getResearch( "getParagraphLength" ); const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, this.getConfig( researcher ) ); - return tooLongParagraphs.flatMap( ( { paragraph } ) => - paragraph.sentences.map( sentence => new Mark( { + return tooLongParagraphs.flatMap( ( { paragraph } ) => { + return new Mark( { position: { - startOffset: sentence.sourceCodeRange.startOffset, - endOffset: sentence.sourceCodeRange.endOffset, - startOffsetBlock: sentence.sourceCodeRange.startOffset - ( sentence.parentStartOffset || 0 ), - endOffsetBlock: sentence.sourceCodeRange.endOffset - ( sentence.parentStartOffset || 0 ), - clientId: sentence.parentClientId || "", - attributeId: sentence.parentAttributeId || "", - isFirstSection: sentence.isParentFirstSectionOfBlock || false, + startOffset: paragraph.sourceCodeLocation.startTag.endOffset, + endOffset: paragraph.sourceCodeLocation.endTag.startOffset, + startOffsetBlock: 0, + endOffsetBlock: paragraph.sourceCodeLocation.endOffset - paragraph.sourceCodeLocation.startOffset, + clientId: paragraph.clientId || "", + attributeId: paragraph.parentAttributeId || "", + isFirstSection: paragraph.isParentFirstSectionOfBlock || false, }, - } ) ) - ); + } ); + } ); } /** From 928996843200c185469fb09b2ee05bdb60268cf7 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 11 Dec 2024 14:34:08 +0100 Subject: [PATCH 08/17] Improve JSDoc --- .../languageProcessing/researches/countSentencesFromText.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js index 78f87ee31ee..59ce61e0489 100644 --- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js +++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js @@ -2,9 +2,11 @@ import sentencesLength from "../helpers/sentence/sentencesLength.js"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; /** - * Count sentences in the text. + * Gets the sentences from the text and calculates the length of each sentence. + * * @param {Paper} paper The Paper object to get text from. * @param {Researcher} researcher The researcher to use for analysis. + * * @returns {SentenceLength[]} The sentences from the text. */ export default function( paper, researcher ) { From 65ec1ae0a4f4718624c3ee67e40b265fe2e96bcd Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 16 Dec 2024 09:48:59 +0100 Subject: [PATCH 09/17] Remove unused import --- .../readability/SentenceLengthInTextAssessmentSpec.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js index 5316e9ef154..a898e48e475 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js @@ -3,7 +3,6 @@ import SentenceLengthInTextAssessment from "../../../../src/scoring/assessments/ import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; -import addMark from "../../../../src/markers/addMark"; import Factory from "../../../../src/helpers/factory.js"; import buildTree from "../../../specHelpers/parse/buildTree"; From fd3ed61de2936aee2988dbd1e7a9811efba4d426 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Mon, 16 Dec 2024 13:37:50 +0100 Subject: [PATCH 10/17] Adds processing of hyphens --- .../spec/fullTextTests/runFullTextTests.js | 29 --- .../researches/getParagraphLengthSpec.js | 4 +- .../ParagraphTooLongAssessmentSpec.js | 171 +++++++++--------- .../helpers/sentence/sentencesLength.js | 2 +- .../helpers/word/getAllWordsFromTree.js | 24 ++- .../researches/getParagraphLength.js | 2 +- .../readability/ParagraphTooLongAssessment.js | 7 +- 7 files changed, 112 insertions(+), 127 deletions(-) diff --git a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js index 5bb6cf426dc..f43f128c4ce 100644 --- a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js @@ -43,7 +43,6 @@ import { getLanguagesWithWordComplexity } from "../../src/helpers"; // Import test papers. import testPapers from "./testTexts"; -import fs from "fs"; testPapers.forEach( function( testPaper ) { // eslint-disable-next-line max-statements @@ -66,34 +65,6 @@ testPapers.forEach( function( testPaper ) { buildTree( paper, researcher ); - /** - * Writes the given contents to the given filename in the temporary directory tmp - * @param {string} filename The name of the file. - * @param {string} content The content of the file. - * @returns {void} - */ - const writeToTempFile = ( filename, content ) => { - // Creates a temporary directory in the current working directory to store the data, if it not yet exists. - // (i.e., packages/yoastseo/tmp/ if this function is called from packages/yoastseo/) - const dir = "tmp/"; - if ( ! fs.existsSync( dir ) ) { - fs.mkdirSync( dir ); - } - - // Writes the data to this temporary directory - fs.writeFileSync( dir + filename, content ); - }; - - // Collects the results and the header into list of ;-separated rows - const sentences = researcher.getResearch( "countSentencesFromText" ); - const resultLines = sentences.map( sentence => sentence.sentence.trimStart().split( " " )[ 0 ] + ";" + sentence.sentenceLength ); - - // Set doExport to true to write the results to a temporary file. - const doExport = true; - if ( doExport ) { - writeToTempFile( testPaper.name + ".csv", resultLines.join( "\n" ) ); - } - const expectedResults = testPaper.expectedResults; /** diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js index af6d54b607d..34c1cf2a93b 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js @@ -6,13 +6,13 @@ import buildTree from "../../specHelpers/parse/buildTree"; describe( "a test for getting paragraph length", function() { it( "returns the paragraph length of a paragraph between p tags", function() { - const mockPaper = new Paper( "

Lorem ipsum

" ); + const mockPaper = new Paper( "

Lorem ipsum, hyphens all-over-the-place

" ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 2 ); + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 4 ); } ); it( "returns the paragraph length of a paragraph in Japanese between p tags", function() { diff --git a/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js index bf5a89b675b..5cf0fea9d5f 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/ParagraphTooLongAssessmentSpec.js @@ -138,8 +138,7 @@ describe( "Applicability of the assessment.", function() { describe( "A test for marking the sentences", function() { it( "should return markers for a default text", function() { - const assessment = new ParagraphTooLongAssessment(); - const paper = new Paper( "You just got Yoast SEO, and you are ready to get the most out of it! Or, have you already been using it for a " + + const paper = new Paper( "

You just got Yoast SEO, and you are ready to get the most out of it! Or, have you already been using it for a " + "while and know your way around it? Either way, you should use some essential Yoast SEO settings to let the plugin work at its best. " + "That’s why we’ve created a Yoast SEO configuration workout! In five steps, we guide you in setting up Yoast SEO the right way! Read " + "on to learn more about each step. If you just installed Yoast SEO, you’ll find a link to the workout at the top of your general " + @@ -147,35 +146,27 @@ describe( "A test for marking the sentences", function() { "screen, where you can find and access the Configuration workout by clicking the Start workout button. If you’ve finished the workout " + "but want to revise some steps, you can find it in the Workouts menu item of the SEO menu (see bottom of image). Now, let’s go through " + "the workout’s steps. Choose Organization if you have a professional or a company website. For example, if we were filling in the data " + - "for yoast.com, we would choose Organization. You’ll then need to add the Organization’s name, logo, and tagline." ); - const paragraphTooLong = new EnglishResearcher( paper ); + "for yoast.com, we would choose Organization. You’ll then need to add the Organization’s name, logo, and tagline.

" ); + const englishResearcher = new EnglishResearcher( paper ); + buildTree( paper, englishResearcher ); + const expected = [ - new Mark( { original: "You just got Yoast SEO, and you are ready to get the most out of it! Or, have you already been using it for a " + - "while and know your way around it? Either way, you should use some essential Yoast SEO settings to let the plugin work at its " + - "best. That’s why we’ve created a Yoast SEO configuration workout! In five steps, we guide you in setting up Yoast SEO the " + - "right way! Read on to learn more about each step. If you just installed Yoast SEO, you’ll find a link to the workout at the " + - "top of your general WordPress Dashboard, as well as on top of all the screens of the Yoast SEO settings (see image). The link" + - " takes you to the Workouts screen, where you can find and access the Configuration workout by clicking the Start workout " + - "button. If you’ve finished the workout but want to revise some steps, you can find it in the Workouts menu item of the SEO " + - "menu (see bottom of image). Now, let’s go through the workout’s steps. Choose Organization if you have a professional or a " + - "company website. For example, if we were filling in the data for yoast.com, we would choose Organization. You’ll then need to" + - " add the Organization’s name, logo, and tagline.", marked: "You just got Yoast SEO, and " + - "you are ready to get the most out of it! Or, have you already been using it for a while " + - "and know your way around it? Either way, you should use some essential Yoast SEO settings to let the plugin work at its best. " + - "That’s why we’ve created a Yoast SEO configuration workout! In five steps, we guide you in setting up Yoast SEO the right way!" + - " Read on to learn more about each step. If you just installed Yoast SEO, you’ll find a link to the workout at the top of your" + - " general WordPress Dashboard, as well as on top of all the screens of the Yoast SEO settings (see image). The link takes you " + - "to the Workouts screen, where you can find and access the Configuration workout by clicking the Start workout button. If " + - "you’ve finished the workout but want to revise some steps, you can find it in the Workouts menu item of the SEO menu (see " + - "bottom of image). Now, let’s go through the workout’s steps. Choose Organization if you have a professional or a company " + - "website. For example, if we were filling in the data for yoast.com, we would choose Organization. You’ll then need to add " + - "the Organization’s name, logo, and tagline." } ), + new Mark( { + position: { + attributeId: "", + clientId: "", + startOffset: 3, + startOffsetBlock: 0, + endOffset: 1150, + endOffsetBlock: 1154, + isFirstSection: false, + }, + } ), ]; - expect( assessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + expect( new ParagraphTooLongAssessment().getMarks( paper, englishResearcher ) ).toEqual( expected ); } ); it( "should return markers for a default text where the text contains
tags with attribute," + " where
has a closing tag", function() { - const assessment = new ParagraphTooLongAssessment(); const paper = new Paper( "

Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, duis aliquid perfecto ius ea, ad pri " + "corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti ut eam, " + "eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima accumsan " + @@ -193,34 +184,26 @@ describe( "A test for marking the sentences", function() { "epicuri perfecto eam ne. Vis summo delicatissimi in, cu porro facete phaedrum nam.

" + "

Utamur discere phaedrum eu nam. Ne quidam placerat per, qui inani diceret cu, " + "at nec quot petentium efficiendi. Sea te persius vulputate. Docendi temporibus et quo. Ad duo appareat lobortis definitionem.!

" ); - const paragraphTooLong = new EnglishResearcher( paper ); + const englishResearcher = new EnglishResearcher( paper ); + buildTree( paper, englishResearcher ); + const expected = [ new Mark( { - original: "Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, duis aliquid perfecto ius ea, " + - "ad pri corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti " + - "ut eam, eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima " + - "accumsan signiferumque ad. Nisl commune ei est, ut eum stet cibo, duo malis veniam ut. Cu est veritus adolescens " + - "vituperatoribus, at eam movet perfecto. Magna consequat at cum. Vel ad fabellas accusata, vel ea erat lorem mediocritatem, " + - "dissentiunt liberavisse per ex. Duo putant vituperata eu, sit at tota etiam deseruisse. Sed in autem mucius. Errem omnium " + - "facilis mea an. Eu usu eripuit dissentiet, duo ei perfecto argumentum. Diceret forensibus cu has, quo alia nihil et, " + - "convenire adversarium efficiantur per id. His mazim virtute ornatus ei, has id vocibus docendi percipitur. " + - "Wisi nusquam pri no, putent menandri ad duo. Nullam nostrum salutandi eum id, per agam exerci an.", - marked: "Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, " + - "duis aliquid perfecto ius ea, " + - "ad pri corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti " + - "ut eam, eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima " + - "accumsan signiferumque ad. Nisl commune ei est, ut eum stet cibo, duo malis veniam ut. Cu est veritus adolescens " + - "vituperatoribus, at eam movet perfecto. Magna consequat at cum. Vel ad fabellas accusata, vel ea erat lorem mediocritatem, " + - "dissentiunt liberavisse per ex. Duo putant vituperata eu, sit at tota etiam deseruisse. Sed in autem mucius. Errem omnium " + - "facilis mea an. Eu usu eripuit dissentiet, duo ei perfecto argumentum. Diceret forensibus cu has, quo alia nihil et, " + - "convenire adversarium efficiantur per id. His mazim virtute ornatus ei, has id vocibus docendi percipitur. " + - "Wisi nusquam pri no, putent menandri ad duo. Nullam nostrum salutandi eum id, per agam exerci an." } ), + position: { + attributeId: "", + clientId: "", + startOffset: 3, + startOffsetBlock: 0, + endOffset: 1002, + endOffsetBlock: 999, + isFirstSection: false, + }, + } ), ]; - expect( assessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + expect( new ParagraphTooLongAssessment().getMarks( paper, englishResearcher ) ).toEqual( expected ); } ); it( "should return markers for a default text where the text contains
tags with attribute," + " where
doesn't have a closing tag", function() { - const assessment = new ParagraphTooLongAssessment(); const paper = new Paper( "

Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, duis aliquid perfecto ius ea, ad pri " + "corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti ut eam, " + "eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima accumsan " + @@ -238,30 +221,23 @@ describe( "A test for marking the sentences", function() { "epicuri perfecto eam ne. Vis summo delicatissimi in, cu porro facete phaedrum nam.
" + "
Utamur discere phaedrum eu nam. Ne quidam placerat per, qui inani diceret cu, " + "at nec quot petentium efficiendi. Sea te persius vulputate. Docendi temporibus et quo. Ad duo appareat lobortis definitionem.!

" ); - const paragraphTooLong = new EnglishResearcher( paper ); + const englishResearcher = new EnglishResearcher( paper ); + buildTree( paper, englishResearcher ); + const expected = [ new Mark( { - original: "Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, duis aliquid perfecto ius ea, " + - "ad pri corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti " + - "ut eam, eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima " + - "accumsan signiferumque ad. Nisl commune ei est, ut eum stet cibo, duo malis veniam ut. Cu est veritus adolescens " + - "vituperatoribus, at eam movet perfecto. Magna consequat at cum. Vel ad fabellas accusata, vel ea erat lorem mediocritatem, " + - "dissentiunt liberavisse per ex. Duo putant vituperata eu, sit at tota etiam deseruisse. Sed in autem mucius. Errem omnium " + - "facilis mea an. Eu usu eripuit dissentiet, duo ei perfecto argumentum. Diceret forensibus cu has, quo alia nihil et, " + - "convenire adversarium efficiantur per id. His mazim virtute ornatus ei, has id vocibus docendi percipitur. " + - "Wisi nusquam pri no, putent menandri ad duo. Nullam nostrum salutandi eum id, per agam exerci an.", - marked: "Lorem ipsum dolor sit amet, in magna dolor voluptaria vel, " + - "duis aliquid perfecto ius ea, " + - "ad pri corpora petentium torquatos. Eu vidit rationibus vel. No vis partem nominavi neglegentur. Omnis dicat everti " + - "ut eam, eos ne atqui facer antiopam. Et pri vivendo sensibus perpetua, aperiam epicurei menandri an vix, sea prima " + - "accumsan signiferumque ad. Nisl commune ei est, ut eum stet cibo, duo malis veniam ut. Cu est veritus adolescens " + - "vituperatoribus, at eam movet perfecto. Magna consequat at cum. Vel ad fabellas accusata, vel ea erat lorem mediocritatem, " + - "dissentiunt liberavisse per ex. Duo putant vituperata eu, sit at tota etiam deseruisse. Sed in autem mucius. Errem omnium " + - "facilis mea an. Eu usu eripuit dissentiet, duo ei perfecto argumentum. Diceret forensibus cu has, quo alia nihil et, " + - "convenire adversarium efficiantur per id. His mazim virtute ornatus ei, has id vocibus docendi percipitur. " + - "Wisi nusquam pri no, putent menandri ad duo. Nullam nostrum salutandi eum id, per agam exerci an." } ), + position: { + attributeId: "", + clientId: "", + startOffset: 3, + startOffsetBlock: 0, + endOffset: 1002, + endOffsetBlock: 999, + isFirstSection: false, + }, + } ), ]; - expect( assessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + expect( new ParagraphTooLongAssessment().getMarks( paper, englishResearcher ) ).toEqual( expected ); } ); it( "should return markers for a long paragraph inside image caption", function() { const longText = " A study was carried out to determine the effect of dietary probiotic L on production performance," + @@ -282,20 +258,29 @@ describe( "A test for marking the sentences", function() { "beneficial conditions for nutrients’ utilisation, inhibit pathogenic bacteria in the host. Utilising probiotics " + "in animal nutrition provides not only economic and health benefits they produce also safe foods. Blood haemoglobin, " + "total protein and total cholesterol concentrations were not significantly affected by the probiotic."; - const assessment = new ParagraphTooLongAssessment(); const paper = new Paper( "

A short text.

" + "

Not a rabbit" + longText + "

\n" + "

" ); - const paragraphTooLong = new EnglishResearcher( paper ); + const englishResearcher = new EnglishResearcher( paper ); + buildTree( paper, englishResearcher ); + const expected = [ new Mark( { - original: longText, - marked: `${ longText }` } ), + position: { + attributeId: "", + clientId: "", + startOffset: 23, + startOffsetBlock: 0, + endOffset: 2147, + endOffsetBlock: 2124, + isFirstSection: false, + }, + } ), ]; - expect( assessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + expect( new ParagraphTooLongAssessment().getMarks( paper, englishResearcher ) ).toEqual( expected ); } ); it( "should return markers for a text in Japanese", function() { const paper = new Paper( "接続詞は、文と文との中間に位置しています。前文と後文との間にあって、両者の関係を示している言葉です。学校文法では、接続詞は文の成分" + @@ -303,28 +288,34 @@ describe( "A test for marking the sentences", function() { "としており、芳賀やすしは接続詞を承前副詞と並立連体詞とに二分しています。時枝文法では「辞」として扱っています。つまり、接続詞は前文を受けて、後文の文末まで" + "係っていく副詞のような働きをしているということです。独立語として中立的に結びつけている言葉ではありません。このように接続詞は前文の内容を後文へと持ち込んで、" + "どんな関係になっているかを示し、後文の文末まで係っていく、そうした副詞と似た働きをしています。後文への修飾語的性格を持っています。" ); - const paragraphTooLong = new JapaneseResearcher( paper ); + + const japaneseResearcher = new JapaneseResearcher( paper ); + buildTree( paper, japaneseResearcher ); + + const assessment = new ParagraphTooLongAssessment(); + const expected = [ - new Mark( { original: "接続詞は、文と文との中間に位置しています。前文と後文との間にあって、両者の関係を示している言葉です。学校文法では、接続詞は文の成分" + - "としては独立語として扱われておりますが、独立語でないとする文法学説もあります。松下文法では一品詞としないで副詞に含め、山田文法では副詞の一類として接続副詞" + - "としており、芳賀やすしは接続詞を承前副詞と並立連体詞とに二分しています。時枝文法では「辞」として扱っています。つまり、接続詞は前文を受けて、後文の文末まで" + - "係っていく副詞のような働きをしているということです。独立語として中立的に結びつけている言葉ではありません。このように接続詞は前文の内容を後文へと持ち込んで、" + - "どんな関係になっているかを示し、後文の文末まで係っていく、そうした副詞と似た働きをしています。後文への修飾語的性格を持っています。", marked: - "接続詞は、文と文との中間に位置しています。前文と後文との間にあって、両者の関係を示している言葉です。" + - "学校文法では、接続詞は文の成分としては独立語として扱われておりますが、独立語でないとする文法学説もあります。松下文法では一品詞としないで副詞に含め、" + - "山田文法では副詞の一類として接続副詞としており、芳賀やすしは接続詞を承前副詞と並立連体詞とに二分しています。時枝文法では「辞」として扱っています。" + - "つまり、接続詞は前文を受けて、後文の文末まで係っていく副詞のような働きをしているということです。独立語として中立的に結びつけている言葉ではありません。" + - "このように接続詞は前文の内容を後文へと持ち込んで、どんな関係になっているかを示し、後文の文末まで係っていく、そうした副詞と似た働きをしています。" + - "後文への修飾語的性格を持っています。" } ), + new Mark( { + position: { + attributeId: "", + clientId: "", + startOffset: 0, + startOffsetBlock: 0, + endOffset: 362, + endOffsetBlock: 362, + isFirstSection: false, + }, + } ), ]; - expect( paragraphTooLongAssessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + + expect( assessment.getMarks( paper, japaneseResearcher ) ).toEqual( expected ); } ); it( "should return no markers when no paragraph is too long", function() { const paper = new Paper( "This is a very interesting paper." ); - const paragraphTooLong = Factory.buildMockResearcher( [ { paragraphLength: 60 }, { paragraphLength: 11 }, - { paragraphLength: 13 } ] ); + const mockResearcher = Factory.buildMockResearcher( + [ { paragraphLength: 60 }, { paragraphLength: 11 }, { paragraphLength: 13 } ] ); const expected = []; - expect( paragraphTooLongAssessment.getMarks( paper, paragraphTooLong ) ).toEqual( expected ); + expect( paragraphTooLongAssessment.getMarks( paper, mockResearcher ) ).toEqual( expected ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js index e6a33d33bd8..043bb793eb9 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js @@ -18,7 +18,7 @@ export default function( sentences, researcher ) { const sentencesWordCount = []; sentences.forEach( sentence => { const customLengthHelper = researcher.getHelper( "customCountLength" ); - const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens ).length; + const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens, false ).length; if ( length > 0 ) { sentencesWordCount.push( { sentence: sentence, diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js index b0f3429a1e2..79c73e00bb0 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -2,16 +2,38 @@ import getSentencesFromTree from "../sentence/getSentencesFromTree"; import { flatMap } from "lodash"; import removePunctuation from "../sanitize/removePunctuation"; +/** + * Merges words surrounding a separator into one word. + * + * @param {string[]} words The array of words to split and merge. + * @param {string} separator The separator to split on. + * + * @returns {void} This function mutates the `words` array through splicing. + */ +function mergeBy( words, separator ) { + while ( words.indexOf( separator ) !== -1 ) { + const currentSeparator = words.indexOf( separator ); + const wordBefore = words[ currentSeparator - 1 ]; + const wordAfter = words[ currentSeparator + 1 ]; + words.splice( currentSeparator - 1, 3, wordBefore + separator + wordAfter ); + } +} + /** * Gets the words from the tokens. * * @param {Token[]} tokens The tokens to get the words from. + * @param {boolean} splitOnHyphens Whether to split words on hyphens. * * @returns {string[]} Array of words retrieved from the tokens. */ -export function getWordsFromTokens( tokens ) { +export function getWordsFromTokens( tokens, splitOnHyphens = true ) { // Retrieve all texts from the tokens. let words = tokens.map( token => token.text ); + // Combine words separated by a hyphen, if needed. + if ( ! splitOnHyphens ) { + mergeBy( words, "-" ); + } // Remove punctuation and spaces. words = words.map( token => removePunctuation( token ) ); // Filter out empty tokens. diff --git a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js index b018bbeb1ea..3a27f0f1c4c 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js +++ b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js @@ -21,7 +21,7 @@ export default function( paper, researcher ) { paragraphs.forEach( paragraph => { const customLengthHelper = researcher.getHelper( "customCountLength" ); const tokens = paragraph.sentences.map( sentence => sentence.tokens ).flat(); - const length = customLengthHelper ? customLengthHelper( paragraph.innerText() ) : getWordsFromTokens( tokens ).length; + const length = customLengthHelper ? customLengthHelper( paragraph.innerText() ) : getWordsFromTokens( tokens, false ).length; if ( length > 0 ) { paragraphLengths.push( { paragraph: paragraph, diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index ae325e23fbd..9ae3e7c1e26 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -198,12 +198,13 @@ export default class ParagraphTooLongAssessment extends Assessment { const paragraphsLength = researcher.getResearch( "getParagraphLength" ); const tooLongParagraphs = this.getTooLongParagraphs( paragraphsLength, this.getConfig( researcher ) ); return tooLongParagraphs.flatMap( ( { paragraph } ) => { + const scl = paragraph.sourceCodeLocation; return new Mark( { position: { - startOffset: paragraph.sourceCodeLocation.startTag.endOffset, - endOffset: paragraph.sourceCodeLocation.endTag.startOffset, + startOffset: scl.startTag ? scl.startTag.endOffset : scl.startOffset, + endOffset: scl.endTag ? scl.endTag.startOffset : scl.endOffset, startOffsetBlock: 0, - endOffsetBlock: paragraph.sourceCodeLocation.endOffset - paragraph.sourceCodeLocation.startOffset, + endOffsetBlock: scl.endOffset - scl.startOffset, clientId: paragraph.clientId || "", attributeId: paragraph.parentAttributeId || "", isFirstSection: paragraph.isParentFirstSectionOfBlock || false, From 95ece64373e5c436acbd094862092872e6aca3ba Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 18 Dec 2024 17:58:28 +0100 Subject: [PATCH 11/17] Adapt fullTexttest --- .../fullTextTests/testTexts/el/greekPaper.html | 9 --------- .../fullTextTests/testTexts/el/greekPaper.js | 16 ++++++++-------- .../en/englishPaperForPerformanceTest.js | 4 ++-- .../es/spanishPaperForPerformanceTest.js | 4 ++-- .../fullTextTests/testTexts/fa/farsiPaper.js | 4 ++-- .../fullTextTests/testTexts/fr/frenchPaper.js | 6 +++--- .../fullTextTests/testTexts/he/hebrewPaper.js | 2 +- .../fullTextTests/testTexts/ja/japanesePaper.js | 2 +- .../fullTextTests/testTexts/pl/polishPaper.js | 2 +- .../pl/polishPaperForPerformanceTest.js | 4 ++-- .../fullTextTests/testTexts/en/englishPaper1.js | 2 +- .../fullTextTests/testTexts/en/englishPaper3.js | 5 ++--- .../fullTextTests/testTexts/en/englishPaper1.js | 4 ++-- .../fullTextTests/testTexts/en/englishPaper2.js | 2 +- 14 files changed, 28 insertions(+), 38 deletions(-) diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.html b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.html index 139daa513cf..92e1305cb95 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.html +++ b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.html @@ -1,10 +1,3 @@ - - - - - Ελληνική γλώσσα - Βικιπαίδεια - -

Η φωνολογία, η μορφολογία, η σύνταξη και το λεξιλόγιο της γλώσσας δείχνουν τόσο συντηρητικά όσο και καινοτόμα στοιχεία σε ολόκληρη την ιστορική πορεία της γλώσσας από την αρχαία έως τη σύγχρονη περίοδο. Η διαίρεση σε συμβατικές περιόδους είναι σχετικά αυθαίρετη, ειδικά επειδή σε όλες τις περιόδους ύπαρξης της η αρχαία ελληνική έχει απολαύσει υψηλό κύρος και οι εγγράμματοι άνθρωποι χρησιμοποιούσαν πολλά δάνεια από τα αρχαία ελληνικά.

Φωνολογία[Επεξεργασία | επεξεργασία κώδικα]

@@ -173,7 +166,5 @@

φοινικικού αλφαβήτου, με την καινοτομία της υιοθέτησης ορισμένων νέων γραμμάτων για την γραφή των φωνηέντων. Η παραλλαγή του αλφαβήτου που χρησιμοποιείται σήμερα είναι ουσιαστικά η ύστερη Ιωνική παραλλαγή, η οποία εισήχθη για την γραφή της αττικής διαλέκτου το 403 π.Χ. Στην κλασική ελληνική, όπως και στην κλασική λατινική, υπήρχαν μόνο κεφαλαία γράμματα. Τα πεζά ελληνικά γράμματα αναπτύχθηκαν πολύ αργότερα από τους μεσαιωνικούς γραμματείς για να επιτρέψουν ένα ταχύτερο, πιο βολικό τρόπο γραφής με τη χρήση μελανιού και πένας.

Το ελληνικό αλφάβητο αποτελείται από 24 γράμματα, το καθένα με κεφαλαία και πεζά γράμματα. Το σίγμα έχει μια πρόσθετη πεζή μορφή (ς) που χρησιμοποιείται στο τέλος μιας λέξης:

- - diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js index 486cf4eb010..7d45dda1316 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js @@ -66,7 +66,7 @@ const expectedResults = { textLength: { isApplicable: true, score: 9, - resultText: "Text length: The text contains 2913 words. Good job!", + resultText: "Text length: The text contains 2910 words. Good job!", }, externalLinks: { isApplicable: true, @@ -117,25 +117,25 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 3, + resultText: "Paragraph length: 3 of the paragraphs contain more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, - score: 6, - resultText: "Sentence length: 27.8% of the sentences contain more than 20 words, " + + score: 3, + resultText: "Sentence length: 30.9% of the sentences contain more than 20 words, " + "which is more than the recommended maximum of 25%. Try to shorten the sentences.", }, textTransitionWords: { isApplicable: true, - score: 3, - resultText: "Transition words: Only 19.6% of the sentences contain" + + score: 6, + resultText: "Transition words: Only 20.2% of the sentences contain" + " transition words, which is not enough. Use more of them.", }, passiveVoice: { isApplicable: true, score: 3, - resultText: "Passive voice: 25.8% of the sentences contain passive voice, " + + resultText: "Passive voice: 26.6% of the sentences contain passive voice, " + "which is more than the recommended maximum of 10%. " + "Try to use their active counterparts.", }, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js index ba97a5f9981..cbb4e745316 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js @@ -115,8 +115,8 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 6, + resultText: "Paragraph length: 1 of the paragraphs contains more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js index 270298eacb7..b07847b0a1a 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js @@ -115,8 +115,8 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 6, + resultText: "Paragraph length: 1 of the paragraphs contains more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/fa/farsiPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/fa/farsiPaper.js index 169852a049b..30d58eaf27f 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/fa/farsiPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/fa/farsiPaper.js @@ -118,8 +118,8 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 3, + resultText: "Paragraph length: 3 of the paragraphs contain more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/fr/frenchPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/fr/frenchPaper.js index f2664d24be1..a10baeafcde 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/fr/frenchPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/fr/frenchPaper.js @@ -109,13 +109,13 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 3, + resultText: "Paragraph length: 2 of the paragraphs contain more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 40.8% of the sentences contain more" + + resultText: "Sentence length: 39.6% of the sentences contain more" + " than 20 words, which is more than the recommended maximum of 25%. " + "Try to shorten the sentences.", }, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/he/hebrewPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/he/hebrewPaper.js index f0cd81304d5..2142bb1ecf4 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/he/hebrewPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/he/hebrewPaper.js @@ -120,7 +120,7 @@ const expectedResults = { textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 66.7% of the sentences contain more than 15 words," + + resultText: "Sentence length: 78.9% of the sentences contain more than 15 words," + " which is more than the recommended maximum of 25%. Try to shorten the sentences.", }, textTransitionWords: { diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js index e05870fbc59..598fb8f8ba2 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js @@ -124,7 +124,7 @@ const expectedResults = { textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 50.8% of the sentences contain more than 40 characters, " + + resultText: "Sentence length: 51.6% of the sentences contain more than 40 characters, " + "which is more than the recommended maximum of 25%. Try to shorten the sentences.", }, textTransitionWords: { diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaper.js index ab2b043c3ec..01f36eb955f 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaper.js @@ -109,7 +109,7 @@ const expectedResults = { textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 45.8% of the sentences contain more than 20 words, which is more than the recommended maximum of 15%. Try to shorten the sentences.", + resultText: "Sentence length: 46.6% of the sentences contain more than 20 words, which is more than the recommended maximum of 15%. Try to shorten the sentences.", }, textTransitionWords: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js index 05b51c1d37d..69e500b29b0 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js @@ -109,8 +109,8 @@ const expectedResults = { }, textParagraphTooLong: { isApplicable: true, - score: 9, - resultText: "Paragraph length: None of the paragraphs are too long. Great job!", + score: 6, + resultText: "Paragraph length: 1 of the paragraphs contains more than the recommended maximum number of words (150). Shorten your paragraphs!", }, textSentenceLength: { isApplicable: true, diff --git a/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper1.js b/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper1.js index 6d2ad1c18ac..778a3072204 100644 --- a/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper1.js +++ b/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper1.js @@ -93,7 +93,7 @@ const expectedResults = { textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 50% of the sentences contain more " + + resultText: "Sentence length: 45.5% of the sentences contain more " + "than 20 words, which is more than the recommended maximum of 25%. " + "Try to shorten the sentences.", }, diff --git a/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper3.js b/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper3.js index f58a8c5e863..0dac860a15c 100644 --- a/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper3.js +++ b/packages/yoastseo/spec/scoring/assessors/collectionPages/fullTextTests/testTexts/en/englishPaper3.js @@ -95,9 +95,8 @@ const expectedResults = { }, textSentenceLength: { isApplicable: true, - score: 3, - - resultText: "Sentence length: 32% of the sentences contain more than 20 words, " + + score: 6, + resultText: "Sentence length: 26.9% of the sentences contain more than 20 words, " + "which is more than the recommended maximum of 25%. Try to shorten the sentences.", }, textTransitionWords: { diff --git a/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper1.js b/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper1.js index d3a68a271df..671c8cd0a10 100644 --- a/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper1.js +++ b/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper1.js @@ -136,8 +136,8 @@ const expectedResults = { }, textSentenceLength: { isApplicable: true, - score: 9, - resultText: "Sentence length: Great!", + score: 3, + resultText: "Sentence length: 28.6% of the sentences contain more than 20 words, which is more than the recommended maximum of 20%. Try to shorten the sentences.", }, textTransitionWords: { isApplicable: true, diff --git a/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper2.js b/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper2.js index c31f33b97c8..3d34ce72ae3 100644 --- a/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper2.js +++ b/packages/yoastseo/spec/scoring/assessors/productPages/fullTextTests/testTexts/en/englishPaper2.js @@ -149,7 +149,7 @@ const expectedResults = { textSentenceLength: { isApplicable: true, score: 3, - resultText: "Sentence length: 50% of the sentences contain more than 20 words, " + + resultText: "Sentence length: 46.7% of the sentences contain more than 20 words, " + "which is more than the recommended maximum of 20%. " + "Try to shorten the sentences.", }, From e8e1c16e9f80d594ce079e1a41f713c8b2e2a3eb Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 18 Dec 2024 17:59:18 +0100 Subject: [PATCH 12/17] Adjust unit tests --- .../researches/getParagraphLengthSpec.js | 60 +++---------------- 1 file changed, 9 insertions(+), 51 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js index 34c1cf2a93b..034c84e12dd 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js @@ -26,47 +26,15 @@ describe( "a test for getting paragraph length", function() { } ); it( "returns the paragraph length of two paragraphs divided by double linebreaks and ends with a double linebreak", function() { + // After using the HTML parser, we don't split paragraphs on double linebreaks, so this should be one paragraph. const mockPaper = new Paper( "Lorem \n\n ipsum two \n\n" ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 ); - expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 ); - } ); - - it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks and ends with a double linebreak", function() { - const mockPaper = new Paper( "1964年 \n\n (昭和39年) \n\n" ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - - const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); - expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 ); - } ); - - it( "returns the paragraph length of two paragraphs divided by double linebreaks that don't end with a double linebreak", function() { - const mockPaper = new Paper( "Lorem \n\n ipsum two" ); - const mockResearcher = new EnglishResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - - const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 1 ); - expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 2 ); - } ); - - it( "returns the paragraph length of two paragraphs in Japanese divided by double linebreaks that don't end with a double linebreak", function() { - const mockPaper = new Paper( "1964年 \n\n (昭和39年)" ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - - const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); - expect( paragraphLengths[ 1 ].paragraphLength ).toBe( 7 ); + expect( paragraphLengths.length ).toBe( 1 ); + expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 3 ); } ); it( "returns the paragraph length of a paragraph without tags or double linebreaks", function() { @@ -162,44 +130,34 @@ describe( "a test for getting paragraph length", function() { expect( paragraphLengths.length ).toBe( 1 ); } ); - it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() { + it( "should not recognize heading as paragraph", function() { const mockPaper = new Paper( "

Lorem ipsum dolor sit amet

" ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 5 ); - } ); - - it( "returns the paragraph length of paragraph in Japanese without p tags or double linebreaks, but with h2 tags", function() { - const mockPaper = new Paper( "

(昭和39年)10月1日に開業した。

" ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - - const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - - expect( paragraphLengths[ 0 ].paragraphLength ).toBe( 18 ); + expect( paragraphLengths.length ).toBe( 0 ); } ); - xit( "returns the paragraph length of an empty paragraph with p tags", function() { + it( "should not count an empty paragraph", function() { const mockPaper = new Paper( "

" ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - expect( paragraphLengths.paragraphLength ).not.toContain( 0 ); + expect( paragraphLengths.length ).toBe( 0 ); } ); - xit( "returns the paragraph length of an empty paragraph without p tags or double line breaks", function() { + it( "should not count an empty paragraph without p tags or double line breaks", function() { const mockPaper = new Paper( "" ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); const paragraphLengths = getParagraphLength( mockPaper, mockResearcher ); - expect( paragraphLengths.paragraphLength ).not.toContain( 0 ); + expect( paragraphLengths.length ).toBe( 0 ); } ); } ); From 322b22133ec84439903a330db13f7b968ce69327 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 20 Dec 2024 11:46:42 +0100 Subject: [PATCH 13/17] Fix typos --- .../assessments/readability/ParagraphTooLongAssessment.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js index 9ae3e7c1e26..6f08493aa3b 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ParagraphTooLongAssessment.js @@ -206,8 +206,8 @@ export default class ParagraphTooLongAssessment extends Assessment { startOffsetBlock: 0, endOffsetBlock: scl.endOffset - scl.startOffset, clientId: paragraph.clientId || "", - attributeId: paragraph.parentAttributeId || "", - isFirstSection: paragraph.isParentFirstSectionOfBlock || false, + attributeId: paragraph.attributeId || "", + isFirstSection: paragraph.isFirstSection || false, }, } ); } ); From d09f012bbb23f450ea65fef57c31f3b5a09c6934 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 2 Jan 2025 11:33:46 +0100 Subject: [PATCH 14/17] Adjust the start and end offset to exclude leading and trailing spaces. Also add unit tests --- .../SentenceLengthInTextAssessmentSpec.js | 55 +++++++++++++++++-- .../SentenceLengthInTextAssessment.js | 14 ++++- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js index a898e48e475..c5949da6352 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SentenceLengthInTextAssessmentSpec.js @@ -109,9 +109,9 @@ describe( "An assessment for sentence length", function() { new Mark( { position: { startOffset: 0, - endOffset: 81, + endOffset: 80, startOffsetBlock: 0, - endOffsetBlock: 81, + endOffsetBlock: 80, clientId: "", attributeId: "", isFirstSection: false, @@ -626,16 +626,59 @@ describe( "A test for marking too long sentences", function() { expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected ); } ); + it( "adjusts the startOffset when the long sentence starts with a space", () => { + const mockPaper = new Paper( " This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think?" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const expected = [ + new Mark( { + position: { + startOffset: 1, + endOffset: 107, + startOffsetBlock: 1, + endOffsetBlock: 107, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + ]; + expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected ); + } ); + + it( "adjusts the endOffset when the long sentence ends with a space", () => { + const mockPaper = new Paper( "This is a too long sentence, because it has over twenty words, and that is hard too read, don't you think? " ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const expected = [ + new Mark( { + position: { + startOffset: 0, + endOffset: 106, + startOffsetBlock: 0, + endOffsetBlock: 106, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + ]; + expect( new SentenceLengthInTextAssessment().getMarks( mockPaper, mockResearcher ) ).toEqual( expected ); + } ); + it( "returns no markers if no sentences are too long", function() { const paper = new Paper( "This is a short sentence." ); - const sentenceLengthInText = Factory.buildMockResearcher( [ { sentence: "This is a short sentence.", sentenceLength: 5 } ] ); + // const sentenceLengthInText = Factory.buildMockResearcher( [ { sentence: "This is a short sentence.", sentenceLength: 5 } ] ); + const mockResearcher = new EnglishResearcher( paper ); + buildTree( paper, mockResearcher ); + const expected = []; - expect( new SentenceLengthInTextAssessment().getMarks( paper, sentenceLengthInText ) ).toEqual( expected ); + expect( new SentenceLengthInTextAssessment().getMarks( paper, mockResearcher ) ).toEqual( expected ); } ); } ); -describe( "A test for marking too long sentences", function() { - it( "calculatePercentage returns nothing if there are no sentences", function() { +describe( "A test for calculatePercentage", function() { + it( "returns nothing if there are no sentences", function() { expect( new SentenceLengthInTextAssessment().calculatePercentage( [] ) ).toEqual( 0 ); } ); } ); diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 322df18bc36..9c98c281644 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -100,8 +100,18 @@ class SentenceLengthInTextAssessment extends Assessment { return tooLongSentences.map( tooLongSentence => { const sentence = tooLongSentence.sentence; - const startOffset = sentence.sourceCodeRange.startOffset; - const endOffset = sentence.sourceCodeRange.endOffset; + const { text } = sentence; + + let startOffset = sentence.sourceCodeRange.startOffset; + let endOffset = sentence.sourceCodeRange.endOffset; + // Adjust the start and end offset to exclude leading and trailing spaces. + if ( text.startsWith( " " ) ) { + startOffset += 1; + } + if ( text.endsWith( " " ) ) { + endOffset -= 1; + } + return new Mark( { position: { startOffset, From f4865ddc19ce233c429c9af38498b4c1748f45d6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 2 Jan 2025 15:30:44 +0100 Subject: [PATCH 15/17] also output first and last tokens of a sentence --- .../helpers/sentence/sentencesLengthSpec.js | 17 +++++++++++++++++ .../helpers/sentence/sentencesLength.js | 19 ++++++++++++------- .../SentenceLengthInTextAssessment.js | 16 ++++------------ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js index aa8761cf6df..d3ed5253dc4 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/sentencesLengthSpec.js @@ -47,6 +47,23 @@ describe( "A test to count sentence lengths.", function() { expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 6 ); } ); + it( "should return the correct length for sentences containing leading and trailing spaces including the first and last token that is not spaces", function() { + const mockPaper = new Paper( + "

The first sentence.

The second sentence.

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + + const sentenceLengths = sentencesLength( getSentencesFromTree( mockPaper ), mockResearcher ); + + expect( sentenceLengths.length ).toEqual( 2 ); + expect( sentenceLengths[ 0 ].sentenceLength ).toEqual( 3 ); + expect( sentenceLengths[ 0 ].firstToken ).toEqual( { sourceCodeRange: { endOffset: 7, startOffset: 4 }, text: "The" } ); + expect( sentenceLengths[ 0 ].lastToken ).toEqual( { sourceCodeRange: { endOffset: 23, startOffset: 22 }, text: "." } ); + expect( sentenceLengths[ 1 ].sentenceLength ).toEqual( 3 ); + expect( sentenceLengths[ 1 ].firstToken ).toEqual( { sourceCodeRange: { endOffset: 33, startOffset: 30 }, text: "The" } ); + expect( sentenceLengths[ 1 ].lastToken ).toEqual( { sourceCodeRange: { endOffset: 50, startOffset: 49 }, text: "." } ); + } ); + it( "should return the sentences and their length for Japanese (so counting characters)", function() { const mockPaper = new Paper( "

自然おのずから存在しているもの

" + "

歩くさわやかな森 自然

" ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js index 043bb793eb9..c68dcf7d749 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js @@ -15,16 +15,21 @@ import { getWordsFromTokens } from "../word/getAllWordsFromTree"; * @returns {SentenceLength[]} Array with the length of each sentence. */ export default function( sentences, researcher ) { - const sentencesWordCount = []; - sentences.forEach( sentence => { - const customLengthHelper = researcher.getHelper( "customCountLength" ); + const customLengthHelper = researcher.getHelper( "customCountLength" ); + + return sentences.map( sentence => { const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens, false ).length; + if ( length > 0 ) { - sentencesWordCount.push( { + const firstToken = sentence.tokens.find( ( { text } ) => text !== " " ); + const lastToken = sentence.tokens.slice().reverse().find( ( { text } ) => text !== " " ); + + return { sentence: sentence, sentenceLength: length, - } ); + firstToken: firstToken ? firstToken : null, + lastToken: lastToken ? lastToken : null, + }; } - } ); - return sentencesWordCount; + } ).filter( Boolean ); } diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 9c98c281644..16146c8cd9e 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -99,18 +99,10 @@ class SentenceLengthInTextAssessment extends Assessment { const tooLongSentences = this.getTooLongSentences( sentenceCount ); return tooLongSentences.map( tooLongSentence => { - const sentence = tooLongSentence.sentence; - const { text } = sentence; - - let startOffset = sentence.sourceCodeRange.startOffset; - let endOffset = sentence.sourceCodeRange.endOffset; - // Adjust the start and end offset to exclude leading and trailing spaces. - if ( text.startsWith( " " ) ) { - startOffset += 1; - } - if ( text.endsWith( " " ) ) { - endOffset -= 1; - } + const { sentence, firstToken, lastToken } = tooLongSentence; + + const startOffset = firstToken.sourceCodeRange.startOffset; + const endOffset = lastToken.sourceCodeRange.endOffset; return new Mark( { position: { From 3543d6e40f858147b7bea4775632d14645142e26 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Fri, 3 Jan 2025 15:30:10 +0100 Subject: [PATCH 16/17] Fixes the translator comments --- .../assessments/readability/SentenceLengthInTextAssessment.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js index 16146c8cd9e..cac01ad8807 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js @@ -168,7 +168,7 @@ class SentenceLengthInTextAssessment extends Assessment { const wordFeedback = sprintf( /* translators: %1$s and %6$s expand to links on yoast.com, %2$s expands to the anchor end tag, - %3$d expands to percentage of sentences, %4$s expands to the recommended maximum sentence length, + %3$s expands to percentage of sentences, %4$d expands to the recommended maximum sentence length, %5$s expands to the recommended maximum percentage. */ _n( "%1$sSentence length%2$s: %3$s of the sentences contain more than %4$d word, which is more than the recommended maximum of %5$s. %6$sTry to shorten the sentences%2$s.", @@ -186,7 +186,7 @@ class SentenceLengthInTextAssessment extends Assessment { const characterFeedback = sprintf( /* translators: %1$s and %6$s expand to links on yoast.com, %2$s expands to the anchor end tag, - %3$d expands to percentage of sentences, %4$s expands to the recommended maximum sentence length, + %3$s expands to percentage of sentences, %4$d expands to the recommended maximum sentence length, %5$s expands to the recommended maximum percentage. */ _n( "%1$sSentence length%2$s: %3$s of the sentences contain more than %4$d character, which is more than the recommended maximum of %5$s. %6$sTry to shorten the sentences%2$s.", From 182a2512bd3f0339c9ec83486e379d07684b09e8 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 3 Jan 2025 15:43:11 +0100 Subject: [PATCH 17/17] simplify code --- .../languageProcessing/helpers/sentence/sentencesLength.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js index c68dcf7d749..befa1152ca8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/sentencesLength.js @@ -18,11 +18,12 @@ export default function( sentences, researcher ) { const customLengthHelper = researcher.getHelper( "customCountLength" ); return sentences.map( sentence => { - const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( sentence.tokens, false ).length; + const { tokens } = sentence; + const length = customLengthHelper ? customLengthHelper( sentence.text ) : getWordsFromTokens( tokens, false ).length; if ( length > 0 ) { - const firstToken = sentence.tokens.find( ( { text } ) => text !== " " ); - const lastToken = sentence.tokens.slice().reverse().find( ( { text } ) => text !== " " ); + const firstToken = tokens.find( ( { text } ) => text !== " " ); + const lastToken = tokens.slice().reverse().find( ( { text } ) => text !== " " ); return { sentence: sentence,