Skip to content

Commit

Permalink
introduce --removeWarnings option that tries to automatically remove …
Browse files Browse the repository at this point in the history
…warnings about reporting story if found on Amazon
  • Loading branch information
Aivean committed Jan 19, 2024
1 parent 7b9ab3b commit d05a0b0
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/main/scala/com/aivean/royalroad/Args.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ class Args(args: Seq[String]) extends ScallopConf(args) {
default = Some(false)
)

val removeWarnings = opt[Boolean](
descr = "Remove warnings about reporting story if found on Amazon",
noshort = true,
default = Some(false)
)

val titleQuery = opt[String](
descr = "CSS selector for chapter title (text of the found element is used)",
default = Some("title")
Expand Down
13 changes: 13 additions & 0 deletions src/main/scala/com/aivean/royalroad/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,19 @@ object Main extends App {
}
}

if (cliArgs.removeWarnings()) {
// find all paragraphs
val paragraphs = chapterContent.select("p")
// find all paragraphs that contain the warning
val warningParagraphs = paragraphs.filter(p => Utils.WarningFuzzyMatcher(p.text))
// remove all warning paragraphs
warningParagraphs.collect {
case p: JsoupElement =>
println("removing warning: " + p.text)
p.underlying.remove()
}
}

// write chapter content to file
printWriter.write(chapterContent.outerHtml)

Expand Down
41 changes: 41 additions & 0 deletions src/main/scala/com/aivean/royalroad/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,47 @@ import scala.util.{Failure, Success, Try}
* Misc helper functions
*/
object Utils {

object WarningFuzzyMatcher {
val keywords = Map(
"amazon" -> 1.0,
"stolen" -> 0.8,
"theft" -> 0.8,
"report" -> 0.7,
"please report" -> 0.61,
"without permission" -> 0.6,
"without consent" -> 0.6,
"unauthorized" -> 0.6,
"pilfered" -> 0.5,
"purloined" -> 0.5,
"appropriated" -> 0.5,
"royal road" -> 0.9,
"story" -> 0.4,
"narrative" -> 0.4,
"content" -> 0.4,
"novel" -> 0.4,
"tale" -> 0.4,
"infringement" -> 0.5,
"violation" -> 0.5,
"not rightfully" -> 0.5,
"taken without" -> 0.7,
"misappropriated" -> 0.6,
"sightings" -> 0.4,
"encounter" -> 0.3
)

val threshold = 2.5

def scoreString(s: String): Double = {
keywords.foldLeft(0.0) {
case (score, (keyword, value)) =>
if (s.toLowerCase.contains(keyword)) score + value else score
}
}

def apply(warning: String): Boolean = warning.length < 200 && scoreString(warning) > threshold
}

def parsingError(name: String, value: String, url: String): Nothing = {
throw new IllegalStateException(
s""" Can't find $name using css query: `$value`
Expand Down
30 changes: 30 additions & 0 deletions src/test/scala/com/aivean/royalroad/UtilsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,34 @@ class UtilsTest extends FunSuite {
assert(dataUri ==
new URI("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAIAAAACDbGyAAAAEUlEQVR4nGP8z4ACmBgo4wMAUJEBCfBOwRcAAAAASUVORK5CYII="))
}


val reportWarnings = List(
"A case of theft: this story is not rightfully on Amazon; if you spot it, report the violation.",
"The narrative has been stolen; if detected on Amazon, report the infringement.",
"If you come across this story on Amazon, it's taken without permission from the author. Report it.",
"Stolen from its original source, this story is not meant to be on Amazon; report any sightings.",
"If you spot this story on Amazon, know that it has been stolen. Report the violation.",
"If you come across this story on Amazon, be aware that it has been stolen from Royal Road. Please report it.",
"This story has been stolen from Royal Road. If you read it on Amazon, please report it.",
"If you encounter this story on Amazon, note that it's taken without permission from the author. Report it.",
"This tale has been pilfered from Royal Road. If found on Amazon, kindly file a report.",
"Unauthorized use: this story is on Amazon without permission from the author. Report any sightings.",
"Stolen from Royal Road, this story should be reported if encountered on Amazon.",
"This narrative has been purloined without the author's approval. Report any appearances on Amazon.",
"The author's content has been appropriated; report any instances of this story on Amazon.",
"The narrative has been taken without permission. Report any sightings.",
"Royal Road's content has been misappropriated; report any instances of this story if found elsewhere.",
"The story has been taken without consent; if you see it on Amazon, report the incident.",
"Stolen novel; please report.",
"Stolen content alert: this content belongs on Royal Road. Report any occurrences."
)

test("amazonRegex") {
reportWarnings.foreach { w =>
assert(Utils.WarningFuzzyMatcher(w), "Warning: " + w + " should match amazonRegex\n"+
s"score: ${Utils.WarningFuzzyMatcher.scoreString(w)} > ${Utils.WarningFuzzyMatcher.threshold}")
}
}

}

0 comments on commit d05a0b0

Please sign in to comment.