Skip to content

Commit

Permalink
scrap additional stuff from titles
Browse files Browse the repository at this point in the history
  • Loading branch information
kubegu committed Jun 6, 2024
1 parent 2b8d33e commit dd58ebb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
19 changes: 15 additions & 4 deletions library-le/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io"
"log"
"net/http"
"regexp"
"strings"

"github.com/PuerkitoBio/goquery"
Expand Down Expand Up @@ -37,10 +38,10 @@ func (libClient Client) FindMovies(title string) []domain.Media {
resultTitles := parseMediaSearch(searchResponse.Body)

movies := make([]domain.Media, 0)
//TODO: Parallel Ergbnislinks folgen und Details sammeln
for _, resultTitle := range resultTitles {
movies = append(movies, resultTitle.loadMediaCopies(libClient.session)...)
}
//Parallel Ergebnislinks folgen und Details über Zweigstelle und Verfpgbarkeit sammeln
return movies
}

Expand All @@ -57,6 +58,8 @@ func (result searchResult) loadMediaCopies(libSession webOpacSession) []domain.M
return parseMediaCopiesPage(result.title, mediaResponse.Body)
}

// Go through the search overview page and create a result object for each title found.
// The result contain details of each copie availabile of the media.
func parseMediaSearch(searchResponse io.Reader) []searchResult {
doc, docErr := goquery.NewDocumentFromReader(searchResponse)
if docErr != nil {
Expand All @@ -65,13 +68,15 @@ func parseMediaSearch(searchResponse io.Reader) []searchResult {
}
titles := make([]searchResult, 0)
doc.Find(resultItemSelector).Each(func(i int, resultItem *goquery.Selection) {
title := resultItem.Find(titleSelector).Text()
title := clearTitle(resultItem.Find(titleSelector).Text())
resultUrl, _ := resultItem.Find(titleSelector).Attr("href")
titles = append(titles, searchResult{title: title, resultUrl: resultUrl})
})
return titles
}

// the media copies page is a list of library branches which have the specific copy of a title
// it have information about the availability of the media
func parseMediaCopiesPage(title string, page io.Reader) []domain.Media {
doc, docErr := goquery.NewDocumentFromReader(page)
if docErr != nil {
Expand All @@ -82,14 +87,20 @@ func parseMediaCopiesPage(title string, page io.Reader) []domain.Media {

doc.Find(copiesSelector).Each(func(i int, copy *goquery.Selection) {
branch := copy.Find("div.col-12.col-md-4.my-md-2 > b").Text()
status := isMovieAvailable(copy)
status := isMediaAvailable(copy)
movies = append(movies, domain.Media{Title: title, Branch: branch, IsAvailable: status})
})

return movies
}

func isMovieAvailable(copy *goquery.Selection) bool {
// Remove additional media information from titles in square brackets
func clearTitle(title string) string {
brackets := regexp.MustCompile(`\[.*\]`)
return strings.TrimSpace(brackets.ReplaceAllString(title, ""))
}

func isMediaAvailable(copy *goquery.Selection) bool {
rentalStateLink := copy.Find("div:nth-child(5) > div > a")
//Link indicates a rented state (can reserve a copy)
if rentalStateLink.Length() != 0 {
Expand Down
6 changes: 5 additions & 1 deletion library-le/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TestParseSearchResultMovies(t *testing.T) {
Equal(t, "Der Clou", results[1].title)
Equal(t, "/webOPACClient/singleHit.do?methodToCall=showHit&curPos=2&identifier=-1_FT_613132921", results[1].resultUrl)

Equal(t, "Der Clou [Blu-ray]", results[2].title)
Equal(t, "Der Clou", results[2].title)
Equal(t, "/webOPACClient/singleHit.do?methodToCall=showHit&curPos=3&identifier=-1_FT_613132921", results[2].resultUrl)

}
Expand All @@ -67,5 +67,9 @@ func TestParseSearchResultGames(t *testing.T) {

Equal(t, "Monster Hunter - Stories 2. Wings of Ruin", results[2].title)
Equal(t, "/webOPACClient/singleHit.do?methodToCall=showHit&curPos=3&identifier=-1_FT_256756711", results[2].resultUrl)
}

func TestClearTitle(t *testing.T) {
Equal(t, "Terminator", clearTitle("Terminator [Bildtonträger]"))
Equal(t, "Mad Max - Fury Road", clearTitle("Mad Max - Fury Road [blu-ray]"))
}

0 comments on commit dd58ebb

Please sign in to comment.