diff --git a/scrapers/AdultEmpire.yml b/scrapers/AdultEmpire.yml index e5807433e..39ab09139 100644 --- a/scrapers/AdultEmpire.yml +++ b/scrapers/AdultEmpire.yml @@ -21,7 +21,13 @@ sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper - +performerByURL: + - + action: scrapeXPath + url: + - adultdvdempire.com + - adultempire.com + scraper: performerScraper xPathScrapers: sceneSearch: @@ -102,4 +108,44 @@ xPathScrapers: - regex: ^ with: "https://www.adultdvdempire.com" URL: //meta[@name='og:url']/@content -# Last Updated October 09, 2023 + performerScraper: + common: + $infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul + performer: + Name: //*[@id="content"]/section/div/div[2]/h1/text() + Birthdate: + selector: $infoPiece/li[contains(text(), 'Born:')]/text() + postProcess: + - replace: + - regex: Born:\s+(.*) + with: $1 + Height: + selector: $infoPiece/li[contains(text(), 'Height:')]/text() + postProcess: + - replace: + - regex: Height:\s+(.*) + with: $1 + - feetToCm: true + Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src + Country: + selector: $infoPiecel/li[contains(text(), 'From:')]/text() + postProcess: + - replace: + - regex: From:\s+(.*) + with: $1 + Measurements: + selector: $infoPiece/li[contains(text(), 'Measurements:')]/text() + postProcess: + - replace: + - regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).* + with: $1-$2-$3 + Aliases: + selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")] + concat: ", " + postProcess: + - replace: + - regex: "Alias: (.*)" + with: $1 + Details: //*[@id="content"]/section/div/div[5]/aside/text() + URL: //link[@rel='canonical']/@href +# Last Updated December 07, 2023