Skip to content

Commit

Permalink
testing cinerama using scrapeops
Browse files Browse the repository at this point in the history
  • Loading branch information
ckuijjer committed Dec 17, 2023
1 parent 2e8cce6 commit 644443c
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 50 deletions.
3 changes: 3 additions & 0 deletions cloud/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ OMDB_API_KEY=
GOOGLE_CUSTOM_SEARCH_API_KEY=
GOOGLE_CUSTOM_SEARCH_ID=
DYNAMODB_MOVIE_METADATA=
PRIVATE_BUCKET=
PUBLIC_BUCKET=
SCRAPEOPS_API_KEY=
53 changes: 28 additions & 25 deletions cloud/playground.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,30 +132,33 @@ const movieMetadataPlayground = async () => {
}

const getUsingGot = async () => {
const json = await got(
'https://kinepolisweb-programmation.kinepolis.com/api/Programmation/NL/NL/WWW/Cinema/Cinerama',
{
headers: {
authority: 'kinepolisweb-programmation.kinepolis.com',
// accept: 'application/json, text/javascript, */*; q=0.01',
accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-US,en;q=0.9',
'if-modified-since': 'Wed, 04 Oct 2023 19:44:34 GMT',
'if-none-match': '"14021a8ddd8adf9db8db447b7f94cc59:1696448674.360531"',
'sec-ch-ua': '"Chromium";v="117", "Not;A=Brand";v="8"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
Referer: 'https://cineramabios.nl/',
'Referrer-Policy': 'strict-origin-when-cross-origin',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
},
const url =
'https://kinepolisweb-programmation.kinepolis.com/api/Programmation/NL/NL/WWW/Cinema/Cinerama'
const scrapeOpsProxyUrl = `https://proxy.scrapeops.io/v1/?api_key=${
process.env.SCRAPEOPS_API_KEY
}&url=${encodeURIComponent(url)}`

const json = await got(scrapeOpsProxyUrl, {
headers: {
authority: 'kinepolisweb-programmation.kinepolis.com',
// accept: 'application/json, text/javascript, */*; q=0.01',
accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-US,en;q=0.9',
'if-modified-since': 'Wed, 04 Oct 2023 19:44:34 GMT',
'if-none-match': '"14021a8ddd8adf9db8db447b7f94cc59:1696448674.360531"',
'sec-ch-ua': '"Chromium";v="117", "Not;A=Brand";v="8"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
Referer: 'https://cineramabios.nl/',
'Referrer-Policy': 'strict-origin-when-cross-origin',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
},
).json()
}).json()

logger.info('result', { json })
return json
Expand Down Expand Up @@ -185,8 +188,8 @@ const playground = async ({ event, context } = {}) => {
// const results = await findMetadata('chungking express')
// const results = await findMetadata('Caché')
// await getUsingChromium()
// const result = await getUsingGot()
const result = await getLux()
const result = await getUsingGot()
// const result = await getLux()

console.log(JSON.stringify(result, null, 2))
}
Expand Down
51 changes: 26 additions & 25 deletions cloud/scrapers/cinerama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,32 @@ const hasEnglishSubtitles = (movie: KinepolisMovie) => {

const extractFromMainPage = async (): Promise<Screening[]> => {
try {
const programmation: KinepolisProgrammation = await got(
'https://kinepolisweb-programmation.kinepolis.com/api/Programmation/NL/NL/WWW/Cinema/Cinerama',
{
headers: {
authority: 'kinepolisweb-programmation.kinepolis.com',
// accept: 'application/json, text/javascript, */*; q=0.01',
accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-US,en;q=0.9',
'if-modified-since': 'Wed, 04 Oct 2023 19:44:34 GMT',
'if-none-match':
'"14021a8ddd8adf9db8db447b7f94cc59:1696448674.360531"',
'sec-ch-ua': '"Chromium";v="117", "Not;A=Brand";v="8"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
Referer: 'https://cineramabios.nl/',
'Referrer-Policy': 'strict-origin-when-cross-origin',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
},
},
).json()
const url =
'https://kinepolisweb-programmation.kinepolis.com/api/Programmation/NL/NL/WWW/Cinema/Cinerama'
const scrapeOpsProxyUrl = `https://proxy.scrapeops.io/v1/?api_key=${
process.env.SCRAPEOPS_API_KEY
}&url=${encodeURIComponent(url)}`

const programmation: KinepolisProgrammation = await got(scrapeOpsProxyUrl, {
// headers: {
// authority: 'kinepolisweb-programmation.kinepolis.com',
// accept:
// 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
// 'accept-language': 'en-US,en;q=0.9',
// 'if-modified-since': 'Wed, 04 Oct 2023 19:44:34 GMT',
// 'if-none-match': '"14021a8ddd8adf9db8db447b7f94cc59:1696448674.360531"',
// 'sec-ch-ua': '"Chromium";v="117", "Not;A=Brand";v="8"',
// 'sec-ch-ua-mobile': '?0',
// 'sec-ch-ua-platform': '"macOS"',
// 'sec-fetch-dest': 'empty',
// 'sec-fetch-mode': 'cors',
// 'sec-fetch-site': 'cross-site',
// Referer: 'https://cineramabios.nl/',
// 'Referrer-Policy': 'strict-origin-when-cross-origin',
// 'user-agent':
// 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
// },
}).json()

const moviesWithEnglishSubtitles =
programmation.films.filter(hasEnglishSubtitles)
Expand Down
2 changes: 2 additions & 0 deletions cloud/serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ functions:
GOOGLE_CUSTOM_SEARCH_ID: ${env:GOOGLE_CUSTOM_SEARCH_ID}
GOOGLE_CUSTOM_SEARCH_API_KEY: ${env:GOOGLE_CUSTOM_SEARCH_API_KEY}
SCRAPERS: ${env:SCRAPERS, ''} # '' as default value, as SCRAPERS is the only optional env var
SCRAPEOPS_API_KEY: ${env:SCRAPEOPS_API_KEY}
layers:
- arn:aws:lambda:eu-west-1:764866452798:layer:chrome-aws-lambda:38 # https://github.com/shelfio/chrome-aws-lambda-layer

Expand All @@ -91,6 +92,7 @@ functions:
OMDB_API_KEY: ${env:OMDB_API_KEY}
GOOGLE_CUSTOM_SEARCH_ID: ${env:GOOGLE_CUSTOM_SEARCH_ID}
GOOGLE_CUSTOM_SEARCH_API_KEY: ${env:GOOGLE_CUSTOM_SEARCH_API_KEY}
SCRAPEOPS_API_KEY: ${env:SCRAPEOPS_API_KEY}

notifySlack:
handler: handler.notifySlack
Expand Down

0 comments on commit 644443c

Please sign in to comment.