-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: switch to web scraping + fall back on tikwm for media
- Loading branch information
Showing
12 changed files
with
3,329 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,63 +1,64 @@ | ||
import { TikTokAPIResponse, AwemeList } from "../types/Services"; | ||
import { WebJSONResponse, ItemStruct } from "../types/Web"; | ||
import Cookie from "../util/cookieHelper"; | ||
import cookieParser from "set-cookie-parser"; | ||
|
||
const cookie = new Cookie([]); | ||
|
||
export async function grabAwemeId(videoId: string): Promise<String | Error> { | ||
// https://vm.tiktok.com/ZMJmVWVpL/ | ||
const res = await fetch("https://vm.tiktok.com/" + videoId); | ||
const url = new URL(res.url); | ||
|
||
const awemeIdPattern = /\/@[\w\d_.]+\/(video|photo)\/(\d{19})/; | ||
const match = url.pathname.match(awemeIdPattern); | ||
|
||
if (match) { | ||
return match[2]; | ||
} else { | ||
throw new Error("Could not find awemeId"); | ||
} | ||
} | ||
// https://vm.tiktok.com/ZMJmVWVpL/ | ||
const res = await fetch("https://vm.tiktok.com/" + videoId); | ||
const url = new URL(res.url); | ||
|
||
const awemeIdPattern = /\/@[\w\d_.]+\/(video|photo)\/(\d{19})/; | ||
const match = url.pathname.match(awemeIdPattern); | ||
|
||
export async function getVideoInfo( | ||
awemeId: String, | ||
): Promise<AwemeList | Error> { | ||
const apiUrl = new URL( | ||
"https://api22-normal-c-alisg.tiktokv.com/aweme/v1/feed/?region=US&carrier_region=US", | ||
); | ||
|
||
const params = { | ||
aweme_id: awemeId, | ||
iid: "7318518857994389254", | ||
device_id: "7318517321748022790", | ||
channel: "googleplay", | ||
app_name: "musical_ly", | ||
version_code: "300904", | ||
device_platform: "android", | ||
device_type: "ASUS_Z01QD", | ||
os_version: "9", | ||
}; | ||
|
||
Object.keys(params).forEach((key) => | ||
apiUrl.searchParams.append(key, params[key]), | ||
); | ||
|
||
console.log(apiUrl.toString()); | ||
|
||
const res: Response = await fetch(apiUrl.toString(), { | ||
headers: { | ||
"User-Agent": | ||
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36", | ||
}, | ||
cf: { | ||
cacheEverything: true, | ||
cacheTtlByStatus: { "200-299": 86400, 404: 1, "500-599": 0 }, | ||
}, | ||
}); | ||
const json: TikTokAPIResponse = await res.json(); | ||
const videoInfo: AwemeList | undefined = json.aweme_list.find( | ||
(aweme) => aweme.aweme_id === awemeId, | ||
); | ||
|
||
if (videoInfo) { | ||
return videoInfo; | ||
} else { | ||
return new Error("Could not find video info"); | ||
} | ||
if (match) { | ||
return match[2]; | ||
} else { | ||
throw new Error("Could not find awemeId"); | ||
} | ||
} | ||
|
||
export async function scrapeVideoData( | ||
awemeId: string, | ||
author?: string | ||
): Promise<ItemStruct | Error> { | ||
console.log('before', cookie.getUpdatingCookies()); | ||
const res = await fetch(`https://www.tiktok.com/@${author || "i"}"/video/${awemeId}`, { | ||
method: "GET", | ||
headers: { | ||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0", | ||
"Cookie": cookie.getCookiesAsString(), | ||
}, | ||
cf: { | ||
cacheEverything: true, | ||
cacheTtlByStatus: { "200-299": 86400, 404: 1, "500-599": 0 }, | ||
}, | ||
}); | ||
|
||
console.log('string', cookie.getCookiesAsString()); | ||
console.log(res.headers) | ||
let cookies = cookieParser(res.headers.get("set-cookie")!); | ||
cookie.setCookies(cookies); | ||
|
||
const html = await res.text(); | ||
|
||
try { | ||
const resJson = html.split('<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" type="application/json">')[1].split('</script>')[0] | ||
const json: WebJSONResponse = JSON.parse(resJson); | ||
|
||
//console.log(Object.keys(json["__DEFAULT_SCOPE__"])); | ||
if(!json["__DEFAULT_SCOPE__"]["webapp.video-detail"] || json["__DEFAULT_SCOPE__"]["webapp.video-detail"].statusCode == 10204) throw new Error("Could not find video data"); | ||
const videoInfo = json["__DEFAULT_SCOPE__"]["webapp.video-detail"]["itemInfo"]["itemStruct"]; | ||
//console.log(videoInfo) | ||
|
||
return videoInfo | ||
} catch(err) { | ||
console.log(err); | ||
throw new Error("Could not parse video info"); | ||
} | ||
|
||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import { TikTokAPIResponse, AwemeList } from "../types/API"; | ||
|
||
export async function getVideoInfo( | ||
awemeId: string, | ||
): Promise<AwemeList | Error> { | ||
const apiUrl = new URL( | ||
"https://api22-normal-c-alisg.tiktokv.com/aweme/v1/feed/", | ||
); | ||
|
||
apiUrl.search = new URLSearchParams({ | ||
region: "US", | ||
carrier_region: "US", | ||
aweme_id: awemeId, | ||
iid: "7318518857994389254", | ||
device_id: "7318517321748022790", | ||
channel: "googleplay", | ||
app_name: "musical_ly", | ||
version_code: "300904", | ||
device_platform: "android", | ||
device_type: "ASUS_Z01QD", | ||
os_version: "9", | ||
}).toString(); | ||
|
||
const res: Response = await fetch(apiUrl.toString(), { | ||
headers: { | ||
"User-Agent": | ||
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36", | ||
}, | ||
cf: { | ||
cacheEverything: true, | ||
cacheTtlByStatus: { "200-299": 86400, 404: 1, "500-599": 0 }, | ||
}, | ||
}); | ||
const json: TikTokAPIResponse = await res.json(); | ||
const videoInfo: AwemeList | undefined = json.aweme_list.find( | ||
(aweme) => aweme.aweme_id === awemeId, | ||
); | ||
|
||
if (videoInfo) { | ||
return videoInfo; | ||
} else { | ||
return new Error("Could not find video info"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
961aad1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks to all the people on #7 for opening an issue about the api change