forked from AakashSasikumar/YouTubeScraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYouTubeScraper.java
122 lines (112 loc) · 4.34 KB
/
YouTubeScraper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package youtubescraping;
import java.io.File;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Scanner;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.ui4j.api.browser.BrowserEngine;
import com.ui4j.api.browser.BrowserFactory;
public class YouTubeScraper {
public static void main(String[] args) throws IOException, InterruptedException{
System.out.println("\t\t\t\t\t\t\tYouTube Downloader");
System.out.println("Enter the song name");
Scanner in = new Scanner(System.in);
String songName = in.nextLine();
//creating the url necessary to scrape
String murl = "https://www.youtube.com/results?search_query="+songName;
Document website = Jsoup.connect(murl).get();
//closing in on the target,(class name is lockup-title)
Elements subdiv = website.select("h3.yt-lockup-title>a");
//System.out.println(subdiv);---checking to see if .select worked
String[] seplinks = new String[subdiv.size()];
String[] title = new String[subdiv.size()];
int i =0;
for(Element temp:subdiv)
{ String a = temp.attr("href");
//youtube has playlists, and these contail "list" in the URL and we dont need these playlists
if(a.contains("list"))
{
continue;
}
else{
//adding the non playlist URL's into my link array
seplinks[i] = "https://www.youtube.com"+temp.attr("href");
title[i] = temp.text();
i++;
}
}
for(int j=0; j<seplinks.length;j++){
System.out.println((j+1)+": "+title[j]);
//System.out.println(seplinks[j]);---checking to see if the links are proper
}
System.out.println("Please enter your choice");
int ch = in.nextInt();
ch--;
String userchoice = seplinks[ch];
//System.out.println(userchoice);---final URL selected
//-------------Done with Scraping youtube-------------------
//Start of scraping the second website
BrowserEngine browser = BrowserFactory.getWebKit();
//Remove comments to stop logging--->//java.util.logging.Logger.getLogger("com.ui4j").setLevel(Level.OFF);
com.ui4j.api.browser.Page docu = browser.navigate("http://www.listentoyoutube.com/");
com.ui4j.api.dom.Document process = docu.getDocument();
process.query("input[type='text']").get().setValue(userchoice);
process.query("input[type='submit']").get().click();
TimeUnit.SECONDS.sleep(15);
String a = docu.getDocument().queryAll("div[class='col-lg-8']").toString();
a=a.substring(584);
/*There are three parameters
* 1. The Server number
* 2.The hash
* 3.The file name
* so had to get the url and had to keep substrining to get the parameters
*
*
*
* */
a=a.substring(0,210);
a=a.substring(20, 99);
String[] temp = a.split("&");
String srv = temp[0].substring(3);
String hash = temp[1].substring(9, 68);
hash=hash.replace("%", "");
/*found a pattern in the final url that includes all the three parameters, so I generated it on my own
*/
String finalur = "http://srv"+srv+".listentoyoutube.com/download/"+hash+"==/"+URLEncoder.encode(title[ch],"UTF-8")+".mp3";
String f = new String(URLEncoder.encode(title[ch], "UTF-8"));
f=f.replaceAll("%7C+","");
f=f.replaceAll("%22", "");
f=f.replaceAll("%3F", "");
String result = java.net.URLDecoder.decode(f, "UTF-8");
//There was an error when the file name was not in the url format, so had to encode and decode it
URL l = new URL(finalur);
String downloadPath = "C:\\Downloads\\"+result+".mp3";
HttpURLConnection httpConnection = (HttpURLConnection) (l.openConnection());
long fileSize = httpConnection.getContentLength();
System.out.println("Size : "+fileSize/1048576f+" mb");
File abc = new File(downloadPath);
TimeUnit.SECONDS.sleep(5);
try
{ System.out.println("Downloading...");
FileUtils.copyURLToFile(l, abc);
System.out.println("Download Complete");
}
catch(Exception c)
{
System.out.println("Got an IOException: " + c.getMessage());
System.out.println("Download Failed");
}
finally{
System.exit(0);
}
in.close();
}
}