Skip to content

Commit

Permalink
Merge branch 'release-1.3.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
Otiel committed Nov 8, 2020
2 parents 30c6da5 + dbfc906 commit 93b37bf
Show file tree
Hide file tree
Showing 5 changed files with 492 additions and 482 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# 1.3.5

## Bug fixes

* Fixed HTML characters not decoded in metadata. [#157](https://github.com/Otiel/BandcampDownloader/issues/157)

## Improvements

* Updated the following languages thanks to [contributors](https://github.com/Otiel/BandcampDownloader/pull/160): Italian.

# 1.3.4

## Bug fixes
Expand Down
211 changes: 105 additions & 106 deletions src/BandcampDownloader/Helpers/BandcampHelper.cs
Original file line number Diff line number Diff line change
@@ -1,107 +1,106 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using Newtonsoft.Json;

namespace BandcampDownloader {

internal static class BandcampHelper {

/// <summary>
/// Retrieves the data on the album of the specified Bandcamp page.
/// </summary>
/// <param name="htmlCode">The HTML source code of a Bandcamp album page.</param>
/// <returns>The data on the album of the specified Bandcamp page.</returns>
public static Album GetAlbum(string htmlCode) {
// Keep the interesting part of htmlCode only
string albumData;
try {
albumData = GetAlbumData(htmlCode);
} catch (Exception e) {
throw new Exception("Could not retrieve album data in HTML code.", e);
}

// Fix some wrongly formatted JSON in source code
albumData = FixJson(albumData);

// Deserialize JSON
Album album;
try {
var settings = new JsonSerializerSettings {
NullValueHandling = NullValueHandling.Ignore,
MissingMemberHandling = MissingMemberHandling.Ignore
};
album = JsonConvert.DeserializeObject<JsonAlbum>(albumData, settings).ToAlbum();
} catch (Exception e) {
throw new Exception("Could not deserialize JSON data.", e);
}

// Extract lyrics from album page
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(htmlCode);
foreach (Track track in album.Tracks) {
HtmlNode lyricsElement = htmlDoc.GetElementbyId("_lyrics_" + track.Number);
if (lyricsElement != null) {
track.Lyrics = lyricsElement.InnerText.Trim();
}
}

return album;
}

/// <summary>
/// Retrieves all the albums URL existing on the specified Bandcamp page.
/// </summary>
/// <param name="htmlCode">The HTML source code of a Bandcamp page.</param>
/// <returns>The albums URL existing on the specified Bandcamp page.</returns>
public static List<string> GetAlbumsUrl(string htmlCode, string artistPage) {

// Get albums ("real" albums or track-only pages) relative urls
var regex = new Regex("href=\"(?<url>/(album|track)/.*)\"");
if (!regex.IsMatch(htmlCode)) {
throw new NoAlbumFoundException();
}

var albumsUrl = new List<string>();
foreach (Match m in regex.Matches(htmlCode)) {
albumsUrl.Add(artistPage + m.Groups["url"].Value);
}

// Remove duplicates
albumsUrl = albumsUrl.Distinct().ToList();
return albumsUrl;
}

private static string FixJson(string albumData) {
// Some JSON is not correctly formatted in bandcamp pages, so it needs to be fixed before we can deserialize it

// In trackinfo property, we have for instance:
// url: "http://verbalclick.bandcamp.com" + "/album/404"
// -> Remove the " + "
var regex = new Regex("(?<root>url: \".+)\" \\+ \"(?<album>.+\",)");
string fixedData = regex.Replace(albumData, "${root}${album}");

return fixedData;
}

private static string GetAlbumData(string htmlCode) {
string startString = "data-tralbum=\"{";
string stopString = "}\"";

if (htmlCode.IndexOf(startString) == -1) {
// Could not find startString
throw new Exception($"Could not find the following string in HTML code: {startString}");
}

string albumDataTemp = htmlCode.Substring(htmlCode.IndexOf(startString) + startString.Length - 1);
string albumData = albumDataTemp.Substring(0, albumDataTemp.IndexOf(stopString) + 1);

// Replace &quot; by "
albumData = albumData.Replace("&quot;", "\"");

return albumData;
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using Newtonsoft.Json;

namespace BandcampDownloader {

internal static class BandcampHelper {

/// <summary>
/// Retrieves the data on the album of the specified Bandcamp page.
/// </summary>
/// <param name="htmlCode">The HTML source code of a Bandcamp album page.</param>
/// <returns>The data on the album of the specified Bandcamp page.</returns>
public static Album GetAlbum(string htmlCode) {
// Keep the interesting part of htmlCode only
string albumData;
try {
albumData = GetAlbumData(htmlCode);
} catch (Exception e) {
throw new Exception("Could not retrieve album data in HTML code.", e);
}

// Fix some wrongly formatted JSON in source code
albumData = FixJson(albumData);

// Deserialize JSON
Album album;
try {
var settings = new JsonSerializerSettings {
NullValueHandling = NullValueHandling.Ignore,
MissingMemberHandling = MissingMemberHandling.Ignore
};
album = JsonConvert.DeserializeObject<JsonAlbum>(albumData, settings).ToAlbum();
} catch (Exception e) {
throw new Exception("Could not deserialize JSON data.", e);
}

// Extract lyrics from album page
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(htmlCode);
foreach (Track track in album.Tracks) {
HtmlNode lyricsElement = htmlDoc.GetElementbyId("_lyrics_" + track.Number);
if (lyricsElement != null) {
track.Lyrics = lyricsElement.InnerText.Trim();
}
}

return album;
}

/// <summary>
/// Retrieves all the albums URL existing on the specified Bandcamp page.
/// </summary>
/// <param name="htmlCode">The HTML source code of a Bandcamp page.</param>
/// <returns>The albums URL existing on the specified Bandcamp page.</returns>
public static List<string> GetAlbumsUrl(string htmlCode, string artistPage) {
// Get albums ("real" albums or track-only pages) relative urls
var regex = new Regex("href=\"(?<url>/(album|track)/.*)\"");
if (!regex.IsMatch(htmlCode)) {
throw new NoAlbumFoundException();
}

var albumsUrl = new List<string>();
foreach (Match m in regex.Matches(htmlCode)) {
albumsUrl.Add(artistPage + m.Groups["url"].Value);
}

// Remove duplicates
albumsUrl = albumsUrl.Distinct().ToList();
return albumsUrl;
}

private static string FixJson(string albumData) {
// Some JSON is not correctly formatted in bandcamp pages, so it needs to be fixed before we can deserialize it

// In trackinfo property, we have for instance:
// url: "http://verbalclick.bandcamp.com" + "/album/404"
// -> Remove the " + "
var regex = new Regex("(?<root>url: \".+)\" \\+ \"(?<album>.+\",)");
string fixedData = regex.Replace(albumData, "${root}${album}");

return fixedData;
}

private static string GetAlbumData(string htmlCode) {
string startString = "data-tralbum=\"{";
string stopString = "}\"";

if (htmlCode.IndexOf(startString) == -1) {
// Could not find startString
throw new Exception($"Could not find the following string in HTML code: {startString}");
}

string albumDataTemp = htmlCode.Substring(htmlCode.IndexOf(startString) + startString.Length - 1);
string albumData = albumDataTemp.Substring(0, albumDataTemp.IndexOf(stopString) + 1);

albumData = WebUtility.HtmlDecode(albumData);

return albumData;
}
}
}
4 changes: 2 additions & 2 deletions src/BandcampDownloader/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.3.4")]
[assembly: AssemblyFileVersion("1.3.4")]
[assembly: AssemblyVersion("1.3.5")]
[assembly: AssemblyFileVersion("1.3.5")]
[assembly: GuidAttribute("8C171C7F-9BAC-4EC0-A287-59908B48953F")]
1 change: 1 addition & 0 deletions src/BandcampDownloader/Properties/Resources.it.resx
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ Disabilitare questa opzione per risparmiare banda/tempo.</value>
<data name="textBoxAllowedFileSizeDifference_ToolTip" xml:space="preserve">
<value>Scaricando una traccia, se esiste già un file con lo stesso nome, si comparerà alla traccia da scaricare.
Se la dimensione dei file differisce meno di questo valore (in percentuale), la traccia non sarà scaricata.
Imposta questo valore a 0 per scaricare sempre le tracce, anche se sono già presenti sul disco.

Valore consigliato = 5</value>
</data>
Expand Down
Loading

0 comments on commit 93b37bf

Please sign in to comment.