From 657e1b338bd244130fed51a425c6665ee0e7c2de Mon Sep 17 00:00:00 2001 From: Glax Date: Sat, 2 Nov 2024 17:19:17 +0100 Subject: [PATCH 1/6] resolves #141 Asuratoon connector --- Tranga/MangaConnectors/AsuraToon.cs | 208 ++++++++++++++++++ .../MangaConnectorJsonConverter.cs | 1 + Tranga/Tranga.cs | 1 + 3 files changed, 210 insertions(+) create mode 100644 Tranga/MangaConnectors/AsuraToon.cs diff --git a/Tranga/MangaConnectors/AsuraToon.cs b/Tranga/MangaConnectors/AsuraToon.cs new file mode 100644 index 0000000..d064b06 --- /dev/null +++ b/Tranga/MangaConnectors/AsuraToon.cs @@ -0,0 +1,208 @@ +using System.Net; +using System.Text.RegularExpressions; +using HtmlAgilityPack; +using Tranga.Jobs; + +namespace Tranga.MangaConnectors; + +public class AsuraToon : MangaConnector +{ + + public AsuraToon(GlobalBase clone) : base(clone, "AsuraToon", ["en"]) + { + this.downloadClient = new HttpDownloadClient(clone); + } + + public override Manga[] GetManga(string publicationTitle = "") + { + Log($"Searching Publications. Term=\"{publicationTitle}\""); + string sanitizedTitle = string.Join(' ', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower(); + string requestUrl = $"https://asuracomic.net/series?name={sanitizedTitle}"; + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return Array.Empty(); + + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); + return publications; + } + + public override Manga? GetMangaFromId(string publicationId) + { + return GetMangaFromUrl($"https://asuracomic.net/series/{publicationId}"); + } + + public override Manga? GetMangaFromUrl(string url) + { + RequestResult requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return null; + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return null; + } + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, url.Split('/')[^1], url); + } + + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + HtmlNodeCollection mangaList = document.DocumentNode.SelectNodes("//a[starts-with(@href,'series')]"); + if (mangaList.Count < 1) + return Array.Empty(); + + IEnumerable urls = mangaList.Select(a => a.GetAttributeValue("href", "")); + + List ret = new(); + foreach (string url in urls) + { + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + string? originalLanguage = null; + Dictionary altTitles = new(), links = new(); + + HtmlNodeCollection genreNodes = document.DocumentNode.SelectNodes("//h3[text()='Genres']/../div/button"); + string[] tags = genreNodes.Select(b => b.InnerText).ToArray(); + + HtmlNode statusNode = document.DocumentNode.SelectSingleNode("//h3[text()='Genres']/../h3[2]"); + Manga.ReleaseStatusByte releaseStatus = statusNode.InnerText.ToLower() switch + { + "ongoing" => Manga.ReleaseStatusByte.Continuing, + "hiatus" => Manga.ReleaseStatusByte.OnHiatus, + "completed" => Manga.ReleaseStatusByte.Completed, + "dropped" => Manga.ReleaseStatusByte.Cancelled, + "season end" => Manga.ReleaseStatusByte.Continuing, + "coming soon" => Manga.ReleaseStatusByte.Unreleased, + _ => Manga.ReleaseStatusByte.Unreleased + }; + + HtmlNode coverNode = + document.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div/div/div[1]/div/div[1]/div[1]/div[2]/div[2]/div[1]/img"); + string coverUrl = coverNode.GetAttributeValue("src", ""); + string coverFileNameInCache = SaveCoverImageToCache(coverUrl, publicationId, RequestType.MangaCover); + + HtmlNode titleNode = + document.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div/div/div[1]/div/div[1]/div[1]/div[2]/div[2]/div[2]/div[1]/span"); + string sortName = titleNode.InnerText; + + HtmlNode descriptionNode = + document.DocumentNode.SelectSingleNode("//h3[starts-with(text(),'Synopsis')]/../span"); + string description = descriptionNode.InnerText; + + HtmlNodeCollection authorNodes = document.DocumentNode.SelectNodes("//h3[text()='Author']/../h3[not(text()='Author' or text()='_')]"); + HtmlNodeCollection artistNodes = document.DocumentNode.SelectNodes("//h3[text()='Artist']/../h3[not(text()='Author' or text()='_')]"); + List authors = authorNodes.Select(a => a.InnerText).Concat(artistNodes.Select(a => a.InnerText)).ToList(); + + HtmlNode? firstChapterNode = document.DocumentNode.SelectSingleNode("//a[contains(@href, 'chapter/1')]"); + int? year = int.Parse(firstChapterNode?.InnerText ?? "2000"); + + Manga manga = new (sortName, authors, description, altTitles, tags, coverUrl, coverFileNameInCache, links, + year, originalLanguage, publicationId, releaseStatus, websiteUrl); + AddMangaToCache(manga); + return manga; + } + + public override Chapter[] GetChapters(Manga manga, string language="en") + { + Log($"Getting chapters {manga}"); + string requestUrl = $"https://asuracomic.net/series/{manga.publicationId}"; + // Leaving this in for verification if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return Array.Empty(); + + //Return Chapters ordered by Chapter-Number + List chapters = ParseChaptersFromHtml(manga, requestUrl); + Log($"Got {chapters.Count} chapters. {manga}"); + return chapters.Order().ToArray(); + } + + private List ParseChaptersFromHtml(Manga manga, string mangaUrl) + { + RequestResult result = downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)result.statusCode < 200 || (int)result.statusCode >= 300 || result.htmlDocument is null) + { + Log("Failed to load site"); + return new List(); + } + + List ret = new(); + + HtmlNodeCollection chapterURLNodes = result.htmlDocument.DocumentNode.SelectNodes("//a[contains(@href, '/chapter/')]/"); + Regex infoRex = new(@"Chapter ([0-9]+)(.*)?"); + + foreach (HtmlNode chapterInfo in chapterURLNodes) + { + string chapterUrl = chapterInfo.GetAttributeValue("href", ""); + + Match match = infoRex.Match(chapterInfo.InnerText); + string chapterNumber = match.Groups[1].Value; + string? chapterName = match.Groups[2].Success ? match.Groups[2].Value : null; + string url = $"https://asuracomic.net/series/{chapterUrl}"; + ret.Add(new Chapter(manga, chapterName, null, chapterNumber, url)); + } + + return ret; + } + + public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) + { + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + Manga chapterParentManga = chapter.parentManga; + Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); + string requestUrl = chapter.url; + // Leaving this in to check if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + progressToken?.Cancel(); + return requestResult.statusCode; + } + + string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); + + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken); + } + + private string[] ParseImageUrlsFromHtml(string mangaUrl) + { + RequestResult requestResult = + downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + return Array.Empty(); + } + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + HtmlNodeCollection images = + requestResult.htmlDocument.DocumentNode.SelectNodes("\\img[contains(@alt, 'chapter page')]"); + + return images.Select(i => i.GetAttributeValue("src", "")).ToArray(); + } +} \ No newline at end of file diff --git a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs index 3b3e6da..283d7e7 100644 --- a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs +++ b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs @@ -38,6 +38,7 @@ public class MangaConnectorJsonConverter : JsonConverter "Manga4Life" => this._connectors.First(c => c is MangaLife), "ManhuaPlus" => this._connectors.First(c => c is ManhuaPlus), "MangaHere" => this._connectors.First(c => c is MangaHere), + "AsuraToon" => this._connectors.First(c => c is AsuraToon), _ => throw new UnreachableException($"Could not find Connector with name {connectorName}") }; } diff --git a/Tranga/Tranga.cs b/Tranga/Tranga.cs index 2762a94..3209a5e 100644 --- a/Tranga/Tranga.cs +++ b/Tranga/Tranga.cs @@ -26,6 +26,7 @@ public partial class Tranga : GlobalBase new MangaLife(this), new ManhuaPlus(this), new MangaHere(this), + new AsuraToon(this), }; foreach(DirectoryInfo dir in new DirectoryInfo(Path.GetTempPath()).GetDirectories("trangatemp"))//Cleanup old temp folders dir.Delete(); From b2381be860a4a07d8e93d2ce178a402c674e0d6e Mon Sep 17 00:00:00 2001 From: Glax Date: Sat, 2 Nov 2024 17:42:26 +0100 Subject: [PATCH 2/6] #141 fix ParsePublicationsFromHtml, statusNode, titleNode, firstChapterNode fix ParseChaptersFromHtml nodeCollection of ChapterURls fix ParseImageUrlsFromHtml xPath fix Chapterparsing names --- Tranga/MangaConnectors/AsuraToon.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Tranga/MangaConnectors/AsuraToon.cs b/Tranga/MangaConnectors/AsuraToon.cs index d064b06..ceb2055 100644 --- a/Tranga/MangaConnectors/AsuraToon.cs +++ b/Tranga/MangaConnectors/AsuraToon.cs @@ -58,7 +58,7 @@ public class AsuraToon : MangaConnector if (mangaList.Count < 1) return Array.Empty(); - IEnumerable urls = mangaList.Select(a => a.GetAttributeValue("href", "")); + IEnumerable urls = mangaList.Select(a => $"https://asuracomic.net/{a.GetAttributeValue("href", "")}"); List ret = new(); foreach (string url in urls) @@ -79,7 +79,7 @@ public class AsuraToon : MangaConnector HtmlNodeCollection genreNodes = document.DocumentNode.SelectNodes("//h3[text()='Genres']/../div/button"); string[] tags = genreNodes.Select(b => b.InnerText).ToArray(); - HtmlNode statusNode = document.DocumentNode.SelectSingleNode("//h3[text()='Genres']/../h3[2]"); + HtmlNode statusNode = document.DocumentNode.SelectSingleNode("//h3[text()='Status']/../h3[2]"); Manga.ReleaseStatusByte releaseStatus = statusNode.InnerText.ToLower() switch { "ongoing" => Manga.ReleaseStatusByte.Continuing, @@ -92,13 +92,13 @@ public class AsuraToon : MangaConnector }; HtmlNode coverNode = - document.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div/div/div[1]/div/div[1]/div[1]/div[2]/div[2]/div[1]/img"); + document.DocumentNode.SelectSingleNode("//img[@alt='poster']"); string coverUrl = coverNode.GetAttributeValue("src", ""); string coverFileNameInCache = SaveCoverImageToCache(coverUrl, publicationId, RequestType.MangaCover); HtmlNode titleNode = - document.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div/div/div[1]/div/div[1]/div[1]/div[2]/div[2]/div[2]/div[1]/span"); - string sortName = titleNode.InnerText; + document.DocumentNode.SelectSingleNode("//title"); + string sortName = Regex.Match(titleNode.InnerText, @"(.*) - Asura Scans").Groups[1].Value; HtmlNode descriptionNode = document.DocumentNode.SelectSingleNode("//h3[starts-with(text(),'Synopsis')]/../span"); @@ -108,8 +108,8 @@ public class AsuraToon : MangaConnector HtmlNodeCollection artistNodes = document.DocumentNode.SelectNodes("//h3[text()='Artist']/../h3[not(text()='Author' or text()='_')]"); List authors = authorNodes.Select(a => a.InnerText).Concat(artistNodes.Select(a => a.InnerText)).ToList(); - HtmlNode? firstChapterNode = document.DocumentNode.SelectSingleNode("//a[contains(@href, 'chapter/1')]"); - int? year = int.Parse(firstChapterNode?.InnerText ?? "2000"); + HtmlNode? firstChapterNode = document.DocumentNode.SelectSingleNode("//a[contains(@href, 'chapter/1')]/../following-sibling::h3"); + int? year = int.Parse(firstChapterNode?.InnerText.Split(' ')[^1] ?? "2000"); Manga manga = new (sortName, authors, description, altTitles, tags, coverUrl, coverFileNameInCache, links, year, originalLanguage, publicationId, releaseStatus, websiteUrl); @@ -144,7 +144,7 @@ public class AsuraToon : MangaConnector List ret = new(); - HtmlNodeCollection chapterURLNodes = result.htmlDocument.DocumentNode.SelectNodes("//a[contains(@href, '/chapter/')]/"); + HtmlNodeCollection chapterURLNodes = result.htmlDocument.DocumentNode.SelectNodes("//a[contains(@href, '/chapter/')]"); Regex infoRex = new(@"Chapter ([0-9]+)(.*)?"); foreach (HtmlNode chapterInfo in chapterURLNodes) @@ -153,7 +153,7 @@ public class AsuraToon : MangaConnector Match match = infoRex.Match(chapterInfo.InnerText); string chapterNumber = match.Groups[1].Value; - string? chapterName = match.Groups[2].Success ? match.Groups[2].Value : null; + string? chapterName = match.Groups[2].Success && match.Groups[2].Length > 0 ? match.Groups[2].Value : null; string url = $"https://asuracomic.net/series/{chapterUrl}"; ret.Add(new Chapter(manga, chapterName, null, chapterNumber, url)); } @@ -201,7 +201,7 @@ public class AsuraToon : MangaConnector } HtmlNodeCollection images = - requestResult.htmlDocument.DocumentNode.SelectNodes("\\img[contains(@alt, 'chapter page')]"); + requestResult.htmlDocument.DocumentNode.SelectNodes("//img[contains(@alt, 'chapter page')]"); return images.Select(i => i.GetAttributeValue("src", "")).ToArray(); } From 47479f7a0d141d6f6a027241194e3a1f4d6954e8 Mon Sep 17 00:00:00 2001 From: Glax Date: Sat, 2 Nov 2024 17:44:23 +0100 Subject: [PATCH 3/6] Fix chaptermarkers. Don't create one if Chapter does not have an ID --- Tranga/Chapter.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tranga/Chapter.cs b/Tranga/Chapter.cs index 928591a..d2fc000 100644 --- a/Tranga/Chapter.cs +++ b/Tranga/Chapter.cs @@ -121,6 +121,8 @@ public readonly struct Chapter : IComparable public void CreateChapterMarker() { + if (this.id is null) + return; string path = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName, $".{id}"); File.WriteAllText(path, GetArchiveFilePath()); File.SetAttributes(path, FileAttributes.Hidden); From b619109ea153398061ca8292ed1ffd2e66e5b81c Mon Sep 17 00:00:00 2001 From: Glax Date: Sat, 2 Nov 2024 17:48:18 +0100 Subject: [PATCH 4/6] fix #141 chapternames --- Tranga/MangaConnectors/AsuraToon.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/MangaConnectors/AsuraToon.cs b/Tranga/MangaConnectors/AsuraToon.cs index ceb2055..bf3bdaa 100644 --- a/Tranga/MangaConnectors/AsuraToon.cs +++ b/Tranga/MangaConnectors/AsuraToon.cs @@ -153,7 +153,7 @@ public class AsuraToon : MangaConnector Match match = infoRex.Match(chapterInfo.InnerText); string chapterNumber = match.Groups[1].Value; - string? chapterName = match.Groups[2].Success && match.Groups[2].Length > 0 ? match.Groups[2].Value : null; + string? chapterName = match.Groups[2].Success && match.Groups[2].Length > 1 ? match.Groups[2].Value : null; string url = $"https://asuracomic.net/series/{chapterUrl}"; ret.Add(new Chapter(manga, chapterName, null, chapterNumber, url)); } From 5a8202f87279777142e67624f0d1dd6ebc0d3ece Mon Sep 17 00:00:00 2001 From: Glax Date: Mon, 11 Nov 2024 17:59:48 +0100 Subject: [PATCH 5/6] More logging --- Tranga/MangaConnectors/MangaConnector.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index f0c8fe0..f9fbf69 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -281,7 +281,9 @@ public abstract class MangaConnector : GlobalBase File.SetUnixFileMode(saveArchiveFilePath, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute | OtherRead | OtherExecute); Directory.Delete(tempFolder, true); //Cleanup + Log("Created archive."); progressToken?.Complete(); + Log("Download complete."); return HttpStatusCode.OK; } From 204fb7614d29c86348f9d7dfadfe6c49efe1a31b Mon Sep 17 00:00:00 2001 From: Glax Date: Thu, 28 Nov 2024 21:35:29 +0100 Subject: [PATCH 6/6] Fix #281 Manganato errors when there is no chapters uploaded --- Tranga/MangaConnectors/Manganato.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tranga/MangaConnectors/Manganato.cs b/Tranga/MangaConnectors/Manganato.cs index 795fc8c..c3b337b 100644 --- a/Tranga/MangaConnectors/Manganato.cs +++ b/Tranga/MangaConnectors/Manganato.cs @@ -130,13 +130,13 @@ public class Manganato : MangaConnector string pattern = "MMM dd,yyyy HH:mm"; - HtmlNode oldestChapter = document.DocumentNode + HtmlNode? oldestChapter = document.DocumentNode .SelectNodes("//span[contains(concat(' ',normalize-space(@class),' '),' chapter-time ')]").MaxBy( node => DateTime.ParseExact(node.GetAttributeValue("title", "Dec 31 2400, 23:59"), pattern, - CultureInfo.InvariantCulture).Millisecond)!; + CultureInfo.InvariantCulture).Millisecond); - int year = DateTime.ParseExact(oldestChapter.GetAttributeValue("title", "Dec 31 2400, 23:59"), pattern, + int year = DateTime.ParseExact(oldestChapter?.GetAttributeValue("title", "Dec 31 2400, 23:59")??"Dec 31 2400, 23:59", pattern, CultureInfo.InvariantCulture).Year; Manga manga = new (sortName, authors.ToList(), description, altTitles, tags.ToArray(), posterUrl, coverFileNameInCache, links,