From bc101363319836aa9b3564c6b78c070b20f81b45 Mon Sep 17 00:00:00 2001 From: Glax Date: Thu, 8 Aug 2024 21:00:37 +0200 Subject: [PATCH] MangaHere image download sucks, you have to iterate all over all images one by one. Have some extra traffic then, idc. https://github.com/C9Glax/tranga/issues/69 --- Tranga/MangaConnectors/MangaConnector.cs | 4 +- .../MangaConnectorJsonConverter.cs | 2 + Tranga/MangaConnectors/MangaHere.cs | 202 ++++++++++++++++++ Tranga/Tranga.cs | 3 +- 4 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 Tranga/MangaConnectors/MangaHere.cs diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index 6dbcb45..5c71de8 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -222,8 +222,8 @@ public abstract class MangaConnector : GlobalBase if (progressToken?.cancellationRequested ?? false) return HttpStatusCode.RequestTimeout; Log($"Downloading Images for {saveArchiveFilePath}"); - if(progressToken is not null) - progressToken.increments = imageUrls.Length; + if (progressToken is not null) + progressToken.increments += imageUrls.Length; //Check if Publication Directory already exists string directoryPath = Path.GetDirectoryName(saveArchiveFilePath)!; if (!Directory.Exists(directoryPath)) diff --git a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs index 0e3dd78..2213a48 100644 --- a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs +++ b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs @@ -40,6 +40,8 @@ public class MangaConnectorJsonConverter : JsonConverter return this._connectors.First(c => c is MangaLife); case "ManhuaPlus": return this._connectors.First(c => c is ManhuaPlus); + case "MangaHere": + return this._connectors.First(c => c is MangaHere); } throw new Exception(); diff --git a/Tranga/MangaConnectors/MangaHere.cs b/Tranga/MangaConnectors/MangaHere.cs new file mode 100644 index 0000000..7b13d4d --- /dev/null +++ b/Tranga/MangaConnectors/MangaHere.cs @@ -0,0 +1,202 @@ +using System.Net; +using System.Text.RegularExpressions; +using HtmlAgilityPack; +using Tranga.Jobs; + +namespace Tranga.MangaConnectors; + +public class MangaHere : MangaConnector +{ + public MangaHere(GlobalBase clone) : base(clone, "MangaHere") + { + this.downloadClient = new ChromiumDownloadClient(clone); + } + + public override Manga[] GetManga(string publicationTitle = "") + { + Log($"Searching Publications. Term=\"{publicationTitle}\""); + string sanitizedTitle = string.Join('+', Regex.Matches(publicationTitle, "[A-z]*").Where(str => str.Length > 0)).ToLower(); + string requestUrl = $"https://www.mangahere.cc/search?title={sanitizedTitle}"; + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || requestResult.htmlDocument is null) + return Array.Empty(); + + Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); + return publications; + } + + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + if (document.DocumentNode.SelectNodes("//div[contains(concat(' ',normalize-space(@class),' '),' container ')]").Any(node => node.ChildNodes.Any(cNode => cNode.HasClass("search-keywords")))) + return Array.Empty(); + + List urls = document.DocumentNode + .SelectNodes("//a[contains(@href, '/manga/') and not(contains(@href, '.html'))]") + .Select(thumb => $"https://www.mangahere.cc{thumb.GetAttributeValue("href", "")}").Distinct().ToList(); + + HashSet ret = new(); + foreach (string url in urls) + { + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + public override Manga? GetMangaFromId(string publicationId) + { + return GetMangaFromUrl($"https://www.mangahere.cc/manga/{publicationId}"); + } + + public override Manga? GetMangaFromUrl(string url) + { + RequestResult requestResult = + downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || requestResult.htmlDocument is null) + return null; + + Regex idRex = new (@"https:\/\/www\.mangahere\.[a-z]{0,63}\/manga\/([0-9A-z\-]+).*"); + string id = idRex.Match(url).Groups[1].Value; + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, id, url); + } + + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + string originalLanguage = "", status = ""; + Dictionary altTitles = new(), links = new(); + Manga.ReleaseStatusByte releaseStatus = Manga.ReleaseStatusByte.Unreleased; + + //We dont get posters, because same origin bs HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//img[contains(concat(' ',normalize-space(@class),' '),' detail-info-cover-img ')]"); + string posterUrl = "http://static.mangahere.cc/v20230914/mangahere/images/nopicture.jpg"; + string coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover); + + HtmlNode titleNode = document.DocumentNode.SelectSingleNode("//span[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-title-font ')]"); + string sortName = titleNode.InnerText; + + List authors = document.DocumentNode + .SelectNodes("//p[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-say ')]/a") + .Select(node => node.InnerText) + .ToList(); + + HashSet tags = document.DocumentNode + .SelectNodes("//p[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-tag-list ')]/a") + .Select(node => node.InnerText) + .ToHashSet(); + + status = document.DocumentNode.SelectSingleNode("//span[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-title-tip ')]").InnerText; + switch (status.ToLower()) + { + case "cancelled": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; + case "hiatus": releaseStatus = Manga.ReleaseStatusByte.OnHiatus; break; + case "discontinued": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; + case "complete": releaseStatus = Manga.ReleaseStatusByte.Completed; break; + case "ongoing": releaseStatus = Manga.ReleaseStatusByte.Continuing; break; + } + + HtmlNode descriptionNode = document.DocumentNode + .SelectSingleNode("//p[contains(concat(' ',normalize-space(@class),' '),' fullcontent ')]"); + string description = descriptionNode.InnerText; + + Manga manga = new(sortName, authors.ToList(), description, altTitles, tags.ToArray(), posterUrl, + coverFileNameInCache, links, + null, originalLanguage, publicationId, releaseStatus, websiteUrl: websiteUrl); + AddMangaToCache(manga); + return manga; + } + + public override Chapter[] GetChapters(Manga manga, string language="en") + { + Log($"Getting chapters {manga}"); + string requestUrl = $"https://www.mangahere.cc/manga/{manga.publicationId}"; + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || requestResult.htmlDocument is null) + return Array.Empty(); + + List urls = requestResult.htmlDocument.DocumentNode.SelectNodes("//div[@id='list-2']/ul//li//a[contains(@href, '/manga/')]") + .Select(node => node.GetAttributeValue("href", "")).ToList(); + Regex chapterRex = new(@".*\/manga\/[a-zA-Z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\@]+\/v([0-9(TBD)]+)\/c([0-9\.]+)\/.*"); + + List chapters = new(); + foreach (string url in urls) + { + Match rexMatch = chapterRex.Match(url); + + string volumeNumber = rexMatch.Groups[1].Value == "TBD" ? "0" : rexMatch.Groups[1].Value; + string chapterNumber = rexMatch.Groups[2].Value; + string fullUrl = $"https://www.mangahere.cc{url}"; + chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); + } + //Return Chapters ordered by Chapter-Number + Log($"Got {chapters.Count} chapters. {manga}"); + return chapters.Order().ToArray(); + } + + public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) + { + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + Manga chapterParentManga = chapter.parentManga; + Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); + + List imageUrls = new(); + + int downloaded = 1; + int images = 1; + string url = string.Join('/', chapter.url.Split('/')[..^1]); + do + { + RequestResult requestResult = + downloadClient.MakeRequest($"{url}/{downloaded}.html", RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + progressToken?.Cancel(); + return requestResult.statusCode; + } + + if (requestResult.htmlDocument is null) + { + progressToken?.Cancel(); + return HttpStatusCode.InternalServerError; + } + + imageUrls.AddRange(ParseImageUrlsFromHtml(requestResult.htmlDocument)); + + images = requestResult.htmlDocument.DocumentNode + .SelectNodes("//a[contains(@href, '/manga/')]") + .MaxBy(node => node.GetAttributeValue("data-page", 0))!.GetAttributeValue("data-page", 0); + if (progressToken is not null) + { + progressToken.increments = images * 2;//we also have to download the images later + progressToken.Increment(); + } + } while (downloaded++ <= images); + + string comicInfoPath = Path.GetTempFileName(); + File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); + + if (progressToken is not null) + progressToken.increments = images;//we blip to normal length, in downloadchapterimages it is increasaed by the amount of urls again + return DownloadChapterImages(imageUrls.ToArray(), chapter.GetArchiveFilePath(settings.downloadLocation), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + } + + private string[] ParseImageUrlsFromHtml(HtmlDocument document) + { + return document.DocumentNode + .SelectNodes("//img[contains(concat(' ',normalize-space(@class),' '),' reader-main-img ')]") + .Select(node => + { + string url = node.GetAttributeValue("src", ""); + return url.StartsWith("//") ? $"https:{url}" : url; + }) + .ToArray(); + } +} \ No newline at end of file diff --git a/Tranga/Tranga.cs b/Tranga/Tranga.cs index b19720e..1471365 100644 --- a/Tranga/Tranga.cs +++ b/Tranga/Tranga.cs @@ -25,7 +25,8 @@ public partial class Tranga : GlobalBase new Mangaworld(this), new Bato(this), new MangaLife(this), - new ManhuaPlus(this) + new ManhuaPlus(this), + new MangaHere(this), }; foreach(DirectoryInfo dir in new DirectoryInfo(Path.GetTempPath()).GetDirectories("trangatemp"))//Cleanup old temp folders dir.Delete();