From b39dbd5671229dfb0d862038bc7317ba7851f7b6 Mon Sep 17 00:00:00 2001 From: Alessandro Benetton Date: Fri, 10 Jan 2025 22:10:34 +0100 Subject: [PATCH 1/7] [cuttingedge] fix(weebcentral): Fixed regex to capture chapters with decimal (1.5, ..) --- Tranga/MangaConnectors/WeebCentral.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/MangaConnectors/WeebCentral.cs b/Tranga/MangaConnectors/WeebCentral.cs index 6c08454..842afff 100644 --- a/Tranga/MangaConnectors/WeebCentral.cs +++ b/Tranga/MangaConnectors/WeebCentral.cs @@ -175,7 +175,7 @@ public class Weebcentral : MangaConnector { var chaptersWrapper = document.DocumentNode.SelectSingleNode("/html/body"); - Regex chapterRex = new(@".* (\d+)"); + Regex chapterRex = new(@"(\d+(?:.\d+)*)"); Regex idRex = new(@"https:\/\/weebcentral\.com\/chapters\/(\w*)"); var ret = chaptersWrapper.Descendants("a").Select(elem => From 7921dcb1cbb382afea8ba097040b43cc19a3a963 Mon Sep 17 00:00:00 2001 From: Alessandro Benetton Date: Fri, 24 Jan 2025 21:52:52 +0100 Subject: [PATCH 2/7] [cuttingedge] fix: Change condition for newChapters. Should solve #323 --- Tranga/MangaConnectors/MangaConnector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index c98bfad..03afa26 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -60,7 +60,7 @@ public abstract class MangaConnector : GlobalBase return Array.Empty(); Log($"Checking for duplicates {manga}"); - List newChaptersList = allChapters.Where(nChapter => nChapter.chapterNumber > manga.ignoreChaptersBelow + List newChaptersList = allChapters.Where(nChapter => nChapter.chapterNumber >= manga.ignoreChaptersBelow && !nChapter.CheckChapterIsDownloaded()).ToList(); Log($"{newChaptersList.Count} new chapters. {manga}"); try From f7a285aabd610d07b69ebe90db3d57dd93fafe42 Mon Sep 17 00:00:00 2001 From: Alessandro Benetton Date: Sat, 25 Jan 2025 11:40:00 +0100 Subject: [PATCH 3/7] [cuttingedge] fix: Add escape to Weebcentral regex --- Tranga/MangaConnectors/WeebCentral.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/MangaConnectors/WeebCentral.cs b/Tranga/MangaConnectors/WeebCentral.cs index 842afff..50d63c4 100644 --- a/Tranga/MangaConnectors/WeebCentral.cs +++ b/Tranga/MangaConnectors/WeebCentral.cs @@ -175,7 +175,7 @@ public class Weebcentral : MangaConnector { var chaptersWrapper = document.DocumentNode.SelectSingleNode("/html/body"); - Regex chapterRex = new(@"(\d+(?:.\d+)*)"); + Regex chapterRex = new(@"(\d+(?:\.\d+)*)"); Regex idRex = new(@"https:\/\/weebcentral\.com\/chapters\/(\w*)"); var ret = chaptersWrapper.Descendants("a").Select(elem => From 0c9e3205c21366458ff95e75896ed01e6501b931 Mon Sep 17 00:00:00 2001 From: Makhuta Date: Thu, 6 Feb 2025 15:37:30 +0100 Subject: [PATCH 4/7] Add Manga Connector - added [Webtoon](https://www.webtoons.com) manga connector - modified/added support for saving covers with refferer --- Tranga/MangaConnectors/MangaConnector.cs | 4 +- .../MangaConnectorJsonConverter.cs | 1 + Tranga/MangaConnectors/Webtoons.cs | 272 ++++++++++++++++++ Tranga/Tranga.cs | 1 + 4 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 Tranga/MangaConnectors/Webtoons.cs diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index 03afa26..5529b45 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -213,7 +213,7 @@ public abstract class MangaConnector : GlobalBase return HttpStatusCode.OK; } - protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType) + protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType, string? refferer = null) { Regex urlRex = new (@"https?:\/\/((?:[a-zA-Z0-9-]+\.)+[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+))"); //https?:\/\/[a-zA-Z0-9-]+\.([a-zA-Z0-9-]+\.[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+)) for only second level domains @@ -224,7 +224,7 @@ public abstract class MangaConnector : GlobalBase if (File.Exists(saveImagePath)) return saveImagePath; - RequestResult coverResult = downloadClient.MakeRequest(url, requestType); + RequestResult coverResult = downloadClient.MakeRequest(url, requestType, referrer); using MemoryStream ms = new(); coverResult.result.CopyTo(ms); Directory.CreateDirectory(TrangaSettings.coverImageCache); diff --git a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs index f54dbe3..1baf08f 100644 --- a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs +++ b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs @@ -39,6 +39,7 @@ public class MangaConnectorJsonConverter : JsonConverter "MangaHere" => this._connectors.First(c => c is MangaHere), "AsuraToon" => this._connectors.First(c => c is AsuraToon), "Weebcentral" => this._connectors.First(c => c is Weebcentral), + "Webtoons" => this._connectors.First(c => c is Webtoons), _ => throw new UnreachableException($"Could not find Connector with name {connectorName}") }; } diff --git a/Tranga/MangaConnectors/Webtoons.cs b/Tranga/MangaConnectors/Webtoons.cs new file mode 100644 index 0000000..238bf51 --- /dev/null +++ b/Tranga/MangaConnectors/Webtoons.cs @@ -0,0 +1,272 @@ +using System.Net; +using System.Text.RegularExpressions; +using HtmlAgilityPack; +using Tranga.Jobs; + +namespace Tranga.MangaConnectors; + +public class Webtoons : MangaConnector +{ + + public Webtoons(GlobalBase clone) : base(clone, "Webtoons", ["en"]) + { + this.downloadClient = new HttpDownloadClient(clone); + } + + // Done + public override Manga[] GetManga(string publicationTitle = "") + { + string sanitizedTitle = string.Join(' ', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower(); + Log($"Searching Publications. Term=\"{publicationTitle}\""); + string requestUrl = $"https://www.webtoons.com/en/search?keyword={sanitizedTitle}&searchType=WEBTOON"; + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); + return publications; + } + + // Done + public override Manga? GetMangaFromId(string publicationId) + { + PublicationManager pb = new PublicationManager(publicationId); + return GetMangaFromUrl($"https://www.webtoons.com/en/{pb.Category}/{pb.Title}/list?title_no={pb.Id}"); + } + + // Done + public override Manga? GetMangaFromUrl(string url) + { + RequestResult requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) { + return null; + } + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return null; + } + Regex regex = new Regex(@".*webtoons\.com/en/(?[^/]+)/(?[^/]+)/list\?title_no=(?<id>\d+).*"); + Match match = regex.Match(url); + + if(match.Success) { + PublicationManager pm = new PublicationManager(match.Groups["title"].Value, match.Groups["category"].Value, match.Groups["id"].Value); + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, pm.getPublicationId(), url); + } + Log($"Failed match Regex ID"); + return null; + } + + // Done + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + HtmlNode mangaList = document.DocumentNode.SelectSingleNode("//ul[contains(@class, 'card_lst')]"); + if (!mangaList.ChildNodes.Any(node => node.Name == "li")) { + Log($"Failed to parse publication"); + return Array.Empty<Manga>(); + } + + List<string> urls = document.DocumentNode + .SelectNodes("//ul[contains(@class, 'card_lst')]/li/a") + .Select(node => node.GetAttributeValue("href", "https://www.webtoons.com")) + .ToList(); + + HashSet<Manga> ret = new(); + foreach (string url in urls) + { + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + private string capitalizeString(string str = "") { + if(str.Length == 0) return ""; + if(str.Length == 1) return str.ToUpper(); + return char.ToUpper(str[0]) + str.Substring(1).ToLower(); + } + + // Done + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + HtmlNode infoNode1 = document.DocumentNode.SelectSingleNode("//*[@id='content']/div[2]/div[1]/div[1]"); + HtmlNode infoNode2 = document.DocumentNode.SelectSingleNode("//*[@id='content']/div[2]/div[2]/div[2]"); + + string sortName = infoNode1.SelectSingleNode(".//h1[contains(@class, 'subj')]").InnerText; + string description = infoNode2.SelectSingleNode(".//p[contains(@class, 'summary')]") + .InnerText.Trim(); + + HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[contains(@class, 'detail_body') and contains(@class, 'banner')]"); + + Regex regex = new Regex(@"url\((?<url>.*?)\)"); + Match match = regex.Match(posterNode.GetAttributeValue("style", "")); + + string posterUrl = match.Groups["url"].Value; + string coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover, websiteUrl); + + string genre = infoNode1.SelectSingleNode(".//h2[contains(@class, 'genre')]") + .InnerText.Trim(); + string[] tags = [ genre ]; + + List<HtmlNode> authorsNodes = infoNode1.SelectSingleNode(".//div[contains(@class, 'author_area')]").Descendants("a").ToList(); + List<string> authors = authorsNodes.Select(node => node.InnerText.Trim()).ToList(); + + string originalLanguage = ""; + + int year = DateTime.Now.Year; + + string status1 = infoNode2.SelectSingleNode(".//p").InnerText; + string status2 = infoNode2.SelectSingleNode(".//p/span").InnerText; + Manga.ReleaseStatusByte releaseStatus = Manga.ReleaseStatusByte.Unreleased; + if(status2.Length == 0 || status1.ToLower() == "completed") { + releaseStatus = Manga.ReleaseStatusByte.Completed; + } else if(status2.ToLower() == "up") { + releaseStatus = Manga.ReleaseStatusByte.Continuing; + } + + Manga manga = new(sortName, authors, description, new Dictionary<string, string>(), tags, posterUrl, coverFileNameInCache, new Dictionary<string, string>(), + year, originalLanguage, publicationId, releaseStatus, websiteUrl: websiteUrl); + AddMangaToCache(manga); + return manga; + } + + // Done + public override Chapter[] GetChapters(Manga manga, string language = "en") + { + PublicationManager pm = new PublicationManager(manga.publicationId); + string requestUrl = $"https://www.webtoons.com/en/{pm.Category}/{pm.Title}/list?title_no={pm.Id}"; + // Leaving this in for verification if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return Array.Empty<Chapter>(); + + // Get number of pages + int pages = requestResult.htmlDocument.DocumentNode.SelectSingleNode("//div[contains(@class, 'paginate')]").ChildNodes.ToArray().Length; + List<Chapter> chapters = new List<Chapter>(); + + for(int page = 1; page <= pages; page++) { + string pageRequestUrl = $"{requestUrl}&page={page}"; + + chapters.AddRange(ParseChaptersFromHtml(manga, pageRequestUrl)); + } + + Log($"Got {chapters.Count} chapters. {manga}"); + return chapters.Order().ToArray(); + } + + // Done + private List<Chapter> ParseChaptersFromHtml(Manga manga, string mangaUrl) + { + RequestResult result = downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)result.statusCode < 200 || (int)result.statusCode >= 300 || result.htmlDocument is null) + { + Log("Failed to load site"); + return new List<Chapter>(); + } + + List<Chapter> ret = new(); + + foreach (HtmlNode chapterInfo in result.htmlDocument.DocumentNode.SelectNodes("//ul/li[contains(@class, '_episodeItem')]")) + { + HtmlNode infoNode = chapterInfo.SelectSingleNode(".//a"); + string url = infoNode.GetAttributeValue("href", ""); + + string id = chapterInfo.GetAttributeValue("id", ""); + if(id == "") continue; + string? volumeNumber = null; + string chapterNumber = chapterInfo.GetAttributeValue("data-episode-no", ""); + if(chapterNumber == "") continue; + string chapterName = infoNode.SelectSingleNode(".//span[contains(@class, 'subj')]/span").InnerText.Trim(); + ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + } + + return ret; + } + + public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) + { + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + Manga chapterParentManga = chapter.parentManga; + Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); + string requestUrl = chapter.url; + // Leaving this in to check if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + progressToken?.Cancel(); + return requestResult.statusCode; + } + + string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken, referrer: requestUrl); + } + + private string[] ParseImageUrlsFromHtml(string mangaUrl) + { + RequestResult requestResult = + downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + return Array.Empty<string>(); + } + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty<string>(); + } + + return requestResult.htmlDocument.DocumentNode + .SelectNodes("//*[@id='_imageList']/img") + .Select(node => + node.GetAttributeValue("data-url", "")) + .ToArray(); + } +} + +internal class PublicationManager { + public PublicationManager(string title = "", string category = "", string id = "") { + this.Title = title; + this.Category = category; + this.Id = id; + } + + public PublicationManager(string publicationId) { + string[] parts = publicationId.Split("|"); + if(parts.Length == 3) { + this.Title = parts[0]; + this.Category = parts[1]; + this.Id = parts[2]; + } else { + this.Title = ""; + this.Category = ""; + this.Id = ""; + } + } + + public string getPublicationId() { + return $"{this.Title}|{this.Category}|{this.Id}"; + } + + public string Title { get; set; } + public string Category { get; set; } + public string Id { get; set; } +} \ No newline at end of file diff --git a/Tranga/Tranga.cs b/Tranga/Tranga.cs index 3292e6d..7384a78 100644 --- a/Tranga/Tranga.cs +++ b/Tranga/Tranga.cs @@ -27,6 +27,7 @@ public partial class Tranga : GlobalBase new MangaHere(this), new AsuraToon(this), new Weebcentral(this) + new Webtoons(this), }; foreach(DirectoryInfo dir in new DirectoryInfo(Path.GetTempPath()).GetDirectories("trangatemp"))//Cleanup old temp folders dir.Delete(); From 7f13d9b1e62eb237883f905010a92c7f6e9c3abe Mon Sep 17 00:00:00 2001 From: Makhuta <matykubla@gmail.com> Date: Thu, 6 Feb 2025 15:39:06 +0100 Subject: [PATCH 5/7] Fix - forgotten comma --- Tranga/Tranga.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/Tranga.cs b/Tranga/Tranga.cs index 7384a78..260f8e8 100644 --- a/Tranga/Tranga.cs +++ b/Tranga/Tranga.cs @@ -26,7 +26,7 @@ public partial class Tranga : GlobalBase new ManhuaPlus(this), new MangaHere(this), new AsuraToon(this), - new Weebcentral(this) + new Weebcentral(this), new Webtoons(this), }; foreach(DirectoryInfo dir in new DirectoryInfo(Path.GetTempPath()).GetDirectories("trangatemp"))//Cleanup old temp folders From d97da26994f46e3fc55cc7df288ebc3e29458152 Mon Sep 17 00:00:00 2001 From: Glax <johanna@bernloehr.eu> Date: Sun, 9 Feb 2025 17:37:53 +0100 Subject: [PATCH 6/7] spelling error --- Tranga/MangaConnectors/MangaConnector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index 5529b45..7573cbc 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -213,7 +213,7 @@ public abstract class MangaConnector : GlobalBase return HttpStatusCode.OK; } - protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType, string? refferer = null) + protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType, string? referrer = null) { Regex urlRex = new (@"https?:\/\/((?:[a-zA-Z0-9-]+\.)+[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+))"); //https?:\/\/[a-zA-Z0-9-]+\.([a-zA-Z0-9-]+\.[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+)) for only second level domains From edacaaba8a8d55889cb0bcffb8e7ccc13de3944b Mon Sep 17 00:00:00 2001 From: Glax <johanna@bernloehr.eu> Date: Sun, 9 Feb 2025 17:38:54 +0100 Subject: [PATCH 7/7] Update Readme --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 0151452..3f81f44 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,13 @@ Tranga can download Chapters and Metadata from "Scanlation" sites such as - [MangaDex.org](https://mangadex.org/) (Multilingual) - [Manganato.com](https://manganato.com/) (en) -- [Mangasee.com](https://mangasee123.com/) (en) - [MangaKatana.com](https://mangakatana.com) (en) - [Mangaworld.bz](https://www.mangaworld.bz/) (it) - [Bato.to](https://bato.to/v3x) (en) -- [Manga4Life](https://manga4life.com) (en) - [ManhuaPlus](https://manhuaplus.org/) (en) - [MangaHere](https://www.mangahere.cc/) (en) (Their covers aren't scrapeable.) - [Weebcentral](https://weebcentral.com) (en) +- [Webtoons](https://www.webtoons.com/en/) - ❓ Open an [issue](https://github.com/C9Glax/tranga/issues/new?assignees=&labels=New+Connector&projects=&template=new_connector.yml&title=%5BNew+Connector%5D%3A+) and trigger a library-scan with [Komga](https://komga.org/) and [Kavita](https://www.kavitareader.com/).