diff --git a/Dockerfile b/Dockerfile index 55a5f6c..35eb708 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,8 @@ FROM --platform=$TARGETPLATFORM mcr.microsoft.com/dotnet/runtime:$DOTNET AS base WORKDIR /publish ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium +ENV XDG_CONFIG_HOME=/tmp/.chromium +ENV XDG_CACHE_HOME=/tmp/.chromium RUN apt-get update \ && apt-get install -y libx11-6 libx11-xcb1 libatk1.0-0 libgtk-3-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 libxshmfence1 libnss3 chromium \ && apt-get autopurge -y \ diff --git a/README.md b/README.md index 2aca578..3f81f44 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,13 @@ Tranga can download Chapters and Metadata from "Scanlation" sites such as - [MangaDex.org](https://mangadex.org/) (Multilingual) - [Manganato.com](https://manganato.com/) (en) -- [Mangasee.com](https://mangasee123.com/) (en) - [MangaKatana.com](https://mangakatana.com) (en) - [Mangaworld.bz](https://www.mangaworld.bz/) (it) - [Bato.to](https://bato.to/v3x) (en) -- [Manga4Life](https://manga4life.com) (en) - [ManhuaPlus](https://manhuaplus.org/) (en) - [MangaHere](https://www.mangahere.cc/) (en) (Their covers aren't scrapeable.) +- [Weebcentral](https://weebcentral.com) (en) +- [Webtoons](https://www.webtoons.com/en/) - ❓ Open an [issue](https://github.com/C9Glax/tranga/issues/new?assignees=&labels=New+Connector&projects=&template=new_connector.yml&title=%5BNew+Connector%5D%3A+) and trigger a library-scan with [Komga](https://komga.org/) and [Kavita](https://www.kavitareader.com/). diff --git a/Tranga/Chapter.cs b/Tranga/Chapter.cs index 5409096..7622bd7 100644 --- a/Tranga/Chapter.cs +++ b/Tranga/Chapter.cs @@ -14,8 +14,8 @@ public readonly struct Chapter : IComparable // ReSharper disable once MemberCanBePrivate.Global public Manga parentManga { get; } public string? name { get; } - public string volumeNumber { get; } - public string chapterNumber { get; } + public float volumeNumber { get; } + public float chapterNumber { get; } public string url { get; } // ReSharper disable once MemberCanBePrivate.Global public string fileName { get; } @@ -23,13 +23,19 @@ public readonly struct Chapter : IComparable private static readonly Regex LegalCharacters = new (@"([A-z]*[0-9]* *\.*-*,*\]*\[*'*\'*\)*\(*~*!*)*"); private static readonly Regex IllegalStrings = new(@"(Vol(ume)?|Ch(apter)?)\.?", RegexOptions.IgnoreCase); - private static readonly Regex Digits = new(@"[0-9\.]*"); + public Chapter(Manga parentManga, string? name, string? volumeNumber, string chapterNumber, string url, string? id = null) + : this(parentManga, name, float.Parse(volumeNumber??"0", GlobalBase.numberFormatDecimalPoint), + float.Parse(chapterNumber, GlobalBase.numberFormatDecimalPoint), url, id) + { + } + + public Chapter(Manga parentManga, string? name, float? volumeNumber, float chapterNumber, string url, string? id = null) { this.parentManga = parentManga; this.name = name; - this.volumeNumber = volumeNumber is not null ? string.Concat(Digits.Matches(volumeNumber).Select(x => x.Value)) : "0"; - this.chapterNumber = string.Concat(Digits.Matches(chapterNumber).Select(x => x.Value)); + this.volumeNumber = volumeNumber??0; + this.chapterNumber = chapterNumber; this.url = url; this.id = id; @@ -60,26 +66,12 @@ public readonly struct Chapter : IComparable { if(obj is not Chapter otherChapter) throw new ArgumentException($"{obj} can not be compared to {this}"); - - if (float.TryParse(volumeNumber, GlobalBase.numberFormatDecimalPoint, out float volumeNumberFloat) && - float.TryParse(chapterNumber, GlobalBase.numberFormatDecimalPoint, out float chapterNumberFloat) && - float.TryParse(otherChapter.volumeNumber, GlobalBase.numberFormatDecimalPoint, - out float otherVolumeNumberFloat) && - float.TryParse(otherChapter.chapterNumber, GlobalBase.numberFormatDecimalPoint, - out float otherChapterNumberFloat)) + return volumeNumber.CompareTo(otherChapter.volumeNumber) switch { - return volumeNumberFloat.CompareTo(otherVolumeNumberFloat) switch - { - <0 => -1, - >0 => 1, - _ => chapterNumberFloat.CompareTo(otherChapterNumberFloat) - }; - } - else throw new FormatException($"Value could not be parsed.\n" + - $"\tVolumeNumber: '{volumeNumber}' ChapterNumber: '{chapterNumber}'\n" + - $"\tOther-VolumeNumber: '{otherChapter.volumeNumber}' Other-ChapterNumber: '{otherChapter.chapterNumber}'\n" + - $"\t{this}\n" + - $"\t{otherChapter}"); + <0 => -1, + >0 => 1, + _ => chapterNumber.CompareTo(otherChapter.chapterNumber) + }; } /// @@ -111,9 +103,10 @@ public readonly struct Chapter : IComparable { Match m = volChRex.Match(archive.Name); if (m.Groups[1].Success) - return m.Groups[1].Value == t.volumeNumber && m.Groups[2].Value == t.chapterNumber; + return m.Groups[1].Value == t.volumeNumber.ToString(GlobalBase.numberFormatDecimalPoint) && + m.Groups[2].Value == t.chapterNumber.ToString(GlobalBase.numberFormatDecimalPoint); else - return m.Groups[2].Value == t.chapterNumber; + return m.Groups[2].Value == t.chapterNumber.ToString(GlobalBase.numberFormatDecimalPoint); }); } diff --git a/Tranga/Jobs/DownloadNewChapters.cs b/Tranga/Jobs/DownloadNewChapters.cs index 29c3e60..b07d46b 100644 --- a/Tranga/Jobs/DownloadNewChapters.cs +++ b/Tranga/Jobs/DownloadNewChapters.cs @@ -54,6 +54,6 @@ public class DownloadNewChapters : Job if (obj is not DownloadNewChapters otherJob) return false; return otherJob.mangaConnector == this.mangaConnector && - otherJob.manga.Equals(this.manga); + otherJob.manga.publicationId == this.manga.publicationId; } } \ No newline at end of file diff --git a/Tranga/Jobs/JobBoss.cs b/Tranga/Jobs/JobBoss.cs index 9a7e7c8..4ecf2e2 100644 --- a/Tranga/Jobs/JobBoss.cs +++ b/Tranga/Jobs/JobBoss.cs @@ -1,4 +1,5 @@ -using System.Runtime.InteropServices; +using System.Diagnostics; +using System.Runtime.InteropServices; using System.Text.RegularExpressions; using Newtonsoft.Json; using Tranga.MangaConnectors; @@ -70,7 +71,7 @@ public class JobBoss : GlobalBase RemoveJob(job); } - public IEnumerable GetJobsLike(string? connectorName = null, string? internalId = null, string? chapterNumber = null) + public IEnumerable GetJobsLike(string? connectorName = null, string? internalId = null, float? chapterNumber = null) { IEnumerable ret = this.jobs; if (connectorName is not null) @@ -82,7 +83,7 @@ public class JobBoss : GlobalBase if (jjob is not DownloadChapter job) return false; return job.chapter.parentManga.internalId == internalId && - job.chapter.chapterNumber == chapterNumber; + job.chapter.chapterNumber.Equals(chapterNumber); }); else if (internalId is not null) ret = ret.Where(jjob => @@ -145,34 +146,44 @@ public class JobBoss : GlobalBase private void LoadJobsList(HashSet connectors) { - Directory.CreateDirectory(TrangaSettings.jobsFolderPath); - if(RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - File.SetUnixFileMode(TrangaSettings.jobsFolderPath, UserRead | UserWrite | UserExecute | GroupRead | OtherRead); if (!Directory.Exists(TrangaSettings.jobsFolderPath)) //No jobs to load + { + Directory.CreateDirectory(TrangaSettings.jobsFolderPath); + if(RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + File.SetUnixFileMode(TrangaSettings.jobsFolderPath, UserRead | UserWrite | UserExecute | GroupRead | OtherRead); return; + } //Load json-job-files foreach (FileInfo file in Directory.GetFiles(TrangaSettings.jobsFolderPath, "*.json").Select(f => new FileInfo(f))) { Log($"Adding {file.Name}"); - Job? job = JsonConvert.DeserializeObject(File.ReadAllText(file.FullName), - new JobJsonConverter(this, new MangaConnectorJsonConverter(this, connectors))); - if (job is null) - { - string newName = file.FullName + ".failed"; - Log($"Failed loading file {file.Name}.\nMoving to {newName}"); - File.Move(file.FullName, newName); - } - else + try { + Job? job = JsonConvert.DeserializeObject(File.ReadAllText(file.FullName), + new JobJsonConverter(this, new MangaConnectorJsonConverter(this, connectors))); + if (job is null) throw new NullReferenceException(); + Log($"Adding Job {job}"); if (!AddJob(job, file.FullName)) //If we detect a duplicate, delete the file. { - string path = string.Concat(file.FullName, ".duplicate"); - file.MoveTo(path); - Log($"Duplicate detected or otherwise not able to add job to list.\nMoved job {job} to {path}"); + //string path = string.Concat(file.FullName, ".duplicate"); + //file.MoveTo(path); + //Log($"Duplicate detected or otherwise not able to add job to list.\nMoved job {job} to {path}"); + Log($"Duplicate detected or otherwise not able to add job to list. Removed the file {file.FullName} {job}"); } } + catch (Exception e) + { + if (e is not UnreachableException or NullReferenceException) + throw; + Log(e.Message); + string newName = file.FullName + ".failed"; + Log($"Failed loading file {file.Name}.\nMoving to {newName}.\n" + + $"If you think this is a bug, upload contents of the file to the Bugreport!"); + File.Move(file.FullName, newName); + continue; + } } //Connect jobs to parent-jobs and add Publications to cache @@ -197,7 +208,7 @@ public class JobBoss : GlobalBase internal void UpdateJobFile(Job job, string? oldFile = null) { string newJobFilePath = Path.Join(TrangaSettings.jobsFolderPath, $"{job.id}.json"); - string oldFilePath = Path.Join(TrangaSettings.jobsFolderPath, oldFile??$"{job.id}.json"); + string oldFilePath = oldFile??Path.Join(TrangaSettings.jobsFolderPath, $"{job.id}.json"); //Delete old file if (File.Exists(oldFilePath)) diff --git a/Tranga/Jobs/UpdateMetadata.cs b/Tranga/Jobs/UpdateMetadata.cs index 72b5a60..ac8bdd6 100644 --- a/Tranga/Jobs/UpdateMetadata.cs +++ b/Tranga/Jobs/UpdateMetadata.cs @@ -71,6 +71,6 @@ public class UpdateMetadata : Job if (obj is not UpdateMetadata otherJob) return false; return otherJob.mangaConnector == this.mangaConnector && - otherJob.manga.Equals(this.manga); + otherJob.manga.publicationId == this.manga.publicationId; } } \ No newline at end of file diff --git a/Tranga/MangaConnectors/AsuraToon.cs b/Tranga/MangaConnectors/AsuraToon.cs index bf3bdaa..8ec5265 100644 --- a/Tranga/MangaConnectors/AsuraToon.cs +++ b/Tranga/MangaConnectors/AsuraToon.cs @@ -10,7 +10,7 @@ public class AsuraToon : MangaConnector public AsuraToon(GlobalBase clone) : base(clone, "AsuraToon", ["en"]) { - this.downloadClient = new HttpDownloadClient(clone); + this.downloadClient = new ChromiumDownloadClient(clone); } public override Manga[] GetManga(string publicationTitle = "") @@ -55,8 +55,8 @@ public class AsuraToon : MangaConnector private Manga[] ParsePublicationsFromHtml(HtmlDocument document) { HtmlNodeCollection mangaList = document.DocumentNode.SelectNodes("//a[starts-with(@href,'series')]"); - if (mangaList.Count < 1) - return Array.Empty(); + if (mangaList is null || mangaList.Count < 1) + return []; IEnumerable urls = mangaList.Select(a => $"https://asuracomic.net/{a.GetAttributeValue("href", "")}"); @@ -102,11 +102,13 @@ public class AsuraToon : MangaConnector HtmlNode descriptionNode = document.DocumentNode.SelectSingleNode("//h3[starts-with(text(),'Synopsis')]/../span"); - string description = descriptionNode.InnerText; + string description = descriptionNode?.InnerText??""; HtmlNodeCollection authorNodes = document.DocumentNode.SelectNodes("//h3[text()='Author']/../h3[not(text()='Author' or text()='_')]"); - HtmlNodeCollection artistNodes = document.DocumentNode.SelectNodes("//h3[text()='Artist']/../h3[not(text()='Author' or text()='_')]"); - List authors = authorNodes.Select(a => a.InnerText).Concat(artistNodes.Select(a => a.InnerText)).ToList(); + HtmlNodeCollection artistNodes = document.DocumentNode.SelectNodes("//h3[text()='Artist']/../h3[not(text()='Artist' or text()='_')]"); + IEnumerable authorNames = authorNodes is null ? [] : authorNodes.Select(a => a.InnerText); + IEnumerable artistNames = artistNodes is null ? [] : artistNodes.Select(a => a.InnerText); + List authors = authorNames.Concat(artistNames).ToList(); HtmlNode? firstChapterNode = document.DocumentNode.SelectSingleNode("//a[contains(@href, 'chapter/1')]/../following-sibling::h3"); int? year = int.Parse(firstChapterNode?.InnerText.Split(' ')[^1] ?? "2000"); @@ -155,7 +157,14 @@ public class AsuraToon : MangaConnector string chapterNumber = match.Groups[1].Value; string? chapterName = match.Groups[2].Success && match.Groups[2].Length > 1 ? match.Groups[2].Value : null; string url = $"https://asuracomic.net/series/{chapterUrl}"; - ret.Add(new Chapter(manga, chapterName, null, chapterNumber, url)); + try + { + ret.Add(new Chapter(manga, chapterName, null, chapterNumber, url)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } return ret; diff --git a/Tranga/MangaConnectors/Bato.cs b/Tranga/MangaConnectors/Bato.cs index c666a36..1e00ca7 100644 --- a/Tranga/MangaConnectors/Bato.cs +++ b/Tranga/MangaConnectors/Bato.cs @@ -163,7 +163,14 @@ public class Bato : MangaConnector string chapterNumber = match.Groups[3].Value; string chapterName = chapterNumber; string url = $"https://bato.to{chapterUrl}?load=2"; - ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + try + { + ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } return ret; diff --git a/Tranga/MangaConnectors/ChromiumDownloadClient.cs b/Tranga/MangaConnectors/ChromiumDownloadClient.cs index f48b2ed..86ac881 100644 --- a/Tranga/MangaConnectors/ChromiumDownloadClient.cs +++ b/Tranga/MangaConnectors/ChromiumDownloadClient.cs @@ -10,7 +10,6 @@ namespace Tranga.MangaConnectors; internal class ChromiumDownloadClient : DownloadClient { private static IBrowser? _browser; - private const int StartTimeoutMs = 10000; private readonly HttpDownloadClient _httpDownloadClient; private static async Task StartBrowser(Logging.Logger? logger = null) @@ -24,7 +23,7 @@ internal class ChromiumDownloadClient : DownloadClient "--disable-dev-shm-usage", "--disable-setuid-sandbox", "--no-sandbox"}, - Timeout = StartTimeoutMs + Timeout = TrangaSettings.ChromiumStartupTimeoutMs }, new LoggerFactory([new LogProvider(logger)])); } @@ -43,6 +42,8 @@ internal class ChromiumDownloadClient : DownloadClient public void Log(LogLevel logLevel, EventId eventId, TState state, Exception? exception, Func formatter) { + if (logLevel <= LogLevel.Information) + return; logger?.WriteLine("Puppeteer", formatter.Invoke(state, exception)); } @@ -68,17 +69,20 @@ internal class ChromiumDownloadClient : DownloadClient private RequestResult MakeRequestBrowser(string url, string? referrer = null, string? clickButton = null) { + if (_browser is null) + return new RequestResult(HttpStatusCode.InternalServerError, null, Stream.Null); IPage page = _browser.NewPageAsync().Result; - page.DefaultTimeout = 10000; + page.DefaultTimeout = TrangaSettings.ChromiumPageTimeoutMs; + page.SetExtraHttpHeadersAsync(new() { { "Referer", referrer } }); IResponse response; try { response = page.GoToAsync(url, WaitUntilNavigation.Networkidle0).Result; - Log("Page loaded."); + Log($"Page loaded. {url}"); } catch (Exception e) { - Log($"Could not load Page:\n{e.Message}"); + Log($"Could not load Page {url}\n{e.Message}"); page.CloseAsync(); return new RequestResult(HttpStatusCode.InternalServerError, null, Stream.Null); } diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index f9fbf69..7573cbc 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -60,8 +60,7 @@ public abstract class MangaConnector : GlobalBase return Array.Empty(); Log($"Checking for duplicates {manga}"); - List newChaptersList = allChapters.Where(nChapter => float.TryParse(nChapter.chapterNumber, numberFormatDecimalPoint, out float chapterNumber) - && chapterNumber > manga.ignoreChaptersBelow + List newChaptersList = allChapters.Where(nChapter => nChapter.chapterNumber >= manga.ignoreChaptersBelow && !nChapter.CheckChapterIsDownloaded()).ToList(); Log($"{newChaptersList.Count} new chapters. {manga}"); try @@ -79,79 +78,6 @@ public abstract class MangaConnector : GlobalBase return newChaptersList.ToArray(); } - - public Chapter[] SelectChapters(Manga manga, string searchTerm, string? language = null) - { - Chapter[] availableChapters = this.GetChapters(manga, language??"en"); - Regex volumeRegex = new ("((v(ol)*(olume)*){1} *([0-9]+(-[0-9]+)?){1})", RegexOptions.IgnoreCase); - Regex chapterRegex = new ("((c(h)*(hapter)*){1} *([0-9]+(-[0-9]+)?){1})", RegexOptions.IgnoreCase); - Regex singleResultRegex = new("([0-9]+)", RegexOptions.IgnoreCase); - Regex rangeResultRegex = new("([0-9]+(-[0-9]+))", RegexOptions.IgnoreCase); - Regex allRegex = new("a(ll)?", RegexOptions.IgnoreCase); - if (volumeRegex.IsMatch(searchTerm) && chapterRegex.IsMatch(searchTerm)) - { - string volume = singleResultRegex.Match(volumeRegex.Match(searchTerm).Value).Value; - string chapter = singleResultRegex.Match(chapterRegex.Match(searchTerm).Value).Value; - return availableChapters.Where(aCh => aCh.volumeNumber is not null && - aCh.volumeNumber.Equals(volume, StringComparison.InvariantCultureIgnoreCase) && - aCh.chapterNumber.Equals(chapter, StringComparison.InvariantCultureIgnoreCase)) - .ToArray(); - } - else if (volumeRegex.IsMatch(searchTerm)) - { - string volume = volumeRegex.Match(searchTerm).Value; - if (rangeResultRegex.IsMatch(volume)) - { - string range = rangeResultRegex.Match(volume).Value; - int start = Convert.ToInt32(range.Split('-')[0]); - int end = Convert.ToInt32(range.Split('-')[1]); - return availableChapters.Where(aCh => aCh.volumeNumber is not null && - Convert.ToInt32(aCh.volumeNumber) >= start && - Convert.ToInt32(aCh.volumeNumber) <= end).ToArray(); - } - else if (singleResultRegex.IsMatch(volume)) - { - string volumeNumber = singleResultRegex.Match(volume).Value; - return availableChapters.Where(aCh => - aCh.volumeNumber is not null && - aCh.volumeNumber.Equals(volumeNumber, StringComparison.InvariantCultureIgnoreCase)).ToArray(); - } - - } - else if (chapterRegex.IsMatch(searchTerm)) - { - string chapter = chapterRegex.Match(searchTerm).Value; - if (rangeResultRegex.IsMatch(chapter)) - { - string range = rangeResultRegex.Match(chapter).Value; - int start = Convert.ToInt32(range.Split('-')[0]); - int end = Convert.ToInt32(range.Split('-')[1]); - return availableChapters.Where(aCh => Convert.ToInt32(aCh.chapterNumber) >= start && - Convert.ToInt32(aCh.chapterNumber) <= end).ToArray(); - } - else if (singleResultRegex.IsMatch(chapter)) - { - string chapterNumber = singleResultRegex.Match(chapter).Value; - return availableChapters.Where(aCh => - aCh.chapterNumber.Equals(chapterNumber, StringComparison.InvariantCultureIgnoreCase)).ToArray(); - } - } - else - { - if (rangeResultRegex.IsMatch(searchTerm)) - { - int start = Convert.ToInt32(searchTerm.Split('-')[0]); - int end = Convert.ToInt32(searchTerm.Split('-')[1]); - return availableChapters[start..(end + 1)]; - } - else if(singleResultRegex.IsMatch(searchTerm)) - return new [] { availableChapters[Convert.ToInt32(searchTerm)] }; - else if (allRegex.IsMatch(searchTerm)) - return availableChapters; - } - - return Array.Empty(); - } public abstract HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null); @@ -287,7 +213,7 @@ public abstract class MangaConnector : GlobalBase return HttpStatusCode.OK; } - protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType) + protected string SaveCoverImageToCache(string url, string mangaInternalId, RequestType requestType, string? referrer = null) { Regex urlRex = new (@"https?:\/\/((?:[a-zA-Z0-9-]+\.)+[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+))"); //https?:\/\/[a-zA-Z0-9-]+\.([a-zA-Z0-9-]+\.[a-zA-Z0-9]+)\/(?:.+\/)*(.+\.([a-zA-Z]+)) for only second level domains @@ -298,7 +224,7 @@ public abstract class MangaConnector : GlobalBase if (File.Exists(saveImagePath)) return saveImagePath; - RequestResult coverResult = downloadClient.MakeRequest(url, requestType); + RequestResult coverResult = downloadClient.MakeRequest(url, requestType, referrer); using MemoryStream ms = new(); coverResult.result.CopyTo(ms); Directory.CreateDirectory(TrangaSettings.coverImageCache); diff --git a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs index 283d7e7..5809a59 100644 --- a/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs +++ b/Tranga/MangaConnectors/MangaConnectorJsonConverter.cs @@ -32,13 +32,13 @@ public class MangaConnectorJsonConverter : JsonConverter "MangaDex" => this._connectors.First(c => c is MangaDex), "Manganato" => this._connectors.First(c => c is Manganato), "MangaKatana" => this._connectors.First(c => c is MangaKatana), - "Mangasee" => this._connectors.First(c => c is Mangasee), "Mangaworld" => this._connectors.First(c => c is Mangaworld), "Bato" => this._connectors.First(c => c is Bato), - "Manga4Life" => this._connectors.First(c => c is MangaLife), "ManhuaPlus" => this._connectors.First(c => c is ManhuaPlus), "MangaHere" => this._connectors.First(c => c is MangaHere), "AsuraToon" => this._connectors.First(c => c is AsuraToon), + "Weebcentral" => this._connectors.First(c => c is Weebcentral), + "Webtoons" => this._connectors.First(c => c is Webtoons), _ => throw new UnreachableException($"Could not find Connector with name {connectorName}") }; } diff --git a/Tranga/MangaConnectors/MangaDex.cs b/Tranga/MangaConnectors/MangaDex.cs index 1dbb1d0..8b23c2d 100644 --- a/Tranga/MangaConnectors/MangaDex.cs +++ b/Tranga/MangaConnectors/MangaDex.cs @@ -246,8 +246,17 @@ public class MangaDex : MangaConnector continue; } - if(chapterNum is not "null" && !chapters.Any(chp => chp.volumeNumber.Equals(volume) && chp.chapterNumber.Equals(chapterNum))) + try + { + if(!chapters.Any(chp => + chp.volumeNumber.Equals(float.Parse(volume??"0", numberFormatDecimalPoint)) && + chp.chapterNumber.Equals(float.Parse(chapterNum, numberFormatDecimalPoint)))) chapters.Add(new Chapter(manga, title, volume, chapterNum, chapterId, chapterId)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNum}: {e.Message}"); + } } } diff --git a/Tranga/MangaConnectors/MangaHere.cs b/Tranga/MangaConnectors/MangaHere.cs index c14b697..18c04d6 100644 --- a/Tranga/MangaConnectors/MangaHere.cs +++ b/Tranga/MangaConnectors/MangaHere.cs @@ -129,7 +129,15 @@ public class MangaHere : MangaConnector string volumeNumber = rexMatch.Groups[1].Value == "TBD" ? "0" : rexMatch.Groups[1].Value; string chapterNumber = rexMatch.Groups[2].Value; string fullUrl = $"https://www.mangahere.cc{url}"; - chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); + + try + { + chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } //Return Chapters ordered by Chapter-Number Log($"Got {chapters.Count} chapters. {manga}"); diff --git a/Tranga/MangaConnectors/MangaKatana.cs b/Tranga/MangaConnectors/MangaKatana.cs index 5142c3b..8cd0c65 100644 --- a/Tranga/MangaConnectors/MangaKatana.cs +++ b/Tranga/MangaConnectors/MangaKatana.cs @@ -186,7 +186,14 @@ public class MangaKatana : MangaConnector string? volumeNumber = volumeRex.IsMatch(url) ? volumeRex.Match(url).Groups[1].Value : null; string chapterNumber = chapterNumRex.Match(url).Groups[1].Value; string chapterName = chapterNameRex.Match(fullString).Groups[1].Value; - ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + try + { + ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } return ret; diff --git a/Tranga/MangaConnectors/MangaLife.cs b/Tranga/MangaConnectors/MangaLife.cs deleted file mode 100644 index a01a09d..0000000 --- a/Tranga/MangaConnectors/MangaLife.cs +++ /dev/null @@ -1,199 +0,0 @@ -using System.Net; -using System.Text.RegularExpressions; -using HtmlAgilityPack; -using Tranga.Jobs; - -namespace Tranga.MangaConnectors; - -public class MangaLife : MangaConnector -{ - public MangaLife(GlobalBase clone) : base(clone, "Manga4Life", ["en"]) - { - this.downloadClient = new ChromiumDownloadClient(clone); - } - - public override Manga[] GetManga(string publicationTitle = "") - { - Log($"Searching Publications. Term=\"{publicationTitle}\""); - string sanitizedTitle = WebUtility.UrlEncode(publicationTitle); - string requestUrl = $"https://manga4life.com/search/?name={sanitizedTitle}"; - RequestResult requestResult = - downloadClient.MakeRequest(requestUrl, RequestType.Default); - if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) - return Array.Empty(); - - if (requestResult.htmlDocument is null) - return Array.Empty(); - Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); - Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); - return publications; - } - - public override Manga? GetMangaFromId(string publicationId) - { - return GetMangaFromUrl($"https://manga4life.com/manga/{publicationId}"); - } - - public override Manga? GetMangaFromUrl(string url) - { - Regex publicationIdRex = new(@"https:\/\/(www\.)?manga4life.com\/manga\/(.*)(\/.*)*"); - string publicationId = publicationIdRex.Match(url).Groups[2].Value; - - RequestResult requestResult = this.downloadClient.MakeRequest(url, RequestType.MangaInfo); - if(requestResult.htmlDocument is not null) - return ParseSinglePublicationFromHtml(requestResult.htmlDocument, publicationId, url); - return null; - } - - private Manga[] ParsePublicationsFromHtml(HtmlDocument document) - { - HtmlNode resultsNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']/div[last()]/div[1]/div"); - if (resultsNode.Descendants("div").Count() == 1 && resultsNode.Descendants("div").First().HasClass("NoResults")) - { - Log("No results."); - return Array.Empty(); - } - Log($"{resultsNode.SelectNodes("div").Count} items."); - - HashSet ret = new(); - - foreach (HtmlNode resultNode in resultsNode.SelectNodes("div")) - { - string url = resultNode.Descendants().First(d => d.HasClass("SeriesName")).GetAttributeValue("href", ""); - Manga? manga = GetMangaFromUrl($"https://manga4life.com{url}"); - if (manga is not null) - ret.Add((Manga)manga); - } - - return ret.ToArray(); - } - - - private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) - { - string originalLanguage = "", status = ""; - Dictionary altTitles = new(), links = new(); - HashSet tags = new(); - Manga.ReleaseStatusByte releaseStatus = Manga.ReleaseStatusByte.Unreleased; - - HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//img"); - string posterUrl = posterNode.GetAttributeValue("src", ""); - string coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover); - - HtmlNode titleNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//h1"); - string sortName = titleNode.InnerText; - - HtmlNode[] authorsNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Author(s):']/..").Descendants("a") - .ToArray(); - List authors = new(); - foreach (HtmlNode authorNode in authorsNodes) - authors.Add(authorNode.InnerText); - - HtmlNode[] genreNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Genre(s):']/..").Descendants("a") - .ToArray(); - foreach (HtmlNode genreNode in genreNodes) - tags.Add(genreNode.InnerText); - - HtmlNode yearNode = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Released:']/..").Descendants("a") - .First(); - int year = Convert.ToInt32(yearNode.InnerText); - - HtmlNode[] statusNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Status:']/..").Descendants("a") - .ToArray(); - foreach (HtmlNode statusNode in statusNodes) - if (statusNode.InnerText.Contains("publish", StringComparison.CurrentCultureIgnoreCase)) - status = statusNode.InnerText.Split(' ')[0]; - switch (status.ToLower()) - { - case "cancelled": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; - case "hiatus": releaseStatus = Manga.ReleaseStatusByte.OnHiatus; break; - case "discontinued": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; - case "complete": releaseStatus = Manga.ReleaseStatusByte.Completed; break; - case "ongoing": releaseStatus = Manga.ReleaseStatusByte.Continuing; break; - } - - HtmlNode descriptionNode = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Description:']/..") - .Descendants("div").First(); - string description = descriptionNode.InnerText; - - Manga manga = new(sortName, authors.ToList(), description, altTitles, tags.ToArray(), posterUrl, - coverFileNameInCache, links, year, originalLanguage, publicationId, releaseStatus, websiteUrl: websiteUrl); - AddMangaToCache(manga); - return manga; - } - - public override Chapter[] GetChapters(Manga manga, string language="en") - { - Log($"Getting chapters {manga}"); - RequestResult result = downloadClient.MakeRequest($"https://manga4life.com/manga/{manga.publicationId}", RequestType.Default, clickButton:"[class*='ShowAllChapters']"); - if ((int)result.statusCode < 200 || (int)result.statusCode >= 300 || result.htmlDocument is null) - { - return Array.Empty(); - } - - HtmlNodeCollection chapterNodes = result.htmlDocument.DocumentNode.SelectNodes( - "//a[contains(concat(' ',normalize-space(@class),' '),' ChapterLink ')]"); - string[] urls = chapterNodes.Select(node => node.GetAttributeValue("href", "")).ToArray(); - Regex urlRex = new (@"-chapter-([0-9\\.]+)(-index-([0-9\\.]+))?"); - - List chapters = new(); - foreach (string url in urls) - { - Match rexMatch = urlRex.Match(url); - - string volumeNumber = "1"; - if (rexMatch.Groups[3].Value.Length > 0) - volumeNumber = rexMatch.Groups[3].Value; - string chapterNumber = rexMatch.Groups[1].Value; - string fullUrl = $"https://manga4life.com{url}"; - fullUrl = fullUrl.Replace(Regex.Match(url,"(-page-[0-9])").Value,""); - chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); - } - //Return Chapters ordered by Chapter-Number - Log($"Got {chapters.Count} chapters. {manga}"); - return chapters.Order().ToArray(); - } - - public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) - { - if (progressToken?.cancellationRequested ?? false) - { - progressToken.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - Manga chapterParentManga = chapter.parentManga; - if (progressToken?.cancellationRequested ?? false) - { - progressToken.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); - - RequestResult requestResult = this.downloadClient.MakeRequest(chapter.url, RequestType.Default); - if (requestResult.htmlDocument is null) - { - progressToken?.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - HtmlDocument document = requestResult.htmlDocument; - - HtmlNode gallery = document.DocumentNode.Descendants("div").First(div => div.HasClass("ImageGallery")); - HtmlNode[] images = gallery.Descendants("img").Where(img => img.HasClass("img-fluid")).ToArray(); - List urls = new(); - foreach(HtmlNode galleryImage in images) - urls.Add(galleryImage.GetAttributeValue("src", "")); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - - return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); - } -} \ No newline at end of file diff --git a/Tranga/MangaConnectors/Manganato.cs b/Tranga/MangaConnectors/Manganato.cs index c3b337b..7d79414 100644 --- a/Tranga/MangaConnectors/Manganato.cs +++ b/Tranga/MangaConnectors/Manganato.cs @@ -181,7 +181,14 @@ public class Manganato : MangaConnector string? volumeNumber = volRex.IsMatch(fullString) ? volRex.Match(fullString).Groups[1].Value : null; string chapterNumber = chapterRex.Match(url).Groups[1].Value; string chapterName = nameRex.Match(fullString).Groups[3].Value; - ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + try + { + ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } ret.Reverse(); return ret; diff --git a/Tranga/MangaConnectors/Mangasee.cs b/Tranga/MangaConnectors/Mangasee.cs deleted file mode 100644 index 78b5271..0000000 --- a/Tranga/MangaConnectors/Mangasee.cs +++ /dev/null @@ -1,227 +0,0 @@ -using System.Data; -using System.Net; -using System.Text.RegularExpressions; -using System.Xml.Linq; -using HtmlAgilityPack; -using Newtonsoft.Json; -using Soenneker.Utils.String.NeedlemanWunsch; -using Tranga.Jobs; - -namespace Tranga.MangaConnectors; - -public class Mangasee : MangaConnector -{ - public Mangasee(GlobalBase clone) : base(clone, "Mangasee", ["en"]) - { - this.downloadClient = new ChromiumDownloadClient(clone); - } - - private struct SearchResult - { - public string i { get; set; } - public string s { get; set; } - public string[] a { get; set; } - } - - public override Manga[] GetManga(string publicationTitle = "") - { - Log($"Searching Publications. Term=\"{publicationTitle}\""); - string requestUrl = "https://mangasee123.com/_search.php"; - RequestResult requestResult = - downloadClient.MakeRequest(requestUrl, RequestType.Default); - if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) - { - Log($"Failed to retrieve search: {requestResult.statusCode}"); - return Array.Empty(); - } - - try - { - SearchResult[] searchResults = JsonConvert.DeserializeObject(requestResult.htmlDocument!.DocumentNode.InnerText) ?? - throw new NoNullAllowedException(); - SearchResult[] filteredResults = FilteredResults(publicationTitle, searchResults); - Log($"Total available manga: {searchResults.Length} Filtered down to: {filteredResults.Length}"); - - - string[] urls = filteredResults.Select(result => $"https://mangasee123.com/manga/{result.i}").ToArray(); - List searchResultManga = new(); - foreach (string url in urls) - { - Manga? newManga = GetMangaFromUrl(url); - if(newManga is { } manga) - searchResultManga.Add(manga); - } - Log($"Retrieved {searchResultManga.Count} publications. Term=\"{publicationTitle}\""); - return searchResultManga.ToArray(); - } - catch (NoNullAllowedException) - { - Log("Failed to retrieve search"); - return Array.Empty(); - } - } - - private readonly string[] _filterWords = {"a", "the", "of", "as", "to", "no", "for", "on", "with", "be", "and", "in", "wa", "at", "be", "ni"}; - private string ToFilteredString(string input) => string.Join(' ', input.ToLower().Split(' ').Where(word => _filterWords.Contains(word) == false)); - private SearchResult[] FilteredResults(string publicationTitle, SearchResult[] unfilteredSearchResults) - { - Dictionary similarity = new(); - foreach (SearchResult sr in unfilteredSearchResults) - { - List scores = new(); - string filteredPublicationString = ToFilteredString(publicationTitle); - string filteredSString = ToFilteredString(sr.s); - scores.Add(NeedlemanWunschStringUtil.CalculateSimilarity(filteredSString, filteredPublicationString)); - foreach (string srA in sr.a) - { - string filteredAString = ToFilteredString(srA); - scores.Add(NeedlemanWunschStringUtil.CalculateSimilarity(filteredAString, filteredPublicationString)); - } - similarity.Add(sr, scores.Sum() / scores.Count); - } - - List ret = similarity.OrderBy(s => s.Value).Take(10).Select(s => s.Key).ToList(); - return ret.ToArray(); - } - - public override Manga? GetMangaFromId(string publicationId) - { - return GetMangaFromUrl($"https://mangasee123.com/manga/{publicationId}"); - } - - public override Manga? GetMangaFromUrl(string url) - { - Regex publicationIdRex = new(@"https:\/\/mangasee123.com\/manga\/(.*)(\/.*)*"); - string publicationId = publicationIdRex.Match(url).Groups[1].Value; - - RequestResult requestResult = this.downloadClient.MakeRequest(url, RequestType.MangaInfo); - if((int)requestResult.statusCode < 300 && (int)requestResult.statusCode >= 200 && requestResult.htmlDocument is not null) - return ParseSinglePublicationFromHtml(requestResult.htmlDocument, publicationId, url); - return null; - } - - private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) - { - string originalLanguage = "", status = ""; - Dictionary altTitles = new(), links = new(); - HashSet tags = new(); - Manga.ReleaseStatusByte releaseStatus = Manga.ReleaseStatusByte.Unreleased; - - HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//img"); - string posterUrl = posterNode.GetAttributeValue("src", ""); - string coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover); - - HtmlNode titleNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//h1"); - string sortName = titleNode.InnerText; - - HtmlNode[] authorsNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Author(s):']/..").Descendants("a") - .ToArray(); - List authors = new(); - foreach (HtmlNode authorNode in authorsNodes) - authors.Add(authorNode.InnerText); - - HtmlNode[] genreNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Genre(s):']/..").Descendants("a") - .ToArray(); - foreach (HtmlNode genreNode in genreNodes) - tags.Add(genreNode.InnerText); - - HtmlNode yearNode = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Released:']/..").Descendants("a") - .First(); - int year = Convert.ToInt32(yearNode.InnerText); - - HtmlNode[] statusNodes = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Status:']/..").Descendants("a") - .ToArray(); - foreach (HtmlNode statusNode in statusNodes) - if (statusNode.InnerText.Contains("publish", StringComparison.CurrentCultureIgnoreCase)) - status = statusNode.InnerText.Split(' ')[0]; - switch (status.ToLower()) - { - case "cancelled": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; - case "hiatus": releaseStatus = Manga.ReleaseStatusByte.OnHiatus; break; - case "discontinued": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; - case "complete": releaseStatus = Manga.ReleaseStatusByte.Completed; break; - case "ongoing": releaseStatus = Manga.ReleaseStatusByte.Continuing; break; - } - - HtmlNode descriptionNode = document.DocumentNode - .SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Description:']/..") - .Descendants("div").First(); - string description = descriptionNode.InnerText; - - Manga manga = new(sortName, authors.ToList(), description, altTitles, tags.ToArray(), posterUrl, - coverFileNameInCache, links, - year, originalLanguage, publicationId, releaseStatus, websiteUrl: websiteUrl); - AddMangaToCache(manga); - return manga; - } - - public override Chapter[] GetChapters(Manga manga, string language="en") - { - Log($"Getting chapters {manga}"); - try - { - XDocument doc = XDocument.Load($"https://mangasee123.com/rss/{manga.publicationId}.xml"); - XElement[] chapterItems = doc.Descendants("item").ToArray(); - List chapters = new(); - Regex chVolRex = new(@".*chapter-([0-9\.]+)(?:-index-([0-9\.]+))?.*"); - foreach (XElement chapter in chapterItems) - { - string url = chapter.Descendants("link").First().Value; - Match m = chVolRex.Match(url); - string? volumeNumber = m.Groups[2].Success ? m.Groups[2].Value : "1"; - string chapterNumber = m.Groups[1].Value; - - string chapterUrl = Regex.Replace(url, @"-page-[0-9]+(\.html)", ".html"); - chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, chapterUrl)); - } - - //Return Chapters ordered by Chapter-Number - Log($"Got {chapters.Count} chapters. {manga}"); - return chapters.Order().ToArray(); - } - catch (HttpRequestException e) - { - Log($"Failed to load https://mangasee123.com/rss/{manga.publicationId}.xml \n\r{e}"); - return Array.Empty(); - } - } - - public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) - { - if (progressToken?.cancellationRequested ?? false) - { - progressToken.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - Manga chapterParentManga = chapter.parentManga; - if (progressToken?.cancellationRequested ?? false) - { - progressToken.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); - - RequestResult requestResult = this.downloadClient.MakeRequest(chapter.url, RequestType.Default); - if (requestResult.htmlDocument is null) - { - progressToken?.Cancel(); - return HttpStatusCode.RequestTimeout; - } - - HtmlDocument document = requestResult.htmlDocument; - - HtmlNode gallery = document.DocumentNode.Descendants("div").First(div => div.HasClass("ImageGallery")); - HtmlNode[] images = gallery.Descendants("img").Where(img => img.HasClass("img-fluid")).ToArray(); - List urls = new(); - foreach(HtmlNode galleryImage in images) - urls.Add(galleryImage.GetAttributeValue("src", "")); - - return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); - } -} \ No newline at end of file diff --git a/Tranga/MangaConnectors/Mangaworld.cs b/Tranga/MangaConnectors/Mangaworld.cs index 71f5581..f54876c 100644 --- a/Tranga/MangaConnectors/Mangaworld.cs +++ b/Tranga/MangaConnectors/Mangaworld.cs @@ -9,7 +9,7 @@ public class Mangaworld: MangaConnector { public Mangaworld(GlobalBase clone) : base(clone, "Mangaworld", ["it"]) { - this.downloadClient = new HttpDownloadClient(clone); + this.downloadClient = new ChromiumDownloadClient(clone); } public override Manga[] GetManga(string publicationTitle = "") @@ -163,7 +163,14 @@ public class Mangaworld: MangaConnector string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value; string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value; - ret.Add(new Chapter(manga, null, volume, number, url, id)); + try + { + ret.Add(new Chapter(manga, null, volume, number, url, id)); + } + catch (Exception e) + { + Log($"Failed to load chapter {number}: {e.Message}"); + } } } } @@ -174,7 +181,14 @@ public class Mangaworld: MangaConnector string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value; string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value; - ret.Add(new Chapter(manga, null, null, number, url, id)); + try + { + ret.Add(new Chapter(manga, null, null, number, url, id)); + } + catch (Exception e) + { + Log($"Failed to load chapter {number}: {e.Message}"); + } } } diff --git a/Tranga/MangaConnectors/ManhuaPlus.cs b/Tranga/MangaConnectors/ManhuaPlus.cs index d66fede..28bbad2 100644 --- a/Tranga/MangaConnectors/ManhuaPlus.cs +++ b/Tranga/MangaConnectors/ManhuaPlus.cs @@ -155,7 +155,14 @@ public class ManhuaPlus : MangaConnector string volumeNumber = "1"; string chapterNumber = rexMatch.Groups[1].Value; string fullUrl = url; - chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); + try + { + chapters.Add(new Chapter(manga, "", volumeNumber, chapterNumber, fullUrl)); + } + catch (Exception e) + { + Log($"Failed to load chapter {chapterNumber}: {e.Message}"); + } } //Return Chapters ordered by Chapter-Number Log($"Got {chapters.Count} chapters. {manga}"); diff --git a/Tranga/MangaConnectors/Webtoons.cs b/Tranga/MangaConnectors/Webtoons.cs new file mode 100644 index 0000000..9e78290 --- /dev/null +++ b/Tranga/MangaConnectors/Webtoons.cs @@ -0,0 +1,273 @@ +using System.Net; +using System.Text.RegularExpressions; +using HtmlAgilityPack; +using Tranga.Jobs; + +namespace Tranga.MangaConnectors; + +public class Webtoons : MangaConnector +{ + + public Webtoons(GlobalBase clone) : base(clone, "Webtoons", ["en"]) + { + this.downloadClient = new HttpDownloadClient(clone); + } + + // Done + public override Manga[] GetManga(string publicationTitle = "") + { + string sanitizedTitle = string.Join(' ', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower(); + Log($"Searching Publications. Term=\"{publicationTitle}\""); + string requestUrl = $"https://www.webtoons.com/en/search?keyword={sanitizedTitle}&searchType=WEBTOON"; + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty(); + } + + Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); + return publications; + } + + // Done + public override Manga? GetMangaFromId(string publicationId) + { + PublicationManager pb = new PublicationManager(publicationId); + return GetMangaFromUrl($"https://www.webtoons.com/en/{pb.Category}/{pb.Title}/list?title_no={pb.Id}"); + } + + // Done + public override Manga? GetMangaFromUrl(string url) + { + RequestResult requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) { + return null; + } + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return null; + } + Regex regex = new Regex(@".*webtoons\.com/en/(?[^/]+)/(?[^/]+)/list\?title_no=(?<id>\d+).*"); + Match match = regex.Match(url); + + if(match.Success) { + PublicationManager pm = new PublicationManager(match.Groups["title"].Value, match.Groups["category"].Value, match.Groups["id"].Value); + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, pm.getPublicationId(), url); + } + Log($"Failed match Regex ID"); + return null; + } + + // Done + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + HtmlNode mangaList = document.DocumentNode.SelectSingleNode("//ul[contains(@class, 'card_lst')]"); + if (!mangaList.ChildNodes.Any(node => node.Name == "li")) { + Log($"Failed to parse publication"); + return Array.Empty<Manga>(); + } + + List<string> urls = document.DocumentNode + .SelectNodes("//ul[contains(@class, 'card_lst')]/li/a") + .Select(node => node.GetAttributeValue("href", "https://www.webtoons.com")) + .ToList(); + + HashSet<Manga> ret = new(); + foreach (string url in urls) + { + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + private string capitalizeString(string str = "") { + if(str.Length == 0) return ""; + if(str.Length == 1) return str.ToUpper(); + return char.ToUpper(str[0]) + str.Substring(1).ToLower(); + } + + // Done + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + HtmlNode infoNode1 = document.DocumentNode.SelectSingleNode("//*[@id='content']/div[2]/div[1]/div[1]"); + HtmlNode infoNode2 = document.DocumentNode.SelectSingleNode("//*[@id='content']/div[2]/div[2]/div[2]"); + + string sortName = infoNode1.SelectSingleNode(".//h1[contains(@class, 'subj')]").InnerText; + string description = infoNode2.SelectSingleNode(".//p[contains(@class, 'summary')]") + .InnerText.Trim(); + + HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[contains(@class, 'detail_body') and contains(@class, 'banner')]"); + + Regex regex = new Regex(@"url\((?<url>.*?)\)"); + Match match = regex.Match(posterNode.GetAttributeValue("style", "")); + + string posterUrl = match.Groups["url"].Value; + string coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover, websiteUrl); + + string genre = infoNode1.SelectSingleNode(".//h2[contains(@class, 'genre')]") + .InnerText.Trim(); + string[] tags = [ genre ]; + + List<HtmlNode> authorsNodes = infoNode1.SelectSingleNode(".//div[contains(@class, 'author_area')]").Descendants("a").ToList(); + List<string> authors = authorsNodes.Select(node => node.InnerText.Trim()).ToList(); + + string originalLanguage = ""; + + int year = DateTime.Now.Year; + + string status1 = infoNode2.SelectSingleNode(".//p").InnerText; + string status2 = infoNode2.SelectSingleNode(".//p/span").InnerText; + Manga.ReleaseStatusByte releaseStatus = Manga.ReleaseStatusByte.Unreleased; + if(status2.Length == 0 || status1.ToLower() == "completed") { + releaseStatus = Manga.ReleaseStatusByte.Completed; + } else if(status2.ToLower() == "up") { + releaseStatus = Manga.ReleaseStatusByte.Continuing; + } + + Manga manga = new(sortName, authors, description, new Dictionary<string, string>(), tags, posterUrl, coverFileNameInCache, new Dictionary<string, string>(), + year, originalLanguage, publicationId, releaseStatus, websiteUrl: websiteUrl); + AddMangaToCache(manga); + return manga; + } + + // Done + public override Chapter[] GetChapters(Manga manga, string language = "en") + { + PublicationManager pm = new PublicationManager(manga.publicationId); + string requestUrl = $"https://www.webtoons.com/en/{pm.Category}/{pm.Title}/list?title_no={pm.Id}"; + // Leaving this in for verification if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return Array.Empty<Chapter>(); + + // Get number of pages + int pages = requestResult.htmlDocument.DocumentNode + .SelectNodes("//div[contains(@class, 'paginate')]/a") + .ToList() + .Count; + List<Chapter> chapters = new List<Chapter>(); + + for(int page = 1; page <= pages; page++) { + string pageRequestUrl = $"{requestUrl}&page={page}"; + chapters.AddRange(ParseChaptersFromHtml(manga, pageRequestUrl)); + } + Log($"Got {chapters.Count} chapters. {manga}"); + return chapters.Order().ToArray(); + } + + // Done + private List<Chapter> ParseChaptersFromHtml(Manga manga, string mangaUrl) + { + RequestResult result = downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)result.statusCode < 200 || (int)result.statusCode >= 300 || result.htmlDocument is null) + { + Log("Failed to load site"); + return new List<Chapter>(); + } + + List<Chapter> ret = new(); + + foreach (HtmlNode chapterInfo in result.htmlDocument.DocumentNode.SelectNodes("//ul/li[contains(@class, '_episodeItem')]")) + { + HtmlNode infoNode = chapterInfo.SelectSingleNode(".//a"); + string url = infoNode.GetAttributeValue("href", ""); + + string id = chapterInfo.GetAttributeValue("id", ""); + if(id == "") continue; + string? volumeNumber = null; + string chapterNumber = chapterInfo.GetAttributeValue("data-episode-no", ""); + if(chapterNumber == "") continue; + string chapterName = infoNode.SelectSingleNode(".//span[contains(@class, 'subj')]/span").InnerText.Trim(); + ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url)); + } + + return ret; + } + + public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) + { + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + Manga chapterParentManga = chapter.parentManga; + Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); + string requestUrl = chapter.url; + // Leaving this in to check if the page exists + RequestResult requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + progressToken?.Cancel(); + return requestResult.statusCode; + } + + string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken, referrer: requestUrl); + } + + private string[] ParseImageUrlsFromHtml(string mangaUrl) + { + RequestResult requestResult = + downloadClient.MakeRequest(mangaUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + { + return Array.Empty<string>(); + } + if (requestResult.htmlDocument is null) + { + Log($"Failed to retrieve site"); + return Array.Empty<string>(); + } + + return requestResult.htmlDocument.DocumentNode + .SelectNodes("//*[@id='_imageList']/img") + .Select(node => + node.GetAttributeValue("data-url", "")) + .ToArray(); + } +} + +internal class PublicationManager { + public PublicationManager(string title = "", string category = "", string id = "") { + this.Title = title; + this.Category = category; + this.Id = id; + } + + public PublicationManager(string publicationId) { + string[] parts = publicationId.Split("|"); + if(parts.Length == 3) { + this.Title = parts[0]; + this.Category = parts[1]; + this.Id = parts[2]; + } else { + this.Title = ""; + this.Category = ""; + this.Id = ""; + } + } + + public string getPublicationId() { + return $"{this.Title}|{this.Category}|{this.Id}"; + } + + public string Title { get; set; } + public string Category { get; set; } + public string Id { get; set; } +} \ No newline at end of file diff --git a/Tranga/MangaConnectors/WeebCentral.cs b/Tranga/MangaConnectors/WeebCentral.cs new file mode 100644 index 0000000..c5d355f --- /dev/null +++ b/Tranga/MangaConnectors/WeebCentral.cs @@ -0,0 +1,209 @@ +using System.Net; +using System.Text.RegularExpressions; +using HtmlAgilityPack; +using Soenneker.Utils.String.NeedlemanWunsch; +using Tranga.Jobs; + +namespace Tranga.MangaConnectors; + +public class Weebcentral : MangaConnector +{ + private readonly string _baseUrl = "https://weebcentral.com"; + + private readonly string[] _filterWords = + { "a", "the", "of", "as", "to", "no", "for", "on", "with", "be", "and", "in", "wa", "at", "be", "ni" }; + + public Weebcentral(GlobalBase clone) : base(clone, "Weebcentral", ["en"]) + { + downloadClient = new ChromiumDownloadClient(clone); + } + + public override Manga[] GetManga(string publicationTitle = "") + { + Log($"Searching Publications. Term=\"{publicationTitle}\""); + const int limit = 32; //How many values we want returned at once + var offset = 0; //"Page" + var requestUrl = + $"{_baseUrl}/search/data?limit={limit}&offset={offset}&text={publicationTitle}&sort=Best+Match&order=Ascending&official=Any&display_mode=Minimal%20Display"; + var requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || + requestResult.htmlDocument == null) + { + Log($"Failed to retrieve search: {requestResult.statusCode}"); + return []; + } + + var publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\""); + + return publications; + } + + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + if (document.DocumentNode.SelectNodes("//article") == null) + return []; + + var urls = document.DocumentNode.SelectNodes("/html/body/article/a[@class='link link-hover']") + .Select(elem => elem.GetAttributeValue("href", "")).ToList(); + + HashSet<Manga> ret = new(); + foreach (var url in urls) + { + var manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + public override Manga? GetMangaFromUrl(string url) + { + Regex publicationIdRex = new(@"https:\/\/weebcentral\.com\/series\/(\w*)\/(.*)"); + var publicationId = publicationIdRex.Match(url).Groups[1].Value; + + var requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 300 && (int)requestResult.statusCode >= 200 && + requestResult.htmlDocument is not null) + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, publicationId, url); + return null; + } + + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + var posterNode = + document.DocumentNode.SelectSingleNode("//section[@class='flex items-center justify-center']/picture/img"); + var posterUrl = posterNode?.GetAttributeValue("src", "") ?? ""; + var coverFileNameInCache = SaveCoverImageToCache(posterUrl, publicationId, RequestType.MangaCover); + + var titleNode = document.DocumentNode.SelectSingleNode("//section/h1"); + var sortName = titleNode?.InnerText ?? "Undefined"; + + HtmlNode[] authorsNodes = + document.DocumentNode.SelectNodes("//ul/li[strong/text() = 'Author(s): ']/span")?.ToArray() ?? []; + var authors = authorsNodes.Select(n => n.InnerText).ToList(); + + HtmlNode[] genreNodes = + document.DocumentNode.SelectNodes("//ul/li[strong/text() = 'Tags(s): ']/span")?.ToArray() ?? []; + HashSet<string> tags = genreNodes.Select(n => n.InnerText).ToHashSet(); + + var statusNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Status: ']/a"); + var status = statusNode?.InnerText ?? ""; + Log("unable to parse status"); + var releaseStatus = Manga.ReleaseStatusByte.Unreleased; + switch (status.ToLower()) + { + case "cancelled": releaseStatus = Manga.ReleaseStatusByte.Cancelled; break; + case "hiatus": releaseStatus = Manga.ReleaseStatusByte.OnHiatus; break; + case "complete": releaseStatus = Manga.ReleaseStatusByte.Completed; break; + case "ongoing": releaseStatus = Manga.ReleaseStatusByte.Continuing; break; + } + + var yearNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Released: ']/span"); + var year = Convert.ToInt32(yearNode?.InnerText ?? "0"); + + var descriptionNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Description']/p"); + var description = descriptionNode?.InnerText ?? "Undefined"; + + HtmlNode[] altTitleNodes = document.DocumentNode + .SelectNodes("//ul/li[strong/text() = 'Associated Name(s)']/ul/li")?.ToArray() ?? []; + Dictionary<string, string> altTitles = new(), links = new(); + for (var i = 0; i < altTitleNodes.Length; i++) + altTitles.Add(i.ToString(), altTitleNodes[i].InnerText); + + var originalLanguage = ""; + + Manga manga = new(sortName, authors.ToList(), description, altTitles, tags.ToArray(), posterUrl, + coverFileNameInCache, links, + year, originalLanguage, publicationId, releaseStatus, websiteUrl); + AddMangaToCache(manga); + return manga; + } + + public override Manga? GetMangaFromId(string publicationId) + { + return GetMangaFromUrl($"https://weebcentral.com/series/{publicationId}"); + } + + public override Chapter[] GetChapters(Manga manga, string language = "en") + { + Log($"Getting chapters {manga}"); + var requestUrl = $"{_baseUrl}/series/{manga.publicationId}/full-chapter-list"; + var requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return []; + + //Return Chapters ordered by Chapter-Number + if (requestResult.htmlDocument is null) + return []; + var chapters = ParseChaptersFromHtml(manga, requestResult.htmlDocument); + Log($"Got {chapters.Count} chapters. {manga}"); + return chapters.Order().ToArray(); + } + + private List<Chapter> ParseChaptersFromHtml(Manga manga, HtmlDocument document) + { + var chaptersWrapper = document.DocumentNode.SelectSingleNode("/html/body"); + + Regex chapterRex = new(@"(\d+(?:\.\d+)*)"); + Regex idRex = new(@"https:\/\/weebcentral\.com\/chapters\/(\w*)"); + + var ret = chaptersWrapper.Descendants("a").Select(elem => + { + var url = elem.GetAttributeValue("href", "") ?? "Undefined"; + + if (!url.StartsWith("https://") && !url.StartsWith("http://")) + return new Chapter(manga, null, null, "-1", "undefined"); + + var idMatch = idRex.Match(url); + var id = idMatch.Success ? idMatch.Groups[1].Value : null; + + var chapterNode = elem.SelectSingleNode("span[@class='grow flex items-center gap-2']/span")?.InnerText ?? + "Undefined"; + + var chapterNumberMatch = chapterRex.Match(chapterNode); + var chapterNumber = chapterNumberMatch.Success ? chapterNumberMatch.Groups[1].Value : "-1"; + + return new Chapter(manga, null, null, chapterNumber, url, id); + }).Where(elem => elem.chapterNumber != -1 && elem.url != "undefined").ToList(); + + ret.Reverse(); + return ret; + } + + public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null) + { + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + var chapterParentManga = chapter.parentManga; + if (progressToken?.cancellationRequested ?? false) + { + progressToken.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + Log($"Retrieving chapter-info {chapter} {chapterParentManga}"); + + var requestResult = downloadClient.MakeRequest(chapter.url, RequestType.Default); + if (requestResult.htmlDocument is null) + { + progressToken?.Cancel(); + return HttpStatusCode.RequestTimeout; + } + + var document = requestResult.htmlDocument; + + var imageNodes = + document.DocumentNode.SelectNodes($"//section[@hx-get='{chapter.url}/images']/img")?.ToArray() ?? []; + var urls = imageNodes.Select(imgNode => imgNode.GetAttributeValue("src", "")).ToArray(); + + return DownloadChapterImages(urls, chapter, RequestType.MangaImage, progressToken: progressToken); + } +} \ No newline at end of file diff --git a/Tranga/Tranga.cs b/Tranga/Tranga.cs index 3209a5e..38f8370 100644 --- a/Tranga/Tranga.cs +++ b/Tranga/Tranga.cs @@ -18,15 +18,15 @@ public partial class Tranga : GlobalBase _connectors = new HashSet<MangaConnector>() { new Manganato(this), - new Mangasee(this), new MangaDex(this), new MangaKatana(this), new Mangaworld(this), new Bato(this), - new MangaLife(this), new ManhuaPlus(this), new MangaHere(this), new AsuraToon(this), + new Weebcentral(this), + new Webtoons(this), }; foreach(DirectoryInfo dir in new DirectoryInfo(Path.GetTempPath()).GetDirectories("trangatemp"))//Cleanup old temp folders dir.Delete(); diff --git a/Tranga/Tranga.csproj b/Tranga/Tranga.csproj index 1ca1c49..6447817 100644 --- a/Tranga/Tranga.csproj +++ b/Tranga/Tranga.csproj @@ -10,9 +10,9 @@ <ItemGroup> <PackageReference Include="GlaxArguments" Version="1.1.0" /> - <PackageReference Include="HtmlAgilityPack" Version="1.11.71" /> + <PackageReference Include="HtmlAgilityPack" Version="1.11.72" /> <PackageReference Include="Newtonsoft.Json" Version="13.0.3" /> - <PackageReference Include="PuppeteerSharp" Version="20.0.5" /> + <PackageReference Include="PuppeteerSharp" Version="20.1.0" /> <PackageReference Include="Soenneker.Utils.String.NeedlemanWunsch" Version="2.1.301" /> </ItemGroup> diff --git a/Tranga/TrangaSettings.cs b/Tranga/TrangaSettings.cs index ccfa997..bb8b54f 100644 --- a/Tranga/TrangaSettings.cs +++ b/Tranga/TrangaSettings.cs @@ -35,6 +35,8 @@ public static class TrangaSettings }; public static Dictionary<RequestType, int> requestLimits { get; set; } = DefaultRequestLimits; + public static int ChromiumStartupTimeoutMs { get; set; } = 30000; + public static int ChromiumPageTimeoutMs { get; set; } = 30000; public static void LoadFromWorkingDirectory(string directory) { @@ -167,6 +169,8 @@ public static class TrangaSettings jobj.Add("requestLimits", JToken.FromObject(requestLimits)); jobj.Add("bufferLibraryUpdates", JToken.FromObject(bufferLibraryUpdates)); jobj.Add("bufferNotifications", JToken.FromObject(bufferNotifications)); + jobj.Add("chromiumStartTimeout", JToken.FromObject(ChromiumStartupTimeoutMs)); + jobj.Add("chromiumPageTimeout", JToken.FromObject(ChromiumPageTimeoutMs)); return jobj; } @@ -191,5 +195,9 @@ public static class TrangaSettings bufferLibraryUpdates = blu.Value<bool>()!; if (jobj.TryGetValue("bufferNotifications", out JToken? bn)) bufferNotifications = bn.Value<bool>()!; + if (jobj.TryGetValue("chromiumStartTimeout", out JToken? cst)) + ChromiumStartupTimeoutMs = cst.Value<int>(); + if (jobj.TryGetValue("chromiumPageTimeout", out JToken? cpt)) + ChromiumPageTimeoutMs = cpt.Value<int>(); } } \ No newline at end of file