From c58adf64fa1879e42ed2d954dfdf52fd2e6ae093 Mon Sep 17 00:00:00 2001 From: Glax Date: Sun, 27 Oct 2024 02:41:28 +0200 Subject: [PATCH] #271 Create Marker-files for Chapters. If a Connector provides a unique ID for a chapter, Tranga will create a markerfile, containing the current name of the Chapter This should prevent duplicates, or missing chapters. --- Tranga/Chapter.cs | 65 +++++++++++++++--------- Tranga/MangaConnectors/Bato.cs | 10 ++-- Tranga/MangaConnectors/MangaConnector.cs | 22 ++++---- Tranga/MangaConnectors/MangaDex.cs | 7 +-- Tranga/MangaConnectors/MangaHere.cs | 5 +- Tranga/MangaConnectors/MangaKatana.cs | 7 +-- Tranga/MangaConnectors/MangaLife.cs | 2 +- Tranga/MangaConnectors/Manganato.cs | 5 +- Tranga/MangaConnectors/Mangasee.cs | 5 +- Tranga/MangaConnectors/Mangaworld.cs | 23 ++++----- Tranga/MangaConnectors/ManhuaPlus.cs | 5 +- 11 files changed, 77 insertions(+), 79 deletions(-) diff --git a/Tranga/Chapter.cs b/Tranga/Chapter.cs index 6ae48a2..3bd2a9f 100644 --- a/Tranga/Chapter.cs +++ b/Tranga/Chapter.cs @@ -1,5 +1,7 @@ -using System.Text.RegularExpressions; +using System.Runtime.InteropServices; +using System.Text.RegularExpressions; using System.Xml.Linq; +using static System.IO.UnixFileMode; namespace Tranga; @@ -17,23 +19,21 @@ public readonly struct Chapter : IComparable public string url { get; } // ReSharper disable once MemberCanBePrivate.Global public string fileName { get; } + public string? id { get; } private static readonly Regex LegalCharacters = new (@"([A-z]*[0-9]* *\.*-*,*\]*\[*'*\'*\)*\(*~*!*)*"); private static readonly Regex IllegalStrings = new(@"(Vol(ume)?|Ch(apter)?)\.?", RegexOptions.IgnoreCase); private static readonly Regex Digits = new(@"[0-9\.]*"); - public Chapter(Manga parentManga, string? name, string? volumeNumber, string chapterNumber, string url) + public Chapter(Manga parentManga, string? name, string? volumeNumber, string chapterNumber, string url, string? id = null) { this.parentManga = parentManga; this.name = name; this.volumeNumber = volumeNumber is not null ? string.Concat(Digits.Matches(volumeNumber).Select(x => x.Value)) : "0"; this.chapterNumber = string.Concat(Digits.Matches(chapterNumber).Select(x => x.Value)); this.url = url; + this.id = id; - string chapterVolNumStr; - if (volumeNumber is not null && volumeNumber.Length > 0) - chapterVolNumStr = $"Vol.{volumeNumber} Ch.{chapterNumber}"; - else - chapterVolNumStr = $"Ch.{chapterNumber}"; + string chapterVolNumStr = $"Vol.{this.volumeNumber} Ch.{chapterNumber}"; if (name is not null && name.Length > 0) { @@ -87,24 +87,43 @@ public readonly struct Chapter : IComparable string mangaDirectory = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName); if (!Directory.Exists(mangaDirectory)) return false; - FileInfo[] archives = new DirectoryInfo(mangaDirectory).GetFiles("*.cbz"); - Regex volChRex = new(@"(?:Vol(?:ume)?\.([0-9]+)\D*)?Ch(?:apter)?\.([0-9]+(?:\.[0-9]+)*)"); - - Chapter t = this; - string correctPath = GetArchiveFilePath(); - FileInfo? archive = archives.FirstOrDefault(archive => + FileInfo? mangaArchive = null; + string markerPath = Path.Join(mangaDirectory, $".{id}"); + if (this.id is not null + && File.Exists(markerPath) + && File.Exists(File.ReadAllText(markerPath))) { - Match m = volChRex.Match(archive.Name); - /*Uncommenting this section will only allow *Version without Volume number* -> *Version with Volume number* but not the other way - if (m.Groups[1].Success) - return m.Groups[1].Value == t.volumeNumber && m.Groups[2].Value == t.chapterNumber; - else*/ - return m.Groups[2].Value == t.chapterNumber; - }); - if(archive is not null && archive.FullName != correctPath) - archive.MoveTo(correctPath, true); - return (archive is not null); + mangaArchive = new FileInfo(File.ReadAllText(markerPath)); + } + else + { + FileInfo[] archives = new DirectoryInfo(mangaDirectory).GetFiles("*.cbz"); + Regex volChRex = new(@"(?:Vol(?:ume)?\.([0-9]+)\D*)?Ch(?:apter)?\.([0-9]+(?:\.[0-9]+)*)"); + + Chapter t = this; + mangaArchive = archives.FirstOrDefault(archive => + { + Match m = volChRex.Match(archive.Name); + if (m.Groups[1].Success) + return m.Groups[1].Value == t.volumeNumber && m.Groups[2].Value == t.chapterNumber; + else + return m.Groups[2].Value == t.chapterNumber; + }); + } + string correctPath = GetArchiveFilePath(); + if(mangaArchive is not null && mangaArchive.FullName != correctPath) + mangaArchive.MoveTo(correctPath, true); + return (mangaArchive is not null); } + + public void CreateChapterMarker() + { + string path = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName, $".{id}"); + File.WriteAllText(path, GetArchiveFilePath()); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + File.SetUnixFileMode(path, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute | OtherRead | OtherExecute); + } + /// /// Creates full file path of chapter-archive /// diff --git a/Tranga/MangaConnectors/Bato.cs b/Tranga/MangaConnectors/Bato.cs index 798967a..c666a36 100644 --- a/Tranga/MangaConnectors/Bato.cs +++ b/Tranga/MangaConnectors/Bato.cs @@ -150,7 +150,7 @@ public class Bato : MangaConnector HtmlNode chapterList = result.htmlDocument.DocumentNode.SelectSingleNode("/html/body/div/main/div[3]/astro-island/div/div[2]/div/div/astro-slot"); - Regex numberRex = new(@"\/title\/.+\/[0-9]+(-vol_([0-9]+))?-ch_([0-9\.]+)"); + Regex numberRex = new(@"\/title\/.+\/([0-9])+(?:-vol_([0-9]+))?-ch_([0-9\.]+)"); foreach (HtmlNode chapterInfo in chapterList.SelectNodes("div")) { @@ -158,6 +158,7 @@ public class Bato : MangaConnector string chapterUrl = infoNode.GetAttributeValue("href", ""); Match match = numberRex.Match(chapterUrl); + string id = match.Groups[1].Value; string? volumeNumber = match.Groups[2].Success ? match.Groups[2].Value : null; string chapterNumber = match.Groups[3].Value; string chapterName = chapterNumber; @@ -189,11 +190,8 @@ public class Bato : MangaConnector } string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - - return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://mangakatana.com/", progressToken:progressToken); + + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken); } private string[] ParseImageUrlsFromHtml(string mangaUrl) diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index 0a4adc0..f0c8fe0 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -214,8 +214,10 @@ public abstract class MangaConnector : GlobalBase return requestResult.statusCode; } - protected HttpStatusCode DownloadChapterImages(string[] imageUrls, string saveArchiveFilePath, RequestType requestType, string? comicInfoPath = null, string? referrer = null, ProgressToken? progressToken = null) + protected HttpStatusCode DownloadChapterImages(string[] imageUrls, Chapter chapter, RequestType requestType, string? referrer = null, ProgressToken? progressToken = null) { + string saveArchiveFilePath = chapter.GetArchiveFilePath(); + if (progressToken?.cancellationRequested ?? false) return HttpStatusCode.RequestTimeout; Log($"Downloading Images for {saveArchiveFilePath}"); @@ -239,7 +241,7 @@ public abstract class MangaConnector : GlobalBase //Create a temporary folder to store images string tempFolder = Directory.CreateTempSubdirectory("trangatemp").FullName; - int chapter = 0; + int chapterNum = 0; //Download all Images to temporary Folder if (imageUrls.Length == 0) { @@ -253,9 +255,9 @@ public abstract class MangaConnector : GlobalBase foreach (string imageUrl in imageUrls) { string extension = imageUrl.Split('.')[^1].Split('?')[0]; - Log($"Downloading image {chapter + 1:000}/{imageUrls.Length:000}"); //TODO - HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapter++}.{extension}"), requestType, referrer); - Log($"{saveArchiveFilePath} {chapter + 1:000}/{imageUrls.Length:000} {status}"); + Log($"Downloading image {chapterNum + 1:000}/{imageUrls.Length:000}"); //TODO + HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapterNum++}.{extension}"), requestType, referrer); + Log($"{saveArchiveFilePath} {chapterNum + 1:000}/{imageUrls.Length:000} {status}"); if ((int)status < 200 || (int)status >= 300) { progressToken?.Complete(); @@ -269,16 +271,14 @@ public abstract class MangaConnector : GlobalBase progressToken?.Increment(); } - if(comicInfoPath is not null){ - File.Copy(comicInfoPath, Path.Join(tempFolder, "ComicInfo.xml")); - File.Delete(comicInfoPath); //Delete tmp-file - } + File.WriteAllText(Path.Join(tempFolder, "ComicInfo.xml"), chapter.GetComicInfoXmlString()); Log($"Creating archive {saveArchiveFilePath}"); //ZIP-it and ship-it ZipFile.CreateFromDirectory(tempFolder, saveArchiveFilePath); - if(RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - File.SetUnixFileMode(saveArchiveFilePath, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute); + chapter.CreateChapterMarker(); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + File.SetUnixFileMode(saveArchiveFilePath, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute | OtherRead | OtherExecute); Directory.Delete(tempFolder, true); //Cleanup progressToken?.Complete(); diff --git a/Tranga/MangaConnectors/MangaDex.cs b/Tranga/MangaConnectors/MangaDex.cs index 07480f5..1dbb1d0 100644 --- a/Tranga/MangaConnectors/MangaDex.cs +++ b/Tranga/MangaConnectors/MangaDex.cs @@ -247,7 +247,7 @@ public class MangaDex : MangaConnector } if(chapterNum is not "null" && !chapters.Any(chp => chp.volumeNumber.Equals(volume) && chp.chapterNumber.Equals(chapterNum))) - chapters.Add(new Chapter(manga, title, volume, chapterNum, chapterId)); + chapters.Add(new Chapter(manga, title, volume, chapterNum, chapterId, chapterId)); } } @@ -288,11 +288,8 @@ public class MangaDex : MangaConnector HashSet imageUrls = new(); foreach (JsonNode? image in imageFileNames) imageUrls.Add($"{baseUrl}/data/{hash}/{image!.GetValue()}"); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); //Download Chapter-Images - return DownloadChapterImages(imageUrls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + return DownloadChapterImages(imageUrls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); } } \ No newline at end of file diff --git a/Tranga/MangaConnectors/MangaHere.cs b/Tranga/MangaConnectors/MangaHere.cs index 000b07a..c14b697 100644 --- a/Tranga/MangaConnectors/MangaHere.cs +++ b/Tranga/MangaConnectors/MangaHere.cs @@ -181,12 +181,9 @@ public class MangaHere : MangaConnector } } while (downloaded++ <= images); - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - if (progressToken is not null) progressToken.increments = images;//we blip to normal length, in downloadchapterimages it is increasaed by the amount of urls again - return DownloadChapterImages(imageUrls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + return DownloadChapterImages(imageUrls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); } private string[] ParseImageUrlsFromHtml(HtmlDocument document) diff --git a/Tranga/MangaConnectors/MangaKatana.cs b/Tranga/MangaConnectors/MangaKatana.cs index bc77637..5142c3b 100644 --- a/Tranga/MangaConnectors/MangaKatana.cs +++ b/Tranga/MangaConnectors/MangaKatana.cs @@ -213,11 +213,8 @@ public class MangaKatana : MangaConnector } string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - - return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://mangakatana.com/", progressToken:progressToken); + + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken); } private string[] ParseImageUrlsFromHtml(string mangaUrl) diff --git a/Tranga/MangaConnectors/MangaLife.cs b/Tranga/MangaConnectors/MangaLife.cs index 9c65a4b..a01a09d 100644 --- a/Tranga/MangaConnectors/MangaLife.cs +++ b/Tranga/MangaConnectors/MangaLife.cs @@ -194,6 +194,6 @@ public class MangaLife : MangaConnector string comicInfoPath = Path.GetTempFileName(); File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); } } \ No newline at end of file diff --git a/Tranga/MangaConnectors/Manganato.cs b/Tranga/MangaConnectors/Manganato.cs index 3422b34..795fc8c 100644 --- a/Tranga/MangaConnectors/Manganato.cs +++ b/Tranga/MangaConnectors/Manganato.cs @@ -214,10 +214,7 @@ public class Manganato : MangaConnector string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument); - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - - return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://chapmanganato.com/", progressToken:progressToken); + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, "https://chapmanganato.com/", progressToken:progressToken); } private string[] ParseImageUrlsFromHtml(HtmlDocument document) diff --git a/Tranga/MangaConnectors/Mangasee.cs b/Tranga/MangaConnectors/Mangasee.cs index a4de0cc..78b5271 100644 --- a/Tranga/MangaConnectors/Mangasee.cs +++ b/Tranga/MangaConnectors/Mangasee.cs @@ -221,10 +221,7 @@ public class Mangasee : MangaConnector List urls = new(); foreach(HtmlNode galleryImage in images) urls.Add(galleryImage.GetAttributeValue("src", "")); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); } } \ No newline at end of file diff --git a/Tranga/MangaConnectors/Mangaworld.cs b/Tranga/MangaConnectors/Mangaworld.cs index d21ae77..4096ccd 100644 --- a/Tranga/MangaConnectors/Mangaworld.cs +++ b/Tranga/MangaConnectors/Mangaworld.cs @@ -149,19 +149,21 @@ public class Mangaworld: MangaConnector document.DocumentNode.SelectSingleNode( "//div[contains(concat(' ',normalize-space(@class),' '),'chapters-wrapper')]"); + Regex volumeRex = new(@"[Vv]olume ([0-9]+).*"); + Regex chapterRex = new(@"[Cc]apitolo ([0-9]+).*"); + Regex idRex = new(@".*\/read\/([a-z0-9]+)(?:[?\/].*)?"); if (chaptersWrapper.Descendants("div").Any(descendant => descendant.HasClass("volume-element"))) { foreach (HtmlNode volNode in document.DocumentNode.SelectNodes("//div[contains(concat(' ',normalize-space(@class),' '),'volume-element')]")) { - string volume = Regex.Match(volNode.SelectNodes("div").First(node => node.HasClass("volume")).SelectSingleNode("p").InnerText, - @"[Vv]olume ([0-9]+).*").Groups[1].Value; + string volume = volumeRex.Match(volNode.SelectNodes("div").First(node => node.HasClass("volume")).SelectSingleNode("p").InnerText).Groups[1].Value; foreach (HtmlNode chNode in volNode.SelectNodes("div").First(node => node.HasClass("volume-chapters")).SelectNodes("div")) { - string number = Regex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText, - @"[Cc]apitolo ([0-9]+).*").Groups[1].Value; + string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value; string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); - ret.Add(new Chapter(manga, null, volume, number, url)); + string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value; + ret.Add(new Chapter(manga, null, volume, number, url, id)); } } } @@ -169,10 +171,10 @@ public class Mangaworld: MangaConnector { foreach (HtmlNode chNode in chaptersWrapper.SelectNodes("div").Where(node => node.HasClass("chapter"))) { - string number = Regex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText, - @"[Cc]apitolo ([0-9]+).*").Groups[1].Value; + string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value; string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); - ret.Add(new Chapter(manga, null, null, number, url)); + string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value; + ret.Add(new Chapter(manga, null, null, number, url, id)); } } @@ -207,10 +209,7 @@ public class Mangaworld: MangaConnector string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument); - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - - return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://www.mangaworld.bz/", progressToken:progressToken); + return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage,"https://www.mangaworld.bz/", progressToken:progressToken); } private string[] ParseImageUrlsFromHtml(HtmlDocument document) diff --git a/Tranga/MangaConnectors/ManhuaPlus.cs b/Tranga/MangaConnectors/ManhuaPlus.cs index 87dd3e1..98bc368 100644 --- a/Tranga/MangaConnectors/ManhuaPlus.cs +++ b/Tranga/MangaConnectors/ManhuaPlus.cs @@ -189,10 +189,7 @@ public class ManhuaPlus : MangaConnector HtmlNode[] images = document.DocumentNode.SelectNodes("//a[contains(concat(' ',normalize-space(@class),' '),' readImg ')]/img").ToArray(); List urls = images.Select(node => node.GetAttributeValue("src", "")).ToList(); - - string comicInfoPath = Path.GetTempFileName(); - File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); - return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); + return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken); } } \ No newline at end of file