#271 Create Marker-files for Chapters.

If a Connector provides a unique ID for a chapter, Tranga will create a markerfile, containing the current name of the Chapter
This should prevent duplicates, or missing chapters.
This commit is contained in:
Glax 2024-10-27 02:41:28 +02:00
parent 957debea01
commit c58adf64fa
11 changed files with 77 additions and 79 deletions

View File

@ -1,5 +1,7 @@
using System.Text.RegularExpressions; using System.Runtime.InteropServices;
using System.Text.RegularExpressions;
using System.Xml.Linq; using System.Xml.Linq;
using static System.IO.UnixFileMode;
namespace Tranga; namespace Tranga;
@ -17,23 +19,21 @@ public readonly struct Chapter : IComparable
public string url { get; } public string url { get; }
// ReSharper disable once MemberCanBePrivate.Global // ReSharper disable once MemberCanBePrivate.Global
public string fileName { get; } public string fileName { get; }
public string? id { get; }
private static readonly Regex LegalCharacters = new (@"([A-z]*[0-9]* *\.*-*,*\]*\[*'*\'*\)*\(*~*!*)*"); private static readonly Regex LegalCharacters = new (@"([A-z]*[0-9]* *\.*-*,*\]*\[*'*\'*\)*\(*~*!*)*");
private static readonly Regex IllegalStrings = new(@"(Vol(ume)?|Ch(apter)?)\.?", RegexOptions.IgnoreCase); private static readonly Regex IllegalStrings = new(@"(Vol(ume)?|Ch(apter)?)\.?", RegexOptions.IgnoreCase);
private static readonly Regex Digits = new(@"[0-9\.]*"); private static readonly Regex Digits = new(@"[0-9\.]*");
public Chapter(Manga parentManga, string? name, string? volumeNumber, string chapterNumber, string url) public Chapter(Manga parentManga, string? name, string? volumeNumber, string chapterNumber, string url, string? id = null)
{ {
this.parentManga = parentManga; this.parentManga = parentManga;
this.name = name; this.name = name;
this.volumeNumber = volumeNumber is not null ? string.Concat(Digits.Matches(volumeNumber).Select(x => x.Value)) : "0"; this.volumeNumber = volumeNumber is not null ? string.Concat(Digits.Matches(volumeNumber).Select(x => x.Value)) : "0";
this.chapterNumber = string.Concat(Digits.Matches(chapterNumber).Select(x => x.Value)); this.chapterNumber = string.Concat(Digits.Matches(chapterNumber).Select(x => x.Value));
this.url = url; this.url = url;
this.id = id;
string chapterVolNumStr; string chapterVolNumStr = $"Vol.{this.volumeNumber} Ch.{chapterNumber}";
if (volumeNumber is not null && volumeNumber.Length > 0)
chapterVolNumStr = $"Vol.{volumeNumber} Ch.{chapterNumber}";
else
chapterVolNumStr = $"Ch.{chapterNumber}";
if (name is not null && name.Length > 0) if (name is not null && name.Length > 0)
{ {
@ -87,24 +87,43 @@ public readonly struct Chapter : IComparable
string mangaDirectory = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName); string mangaDirectory = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName);
if (!Directory.Exists(mangaDirectory)) if (!Directory.Exists(mangaDirectory))
return false; return false;
FileInfo? mangaArchive = null;
string markerPath = Path.Join(mangaDirectory, $".{id}");
if (this.id is not null
&& File.Exists(markerPath)
&& File.Exists(File.ReadAllText(markerPath)))
{
mangaArchive = new FileInfo(File.ReadAllText(markerPath));
}
else
{
FileInfo[] archives = new DirectoryInfo(mangaDirectory).GetFiles("*.cbz"); FileInfo[] archives = new DirectoryInfo(mangaDirectory).GetFiles("*.cbz");
Regex volChRex = new(@"(?:Vol(?:ume)?\.([0-9]+)\D*)?Ch(?:apter)?\.([0-9]+(?:\.[0-9]+)*)"); Regex volChRex = new(@"(?:Vol(?:ume)?\.([0-9]+)\D*)?Ch(?:apter)?\.([0-9]+(?:\.[0-9]+)*)");
Chapter t = this; Chapter t = this;
string correctPath = GetArchiveFilePath(); mangaArchive = archives.FirstOrDefault(archive =>
FileInfo? archive = archives.FirstOrDefault(archive =>
{ {
Match m = volChRex.Match(archive.Name); Match m = volChRex.Match(archive.Name);
/*Uncommenting this section will only allow *Version without Volume number* -> *Version with Volume number* but not the other way
if (m.Groups[1].Success) if (m.Groups[1].Success)
return m.Groups[1].Value == t.volumeNumber && m.Groups[2].Value == t.chapterNumber; return m.Groups[1].Value == t.volumeNumber && m.Groups[2].Value == t.chapterNumber;
else*/ else
return m.Groups[2].Value == t.chapterNumber; return m.Groups[2].Value == t.chapterNumber;
}); });
if(archive is not null && archive.FullName != correctPath)
archive.MoveTo(correctPath, true);
return (archive is not null);
} }
string correctPath = GetArchiveFilePath();
if(mangaArchive is not null && mangaArchive.FullName != correctPath)
mangaArchive.MoveTo(correctPath, true);
return (mangaArchive is not null);
}
public void CreateChapterMarker()
{
string path = Path.Join(TrangaSettings.downloadLocation, parentManga.folderName, $".{id}");
File.WriteAllText(path, GetArchiveFilePath());
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
File.SetUnixFileMode(path, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute | OtherRead | OtherExecute);
}
/// <summary> /// <summary>
/// Creates full file path of chapter-archive /// Creates full file path of chapter-archive
/// </summary> /// </summary>

View File

@ -150,7 +150,7 @@ public class Bato : MangaConnector
HtmlNode chapterList = HtmlNode chapterList =
result.htmlDocument.DocumentNode.SelectSingleNode("/html/body/div/main/div[3]/astro-island/div/div[2]/div/div/astro-slot"); result.htmlDocument.DocumentNode.SelectSingleNode("/html/body/div/main/div[3]/astro-island/div/div[2]/div/div/astro-slot");
Regex numberRex = new(@"\/title\/.+\/[0-9]+(-vol_([0-9]+))?-ch_([0-9\.]+)"); Regex numberRex = new(@"\/title\/.+\/([0-9])+(?:-vol_([0-9]+))?-ch_([0-9\.]+)");
foreach (HtmlNode chapterInfo in chapterList.SelectNodes("div")) foreach (HtmlNode chapterInfo in chapterList.SelectNodes("div"))
{ {
@ -158,6 +158,7 @@ public class Bato : MangaConnector
string chapterUrl = infoNode.GetAttributeValue("href", ""); string chapterUrl = infoNode.GetAttributeValue("href", "");
Match match = numberRex.Match(chapterUrl); Match match = numberRex.Match(chapterUrl);
string id = match.Groups[1].Value;
string? volumeNumber = match.Groups[2].Success ? match.Groups[2].Value : null; string? volumeNumber = match.Groups[2].Success ? match.Groups[2].Value : null;
string chapterNumber = match.Groups[3].Value; string chapterNumber = match.Groups[3].Value;
string chapterName = chapterNumber; string chapterName = chapterNumber;
@ -190,10 +191,7 @@ public class Bato : MangaConnector
string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); string[] imageUrls = ParseImageUrlsFromHtml(requestUrl);
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://mangakatana.com/", progressToken:progressToken);
} }
private string[] ParseImageUrlsFromHtml(string mangaUrl) private string[] ParseImageUrlsFromHtml(string mangaUrl)

View File

@ -214,8 +214,10 @@ public abstract class MangaConnector : GlobalBase
return requestResult.statusCode; return requestResult.statusCode;
} }
protected HttpStatusCode DownloadChapterImages(string[] imageUrls, string saveArchiveFilePath, RequestType requestType, string? comicInfoPath = null, string? referrer = null, ProgressToken? progressToken = null) protected HttpStatusCode DownloadChapterImages(string[] imageUrls, Chapter chapter, RequestType requestType, string? referrer = null, ProgressToken? progressToken = null)
{ {
string saveArchiveFilePath = chapter.GetArchiveFilePath();
if (progressToken?.cancellationRequested ?? false) if (progressToken?.cancellationRequested ?? false)
return HttpStatusCode.RequestTimeout; return HttpStatusCode.RequestTimeout;
Log($"Downloading Images for {saveArchiveFilePath}"); Log($"Downloading Images for {saveArchiveFilePath}");
@ -239,7 +241,7 @@ public abstract class MangaConnector : GlobalBase
//Create a temporary folder to store images //Create a temporary folder to store images
string tempFolder = Directory.CreateTempSubdirectory("trangatemp").FullName; string tempFolder = Directory.CreateTempSubdirectory("trangatemp").FullName;
int chapter = 0; int chapterNum = 0;
//Download all Images to temporary Folder //Download all Images to temporary Folder
if (imageUrls.Length == 0) if (imageUrls.Length == 0)
{ {
@ -253,9 +255,9 @@ public abstract class MangaConnector : GlobalBase
foreach (string imageUrl in imageUrls) foreach (string imageUrl in imageUrls)
{ {
string extension = imageUrl.Split('.')[^1].Split('?')[0]; string extension = imageUrl.Split('.')[^1].Split('?')[0];
Log($"Downloading image {chapter + 1:000}/{imageUrls.Length:000}"); //TODO Log($"Downloading image {chapterNum + 1:000}/{imageUrls.Length:000}"); //TODO
HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapter++}.{extension}"), requestType, referrer); HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapterNum++}.{extension}"), requestType, referrer);
Log($"{saveArchiveFilePath} {chapter + 1:000}/{imageUrls.Length:000} {status}"); Log($"{saveArchiveFilePath} {chapterNum + 1:000}/{imageUrls.Length:000} {status}");
if ((int)status < 200 || (int)status >= 300) if ((int)status < 200 || (int)status >= 300)
{ {
progressToken?.Complete(); progressToken?.Complete();
@ -269,16 +271,14 @@ public abstract class MangaConnector : GlobalBase
progressToken?.Increment(); progressToken?.Increment();
} }
if(comicInfoPath is not null){ File.WriteAllText(Path.Join(tempFolder, "ComicInfo.xml"), chapter.GetComicInfoXmlString());
File.Copy(comicInfoPath, Path.Join(tempFolder, "ComicInfo.xml"));
File.Delete(comicInfoPath); //Delete tmp-file
}
Log($"Creating archive {saveArchiveFilePath}"); Log($"Creating archive {saveArchiveFilePath}");
//ZIP-it and ship-it //ZIP-it and ship-it
ZipFile.CreateFromDirectory(tempFolder, saveArchiveFilePath); ZipFile.CreateFromDirectory(tempFolder, saveArchiveFilePath);
chapter.CreateChapterMarker();
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
File.SetUnixFileMode(saveArchiveFilePath, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute); File.SetUnixFileMode(saveArchiveFilePath, UserRead | UserWrite | UserExecute | GroupRead | GroupWrite | GroupExecute | OtherRead | OtherExecute);
Directory.Delete(tempFolder, true); //Cleanup Directory.Delete(tempFolder, true); //Cleanup
progressToken?.Complete(); progressToken?.Complete();

View File

@ -247,7 +247,7 @@ public class MangaDex : MangaConnector
} }
if(chapterNum is not "null" && !chapters.Any(chp => chp.volumeNumber.Equals(volume) && chp.chapterNumber.Equals(chapterNum))) if(chapterNum is not "null" && !chapters.Any(chp => chp.volumeNumber.Equals(volume) && chp.chapterNumber.Equals(chapterNum)))
chapters.Add(new Chapter(manga, title, volume, chapterNum, chapterId)); chapters.Add(new Chapter(manga, title, volume, chapterNum, chapterId, chapterId));
} }
} }
@ -289,10 +289,7 @@ public class MangaDex : MangaConnector
foreach (JsonNode? image in imageFileNames) foreach (JsonNode? image in imageFileNames)
imageUrls.Add($"{baseUrl}/data/{hash}/{image!.GetValue<string>()}"); imageUrls.Add($"{baseUrl}/data/{hash}/{image!.GetValue<string>()}");
string comicInfoPath = Path.GetTempFileName();
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
//Download Chapter-Images //Download Chapter-Images
return DownloadChapterImages(imageUrls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); return DownloadChapterImages(imageUrls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken);
} }
} }

View File

@ -181,12 +181,9 @@ public class MangaHere : MangaConnector
} }
} while (downloaded++ <= images); } while (downloaded++ <= images);
string comicInfoPath = Path.GetTempFileName();
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
if (progressToken is not null) if (progressToken is not null)
progressToken.increments = images;//we blip to normal length, in downloadchapterimages it is increasaed by the amount of urls again progressToken.increments = images;//we blip to normal length, in downloadchapterimages it is increasaed by the amount of urls again
return DownloadChapterImages(imageUrls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); return DownloadChapterImages(imageUrls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken);
} }
private string[] ParseImageUrlsFromHtml(HtmlDocument document) private string[] ParseImageUrlsFromHtml(HtmlDocument document)

View File

@ -214,10 +214,7 @@ public class MangaKatana : MangaConnector
string[] imageUrls = ParseImageUrlsFromHtml(requestUrl); string[] imageUrls = ParseImageUrlsFromHtml(requestUrl);
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://mangakatana.com/", progressToken:progressToken);
} }
private string[] ParseImageUrlsFromHtml(string mangaUrl) private string[] ParseImageUrlsFromHtml(string mangaUrl)

View File

@ -194,6 +194,6 @@ public class MangaLife : MangaConnector
string comicInfoPath = Path.GetTempFileName(); string comicInfoPath = Path.GetTempFileName();
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString()); File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken); return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken);
} }
} }

View File

@ -214,10 +214,7 @@ public class Manganato : MangaConnector
string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument); string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument);
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage, "https://chapmanganato.com/", progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://chapmanganato.com/", progressToken:progressToken);
} }
private string[] ParseImageUrlsFromHtml(HtmlDocument document) private string[] ParseImageUrlsFromHtml(HtmlDocument document)

View File

@ -222,9 +222,6 @@ public class Mangasee : MangaConnector
foreach(HtmlNode galleryImage in images) foreach(HtmlNode galleryImage in images)
urls.Add(galleryImage.GetAttributeValue("src", "")); urls.Add(galleryImage.GetAttributeValue("src", ""));
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken);
} }
} }

View File

@ -149,19 +149,21 @@ public class Mangaworld: MangaConnector
document.DocumentNode.SelectSingleNode( document.DocumentNode.SelectSingleNode(
"//div[contains(concat(' ',normalize-space(@class),' '),'chapters-wrapper')]"); "//div[contains(concat(' ',normalize-space(@class),' '),'chapters-wrapper')]");
Regex volumeRex = new(@"[Vv]olume ([0-9]+).*");
Regex chapterRex = new(@"[Cc]apitolo ([0-9]+).*");
Regex idRex = new(@".*\/read\/([a-z0-9]+)(?:[?\/].*)?");
if (chaptersWrapper.Descendants("div").Any(descendant => descendant.HasClass("volume-element"))) if (chaptersWrapper.Descendants("div").Any(descendant => descendant.HasClass("volume-element")))
{ {
foreach (HtmlNode volNode in document.DocumentNode.SelectNodes("//div[contains(concat(' ',normalize-space(@class),' '),'volume-element')]")) foreach (HtmlNode volNode in document.DocumentNode.SelectNodes("//div[contains(concat(' ',normalize-space(@class),' '),'volume-element')]"))
{ {
string volume = Regex.Match(volNode.SelectNodes("div").First(node => node.HasClass("volume")).SelectSingleNode("p").InnerText, string volume = volumeRex.Match(volNode.SelectNodes("div").First(node => node.HasClass("volume")).SelectSingleNode("p").InnerText).Groups[1].Value;
@"[Vv]olume ([0-9]+).*").Groups[1].Value;
foreach (HtmlNode chNode in volNode.SelectNodes("div").First(node => node.HasClass("volume-chapters")).SelectNodes("div")) foreach (HtmlNode chNode in volNode.SelectNodes("div").First(node => node.HasClass("volume-chapters")).SelectNodes("div"))
{ {
string number = Regex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText, string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value;
@"[Cc]apitolo ([0-9]+).*").Groups[1].Value;
string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); string url = chNode.SelectSingleNode("a").GetAttributeValue("href", "");
ret.Add(new Chapter(manga, null, volume, number, url)); string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value;
ret.Add(new Chapter(manga, null, volume, number, url, id));
} }
} }
} }
@ -169,10 +171,10 @@ public class Mangaworld: MangaConnector
{ {
foreach (HtmlNode chNode in chaptersWrapper.SelectNodes("div").Where(node => node.HasClass("chapter"))) foreach (HtmlNode chNode in chaptersWrapper.SelectNodes("div").Where(node => node.HasClass("chapter")))
{ {
string number = Regex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText, string number = chapterRex.Match(chNode.SelectSingleNode("a").SelectSingleNode("span").InnerText).Groups[1].Value;
@"[Cc]apitolo ([0-9]+).*").Groups[1].Value;
string url = chNode.SelectSingleNode("a").GetAttributeValue("href", ""); string url = chNode.SelectSingleNode("a").GetAttributeValue("href", "");
ret.Add(new Chapter(manga, null, null, number, url)); string id = idRex.Match(chNode.SelectSingleNode("a").GetAttributeValue("href", "")).Groups[1].Value;
ret.Add(new Chapter(manga, null, null, number, url, id));
} }
} }
@ -207,10 +209,7 @@ public class Mangaworld: MangaConnector
string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument); string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument);
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(imageUrls, chapter, RequestType.MangaImage,"https://www.mangaworld.bz/", progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, "https://www.mangaworld.bz/", progressToken:progressToken);
} }
private string[] ParseImageUrlsFromHtml(HtmlDocument document) private string[] ParseImageUrlsFromHtml(HtmlDocument document)

View File

@ -190,9 +190,6 @@ public class ManhuaPlus : MangaConnector
HtmlNode[] images = document.DocumentNode.SelectNodes("//a[contains(concat(' ',normalize-space(@class),' '),' readImg ')]/img").ToArray(); HtmlNode[] images = document.DocumentNode.SelectNodes("//a[contains(concat(' ',normalize-space(@class),' '),' readImg ')]/img").ToArray();
List<string> urls = images.Select(node => node.GetAttributeValue("src", "")).ToList(); List<string> urls = images.Select(node => node.GetAttributeValue("src", "")).ToList();
string comicInfoPath = Path.GetTempFileName(); return DownloadChapterImages(urls.ToArray(), chapter, RequestType.MangaImage, progressToken:progressToken);
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(urls.ToArray(), chapter.GetArchiveFilePath(), RequestType.MangaImage, comicInfoPath, progressToken:progressToken);
} }
} }