2
0

Merge pull request 'cuttingedge' (!58) from cuttingedge into master

Reviewed-on: #58
This commit is contained in:
glax 2023-10-12 20:48:15 +02:00
commit f01a786e59
7 changed files with 254 additions and 15 deletions

View File

@ -0,0 +1,23 @@
name: New Connector Request
description: Request a new site to be added
title: "[New Connector]: "
labels: ["New Connector"]
body:
- type: input
attributes:
label: Website-Link
placeholder: https://
validations:
required: true
- type: checkboxes
attributes:
label: Is the Website free to access?
description: We can't support pay-to-use sites.
options:
- label: The Website is freely accessible.
required: true
- type: textarea
attributes:
label: Anything else?
validations:
required: false

View File

@ -21,11 +21,6 @@
</p> </p>
</div> </div>
# Important for existing users:
Tranga just had a complete rewrite. Old settings, tasks, etc. will not work.
~~For the time being the docker-tag `latest` will be the old, discontinued branch.~~ `cuttingedge` is the active branch and
will soon be moved to the `latest` branch (This is now the case). There is no migration-tool. Make a backup of old files.
<!-- TABLE OF CONTENTS --> <!-- TABLE OF CONTENTS -->
<details> <details>
<summary>Table of Contents</summary> <summary>Table of Contents</summary>
@ -57,10 +52,12 @@ will soon be moved to the `latest` branch (This is now the case). There is no mi
Tranga can download Chapters and Metadata from "Scanlation" sites such as Tranga can download Chapters and Metadata from "Scanlation" sites such as
- [MangaDex.org](https://mangadex.org/) - [MangaDex.org](https://mangadex.org/) (Multilingual)
- [Manganato.com](https://manganato.com/) - [Manganato.com](https://manganato.com/) (en)
- [Mangasee](https://mangasee123.com/) - [Mangasee.com](https://mangasee123.com/) (en)
- [MangaKatana](https://mangakatana.com) - [MangaKatana.com](https://mangakatana.com) (en)
- [Mangaworld.bz](https://www.mangaworld.bz/) (it)
- [Bato.to](https://bato.to/v3x) (en)
- ❓ Open an [issue](https://github.com/C9Glax/tranga/issues) - ❓ Open an [issue](https://github.com/C9Glax/tranga/issues)
and trigger an scan with [Komga](https://komga.org/) and [Kavita](https://www.kavitareader.com/). and trigger an scan with [Komga](https://komga.org/) and [Kavita](https://www.kavitareader.com/).

View File

@ -162,14 +162,21 @@ public class JobBoss : GlobalBase
new JobJsonConverter(this, new MangaConnectorJsonConverter(this, connectors)))!; new JobJsonConverter(this, new MangaConnectorJsonConverter(this, connectors)))!;
this.jobs.Add(job); this.jobs.Add(job);
} }
//Connect jobs to parent-jobs and add Publications to cache //Connect jobs to parent-jobs and add Publications to cache
foreach (Job job in this.jobs) foreach (Job job in this.jobs)
{ {
this.jobs.FirstOrDefault(jjob => jjob.id == job.parentJobId)?.AddSubJob(job); this.jobs.FirstOrDefault(jjob => jjob.id == job.parentJobId)?.AddSubJob(job);
if(job is DownloadNewChapters dncJob) if (job is DownloadNewChapters dncJob)
cachedPublications.Add(dncJob.manga); cachedPublications.Add(dncJob.manga);
} }
HashSet<string> coverFileNames = cachedPublications.Select(manga => manga.coverFileNameInCache!).ToHashSet();
foreach (string fileName in Directory.GetFiles(settings.coverImageCache))
{
if(!coverFileNames.Any(existingManga => fileName.Contains(existingManga)))
File.Delete(fileName);
}
} }
private void UpdateJobFile(Job job) private void UpdateJobFile(Job job)

View File

@ -0,0 +1,209 @@
using System.Net;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using Tranga.Jobs;
namespace Tranga.MangaConnectors;
public class Bato : MangaConnector
{
public Bato(GlobalBase clone) : base(clone, "Bato")
{
this.downloadClient = new HttpDownloadClient(clone, new Dictionary<byte, int>()
{
{1, 60}
});
}
public override Manga[] GetManga(string publicationTitle = "")
{
Log($"Searching Publications. Term=\"{publicationTitle}\"");
string sanitizedTitle = string.Join(' ', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower();
string requestUrl = $"https://bato.to/v3x-search?word={sanitizedTitle}&lang=en";
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return Array.Empty<Manga>();
if (requestResult.htmlDocument is null)
{
Log($"Failed to retrieve site");
return Array.Empty<Manga>();
}
Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument);
Log($"Retrieved {publications.Length} publications. Term=\"{publicationTitle}\"");
return publications;
}
public override Manga? GetMangaFromUrl(string url)
{
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(url, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return null;
if (requestResult.htmlDocument is null)
{
Log($"Failed to retrieve site");
return null;
}
return ParseSinglePublicationFromHtml(requestResult.htmlDocument, url.Split('/')[^1]);
}
private Manga[] ParsePublicationsFromHtml(HtmlDocument document)
{
HtmlNode mangaList = document.DocumentNode.SelectSingleNode("//div[@data-hk='0-0-2']");
if (!mangaList.ChildNodes.Any(node => node.Name == "div"))
return Array.Empty<Manga>();
List<string> urls = mangaList.ChildNodes
.Select(node => $"https://bato.to{node.Descendants("div").First().FirstChild.GetAttributeValue("href", "")}").ToList();
HashSet<Manga> ret = new();
foreach (string url in urls)
{
Manga? manga = GetMangaFromUrl(url);
if (manga is not null)
ret.Add((Manga)manga);
}
return ret.ToArray();
}
private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId)
{
HtmlNode infoNode = document.DocumentNode.SelectSingleNode("/html/body/div/main/div[1]/div[2]");
string sortName = infoNode.Descendants("h3").First().InnerText;
string description = document.DocumentNode
.SelectSingleNode("//div[contains(concat(' ',normalize-space(@class),' '),'prose')]").InnerText;
string[] altTitlesList = infoNode.ChildNodes[1].ChildNodes[2].InnerText.Split('/');
int i = 0;
Dictionary<string, string> altTitles = altTitlesList.ToDictionary(s => i++.ToString(), s => s);
string posterUrl = document.DocumentNode.SelectNodes("//img")
.First(child => child.GetAttributeValue("data-hk", "") == "0-1-0").GetAttributeValue("src", "").Replace("&amp;", "&");
string coverFileNameInCache = SaveCoverImageToCache(posterUrl, 1);
List<HtmlNode> genreNodes = document.DocumentNode.SelectSingleNode("//b[text()='Genres:']/..").SelectNodes("span").ToList();
string[] tags = genreNodes.Select(node => node.FirstChild.InnerText).ToArray();
List<HtmlNode> authorsNodes = infoNode.ChildNodes[1].ChildNodes[3].Descendants("a").ToList();
List<string> authors = authorsNodes.Select(node => node.InnerText.Replace("amp;", "")).ToList();
HtmlNode? originalLanguageNode = document.DocumentNode.SelectSingleNode("//span[text()='Tr From']/..");
string originalLanguage = originalLanguageNode is not null ? originalLanguageNode.LastChild.InnerText : "";
if (!int.TryParse(
document.DocumentNode.SelectSingleNode("//span[text()='Original Publication:']/..").LastChild.InnerText.Split('-')[0],
out int year))
year = DateTime.Now.Year;
string status = document.DocumentNode.SelectSingleNode("//span[text()='Original Publication:']/..")
.ChildNodes[2].InnerText;
Manga manga = new (sortName, authors, description, altTitles, tags, posterUrl, coverFileNameInCache, new Dictionary<string, string>(),
year, originalLanguage, status, publicationId);
cachedPublications.Add(manga);
return manga;
}
public override Chapter[] GetChapters(Manga manga, string language="en")
{
Log($"Getting chapters {manga}");
string requestUrl = $"https://bato.to/title/{manga.publicationId}";
// Leaving this in for verification if the page exists
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return Array.Empty<Chapter>();
//Return Chapters ordered by Chapter-Number
List<Chapter> chapters = ParseChaptersFromHtml(manga, requestUrl);
Log($"Got {chapters.Count} chapters. {manga}");
return chapters.OrderBy(chapter => Convert.ToSingle(chapter.chapterNumber, numberFormatDecimalPoint)).ToArray();
}
private List<Chapter> ParseChaptersFromHtml(Manga manga, string mangaUrl)
{
// Using HtmlWeb will include the chapters since they are loaded with js
HtmlWeb web = new();
HtmlDocument document = web.Load(mangaUrl);
List<Chapter> ret = new();
HtmlNode chapterList =
document.DocumentNode.SelectSingleNode("/html/body/div/main/div[3]/astro-island/div/div[2]/div/div/astro-slot");
Regex chapterNumberRex = new(@"Chapter ([0-9\.]+)");
foreach (HtmlNode chapterInfo in chapterList.SelectNodes("div"))
{
HtmlNode infoNode = chapterInfo.FirstChild.FirstChild;
string fullString = infoNode.InnerText;
string? volumeNumber = null;
string chapterNumber = chapterNumberRex.Match(fullString).Groups[1].Value;
string chapterName = chapterNumber;
string url = $"https://bato.to{infoNode.GetAttributeValue("href", "")}?load=2";
ret.Add(new Chapter(manga, chapterName, volumeNumber, chapterNumber, url));
}
return ret;
}
public override HttpStatusCode DownloadChapter(Chapter chapter, ProgressToken? progressToken = null)
{
if (progressToken?.cancellationRequested ?? false)
{
progressToken.Cancel();
return HttpStatusCode.RequestTimeout;
}
Manga chapterParentManga = chapter.parentManga;
Log($"Retrieving chapter-info {chapter} {chapterParentManga}");
string requestUrl = chapter.url;
// Leaving this in to check if the page exists
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
{
progressToken?.Cancel();
return requestResult.statusCode;
}
string[] imageUrls = ParseImageUrlsFromHtml(requestUrl);
string comicInfoPath = Path.GetTempFileName();
File.WriteAllText(comicInfoPath, chapter.GetComicInfoXmlString());
return DownloadChapterImages(imageUrls, chapter.GetArchiveFilePath(settings.downloadLocation), 1, comicInfoPath, "https://mangakatana.com/", progressToken:progressToken);
}
private string[] ParseImageUrlsFromHtml(string mangaUrl)
{
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(mangaUrl, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
{
return Array.Empty<string>();
}
if (requestResult.htmlDocument is null)
{
Log($"Failed to retrieve site");
return Array.Empty<string>();
}
HtmlDocument document = requestResult.htmlDocument;
HtmlNode images = document.DocumentNode.SelectNodes("//astro-island").First(node =>
node.GetAttributeValue("component-url", "").Contains("/_astro/ImageList."));
string weirdString = images.OuterHtml;
string weirdString2 = Regex.Match(weirdString, @"props=\""(.*)}\""").Groups[1].Value;
string[] urls = Regex.Matches(weirdString2, @"https:\/\/[A-z\-0-9\.\?\&\;\=\/]*").Select(m => m.Value.Replace("\\&quot;]", "").Replace("amp;", "")).ToArray();
return urls;
}
}

View File

@ -213,8 +213,7 @@ public abstract class MangaConnector : GlobalBase
//Download all Images to temporary Folder //Download all Images to temporary Folder
foreach (string imageUrl in imageUrls) foreach (string imageUrl in imageUrls)
{ {
string[] split = imageUrl.Split('.'); string extension = imageUrl.Split('.')[^1].Split('?')[0];
string extension = split[^1];
Log($"Downloading image {chapter + 1:000}/{imageUrls.Length:000}"); //TODO Log($"Downloading image {chapter + 1:000}/{imageUrls.Length:000}"); //TODO
HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapter++}.{extension}"), requestType, referrer); HttpStatusCode status = DownloadImage(imageUrl, Path.Join(tempFolder, $"{chapter++}.{extension}"), requestType, referrer);
Log($"{saveArchiveFilePath} {chapter + 1:000}/{imageUrls.Length:000} {status}"); Log($"{saveArchiveFilePath} {chapter + 1:000}/{imageUrls.Length:000} {status}");
@ -247,7 +246,8 @@ public abstract class MangaConnector : GlobalBase
protected string SaveCoverImageToCache(string url, byte requestType) protected string SaveCoverImageToCache(string url, byte requestType)
{ {
string filename = url.Split('/')[^1].Split('?')[0]; string filetype = url.Split('/')[^1].Split('?')[0].Split('.')[^1];
string filename = $"{DateTime.Now.Ticks.ToString()}.{filetype}";
string saveImagePath = Path.Join(settings.coverImageCache, filename); string saveImagePath = Path.Join(settings.coverImageCache, filename);
if (File.Exists(saveImagePath)) if (File.Exists(saveImagePath))

View File

@ -34,6 +34,8 @@ public class MangaConnectorJsonConverter : JsonConverter
return this._connectors.First(c => c is Mangasee); return this._connectors.First(c => c is Mangasee);
case "Mangaworld": case "Mangaworld":
return this._connectors.First(c => c is Mangaworld); return this._connectors.First(c => c is Mangaworld);
case "Bato":
return this._connectors.First(c => c is Bato);
} }
throw new Exception(); throw new Exception();

View File

@ -22,7 +22,8 @@ public partial class Tranga : GlobalBase
new Mangasee(this), new Mangasee(this),
new MangaDex(this), new MangaDex(this),
new MangaKatana(this), new MangaKatana(this),
new Mangaworld(this) new Mangaworld(this),
new Bato(this)
}; };
jobBoss = new(this, this._connectors); jobBoss = new(this, this._connectors);
StartJobBoss(); StartJobBoss();