190 lines
7.3 KiB
C#
Raw Normal View History

2024-12-14 21:53:29 +01:00
using System.Text.RegularExpressions;
using API.MangaDownloadClients;
using HtmlAgilityPack;
namespace API.Schema.MangaConnectors;
public class AsuraToon : MangaConnector
{
public AsuraToon() : base("AsuraToon", ["en"], ["https://asuracomic.net"])
{
this.downloadClient = new ChromiumDownloadClient();
}
public override (Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)[] GetManga(string publicationTitle = "")
2024-12-14 21:53:29 +01:00
{
string sanitizedTitle = string.Join(' ', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower();
string requestUrl = $"https://asuracomic.net/series?name={sanitizedTitle}";
RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, RequestType.Default);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
2024-12-15 23:00:35 +01:00
return [];
2024-12-14 21:53:29 +01:00
if (requestResult.htmlDocument is null)
{
return [];
}
(Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument);
2024-12-14 21:53:29 +01:00
return publications;
}
public override (Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)? GetMangaFromId(string publicationId)
2024-12-14 21:53:29 +01:00
{
return GetMangaFromUrl($"https://asuracomic.net/series/{publicationId}");
}
public override (Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)? GetMangaFromUrl(string url)
2024-12-14 21:53:29 +01:00
{
RequestResult requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return null;
if (requestResult.htmlDocument is null)
{
return null;
}
return ParseSinglePublicationFromHtml(requestResult.htmlDocument, url.Split('/')[^1], url);
}
private (Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)[] ParsePublicationsFromHtml(HtmlDocument document)
2024-12-14 21:53:29 +01:00
{
HtmlNodeCollection mangaList = document.DocumentNode.SelectNodes("//a[starts-with(@href,'series')]");
if (mangaList is null || mangaList.Count < 1)
return [];
IEnumerable<string> urls = mangaList.Select(a => $"https://asuracomic.net/{a.GetAttributeValue("href", "")}");
List<(Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)> ret = new();
2024-12-14 21:53:29 +01:00
foreach (string url in urls)
{
(Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?)? manga = GetMangaFromUrl(url);
2024-12-15 23:00:35 +01:00
if (manga is { } x)
ret.Add(x);
2024-12-14 21:53:29 +01:00
}
return ret.ToArray();
}
private (Manga, List<Author>?, List<MangaTag>?, List<Link>?, List<MangaAltTitle>?) ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl)
2024-12-14 21:53:29 +01:00
{
string? originalLanguage = null;
Dictionary<string, string> altTitles = new(), links = new();
HtmlNodeCollection genreNodes = document.DocumentNode.SelectNodes("//h3[text()='Genres']/../div/button");
string[] tags = genreNodes.Select(b => b.InnerText).ToArray();
List<MangaTag> mangaTags = tags.Select(t => new MangaTag(t)).ToList();
2024-12-14 21:53:29 +01:00
HtmlNode statusNode = document.DocumentNode.SelectSingleNode("//h3[text()='Status']/../h3[2]");
MangaReleaseStatus releaseStatus = statusNode.InnerText.ToLower() switch
{
"ongoing" => MangaReleaseStatus.Continuing,
"hiatus" => MangaReleaseStatus.OnHiatus,
"completed" => MangaReleaseStatus.Completed,
"dropped" => MangaReleaseStatus.Cancelled,
"season end" => MangaReleaseStatus.Continuing,
"coming soon" => MangaReleaseStatus.Unreleased,
_ => MangaReleaseStatus.Unreleased
};
HtmlNode coverNode =
document.DocumentNode.SelectSingleNode("//img[@alt='poster']");
string coverUrl = coverNode.GetAttributeValue("src", "");
HtmlNode titleNode =
document.DocumentNode.SelectSingleNode("//title");
string sortName = Regex.Match(titleNode.InnerText, @"(.*) - Asura Scans").Groups[1].Value;
HtmlNode descriptionNode =
document.DocumentNode.SelectSingleNode("//h3[starts-with(text(),'Synopsis')]/../span");
string description = descriptionNode?.InnerText??"";
HtmlNodeCollection authorNodes = document.DocumentNode.SelectNodes("//h3[text()='Author']/../h3[not(text()='Author' or text()='_')]");
HtmlNodeCollection artistNodes = document.DocumentNode.SelectNodes("//h3[text()='Artist']/../h3[not(text()='Artist' or text()='_')]");
IEnumerable<string> authorNames = authorNodes is null ? [] : authorNodes.Select(a => a.InnerText);
IEnumerable<string> artistNames = artistNodes is null ? [] : artistNodes.Select(a => a.InnerText);
2024-12-15 23:00:35 +01:00
List<string> authorStrings = authorNames.Concat(artistNames).ToList();
List<Author> authors = authorStrings.Select(author => new Author(author)).ToList();
2024-12-14 21:53:29 +01:00
HtmlNode? firstChapterNode = document.DocumentNode.SelectSingleNode("//a[contains(@href, 'chapter/1')]/../following-sibling::h3");
uint year = uint.Parse(firstChapterNode?.InnerText.Split(' ')[^1] ?? "2000");
2024-12-15 23:00:35 +01:00
Manga manga = new (publicationId, sortName, description, websiteUrl, coverUrl, null, year,
originalLanguage, releaseStatus, -1,
2024-12-16 19:25:22 +01:00
this,
authors,
mangaTags,
2024-12-15 23:00:35 +01:00
[],
[]);
2024-12-14 21:53:29 +01:00
2024-12-15 23:00:35 +01:00
return (manga, authors, mangaTags, [], []);
2024-12-14 21:53:29 +01:00
}
public override Chapter[] GetChapters(Manga manga, string language="en")
{
string requestUrl = $"https://asuracomic.net/series/{manga.MangaId}";
// Leaving this in for verification if the page exists
RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, RequestType.Default);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return [];
//Return Chapters ordered by Chapter-Number
List<Chapter> chapters = ParseChaptersFromHtml(manga, requestUrl);
return chapters.Order().ToArray();
}
private List<Chapter> ParseChaptersFromHtml(Manga manga, string mangaUrl)
{
RequestResult result = downloadClient.MakeRequest(mangaUrl, RequestType.Default);
if ((int)result.statusCode < 200 || (int)result.statusCode >= 300 || result.htmlDocument is null)
{
return new List<Chapter>();
}
List<Chapter> ret = new();
HtmlNodeCollection chapterURLNodes = result.htmlDocument.DocumentNode.SelectNodes("//a[contains(@href, '/chapter/')]");
Regex infoRex = new(@"Chapter ([0-9]+)(.*)?");
foreach (HtmlNode chapterInfo in chapterURLNodes)
{
string chapterUrl = chapterInfo.GetAttributeValue("href", "");
Match match = infoRex.Match(chapterInfo.InnerText);
string chapterNumber = new(match.Groups[1].Value);
2024-12-14 21:53:29 +01:00
string? chapterName = match.Groups[2].Success && match.Groups[2].Length > 1 ? match.Groups[2].Value : null;
string url = $"https://asuracomic.net/series/{chapterUrl}";
try
{
ret.Add(new Chapter(manga, url, chapterNumber, null, chapterName));
}
catch (Exception e)
{
}
}
return ret;
}
internal override string[] GetChapterImageUrls(Chapter chapter)
{
string requestUrl = chapter.Url;
// Leaving this in to check if the page exists
RequestResult requestResult =
downloadClient.MakeRequest(requestUrl, RequestType.Default);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || requestResult.htmlDocument is null)
{
return [];
}
string[] imageUrls = ParseImageUrlsFromHtml(requestResult.htmlDocument);
return imageUrls;
}
private string[] ParseImageUrlsFromHtml(HtmlDocument document)
{
HtmlNodeCollection images = document.DocumentNode.SelectNodes("//img[contains(@alt, 'chapter page')]");
return images.Select(i => i.GetAttributeValue("src", "")).ToArray();
}
}