From 07716625d6d325887dae8dba2e198682bd11cdc7 Mon Sep 17 00:00:00 2001 From: glax Date: Mon, 8 Sep 2025 18:43:55 +0200 Subject: [PATCH] Update to fit upstream --- API/MangaConnectors/Mangaworld.cs | 507 +++++++++++++++++++++ API/Schema/MangaConnectors/Mangaworld.cs | 542 ----------------------- API/Tranga.cs | 2 +- README.md | 3 +- 4 files changed, 510 insertions(+), 544 deletions(-) create mode 100644 API/MangaConnectors/Mangaworld.cs delete mode 100644 API/Schema/MangaConnectors/Mangaworld.cs diff --git a/API/MangaConnectors/Mangaworld.cs b/API/MangaConnectors/Mangaworld.cs new file mode 100644 index 0000000..a6d450c --- /dev/null +++ b/API/MangaConnectors/Mangaworld.cs @@ -0,0 +1,507 @@ +using System.Text.RegularExpressions; +using API.MangaDownloadClients; +using API.Schema.MangaContext; +using HtmlAgilityPack; +// ReSharper disable StringLiteralTypo + +namespace API.MangaConnectors; + +public sealed class Mangaworld : MangaConnector +{ + public Mangaworld() : base( + "Mangaworld", + ["it"], + [ + "mangaworld.cx","www.mangaworld.cx", + "mangaworld.bz","www.mangaworld.bz", + "mangaworld.fun","www.mangaworld.fun", + "mangaworld.ac","www.mangaworld.ac" + ], + "https://www.mangaworld.cx/public/assets/seo/favicon-96x96.png?v=3" + ) + { + downloadClient = new HttpDownloadClient(); + } + + // ============================ SEARCH ============================ + public override (Manga, MangaConnectorId)[] SearchManga(string mangaSearchName) + { + Uri baseUri = new ("https://www.mangaworld.cx/"); + Uri searchUrl = new (baseUri, "archive?keyword=" + Uri.EscapeDataString(mangaSearchName)); + + RequestResult res = downloadClient.MakeRequest(searchUrl.ToString(), RequestType.Default); + if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) + return []; + + using StreamReader sr = new (res.result); + string html = sr.ReadToEnd(); + + HtmlDocument doc = new (); + doc.LoadHtml(html); + + HtmlNodeCollection? anchors = doc.DocumentNode.SelectNodes("//a[@href and contains(@href,'/manga/')]"); + // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract Apparently it does return null. Ask AgilityPack why the return type isnt marked as such... + if (anchors is null || anchors.Count < 1) + return []; + + List<(Manga, MangaConnectorId)> list = []; + + foreach (HtmlNode a in anchors) + { + string href = a.GetAttributeValue("href", ""); + if (string.IsNullOrEmpty(href)) + continue; + + string canonical = new Uri(baseUri, href).ToString(); + + (Manga, MangaConnectorId)? manga = GetMangaFromUrl(canonical); + if(manga is null) + continue; + + list.Add(((Manga, MangaConnectorId))manga); + } + + return list.ToArray(); + } + + // ======================== URL → Manga =========================== + public override (Manga, MangaConnectorId)? GetMangaFromUrl(string url) + { + Match m = SeriesUrl.Match(url); + if (!m.Success) + return null; + return GetMangaFromId($"{m.Groups["id"].Value}/{m.Groups["slug"].Value}"); + } + + // ======================== ID → Manga ============================ + public override (Manga, MangaConnectorId)? GetMangaFromId(string mangaIdOnSite) + { + string[] parts = mangaIdOnSite.Split('/', 2); + if (parts.Length != 2) + return null; + + string id = parts[0]; + string slug = parts[1]; + + string url = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; + RequestResult res = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) + return null; + + using StreamReader sr = new (res.result); + string html = sr.ReadToEnd(); + + HtmlDocument doc = new (); + doc.LoadHtml(html); + + string title = + doc.DocumentNode.SelectSingleNode("//meta[@property='og:title']")?.GetAttributeValue("content", null) + ?? doc.DocumentNode.SelectSingleNode("//h1")?.InnerText?.Trim() + ?? slug.Replace('-', ' '); + + title = CleanTitleSuffix(title); + + string cover = + ExtractOgImage(html, new Uri(url)) + ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("data-src", null) + ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("src", null) + ?? string.Empty; + + if (!string.IsNullOrEmpty(cover)) + cover = MakeAbsoluteUrl(new Uri(url), cover); + + string description = + doc.DocumentNode.SelectSingleNode("//meta[@name='description']")?.GetAttributeValue("content", null) + ?? HtmlEntity.DeEntitize( + doc.DocumentNode.SelectSingleNode("//div[contains(@class,'description') or contains(@class,'trama')]") + ?.InnerText ?? string.Empty + ).Trim(); + + // === STATO (scheda dettaglio) === + MangaReleaseStatus status = MangaReleaseStatus.Unreleased; + string? detailRawStatus = ExtractItalianStatus(doc); + if (!string.IsNullOrWhiteSpace(detailRawStatus)) + status = MapItalianStatus(detailRawStatus); + + Manga m = new ( + HtmlEntity.DeEntitize(title).Trim(), + description, + cover, + status, + [], + [], + [], + [], + originalLanguage: "it"); + MangaConnectorId mcId = new (m, + this, + $"{id}/{slug}", + $"https://www.mangaworld.cx/manga/{id}/{slug}/"); + m.MangaConnectorIds.Add(mcId); + return (m, mcId); + } + + // ========================== CAPITOLI ============================ + public override (Chapter, MangaConnectorId)[] GetChapters(MangaConnectorId mangaId, string? language = null) + { + string[] parts = mangaId.IdOnConnectorSite.Split('/', 2); + if (parts.Length != 2) + return []; + + string id = parts[0]; + string slug = parts[1]; + string seriesUrl = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; + + string html = FetchHtmlWithFallback(seriesUrl, out Uri baseUri); + if (string.IsNullOrEmpty(html)) + return []; + + HtmlDocument doc = new (); + doc.LoadHtml(html); + + List<(Chapter, MangaConnectorId)> chapters = ParseChaptersFromHtml(mangaId.Obj ,doc, baseUri); + + // Ordinamento finale: Volume → Capitolo (numerico) + return chapters + .OrderBy(c => c.Item1, new Chapter.ChapterComparer()) + .ToArray(); + } + + // ===================== IMMAGINI CAPITOLO ======================= + private static readonly Regex ImagesArray = new(@"images\s*=\s*\[(?.*?)\]", RegexOptions.Singleline | RegexOptions.IgnoreCase); + private static readonly Regex UrlInQuotes = new("\"(https?[^\"\\]]+)\""); + internal override string[] GetChapterImageUrls(MangaConnectorId chapterId) + { + string url = EnsureListStyle(chapterId.WebsiteUrl ?? $"https://www.mangaworld.cx/manga/{chapterId.IdOnConnectorSite}"); + + RequestResult res = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) + return []; + + using StreamReader sr = new (res.result); + string html = sr.ReadToEnd(); + + Uri baseUri = new (url); + + HtmlDocument doc = new (); + doc.LoadHtml(html); + + HtmlNodeCollection imageNodes = doc.DocumentNode.SelectNodes("//img[@data-src or @src or @srcset]") ?? new HtmlNodeCollection(null); + + IEnumerable fromDom = imageNodes + .SelectMany(i => + { + var list = new List(); + string ds = i.GetAttributeValue("data-src", ""); + string s = i.GetAttributeValue("src", ""); + string ss = i.GetAttributeValue("srcset", ""); + + if (!string.IsNullOrEmpty(ds)) + list.Add(ds); + if (!string.IsNullOrEmpty(s)) + list.Add(s); + if (!string.IsNullOrEmpty(ss)) + { + foreach (string part in ss.Split(',')) + { + string p = part.Trim().Split(' ')[0]; + if (!string.IsNullOrWhiteSpace(p)) + list.Add(p); + } + } + return list; + }) + .Select(x => MakeAbsoluteUrl(baseUri, x)) + .Where(u => + { + string z = u.ToLowerInvariant(); + return z.StartsWith("http") && (z.Contains(".jpg") || z.Contains(".jpeg") || z.Contains(".png") || z.Contains(".webp")); + }); + + Match m = ImagesArray.Match(html); + IEnumerable fromJs = []; + if (m.Success) + { + MatchCollection urls = UrlInQuotes.Matches(m.Groups["arr"].Value); + fromJs = urls.Select(mm => MakeAbsoluteUrl(baseUri, mm.Groups[1].Value)); + } + + List final = new (); + HashSet seen = new (StringComparer.OrdinalIgnoreCase); + foreach (string u in fromDom.Concat(fromJs)) + if (seen.Add(u)) + final.Add(u); + + return final.ToArray(); + } + + // ============================ PARSER CAPITOLI =================== + private static readonly Regex RexVolume = new(@"[Vv]olume\s+([0-9]+)", RegexOptions.Compiled); + private static readonly Regex RexChapter = new(@"(?:\b[Cc]apitolo|\b[Cc]h(?:apter)?)\s*([0-9]+(?:\.[0-9]+)?)", RegexOptions.Compiled); + private static readonly Regex RexChapterId = new(@"manga\/([0-9]+\/[a-z0-9\-]+\/read\/[a-z0-9]+)\/", RegexOptions.Compiled); + + private List<(Chapter, MangaConnectorId)> ParseChaptersFromHtml(Manga manga, HtmlDocument document, Uri baseUri) + { + List<(Chapter, MangaConnectorId)> ret = new (); + + // wrapper principale + HtmlNode? chaptersWrapper = document.DocumentNode.SelectSingleNode("//div[contains(@class,'chapters-wrapper')]"); + // layout A: volumi raggruppati + HtmlNodeCollection? volumeElements = document.DocumentNode.SelectNodes("//div[contains(@class,'volume-element')]"); + + // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract + if (volumeElements is not null && volumeElements.Count > 0) + { + foreach (HtmlNode volNode in volumeElements) + { + // titolo volume, es. "

Volume 24

" + string volText = volNode.SelectSingleNode(".//div[contains(@class,'volume')]/p")?.InnerText ?? string.Empty; + + int? volumeNumber = null; + Match vm = RexVolume.Match(volText); + if (vm.Success && int.TryParse(vm.Groups[1].Value, out int volParsed)) + volumeNumber = volParsed; + + // capitoli dentro il blocco volume + HtmlNodeCollection chapterNodes = volNode + .SelectSingleNode(".//div[contains(@class,'volume-chapters')]") + ?.SelectNodes(".//div") ?? new HtmlNodeCollection(null); + + foreach (HtmlNode chNode in chapterNodes) + { + HtmlNode? anchor = chNode.SelectSingleNode(".//a[@href]"); + // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract + if (anchor is null) + continue; + + string spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; + + Match cm = RexChapter.Match(spanText); + if (!cm.Success) + continue; + + string chapterNumber = NormalizeNumber(cm.Groups[1].Value); + string href = anchor.GetAttributeValue("href", ""); + if (string.IsNullOrWhiteSpace(href)) + continue; + + string rel = MakeAbsoluteUrl(baseUri, href); + string ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); + + Match idMatch = RexChapterId.Match(ensured); + if(!idMatch.Success) + continue; + string id = idMatch.Groups[1].Value; + + Chapter chapter = new (manga, chapterNumber, volumeNumber); + MangaConnectorId chId = new(chapter, this, id, ensured); + chapter.MangaConnectorIds.Add(chId); + + // title:null per evitare duplicazioni nel filename + ret.Add((chapter, chId)); + } + } + } + else + { + // layout B: lista piatta (niente blocchi volume) → v1: Volume 0 + HtmlNodeCollection chapterNodes = chaptersWrapper?.SelectNodes(".//div[contains(@class,'chapter')]") + ?? document.DocumentNode.SelectNodes("//div[contains(@class,'chapter')]") + ?? new HtmlNodeCollection(null); + + foreach (HtmlNode chNode in chapterNodes) + { + HtmlNode? anchor = chNode.SelectSingleNode(".//a[@href]") ?? chNode.SelectSingleNode(".//a"); + if (anchor is null) + continue; + + string spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; + + Match cm = RexChapter.Match(spanText); + if (!cm.Success) + continue; + + string chapterNumber = NormalizeNumber(cm.Groups[1].Value); + string href = anchor.GetAttributeValue("href", ""); + if (string.IsNullOrWhiteSpace(href)) + continue; + + string rel = MakeAbsoluteUrl(baseUri, href); + string ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); + + Match idMatch = RexChapterId.Match(ensured); + if(!idMatch.Success) + continue; + string id = idMatch.Groups[1].Value; + + // v1 behaviour: senza volumi → Volume 0 + Chapter chapter = new (manga, chapterNumber, null); + MangaConnectorId chId = new(chapter, this, id, ensured); + + ret.Add((chapter, chId)); + } + } + + return ret; + } + + // ============================ HELPERS =========================== + private static readonly Regex SeriesUrl = new(@"https?://[^/]+/manga/(?\d+)/(?[^/]+)/?", RegexOptions.IgnoreCase); + + private string FetchHtmlWithFallback(string seriesUrl, out Uri baseUri) + { + baseUri = new (seriesUrl); + + // 1) tenta client "Default" + RequestResult res = downloadClient.MakeRequest(seriesUrl, RequestType.Default); + if ((int)res.statusCode >= 200 && (int)res.statusCode < 300) + { + using StreamReader sr = new (res.result); + string html = sr.ReadToEnd(); + if (!LooksLikeChallenge(html)) + return html; + } + + // 2) fallback: client “MangaInfo” (proxy/Flare se configurato) + RequestResult res2 = downloadClient.MakeRequest(seriesUrl, RequestType.MangaInfo); + if ((int)res2.statusCode >= 200 && (int)res2.statusCode < 300) + { + using StreamReader sr2 = new StreamReader(res2.result); + return sr2.ReadToEnd(); + } + + return string.Empty; + } + + private static bool LooksLikeChallenge(string html) + { + if (string.IsNullOrEmpty(html)) return true; + string h = html.ToLowerInvariant(); + return h.Contains("cf-challenge") || + h.Contains("cf-browser-verification") || + h.Contains("just a moment") || + h.Contains("verify you are human") || + h.Contains("captcha"); + } + + private static string EnsureReaderUrlHasPage(string url) + { + Match m = Regex.Match(url, @"(/read/[0-9a-fA-F]{16,64})(/(\d+))?", RegexOptions.IgnoreCase); + if (m.Success && string.IsNullOrEmpty(m.Groups[2].Value)) + { + int qIdx = url.IndexOf('?', StringComparison.Ordinal); + if (qIdx >= 0) + url = url.Insert(qIdx, "/1"); + else + url = url.TrimEnd('/') + "/1"; + } + return url; + } + + private static string EnsureListStyle(string url) + { + if (string.IsNullOrEmpty(url)) + return url; + if (url.Contains("style=list", StringComparison.OrdinalIgnoreCase)) + return url; + return url.Contains('?') ? (url + "&style=list") : (url + "?style=list"); + } + + private static string NormalizeNumber(string s) + { + if (string.IsNullOrWhiteSpace(s)) + return "0"; + s = s.Trim(); + Match m = Regex.Match(s, @"^\s*0*(\d+)(?:\.(\d+))?\s*$"); + if (!m.Success) + return s; + string intPart = m.Groups[1].Value.TrimStart('0'); + if (intPart.Length == 0) + intPart = "0"; + string frac = m.Groups[2].Success + ? "." + m.Groups[2].Value + : ""; + return intPart + frac; + } + + private static string MakeAbsoluteUrl(Uri baseUri, string s) + { + s = s.Trim(); + if (s.StartsWith("//")) + return "https:" + s; + if (s.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || + s.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) + return s; + if (s.StartsWith("/")) + return new Uri(baseUri, s).ToString(); + return new Uri(baseUri, s).ToString(); + } + + private static string? ExtractOgImage(string html, Uri baseUri) + { + HtmlDocument doc = new (); + doc.LoadHtml(html); + string? og = doc.DocumentNode.SelectSingleNode("//meta[@property='og:image']")?.GetAttributeValue("content", null); + return string.IsNullOrWhiteSpace(og) ? null : MakeAbsoluteUrl(baseUri, og!); + } + + // ===================== TITLE CLEANUP (suffisso MW) ============== + private static readonly Regex MwSuffix = new(@"\s*(Scan\s\w+\s-\sMangaWorld)$", RegexOptions.IgnoreCase); + + private static string CleanTitleSuffix(string? t) + { + if (string.IsNullOrWhiteSpace(t)) + return t ?? string.Empty; + return MwSuffix.Replace(t, "").Trim(); + } + + // ===================== STATO (estrazione + mapping) ============= + private static string? ExtractItalianStatus(HtmlDocument doc) + { + // 1) Percorso più comune: "Stato: " + HtmlNode? node = doc.DocumentNode.SelectSingleNode("//span[normalize-space(text())='Stato:']/following-sibling::*[1]") + ?? doc.DocumentNode.SelectSingleNode("//span[contains(translate(., 'STATO', 'stato'), 'stato')]/following-sibling::*[1]"); + string? val = node?.InnerText?.Trim(); + if (!string.IsNullOrWhiteSpace(val)) return HtmlEntity.DeEntitize(val); + + // 2) Blocchi info vari (tollerante a cambi DOM) + HtmlNodeCollection? blocks = doc.DocumentNode.SelectNodes("//*[contains(@class,'info') or contains(@class,'details') or contains(@class,'meta') or contains(@class,'attributes') or contains(@class,'list-group')]"); + // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract + if (blocks is not null) + { + foreach (HtmlNode block in blocks) + { + HtmlNodeCollection labels = block.SelectNodes(".//dt|.//li|.//div|.//span|.//strong") ?? new HtmlNodeCollection(null); + foreach (HtmlNode label in labels) + { + string? t = label.InnerText?.Trim()?.ToLowerInvariant(); + if (string.IsNullOrEmpty(t)) + continue; + if (t != "stato" && t != "stato:" && !t.Contains("stato")) + continue; + string? vv = label.SelectSingleNode("./following-sibling::*[1]")?.InnerText?.Trim() + ?? label.ParentNode?.SelectSingleNode(".//a|.//span|.//strong")?.InnerText?.Trim(); + if (!string.IsNullOrWhiteSpace(vv)) + return HtmlEntity.DeEntitize(vv); + } + } + } + + // 3) Fallback testuale grezzo + string body = doc.DocumentNode.InnerText; + Match m = Regex.Match(body, @"Stato\s*:\s*([A-Za-zÀ-ÿ\s\-]+)", RegexOptions.IgnoreCase); + return m.Success + ? m.Groups[1].Value.Trim() + : null; + } + + private static MangaReleaseStatus MapItalianStatus(string s) => s.Trim().ToLowerInvariant() switch + { + "in corso" or "ongoing" or "attivo" => MangaReleaseStatus.Continuing, + "completo" or "concluso" or "finito" or "terminato" or "completed" => MangaReleaseStatus.Completed, + "in pausa" or "pausa" or "hiatus" or "sospeso" => MangaReleaseStatus.OnHiatus, + "droppato" or "cancellato" or "abbandonato" or "cancelled" or "interrotto" => MangaReleaseStatus.Cancelled, + _ => MangaReleaseStatus.Unreleased + }; +} + diff --git a/API/Schema/MangaConnectors/Mangaworld.cs b/API/Schema/MangaConnectors/Mangaworld.cs deleted file mode 100644 index 35c46fa..0000000 --- a/API/Schema/MangaConnectors/Mangaworld.cs +++ /dev/null @@ -1,542 +0,0 @@ -using System.Text.RegularExpressions; -using API.MangaDownloadClients; -using HtmlAgilityPack; - -namespace API.Schema.MangaConnectors; - -public sealed class Mangaworld : MangaConnector -{ - public Mangaworld() : base( - "Mangaworld", - new[] { "it" }, - new[] - { - "mangaworld.cx","www.mangaworld.cx", - "mangaworld.bz","www.mangaworld.bz", - "mangaworld.fun","www.mangaworld.fun", - "mangaworld.ac","www.mangaworld.ac" - }, - "https://www.mangaworld.cx/public/assets/seo/favicon-96x96.png?v=3" - ) - { - downloadClient = new HttpDownloadClient(); - } - - // ============================ SEARCH ============================ - public override Manga[] SearchManga(string mangaSearchName) - { - var baseUri = new Uri("https://www.mangaworld.cx/"); - var searchUrl = new Uri(baseUri, "archive?keyword=" + Uri.EscapeDataString(mangaSearchName)); - - var res = downloadClient.MakeRequest(searchUrl.ToString(), RequestType.Default); - if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return Array.Empty(); - - using var sr = new StreamReader(res.result); - var html = sr.ReadToEnd(); - - var doc = new HtmlDocument(); - doc.LoadHtml(html); - - var anchors = doc.DocumentNode.SelectNodes("//a[@href and contains(@href,'/manga/')]"); - if (anchors is null) return Array.Empty(); - - var seen = new HashSet(StringComparer.OrdinalIgnoreCase); - var list = new List(); - - foreach (var a in anchors) - { - var href = a.GetAttributeValue("href", null); - if (string.IsNullOrWhiteSpace(href)) continue; - - var canonical = new Uri(baseUri, href).ToString(); - var ms = _seriesUrl.Match(canonical); - if (!ms.Success) continue; - - var id = ms.Groups["id"].Value; - var slug = ms.Groups["slug"].Value; - var key = $"{id}/{slug}"; - if (!seen.Add(key)) continue; - - string title = slug.Replace('-', ' '); - string cover = string.Empty; - - // default status: verrà corretto se troviamo lo stato nella scheda - var status = MangaReleaseStatus.Unreleased; - - // preferisci sempre la pagina serie per titolo/cover corretti - var seriesRes = downloadClient.MakeRequest(canonical, RequestType.MangaInfo); - if ((int)seriesRes.statusCode >= 200 && (int)seriesRes.statusCode < 300) - { - using var srs = new StreamReader(seriesRes.result); - var seriesHtml = srs.ReadToEnd(); - - var sdoc = new HtmlDocument(); - sdoc.LoadHtml(seriesHtml); - - title = - sdoc.DocumentNode.SelectSingleNode("//meta[@property='og:title']")?.GetAttributeValue("content", null) - ?? sdoc.DocumentNode.SelectSingleNode("//h1")?.InnerText?.Trim() - ?? title; - - title = CleanTitleSuffix(title); - - cover = - ExtractOgImage(seriesHtml, new Uri(canonical)) - ?? sdoc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("data-src", null) - ?? sdoc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("src", null) - ?? string.Empty; - - if (!string.IsNullOrEmpty(cover)) - cover = MakeAbsoluteUrl(new Uri(canonical), cover); - - // === STATO (anche in lista, se abbiamo già aperto la scheda) === - var rawStatus = ExtractItalianStatus(sdoc); - if (!string.IsNullOrWhiteSpace(rawStatus)) - status = MapItalianStatus(rawStatus); - } - else - { - var fallbackTitle = HtmlEntity.DeEntitize(a.InnerText).Trim(); - if (!string.IsNullOrWhiteSpace(fallbackTitle)) title = fallbackTitle; - title = CleanTitleSuffix(title); - cover = TryExtractCoverFromSearchCard(a, baseUri); - } - - list.Add(new Manga( - $"{id}/{slug}", - HtmlEntity.DeEntitize(title).Trim(), - string.Empty, - canonical, - cover, - status, // <-- STATO in lista se ricavato - this, - new List(), - new List(), - new List(), - new List(), - year: null, - originalLanguage: "it" - )); - } - - return list.ToArray(); - } - - // ======================== URL → Manga =========================== - public override Manga? GetMangaFromUrl(string url) - { - var m = _seriesUrl.Match(url); - if (!m.Success) return null; - return GetMangaFromId($"{m.Groups["id"].Value}/{m.Groups["slug"].Value}"); - } - - // ======================== ID → Manga ============================ - public override Manga? GetMangaFromId(string mangaIdOnSite) - { - var parts = mangaIdOnSite.Split('/', 2); - if (parts.Length != 2) return null; - - var id = parts[0]; - var slug = parts[1]; - - var url = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; - var res = downloadClient.MakeRequest(url, RequestType.MangaInfo); - if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return null; - - using var sr = new StreamReader(res.result); - var html = sr.ReadToEnd(); - - var doc = new HtmlDocument(); - doc.LoadHtml(html); - - var title = - doc.DocumentNode.SelectSingleNode("//meta[@property='og:title']")?.GetAttributeValue("content", null) - ?? doc.DocumentNode.SelectSingleNode("//h1")?.InnerText?.Trim() - ?? slug.Replace('-', ' '); - - title = CleanTitleSuffix(title); - - var cover = - ExtractOgImage(html, new Uri(url)) - ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("data-src", null) - ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("src", null) - ?? string.Empty; - - if (!string.IsNullOrEmpty(cover)) - cover = MakeAbsoluteUrl(new Uri(url), cover); - - var description = - doc.DocumentNode.SelectSingleNode("//meta[@name='description']")?.GetAttributeValue("content", null) - ?? HtmlEntity.DeEntitize( - doc.DocumentNode.SelectSingleNode("//div[contains(@class,'description') or contains(@class,'trama')]") - ?.InnerText ?? string.Empty - ).Trim(); - - // === STATO (scheda dettaglio) === - var detailStatus = MangaReleaseStatus.Unreleased; - var detailRawStatus = ExtractItalianStatus(doc); - if (!string.IsNullOrWhiteSpace(detailRawStatus)) - detailStatus = MapItalianStatus(detailRawStatus); - - return new Manga( - mangaIdOnSite, - HtmlEntity.DeEntitize(title), - description, - url, - cover, - detailStatus, // <-- STATO corretto in dettaglio - this, - new List(), - new List(), - new List(), - new List(), - year: null, - originalLanguage: "it" - ); - } - - // ========================== CAPITOLI ============================ - public override Chapter[] GetChapters(Manga manga, string? language = null) - { - var parts = manga.IdOnConnectorSite.Split('/', 2); - if (parts.Length != 2) return Array.Empty(); - - var id = parts[0]; - var slug = parts[1]; - var seriesUrl = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; - - string html = FetchHtmlWithFallback(seriesUrl, out var baseUri); - if (string.IsNullOrEmpty(html)) return Array.Empty(); - - var doc = new HtmlDocument(); - doc.LoadHtml(html); - - var chapters = ParseChaptersFromHtml(manga, doc, baseUri); - - // Ordinamento finale: Volume → Capitolo (numerico) - return chapters - .OrderBy(c => c.VolumeNumber ?? 0) - .ThenBy(c => TryParseDouble(c.ChapterNumber)) - .ToArray(); - } - - // ===================== IMMAGINI CAPITOLO ======================= - private static readonly Regex _imagesArray = new(@"images\s*=\s*\[(?.*?)\]", RegexOptions.Singleline | RegexOptions.IgnoreCase); - private static readonly Regex _urlInQuotes = new("\"(https?[^\"\\]]+)\""); - internal override string[] GetChapterImageUrls(Chapter chapter) - { - var url = EnsureListStyle(chapter.Url); - - var res = downloadClient.MakeRequest(url, RequestType.MangaInfo); - if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return Array.Empty(); - - using var sr = new StreamReader(res.result); - var html = sr.ReadToEnd(); - - var baseUri = new Uri(url); - - var doc = new HtmlDocument(); - doc.LoadHtml(html); - - var imgs = doc.DocumentNode.SelectNodes("//img[@data-src or @src or @srcset]") ?? new HtmlNodeCollection(null); - - IEnumerable fromDom = imgs - .SelectMany(i => - { - var list = new List(); - var ds = i.GetAttributeValue("data-src", null); - var s = i.GetAttributeValue("src", null); - var ss = i.GetAttributeValue("srcset", null); - - if (!string.IsNullOrWhiteSpace(ds)) list.Add(ds); - if (!string.IsNullOrWhiteSpace(s)) list.Add(s); - if (!string.IsNullOrWhiteSpace(ss)) - { - foreach (var part in ss.Split(',')) - { - var p = part.Trim().Split(' ')[0]; - if (!string.IsNullOrWhiteSpace(p)) list.Add(p); - } - } - return list; - }) - .Select(x => MakeAbsoluteUrl(baseUri, x)) - .Where(u => - { - var z = u.ToLowerInvariant(); - return z.StartsWith("http") && (z.Contains(".jpg") || z.Contains(".jpeg") || z.Contains(".png") || z.Contains(".webp")); - }); - - var m = _imagesArray.Match(html); - IEnumerable fromJs = Enumerable.Empty(); - if (m.Success) - { - var urls = _urlInQuotes.Matches(m.Groups["arr"].Value); - fromJs = urls.Select(mm => MakeAbsoluteUrl(baseUri, mm.Groups[1].Value)); - } - - var final = new List(); - var seen = new HashSet(StringComparer.OrdinalIgnoreCase); - foreach (var u in fromDom.Concat(fromJs)) - if (seen.Add(u)) final.Add(u); - - return final.ToArray(); - } - - // ============================ PARSER CAPITOLI =================== - private static readonly Regex RexVolume = new(@"[Vv]olume\s+([0-9]+)", RegexOptions.Compiled); - private static readonly Regex RexChapter = new(@"(?:\b[Cc]apitolo|\b[Cc]h(?:apter)?)\s*([0-9]+(?:\.[0-9]+)?)", RegexOptions.Compiled); - - private List ParseChaptersFromHtml(Manga manga, HtmlDocument document, Uri baseUri) - { - var ret = new List(); - - // wrapper principale - var chaptersWrapper = document.DocumentNode.SelectSingleNode("//div[contains(@class,'chapters-wrapper')]"); - // layout A: volumi raggruppati - var volumeElements = document.DocumentNode.SelectNodes("//div[contains(@class,'volume-element')]"); - - if (volumeElements is not null && volumeElements.Count > 0) - { - foreach (var volNode in volumeElements) - { - // titolo volume, es. "

Volume 24

" - var volText = volNode.SelectSingleNode(".//div[contains(@class,'volume')]/p")?.InnerText ?? string.Empty; - - int? volumeNumber = null; - var vm = RexVolume.Match(volText); - if (vm.Success && int.TryParse(vm.Groups[1].Value, out var volParsed)) - volumeNumber = volParsed; - - // capitoli dentro il blocco volume - var chapterNodes = volNode - .SelectSingleNode(".//div[contains(@class,'volume-chapters')]") - ?.SelectNodes(".//div") ?? new HtmlNodeCollection(null); - - foreach (var chNode in chapterNodes) - { - var anchor = chNode.SelectSingleNode(".//a[@href]"); - if (anchor is null) continue; - - var spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; - - var cm = RexChapter.Match(spanText); - if (!cm.Success) continue; - - string chapterNumber = NormalizeNumber(cm.Groups[1].Value); - string href = anchor.GetAttributeValue("href", ""); - if (string.IsNullOrWhiteSpace(href)) continue; - - var rel = MakeAbsoluteUrl(baseUri, href); - var ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); - - // title:null per evitare duplicazioni nel filename - ret.Add(new Chapter(manga, ensured, chapterNumber, volumeNumber, title: null)); - } - } - } - else - { - // layout B: lista piatta (niente blocchi volume) → v1: Volume 0 - var chapterNodes = chaptersWrapper?.SelectNodes(".//div[contains(@class,'chapter')]") - ?? document.DocumentNode.SelectNodes("//div[contains(@class,'chapter')]") - ?? new HtmlNodeCollection(null); - - foreach (var chNode in chapterNodes) - { - var anchor = chNode.SelectSingleNode(".//a[@href]") ?? chNode.SelectSingleNode(".//a"); - if (anchor is null) continue; - - var spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; - - var cm = RexChapter.Match(spanText); - if (!cm.Success) continue; - - string chapterNumber = NormalizeNumber(cm.Groups[1].Value); - string href = anchor.GetAttributeValue("href", ""); - if (string.IsNullOrWhiteSpace(href)) continue; - - var rel = MakeAbsoluteUrl(baseUri, href); - var ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); - - // v1 behaviour: senza volumi → Volume 0 - ret.Add(new Chapter(manga, ensured, chapterNumber, 0, title: null)); - } - } - - return ret; - } - - // ============================ HELPERS =========================== - private static readonly Regex _seriesUrl = new(@"https?://[^/]+/manga/(?\d+)/(?[^/]+)/?", RegexOptions.IgnoreCase); - - private string FetchHtmlWithFallback(string seriesUrl, out Uri baseUri) - { - baseUri = new Uri(seriesUrl); - - // 1) tenta client "Default" - var res = downloadClient.MakeRequest(seriesUrl, RequestType.Default); - if ((int)res.statusCode >= 200 && (int)res.statusCode < 300) - { - using var sr = new StreamReader(res.result); - var html = sr.ReadToEnd(); - if (!LooksLikeChallenge(html)) return html; - } - - // 2) fallback: client “MangaInfo” (proxy/Flare se configurato) - var res2 = downloadClient.MakeRequest(seriesUrl, RequestType.MangaInfo); - if ((int)res2.statusCode >= 200 && (int)res2.statusCode < 300) - { - using var sr2 = new StreamReader(res2.result); - return sr2.ReadToEnd(); - } - - return string.Empty; - } - - private static bool LooksLikeChallenge(string html) - { - if (string.IsNullOrEmpty(html)) return true; - var h = html.ToLowerInvariant(); - return h.Contains("cf-challenge") || - h.Contains("cf-browser-verification") || - h.Contains("just a moment") || - h.Contains("verify you are human") || - h.Contains("captcha"); - } - - private static string EnsureReaderUrlHasPage(string url) - { - var u = url ?? string.Empty; - var m = Regex.Match(u, @"(/read/[0-9a-fA-F]{16,64})(/(\d+))?", RegexOptions.IgnoreCase); - if (m.Success && string.IsNullOrEmpty(m.Groups[2].Value)) - { - var qIdx = u.IndexOf('?', StringComparison.Ordinal); - if (qIdx >= 0) u = u.Insert(qIdx, "/1"); - else u = u.TrimEnd('/') + "/1"; - } - return u; - } - - private static string EnsureListStyle(string url) - { - if (string.IsNullOrEmpty(url)) return url; - if (url.Contains("style=list", StringComparison.OrdinalIgnoreCase)) return url; - return url.Contains('?') ? (url + "&style=list") : (url + "?style=list"); - } - - private static string NormalizeNumber(string s) - { - if (string.IsNullOrWhiteSpace(s)) return "0"; - s = s.Trim(); - var m = Regex.Match(s, @"^\s*0*(\d+)(?:\.(\d+))?\s*$"); - if (!m.Success) return s; - var intPart = m.Groups[1].Value.TrimStart('0'); - if (intPart.Length == 0) intPart = "0"; - var frac = m.Groups[2].Success ? "." + m.Groups[2].Value : ""; - return intPart + frac; - } - - private static double TryParseDouble(string s) - => double.TryParse(s.Replace(',', '.'), System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out var d) ? d : double.MaxValue; - - private static string MakeAbsoluteUrl(Uri baseUri, string s) - { - s = s.Trim(); - if (s.StartsWith("//")) return "https:" + s; - if (s.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || - s.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) return s; - if (s.StartsWith("/")) return new Uri(baseUri, s).ToString(); - return new Uri(baseUri, s).ToString(); - } - - private static string TryExtractCoverFromSearchCard(HtmlNode linkNode, Uri baseUri) - { - var container = linkNode.Ancestors("div") - .FirstOrDefault(div => - { - var cls = div.GetAttributeValue("class", ""); - return cls.Contains("card") || cls.Contains("manga") || cls.Contains("item") || cls.Contains("poster") || cls.Contains("thumb"); - }); - - var img = container?.SelectSingleNode(".//img[@data-src or @src]"); - if (img is null) return string.Empty; - - var raw = img.GetAttributeValue("data-src", null) ?? img.GetAttributeValue("src", null); - return string.IsNullOrWhiteSpace(raw) ? string.Empty : MakeAbsoluteUrl(baseUri, raw!); - } - - private static string? ExtractOgImage(string html, Uri baseUri) - { - var doc = new HtmlDocument(); - doc.LoadHtml(html); - var og = doc.DocumentNode.SelectSingleNode("//meta[@property='og:image']")?.GetAttributeValue("content", null); - return string.IsNullOrWhiteSpace(og) ? null : MakeAbsoluteUrl(baseUri, og!); - } - - // ===================== TITLE CLEANUP (suffisso MW) ============== - private static readonly Regex _mwSuffix = new(@"\s*(Scan\s\w+\s-\sMangaWorld)$", RegexOptions.IgnoreCase); - - private static string CleanTitleSuffix(string? t) - { - if (string.IsNullOrWhiteSpace(t)) return t ?? string.Empty; - return _mwSuffix.Replace(t, "").Trim(); - } - - // ===================== STATO (estrazione + mapping) ============= - private static string? ExtractItalianStatus(HtmlDocument doc) - { - // 1) Percorso più comune: "Stato: " - var node = doc.DocumentNode.SelectSingleNode("//span[normalize-space(text())='Stato:']/following-sibling::*[1]") - ?? doc.DocumentNode.SelectSingleNode("//span[contains(translate(., 'STATO', 'stato'), 'stato')]/following-sibling::*[1]"); - var val = node?.InnerText?.Trim(); - if (!string.IsNullOrWhiteSpace(val)) return HtmlEntity.DeEntitize(val); - - // 2) Blocchi info vari (tollerante a cambi DOM) - var blocks = doc.DocumentNode.SelectNodes("//*[contains(@class,'info') or contains(@class,'details') or contains(@class,'meta') or contains(@class,'attributes') or contains(@class,'list-group')]"); - if (blocks != null) - { - foreach (var b in blocks) - { - var labels = b.SelectNodes(".//dt|.//li|.//div|.//span|.//strong") ?? new HtmlNodeCollection(null); - foreach (var lab in labels) - { - var t = lab.InnerText?.Trim()?.ToLowerInvariant(); - if (string.IsNullOrEmpty(t)) continue; - if (t == "stato" || t == "stato:" || t.Contains("stato")) - { - var vv = lab.SelectSingleNode("./following-sibling::*[1]")?.InnerText?.Trim() - ?? lab.ParentNode?.SelectSingleNode(".//a|.//span|.//strong")?.InnerText?.Trim(); - if (!string.IsNullOrWhiteSpace(vv)) return HtmlEntity.DeEntitize(vv!); - } - } - } - } - - // 3) Fallback testuale grezzo - var body = doc.DocumentNode.InnerText ?? ""; - var m = Regex.Match(body, @"Stato\s*:\s*([A-Za-zÀ-ÿ\s\-]+)", RegexOptions.IgnoreCase); - return m.Success ? m.Groups[1].Value.Trim() : null; - } - - private static MangaReleaseStatus MapItalianStatus(string s) - { - var k = (s ?? "").Trim().ToLowerInvariant(); - - if (k.Contains("in corso") || k.Contains("ongoing") || k.Contains("attivo")) - return MangaReleaseStatus.Continuing; - - if (k.Contains("completo") || k.Contains("concluso") || k.Contains("finito") || k.Contains("terminato") || k.Contains("completed")) - return MangaReleaseStatus.Completed; - - if (k.Contains("in pausa") || k.Contains("pausa") || k.Contains("hiatus") || k.Contains("sospeso")) - return MangaReleaseStatus.OnHiatus; - - if (k.Contains("droppato") || k.Contains("cancellato") || k.Contains("abbandonato") || k.Contains("cancelled") || k.Contains("interrotto")) - return MangaReleaseStatus.Cancelled; - - // fallback neutro nel tuo enum - return MangaReleaseStatus.Unreleased; - } -} - diff --git a/API/Tranga.cs b/API/Tranga.cs index f210cbf..b360d51 100644 --- a/API/Tranga.cs +++ b/API/Tranga.cs @@ -19,7 +19,7 @@ public static class Tranga private static readonly ILog Log = LogManager.GetLogger(typeof(Tranga)); internal static readonly MetadataFetcher[] MetadataFetchers = [new MyAnimeList()]; - internal static readonly MangaConnector[] MangaConnectors = [new Global(), new MangaDex(), new ComickIo()]; + internal static readonly MangaConnector[] MangaConnectors = [new Global(), new MangaDex(), new ComickIo(), new Mangaworld()]; internal static TrangaSettings Settings = TrangaSettings.Load(); internal static readonly UpdateMetadataWorker UpdateMetadataWorker = new (); diff --git a/README.md b/README.md index 267298c..c2556580 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,8 @@ Tranga can download Chapters and Metadata from "Scanlation" sites such as - [MangaDex.org](https://mangadex.org/) (Multilingual) -- [Comick.io](https://comick.io/) +- [Comick.io](https://comick.io/) (Multilingual) +- [MangaWorld](https://www.mangaworld.cx) (it) - ❓ Open an [issue](https://github.com/C9Glax/tranga/issues/new?assignees=&labels=New+Connector&projects=&template=new_connector.yml&title=%5BNew+Connector%5D%3A+) and trigger a library-scan with [Komga](https://komga.org/) and [Kavita](https://www.kavitareader.com/).