using System.Text.RegularExpressions; using API.MangaDownloadClients; using HtmlAgilityPack; namespace API.Schema.MangaConnectors; public sealed class Mangaworld : MangaConnector { public Mangaworld() : base( "Mangaworld", new[] { "it" }, new[] { "mangaworld.cx","www.mangaworld.cx", "mangaworld.bz","www.mangaworld.bz", "mangaworld.fun","www.mangaworld.fun", "mangaworld.ac","www.mangaworld.ac" }, "https://www.mangaworld.cx/public/assets/seo/favicon-96x96.png?v=3" ) { downloadClient = new HttpDownloadClient(); } // ============================ SEARCH ============================ public override Manga[] SearchManga(string mangaSearchName) { var baseUri = new Uri("https://www.mangaworld.cx/"); var searchUrl = new Uri(baseUri, "archive?keyword=" + Uri.EscapeDataString(mangaSearchName)); var res = downloadClient.MakeRequest(searchUrl.ToString(), RequestType.Default); if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return Array.Empty(); using var sr = new StreamReader(res.result); var html = sr.ReadToEnd(); var doc = new HtmlDocument(); doc.LoadHtml(html); var anchors = doc.DocumentNode.SelectNodes("//a[@href and contains(@href,'/manga/')]"); if (anchors is null) return Array.Empty(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); var list = new List(); foreach (var a in anchors) { var href = a.GetAttributeValue("href", null); if (string.IsNullOrWhiteSpace(href)) continue; var canonical = new Uri(baseUri, href).ToString(); var ms = _seriesUrl.Match(canonical); if (!ms.Success) continue; var id = ms.Groups["id"].Value; var slug = ms.Groups["slug"].Value; var key = $"{id}/{slug}"; if (!seen.Add(key)) continue; string title = slug.Replace('-', ' '); string cover = string.Empty; // default status: verrà corretto se troviamo lo stato nella scheda var status = MangaReleaseStatus.Unreleased; // preferisci sempre la pagina serie per titolo/cover corretti var seriesRes = downloadClient.MakeRequest(canonical, RequestType.MangaInfo); if ((int)seriesRes.statusCode >= 200 && (int)seriesRes.statusCode < 300) { using var srs = new StreamReader(seriesRes.result); var seriesHtml = srs.ReadToEnd(); var sdoc = new HtmlDocument(); sdoc.LoadHtml(seriesHtml); title = sdoc.DocumentNode.SelectSingleNode("//meta[@property='og:title']")?.GetAttributeValue("content", null) ?? sdoc.DocumentNode.SelectSingleNode("//h1")?.InnerText?.Trim() ?? title; title = CleanTitleSuffix(title); cover = ExtractOgImage(seriesHtml, new Uri(canonical)) ?? sdoc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("data-src", null) ?? sdoc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("src", null) ?? string.Empty; if (!string.IsNullOrEmpty(cover)) cover = MakeAbsoluteUrl(new Uri(canonical), cover); // === STATO (anche in lista, se abbiamo già aperto la scheda) === var rawStatus = ExtractItalianStatus(sdoc); if (!string.IsNullOrWhiteSpace(rawStatus)) status = MapItalianStatus(rawStatus); } else { var fallbackTitle = HtmlEntity.DeEntitize(a.InnerText).Trim(); if (!string.IsNullOrWhiteSpace(fallbackTitle)) title = fallbackTitle; title = CleanTitleSuffix(title); cover = TryExtractCoverFromSearchCard(a, baseUri); } list.Add(new Manga( $"{id}/{slug}", HtmlEntity.DeEntitize(title).Trim(), string.Empty, canonical, cover, status, // <-- STATO in lista se ricavato this, new List(), new List(), new List(), new List(), year: null, originalLanguage: "it" )); } return list.ToArray(); } // ======================== URL → Manga =========================== public override Manga? GetMangaFromUrl(string url) { var m = _seriesUrl.Match(url); if (!m.Success) return null; return GetMangaFromId($"{m.Groups["id"].Value}/{m.Groups["slug"].Value}"); } // ======================== ID → Manga ============================ public override Manga? GetMangaFromId(string mangaIdOnSite) { var parts = mangaIdOnSite.Split('/', 2); if (parts.Length != 2) return null; var id = parts[0]; var slug = parts[1]; var url = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; var res = downloadClient.MakeRequest(url, RequestType.MangaInfo); if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return null; using var sr = new StreamReader(res.result); var html = sr.ReadToEnd(); var doc = new HtmlDocument(); doc.LoadHtml(html); var title = doc.DocumentNode.SelectSingleNode("//meta[@property='og:title']")?.GetAttributeValue("content", null) ?? doc.DocumentNode.SelectSingleNode("//h1")?.InnerText?.Trim() ?? slug.Replace('-', ' '); title = CleanTitleSuffix(title); var cover = ExtractOgImage(html, new Uri(url)) ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("data-src", null) ?? doc.DocumentNode.SelectSingleNode("//div[contains(@class,'cover') or contains(@class,'poster')]//img[@src or @data-src]")?.GetAttributeValue("src", null) ?? string.Empty; if (!string.IsNullOrEmpty(cover)) cover = MakeAbsoluteUrl(new Uri(url), cover); var description = doc.DocumentNode.SelectSingleNode("//meta[@name='description']")?.GetAttributeValue("content", null) ?? HtmlEntity.DeEntitize( doc.DocumentNode.SelectSingleNode("//div[contains(@class,'description') or contains(@class,'trama')]") ?.InnerText ?? string.Empty ).Trim(); // === STATO (scheda dettaglio) === var detailStatus = MangaReleaseStatus.Unreleased; var detailRawStatus = ExtractItalianStatus(doc); if (!string.IsNullOrWhiteSpace(detailRawStatus)) detailStatus = MapItalianStatus(detailRawStatus); return new Manga( mangaIdOnSite, HtmlEntity.DeEntitize(title), description, url, cover, detailStatus, // <-- STATO corretto in dettaglio this, new List(), new List(), new List(), new List(), year: null, originalLanguage: "it" ); } // ========================== CAPITOLI ============================ public override Chapter[] GetChapters(Manga manga, string? language = null) { var parts = manga.IdOnConnectorSite.Split('/', 2); if (parts.Length != 2) return Array.Empty(); var id = parts[0]; var slug = parts[1]; var seriesUrl = $"https://www.mangaworld.cx/manga/{id}/{slug}/"; string html = FetchHtmlWithFallback(seriesUrl, out var baseUri); if (string.IsNullOrEmpty(html)) return Array.Empty(); var doc = new HtmlDocument(); doc.LoadHtml(html); var chapters = ParseChaptersFromHtml(manga, doc, baseUri); // Ordinamento finale: Volume → Capitolo (numerico) return chapters .OrderBy(c => c.VolumeNumber ?? 0) .ThenBy(c => TryParseDouble(c.ChapterNumber)) .ToArray(); } // ===================== IMMAGINI CAPITOLO ======================= private static readonly Regex _imagesArray = new(@"images\s*=\s*\[(?.*?)\]", RegexOptions.Singleline | RegexOptions.IgnoreCase); private static readonly Regex _urlInQuotes = new("\"(https?[^\"\\]]+)\""); internal override string[] GetChapterImageUrls(Chapter chapter) { var url = EnsureListStyle(chapter.Url); var res = downloadClient.MakeRequest(url, RequestType.MangaInfo); if ((int)res.statusCode < 200 || (int)res.statusCode >= 300) return Array.Empty(); using var sr = new StreamReader(res.result); var html = sr.ReadToEnd(); var baseUri = new Uri(url); var doc = new HtmlDocument(); doc.LoadHtml(html); var imgs = doc.DocumentNode.SelectNodes("//img[@data-src or @src or @srcset]") ?? new HtmlNodeCollection(null); IEnumerable fromDom = imgs .SelectMany(i => { var list = new List(); var ds = i.GetAttributeValue("data-src", null); var s = i.GetAttributeValue("src", null); var ss = i.GetAttributeValue("srcset", null); if (!string.IsNullOrWhiteSpace(ds)) list.Add(ds); if (!string.IsNullOrWhiteSpace(s)) list.Add(s); if (!string.IsNullOrWhiteSpace(ss)) { foreach (var part in ss.Split(',')) { var p = part.Trim().Split(' ')[0]; if (!string.IsNullOrWhiteSpace(p)) list.Add(p); } } return list; }) .Select(x => MakeAbsoluteUrl(baseUri, x)) .Where(u => { var z = u.ToLowerInvariant(); return z.StartsWith("http") && (z.Contains(".jpg") || z.Contains(".jpeg") || z.Contains(".png") || z.Contains(".webp")); }); var m = _imagesArray.Match(html); IEnumerable fromJs = Enumerable.Empty(); if (m.Success) { var urls = _urlInQuotes.Matches(m.Groups["arr"].Value); fromJs = urls.Select(mm => MakeAbsoluteUrl(baseUri, mm.Groups[1].Value)); } var final = new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var u in fromDom.Concat(fromJs)) if (seen.Add(u)) final.Add(u); return final.ToArray(); } // ============================ PARSER CAPITOLI =================== private static readonly Regex RexVolume = new(@"[Vv]olume\s+([0-9]+)", RegexOptions.Compiled); private static readonly Regex RexChapter = new(@"(?:\b[Cc]apitolo|\b[Cc]h(?:apter)?)\s*([0-9]+(?:\.[0-9]+)?)", RegexOptions.Compiled); private List ParseChaptersFromHtml(Manga manga, HtmlDocument document, Uri baseUri) { var ret = new List(); // wrapper principale var chaptersWrapper = document.DocumentNode.SelectSingleNode("//div[contains(@class,'chapters-wrapper')]"); // layout A: volumi raggruppati var volumeElements = document.DocumentNode.SelectNodes("//div[contains(@class,'volume-element')]"); if (volumeElements is not null && volumeElements.Count > 0) { foreach (var volNode in volumeElements) { // titolo volume, es. "

Volume 24

" var volText = volNode.SelectSingleNode(".//div[contains(@class,'volume')]/p")?.InnerText ?? string.Empty; int? volumeNumber = null; var vm = RexVolume.Match(volText); if (vm.Success && int.TryParse(vm.Groups[1].Value, out var volParsed)) volumeNumber = volParsed; // capitoli dentro il blocco volume var chapterNodes = volNode .SelectSingleNode(".//div[contains(@class,'volume-chapters')]") ?.SelectNodes(".//div") ?? new HtmlNodeCollection(null); foreach (var chNode in chapterNodes) { var anchor = chNode.SelectSingleNode(".//a[@href]"); if (anchor is null) continue; var spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; var cm = RexChapter.Match(spanText); if (!cm.Success) continue; string chapterNumber = NormalizeNumber(cm.Groups[1].Value); string href = anchor.GetAttributeValue("href", ""); if (string.IsNullOrWhiteSpace(href)) continue; var rel = MakeAbsoluteUrl(baseUri, href); var ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); // title:null per evitare duplicazioni nel filename ret.Add(new Chapter(manga, ensured, chapterNumber, volumeNumber, title: null)); } } } else { // layout B: lista piatta (niente blocchi volume) → v1: Volume 0 var chapterNodes = chaptersWrapper?.SelectNodes(".//div[contains(@class,'chapter')]") ?? document.DocumentNode.SelectNodes("//div[contains(@class,'chapter')]") ?? new HtmlNodeCollection(null); foreach (var chNode in chapterNodes) { var anchor = chNode.SelectSingleNode(".//a[@href]") ?? chNode.SelectSingleNode(".//a"); if (anchor is null) continue; var spanText = anchor.SelectSingleNode(".//span")?.InnerText ?? anchor.InnerText ?? string.Empty; var cm = RexChapter.Match(spanText); if (!cm.Success) continue; string chapterNumber = NormalizeNumber(cm.Groups[1].Value); string href = anchor.GetAttributeValue("href", ""); if (string.IsNullOrWhiteSpace(href)) continue; var rel = MakeAbsoluteUrl(baseUri, href); var ensured = EnsureListStyle(EnsureReaderUrlHasPage(rel)); // v1 behaviour: senza volumi → Volume 0 ret.Add(new Chapter(manga, ensured, chapterNumber, 0, title: null)); } } return ret; } // ============================ HELPERS =========================== private static readonly Regex _seriesUrl = new(@"https?://[^/]+/manga/(?\d+)/(?[^/]+)/?", RegexOptions.IgnoreCase); private string FetchHtmlWithFallback(string seriesUrl, out Uri baseUri) { baseUri = new Uri(seriesUrl); // 1) tenta client "Default" var res = downloadClient.MakeRequest(seriesUrl, RequestType.Default); if ((int)res.statusCode >= 200 && (int)res.statusCode < 300) { using var sr = new StreamReader(res.result); var html = sr.ReadToEnd(); if (!LooksLikeChallenge(html)) return html; } // 2) fallback: client “MangaInfo” (proxy/Flare se configurato) var res2 = downloadClient.MakeRequest(seriesUrl, RequestType.MangaInfo); if ((int)res2.statusCode >= 200 && (int)res2.statusCode < 300) { using var sr2 = new StreamReader(res2.result); return sr2.ReadToEnd(); } return string.Empty; } private static bool LooksLikeChallenge(string html) { if (string.IsNullOrEmpty(html)) return true; var h = html.ToLowerInvariant(); return h.Contains("cf-challenge") || h.Contains("cf-browser-verification") || h.Contains("just a moment") || h.Contains("verify you are human") || h.Contains("captcha"); } private static string EnsureReaderUrlHasPage(string url) { var u = url ?? string.Empty; var m = Regex.Match(u, @"(/read/[0-9a-fA-F]{16,64})(/(\d+))?", RegexOptions.IgnoreCase); if (m.Success && string.IsNullOrEmpty(m.Groups[2].Value)) { var qIdx = u.IndexOf('?', StringComparison.Ordinal); if (qIdx >= 0) u = u.Insert(qIdx, "/1"); else u = u.TrimEnd('/') + "/1"; } return u; } private static string EnsureListStyle(string url) { if (string.IsNullOrEmpty(url)) return url; if (url.Contains("style=list", StringComparison.OrdinalIgnoreCase)) return url; return url.Contains('?') ? (url + "&style=list") : (url + "?style=list"); } private static string NormalizeNumber(string s) { if (string.IsNullOrWhiteSpace(s)) return "0"; s = s.Trim(); var m = Regex.Match(s, @"^\s*0*(\d+)(?:\.(\d+))?\s*$"); if (!m.Success) return s; var intPart = m.Groups[1].Value.TrimStart('0'); if (intPart.Length == 0) intPart = "0"; var frac = m.Groups[2].Success ? "." + m.Groups[2].Value : ""; return intPart + frac; } private static double TryParseDouble(string s) => double.TryParse(s.Replace(',', '.'), System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out var d) ? d : double.MaxValue; private static string MakeAbsoluteUrl(Uri baseUri, string s) { s = s.Trim(); if (s.StartsWith("//")) return "https:" + s; if (s.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || s.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) return s; if (s.StartsWith("/")) return new Uri(baseUri, s).ToString(); return new Uri(baseUri, s).ToString(); } private static string TryExtractCoverFromSearchCard(HtmlNode linkNode, Uri baseUri) { var container = linkNode.Ancestors("div") .FirstOrDefault(div => { var cls = div.GetAttributeValue("class", ""); return cls.Contains("card") || cls.Contains("manga") || cls.Contains("item") || cls.Contains("poster") || cls.Contains("thumb"); }); var img = container?.SelectSingleNode(".//img[@data-src or @src]"); if (img is null) return string.Empty; var raw = img.GetAttributeValue("data-src", null) ?? img.GetAttributeValue("src", null); return string.IsNullOrWhiteSpace(raw) ? string.Empty : MakeAbsoluteUrl(baseUri, raw!); } private static string? ExtractOgImage(string html, Uri baseUri) { var doc = new HtmlDocument(); doc.LoadHtml(html); var og = doc.DocumentNode.SelectSingleNode("//meta[@property='og:image']")?.GetAttributeValue("content", null); return string.IsNullOrWhiteSpace(og) ? null : MakeAbsoluteUrl(baseUri, og!); } // ===================== TITLE CLEANUP (suffisso MW) ============== private static readonly Regex _mwSuffix = new(@"\s*(Scan\s\w+\s-\sMangaWorld)$", RegexOptions.IgnoreCase); private static string CleanTitleSuffix(string? t) { if (string.IsNullOrWhiteSpace(t)) return t ?? string.Empty; return _mwSuffix.Replace(t, "").Trim(); } // ===================== STATO (estrazione + mapping) ============= private static string? ExtractItalianStatus(HtmlDocument doc) { // 1) Percorso più comune: "Stato: " var node = doc.DocumentNode.SelectSingleNode("//span[normalize-space(text())='Stato:']/following-sibling::*[1]") ?? doc.DocumentNode.SelectSingleNode("//span[contains(translate(., 'STATO', 'stato'), 'stato')]/following-sibling::*[1]"); var val = node?.InnerText?.Trim(); if (!string.IsNullOrWhiteSpace(val)) return HtmlEntity.DeEntitize(val); // 2) Blocchi info vari (tollerante a cambi DOM) var blocks = doc.DocumentNode.SelectNodes("//*[contains(@class,'info') or contains(@class,'details') or contains(@class,'meta') or contains(@class,'attributes') or contains(@class,'list-group')]"); if (blocks != null) { foreach (var b in blocks) { var labels = b.SelectNodes(".//dt|.//li|.//div|.//span|.//strong") ?? new HtmlNodeCollection(null); foreach (var lab in labels) { var t = lab.InnerText?.Trim()?.ToLowerInvariant(); if (string.IsNullOrEmpty(t)) continue; if (t == "stato" || t == "stato:" || t.Contains("stato")) { var vv = lab.SelectSingleNode("./following-sibling::*[1]")?.InnerText?.Trim() ?? lab.ParentNode?.SelectSingleNode(".//a|.//span|.//strong")?.InnerText?.Trim(); if (!string.IsNullOrWhiteSpace(vv)) return HtmlEntity.DeEntitize(vv!); } } } } // 3) Fallback testuale grezzo var body = doc.DocumentNode.InnerText ?? ""; var m = Regex.Match(body, @"Stato\s*:\s*([A-Za-zÀ-ÿ\s\-]+)", RegexOptions.IgnoreCase); return m.Success ? m.Groups[1].Value.Trim() : null; } private static MangaReleaseStatus MapItalianStatus(string s) { var k = (s ?? "").Trim().ToLowerInvariant(); if (k.Contains("in corso") || k.Contains("ongoing") || k.Contains("attivo")) return MangaReleaseStatus.Continuing; if (k.Contains("completo") || k.Contains("concluso") || k.Contains("finito") || k.Contains("terminato") || k.Contains("completed")) return MangaReleaseStatus.Completed; if (k.Contains("in pausa") || k.Contains("pausa") || k.Contains("hiatus") || k.Contains("sospeso")) return MangaReleaseStatus.OnHiatus; if (k.Contains("droppato") || k.Contains("cancellato") || k.Contains("abbandonato") || k.Contains("cancelled") || k.Contains("interrotto")) return MangaReleaseStatus.Cancelled; // fallback neutro nel tuo enum return MangaReleaseStatus.Unreleased; } }