diff --git a/API/Program.cs b/API/Program.cs index 3efffca..cc90c1f 100644 --- a/API/Program.cs +++ b/API/Program.cs @@ -103,7 +103,8 @@ using (var scope = app.Services.CreateScope()) new Manganato(), new Mangasee(), new Mangaworld(), - new ManhuaPlus() + new ManhuaPlus(), + new Weebcentral() ]; MangaConnector[] newConnectors = context.MangaConnectors.Where(c => !connectors.Contains(c)).ToArray(); context.MangaConnectors.AddRange(newConnectors); diff --git a/API/Schema/MangaConnectors/ManhuaPlus.cs b/API/Schema/MangaConnectors/ManhuaPlus.cs index efa8c5d..8525473 100644 --- a/API/Schema/MangaConnectors/ManhuaPlus.cs +++ b/API/Schema/MangaConnectors/ManhuaPlus.cs @@ -18,11 +18,9 @@ public class ManhuaPlus : MangaConnector string requestUrl = $"https://manhuaplus.org/search?keyword={sanitizedTitle}"; RequestResult requestResult = downloadClient.MakeRequest(requestUrl, RequestType.Default); - if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) - return Array.Empty(); - - if (requestResult.htmlDocument is null) - return Array.Empty(); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || requestResult.htmlDocument is null) + return []; + Manga[] publications = ParsePublicationsFromHtml(requestResult.htmlDocument); return publications; } diff --git a/API/Schema/MangaConnectors/WeebCentral.cs b/API/Schema/MangaConnectors/WeebCentral.cs new file mode 100644 index 0000000..1c178ed --- /dev/null +++ b/API/Schema/MangaConnectors/WeebCentral.cs @@ -0,0 +1,215 @@ +using System.Text.RegularExpressions; +using API.MangaDownloadClients; +using HtmlAgilityPack; +using Soenneker.Utils.String.NeedlemanWunsch; + +namespace API.Schema.MangaConnectors; + +public class Weebcentral : MangaConnector +{ + private readonly string _baseUrl = "https://weebcentral.com"; + + private readonly string[] _filterWords = + { "a", "the", "of", "as", "to", "no", "for", "on", "with", "be", "and", "in", "wa", "at", "be", "ni" }; + + public Weebcentral() : base("Weebcentral", ["en"], ["https://weebcentral.com"]) + { + downloadClient = new ChromiumDownloadClient(); + } + + public override Manga[] GetManga(string publicationTitle = "") + { + const int limit = 32; //How many values we want returned at once + var offset = 0; //"Page" + var requestUrl = + $"{_baseUrl}/search/data?limit={limit}&offset={offset}&text={publicationTitle}&sort=Best+Match&order=Ascending&official=Any&display_mode=Minimal%20Display"; + var requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 || + requestResult.htmlDocument == null) + { + return []; + } + + var publications = ParsePublicationsFromHtml(requestResult.htmlDocument); + + return publications; + } + + private Manga[] ParsePublicationsFromHtml(HtmlDocument document) + { + if (document.DocumentNode.SelectNodes("//article") == null) + return []; + + var urls = document.DocumentNode.SelectNodes("/html/body/article/a[@class='link link-hover']") + .Select(elem => elem.GetAttributeValue("href", "")).ToList(); + + HashSet ret = new(); + foreach (var url in urls) + { + var manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); + } + + return ret.ToArray(); + } + + public override Manga? GetMangaFromUrl(string url) + { + Regex publicationIdRex = new(@"https:\/\/weebcentral\.com\/series\/(\w*)\/(.*)"); + var publicationId = publicationIdRex.Match(url).Groups[1].Value; + + var requestResult = downloadClient.MakeRequest(url, RequestType.MangaInfo); + if ((int)requestResult.statusCode < 300 && (int)requestResult.statusCode >= 200 && + requestResult.htmlDocument is not null) + return ParseSinglePublicationFromHtml(requestResult.htmlDocument, publicationId, url); + return null; + } + + private Manga ParseSinglePublicationFromHtml(HtmlDocument document, string publicationId, string websiteUrl) + { + var posterNode = + document.DocumentNode.SelectSingleNode("//section[@class='flex items-center justify-center']/picture/img"); + var posterUrl = posterNode?.GetAttributeValue("src", "") ?? ""; + + var titleNode = document.DocumentNode.SelectSingleNode("//section/h1"); + var sortName = titleNode?.InnerText ?? "Undefined"; + + HtmlNode[] authorsNodes = + document.DocumentNode.SelectNodes("//ul/li[strong/text() = 'Author(s): ']/span")?.ToArray() ?? []; + var authors = authorsNodes.Select(n => n.InnerText).ToList(); + + HtmlNode[] genreNodes = + document.DocumentNode.SelectNodes("//ul/li[strong/text() = 'Tags(s): ']/span")?.ToArray() ?? []; + HashSet tags = genreNodes.Select(n => n.InnerText).ToHashSet(); + + var statusNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Status: ']/a"); + var status = statusNode?.InnerText ?? ""; + var releaseStatus = MangaReleaseStatus.Unreleased; + switch (status.ToLower()) + { + case "cancelled": releaseStatus = MangaReleaseStatus.Cancelled; break; + case "hiatus": releaseStatus = MangaReleaseStatus.OnHiatus; break; + case "complete": releaseStatus = MangaReleaseStatus.Completed; break; + case "ongoing": releaseStatus = MangaReleaseStatus.Continuing; break; + } + + var yearNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Released: ']/span"); + var year = Convert.ToInt32(yearNode?.InnerText ?? "0"); + + var descriptionNode = document.DocumentNode.SelectSingleNode("//ul/li[strong/text() = 'Description']/p"); + var description = descriptionNode?.InnerText ?? "Undefined"; + + HtmlNode[] altTitleNodes = document.DocumentNode + .SelectNodes("//ul/li[strong/text() = 'Associated Name(s)']/ul/li")?.ToArray() ?? []; + Dictionary altTitles = new(), links = new(); + for (var i = 0; i < altTitleNodes.Length; i++) + altTitles.Add(i.ToString(), altTitleNodes[i].InnerText); + + var originalLanguage = ""; + + Manga manga = //TODO + return manga; + } + + public override Manga? GetMangaFromId(string publicationId) + { + return GetMangaFromUrl($"https://weebcentral.com/series/{publicationId}"); + } + + private string ToFilteredString(string input) + { + return string.Join(' ', input.ToLower().Split(' ').Where(word => _filterWords.Contains(word) == false)); + } + + private SearchResult[] FilteredResults(string publicationTitle, SearchResult[] unfilteredSearchResults) + { + Dictionary similarity = new(); + foreach (var sr in unfilteredSearchResults) + { + List scores = new(); + var filteredPublicationString = ToFilteredString(publicationTitle); + var filteredSString = ToFilteredString(sr.s); + scores.Add(NeedlemanWunschStringUtil.CalculateSimilarity(filteredSString, filteredPublicationString)); + foreach (var srA in sr.a) + { + var filteredAString = ToFilteredString(srA); + scores.Add(NeedlemanWunschStringUtil.CalculateSimilarity(filteredAString, filteredPublicationString)); + } + + similarity.Add(sr, scores.Sum() / scores.Count); + } + + var ret = similarity.OrderBy(s => s.Value).Take(10).Select(s => s.Key).ToList(); + return ret.ToArray(); + } + + public override Chapter[] GetChapters(Manga manga, string language = "en") + { + var requestUrl = $"{_baseUrl}/series/{manga.MangaId}/full-chapter-list"; + var requestResult = + downloadClient.MakeRequest(requestUrl, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return Array.Empty(); + + //Return Chapters ordered by Chapter-Number + if (requestResult.htmlDocument is null) + return Array.Empty(); + var chapters = ParseChaptersFromHtml(manga, requestResult.htmlDocument); + return chapters.Order().ToArray(); + } + + private List ParseChaptersFromHtml(Manga manga, HtmlDocument document) + { + var chaptersWrapper = document.DocumentNode.SelectSingleNode("/html/body"); + + Regex chapterRex = new(@".* (\d+)"); + Regex idRex = new(@"https:\/\/weebcentral\.com\/chapters\/(\w*)"); + + var ret = chaptersWrapper.Descendants("a").Select(elem => + { + var url = elem.GetAttributeValue("href", "") ?? "Undefined"; + + if (!url.StartsWith("https://") && !url.StartsWith("http://")) + return new Chapter(manga, "undefined", -1, null, null); + + var idMatch = idRex.Match(url); + var id = idMatch.Success ? idMatch.Groups[1].Value : null; + + var chapterNode = elem.SelectSingleNode("span[@class='grow flex items-center gap-2']/span")?.InnerText ?? + "Undefined"; + + var chapterNumberMatch = chapterRex.Match(chapterNode); + var chapterNumber = chapterNumberMatch.Success ? float.Parse(chapterNumberMatch.Groups[1].Value) : -1; + + return new Chapter(manga, url, chapterNumber, null, null); + }).Where(elem => elem.ChapterNumber < 0 && elem.Url != "undefined").ToList(); + + ret.Reverse(); + return ret; + } + + internal override string[] GetChapterImageUrls(Chapter chapter) + { + var requestResult = downloadClient.MakeRequest(chapter.Url, RequestType.Default); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300 ||requestResult.htmlDocument is null) + { + return []; + } + + var document = requestResult.htmlDocument; + + var imageNodes = + document.DocumentNode.SelectNodes($"//section[@hx-get='{chapter.Url}/images']/img")?.ToArray() ?? []; + var urls = imageNodes.Select(imgNode => imgNode.GetAttributeValue("src", "")).ToArray(); + return urls; + } + + private struct SearchResult + { + public string i { get; set; } + public string s { get; set; } + public string[] a { get; set; } + } +} \ No newline at end of file