From ad1d4dfe23a9a5b2815c73abcca43543698ff59b Mon Sep 17 00:00:00 2001 From: glax Date: Sat, 2 Sep 2023 21:52:48 +0200 Subject: [PATCH] Fixed naming errors containing Manga Added GetMangaFromUrl(url) to Mangaconnector --- Tranga/MangaConnectors/MangaConnector.cs | 4 +- Tranga/MangaConnectors/MangaDex.cs | 202 +++++++++++++---------- Tranga/MangaConnectors/MangaKatana.cs | 20 ++- Tranga/MangaConnectors/Manganato.cs | 21 ++- Tranga/MangaConnectors/Mangasee.cs | 80 ++++----- Tranga/Server.cs | 2 +- 6 files changed, 183 insertions(+), 146 deletions(-) diff --git a/Tranga/MangaConnectors/MangaConnector.cs b/Tranga/MangaConnectors/MangaConnector.cs index 4ebfa46..0fe756d 100644 --- a/Tranga/MangaConnectors/MangaConnector.cs +++ b/Tranga/MangaConnectors/MangaConnector.cs @@ -29,7 +29,9 @@ public abstract class MangaConnector : GlobalBase /// /// Search-Query /// Publications matching the query - public abstract Manga[] GetPublications(string publicationTitle = ""); + public abstract Manga[] GetManga(string publicationTitle = ""); + + public abstract Manga? GetMangaFromUrl(string url); /// /// Returns all Chapters of the publication in the provided language. diff --git a/Tranga/MangaConnectors/MangaDex.cs b/Tranga/MangaConnectors/MangaDex.cs index a484c20..afc84ef 100644 --- a/Tranga/MangaConnectors/MangaDex.cs +++ b/Tranga/MangaConnectors/MangaDex.cs @@ -1,8 +1,9 @@ using System.Globalization; using System.Net; -using System.Text.Json; using System.Text.Json.Nodes; +using System.Text.RegularExpressions; using Tranga.Jobs; +using JsonSerializer = System.Text.Json.JsonSerializer; namespace Tranga.MangaConnectors; public class MangaDex : MangaConnector @@ -31,13 +32,13 @@ public class MangaDex : MangaConnector }); } - public override Manga[] GetPublications(string publicationTitle = "") + public override Manga[] GetManga(string publicationTitle = "") { Log($"Searching Publications. Term=\"{publicationTitle}\""); const int limit = 100; //How many values we want returned at once int offset = 0; //"Page" int total = int.MaxValue; //How many total results are there, is updated on first request - HashSet publications = new(); + HashSet retManga = new(); int loadedPublicationData = 0; while (offset < total) //As long as we haven't requested all "Pages" { @@ -57,98 +58,119 @@ public class MangaDex : MangaConnector JsonArray mangaInResult = result["data"]!.AsArray(); //Manga-data-Array //Loop each Manga and extract information from JSON - foreach (JsonNode? mangeNode in mangaInResult) + foreach (JsonNode? mangaNode in mangaInResult) { Log($"Getting publication data. {++loadedPublicationData}/{total}"); - JsonObject manga = (JsonObject)mangeNode!; - JsonObject attributes = manga["attributes"]!.AsObject(); - - string publicationId = manga["id"]!.GetValue(); - - string title = attributes["title"]!.AsObject().ContainsKey("en") && attributes["title"]!["en"] is not null - ? attributes["title"]!["en"]!.GetValue() - : attributes["title"]![((IDictionary)attributes["title"]!.AsObject()).Keys.First()]!.GetValue(); - - string? description = attributes["description"]!.AsObject().ContainsKey("en") && attributes["description"]!["en"] is not null - ? attributes["description"]!["en"]!.GetValue() - : null; - - JsonArray altTitlesObject = attributes["altTitles"]!.AsArray(); - Dictionary altTitlesDict = new(); - foreach (JsonNode? altTitleNode in altTitlesObject) - { - JsonObject altTitleObject = (JsonObject)altTitleNode!; - string key = ((IDictionary)altTitleObject).Keys.ToArray()[0]; - altTitlesDict.TryAdd(key, altTitleObject[key]!.GetValue()); - } - - JsonArray tagsObject = attributes["tags"]!.AsArray(); - HashSet tags = new(); - foreach (JsonNode? tagNode in tagsObject) - { - JsonObject tagObject = (JsonObject)tagNode!; - if(tagObject["attributes"]!["name"]!.AsObject().ContainsKey("en")) - tags.Add(tagObject["attributes"]!["name"]!["en"]!.GetValue()); - } - - string? posterId = null; - HashSet authorIds = new(); - if (manga.ContainsKey("relationships") && manga["relationships"] is not null) - { - JsonArray relationships = manga["relationships"]!.AsArray(); - posterId = relationships.FirstOrDefault(relationship => relationship!["type"]!.GetValue() == "cover_art")!["id"]!.GetValue(); - foreach (JsonNode? node in relationships.Where(relationship => - relationship!["type"]!.GetValue() == "author")) - authorIds.Add(node!["id"]!.GetValue()); - } - string? coverUrl = GetCoverUrl(publicationId, posterId); - string? coverCacheName = null; - if (coverUrl is not null) - coverCacheName = SaveCoverImageToCache(coverUrl, (byte)RequestType.AtHomeServer); - - List authors = GetAuthors(authorIds); - - Dictionary linksDict = new(); - if (attributes.ContainsKey("links") && attributes["links"] is not null) - { - JsonObject linksObject = attributes["links"]!.AsObject(); - foreach (string key in ((IDictionary)linksObject).Keys) - { - linksDict.Add(key, linksObject[key]!.GetValue()); - } - } - - int? year = attributes.ContainsKey("year") && attributes["year"] is not null - ? attributes["year"]!.GetValue() - : null; - - string? originalLanguage = attributes.ContainsKey("originalLanguage") && attributes["originalLanguage"] is not null - ? attributes["originalLanguage"]!.GetValue() - : null; - - string status = attributes["status"]!.GetValue(); - - Manga pub = new ( - title, - authors, - description, - altTitlesDict, - tags.ToArray(), - coverUrl, - coverCacheName, - linksDict, - year, - originalLanguage, - status, - publicationId - ); - publications.Add(pub); //Add Publication (Manga) to result + Manga manga = MangaFromJsonObject((JsonObject)mangaNode); + retManga.Add(manga); //Add Publication (Manga) to result } } - cachedPublications.AddRange(publications); - Log($"Retrieved {publications.Count} publications. Term=\"{publicationTitle}\""); - return publications.ToArray(); + cachedPublications.AddRange(retManga); + Log($"Retrieved {retManga.Count} publications. Term=\"{publicationTitle}\""); + return retManga.ToArray(); + } + + public override Manga? GetMangaFromUrl(string url) + { + Regex idRex = new (@"https:\/\/mangadex.org\/title\/([A-z0-9-]*)\/.*"); + string id = idRex.Match(url).Value; + Log($"Got id {id} from {url}"); + DownloadClient.RequestResult requestResult = + downloadClient.MakeRequest($"https://api.mangadex.org/manga/{id}", (byte)RequestType.Manga); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return null; + JsonObject? result = JsonSerializer.Deserialize(requestResult.result); + if(result is not null) + return MangaFromJsonObject(result); + return null; + } + + private Manga MangaFromJsonObject(JsonObject manga) + { + JsonObject attributes = manga["attributes"]!.AsObject(); + + string publicationId = manga["id"]!.GetValue(); + + string title = attributes["title"]!.AsObject().ContainsKey("en") && attributes["title"]!["en"] is not null + ? attributes["title"]!["en"]!.GetValue() + : attributes["title"]![((IDictionary)attributes["title"]!.AsObject()).Keys.First()]!.GetValue(); + + string? description = attributes["description"]!.AsObject().ContainsKey("en") && attributes["description"]!["en"] is not null + ? attributes["description"]!["en"]!.GetValue() + : null; + + JsonArray altTitlesObject = attributes["altTitles"]!.AsArray(); + Dictionary altTitlesDict = new(); + foreach (JsonNode? altTitleNode in altTitlesObject) + { + JsonObject altTitleObject = (JsonObject)altTitleNode!; + string key = ((IDictionary)altTitleObject).Keys.ToArray()[0]; + altTitlesDict.TryAdd(key, altTitleObject[key]!.GetValue()); + } + + JsonArray tagsObject = attributes["tags"]!.AsArray(); + HashSet tags = new(); + foreach (JsonNode? tagNode in tagsObject) + { + JsonObject tagObject = (JsonObject)tagNode!; + if(tagObject["attributes"]!["name"]!.AsObject().ContainsKey("en")) + tags.Add(tagObject["attributes"]!["name"]!["en"]!.GetValue()); + } + + string? posterId = null; + HashSet authorIds = new(); + if (manga.ContainsKey("relationships") && manga["relationships"] is not null) + { + JsonArray relationships = manga["relationships"]!.AsArray(); + posterId = relationships.FirstOrDefault(relationship => relationship!["type"]!.GetValue() == "cover_art")!["id"]!.GetValue(); + foreach (JsonNode? node in relationships.Where(relationship => + relationship!["type"]!.GetValue() == "author")) + authorIds.Add(node!["id"]!.GetValue()); + } + string? coverUrl = GetCoverUrl(publicationId, posterId); + string? coverCacheName = null; + if (coverUrl is not null) + coverCacheName = SaveCoverImageToCache(coverUrl, (byte)RequestType.AtHomeServer); + + List authors = GetAuthors(authorIds); + + Dictionary linksDict = new(); + if (attributes.ContainsKey("links") && attributes["links"] is not null) + { + JsonObject linksObject = attributes["links"]!.AsObject(); + foreach (string key in ((IDictionary)linksObject).Keys) + { + linksDict.Add(key, linksObject[key]!.GetValue()); + } + } + + int? year = attributes.ContainsKey("year") && attributes["year"] is not null + ? attributes["year"]!.GetValue() + : null; + + string? originalLanguage = + attributes.ContainsKey("originalLanguage") && attributes["originalLanguage"] is not null + ? attributes["originalLanguage"]!.GetValue() + : null; + + string status = attributes["status"]!.GetValue(); + + Manga pub = new( + title, + authors, + description, + altTitlesDict, + tags.ToArray(), + coverUrl, + coverCacheName, + linksDict, + year, + originalLanguage, + status, + publicationId + ); + return pub; } public override Chapter[] GetChapters(Manga manga, string language="en") diff --git a/Tranga/MangaConnectors/MangaKatana.cs b/Tranga/MangaConnectors/MangaKatana.cs index 7803aa0..51986c8 100644 --- a/Tranga/MangaConnectors/MangaKatana.cs +++ b/Tranga/MangaConnectors/MangaKatana.cs @@ -19,7 +19,7 @@ public class MangaKatana : MangaConnector }); } - public override Manga[] GetPublications(string publicationTitle = "") + public override Manga[] GetManga(string publicationTitle = "") { Log($"Searching Publications. Term=\"{publicationTitle}\""); string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower(); @@ -44,6 +44,15 @@ public class MangaKatana : MangaConnector return publications; } + public override Manga? GetMangaFromUrl(string url) + { + DownloadClient.RequestResult requestResult = + downloadClient.MakeRequest(url, 1); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return null; + return ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]); + } + private Manga[] ParsePublicationsFromHtml(Stream html) { StreamReader reader = new(html); @@ -63,12 +72,9 @@ public class MangaKatana : MangaConnector HashSet ret = new(); foreach (string url in urls) { - DownloadClient.RequestResult requestResult = - downloadClient.MakeRequest(url, 1); - if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) - return Array.Empty(); - - ret.Add(ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1])); + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); } return ret.ToArray(); diff --git a/Tranga/MangaConnectors/Manganato.cs b/Tranga/MangaConnectors/Manganato.cs index b22b17f..f2d6342 100644 --- a/Tranga/MangaConnectors/Manganato.cs +++ b/Tranga/MangaConnectors/Manganato.cs @@ -19,7 +19,7 @@ public class Manganato : MangaConnector }); } - public override Manga[] GetPublications(string publicationTitle = "") + public override Manga[] GetManga(string publicationTitle = "") { Log($"Searching Publications. Term=\"{publicationTitle}\""); string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*")).ToLower(); @@ -52,17 +52,24 @@ public class Manganato : MangaConnector HashSet ret = new(); foreach (string url in urls) { - DownloadClient.RequestResult requestResult = - downloadClient.MakeRequest(url, 1); - if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) - return Array.Empty(); - - ret.Add(ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1])); + Manga? manga = GetMangaFromUrl(url); + if (manga is not null) + ret.Add((Manga)manga); } return ret.ToArray(); } + public override Manga? GetMangaFromUrl(string url) + { + DownloadClient.RequestResult requestResult = + downloadClient.MakeRequest(url, 1); + if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) + return null; + + return ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]); + } + private Manga ParseSinglePublicationFromHtml(Stream html, string publicationId) { StreamReader reader = new (html); diff --git a/Tranga/MangaConnectors/Mangasee.cs b/Tranga/MangaConnectors/Mangasee.cs index 0932436..d2e16a1 100644 --- a/Tranga/MangaConnectors/Mangasee.cs +++ b/Tranga/MangaConnectors/Mangasee.cs @@ -69,7 +69,7 @@ public class Mangasee : MangaConnector }); } - public override Manga[] GetPublications(string publicationTitle = "") + public override Manga[] GetManga(string publicationTitle = "") { Log($"Searching Publications. Term=\"{publicationTitle}\""); string requestUrl = $"https://mangasee123.com/_search.php"; @@ -84,6 +84,28 @@ public class Mangasee : MangaConnector return publications; } + public override Manga? GetMangaFromUrl(string url) + { + while (this._browser is null) + { + Log("Waiting for headless browser to download..."); + Thread.Sleep(1000); + } + + + IPage page = _browser!.NewPageAsync().Result; + IResponse response = page.GoToAsync(url, WaitUntilNavigation.DOMContentLoaded).Result; + if (response.Ok) + { + HtmlDocument document = new(); + document.LoadHtml(page.GetContentAsync().Result); + page.CloseAsync(); + return ParseSinglePublicationFromHtml(document); + } + + return null; + } + private Manga[] ParsePublicationsFromHtml(Stream html, string publicationTitle) { string jsonString = new StreamReader(html).ReadToEnd(); @@ -105,73 +127,51 @@ public class Mangasee : MangaConnector List orderedFiltered = queryFiltered.OrderBy(item => item.Value).ToDictionary(item => item.Key, item => item.Value).Keys.ToList(); - uint index = 1; foreach (SearchResultItem orderedItem in orderedFiltered) { - DownloadClient.RequestResult requestResult = - downloadClient.MakeRequest($"https://mangasee123.com/manga/{orderedItem.i}", 1); - if ((int)requestResult.statusCode >= 200 || (int)requestResult.statusCode < 300) - { - Log($"Retrieving Publication info: {orderedItem.s} {index++}/{orderedFiltered.Count}"); - ret.Add(ParseSinglePublicationFromHtml(requestResult.result, orderedItem.s, orderedItem.i, orderedItem.a)); - } + Manga? manga = GetMangaFromUrl($"https://mangasee123.com/manga/{orderedItem.i}"); + if (manga is not null) + ret.Add((Manga)manga); } return ret.ToArray(); } - private Manga ParseSinglePublicationFromHtml(Stream html, string sortName, string publicationId, string[] a) + private Manga ParseSinglePublicationFromHtml(HtmlDocument document) { - StreamReader reader = new (html); - HtmlDocument document = new (); - document.LoadHtml(reader.ReadToEnd()); - string originalLanguage = "", status = ""; Dictionary altTitles = new(), links = new(); HashSet tags = new(); - HtmlNode posterNode = - document.DocumentNode.Descendants("img").First(img => img.HasClass("img-fluid") && img.HasClass("bottom-5")); + HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//img"); string posterUrl = posterNode.GetAttributeValue("src", ""); string coverFileNameInCache = SaveCoverImageToCache(posterUrl, 1); - HtmlNode attributes = document.DocumentNode.Descendants("div") - .First(div => div.HasClass("col-md-9") && div.HasClass("col-sm-8") && div.HasClass("top-5")) - .Descendants("ul").First(); + HtmlNode titleNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//h1"); + string title = titleNode.InnerText; + string publicationId = title; - HtmlNode[] authorsNodes = attributes.Descendants("li") - .First(node => node.InnerText.Contains("author(s):", StringComparison.CurrentCultureIgnoreCase)) - .Descendants("a").ToArray(); + HtmlNode[] authorsNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Author(s):']/..").Descendants("a").ToArray(); List authors = new(); foreach(HtmlNode authorNode in authorsNodes) authors.Add(authorNode.InnerText); - - HtmlNode[] genreNodes = attributes.Descendants("li") - .First(node => node.InnerText.Contains("genre(s):", StringComparison.CurrentCultureIgnoreCase)) - .Descendants("a").ToArray(); + + HtmlNode[] genreNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Genre(s):']/..").Descendants("a").ToArray(); foreach (HtmlNode genreNode in genreNodes) tags.Add(genreNode.InnerText); - - HtmlNode yearNode = attributes.Descendants("li") - .First(node => node.InnerText.Contains("released:", StringComparison.CurrentCultureIgnoreCase)) - .Descendants("a").First(); + + HtmlNode yearNode = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Released:']/..").Descendants("a").First(); int year = Convert.ToInt32(yearNode.InnerText); - - HtmlNode[] statusNodes = attributes.Descendants("li") - .First(node => node.InnerText.Contains("status:", StringComparison.CurrentCultureIgnoreCase)) - .Descendants("a").ToArray(); + + HtmlNode[] statusNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Status:']/..").Descendants("a").ToArray(); foreach(HtmlNode statusNode in statusNodes) if (statusNode.InnerText.Contains("publish", StringComparison.CurrentCultureIgnoreCase)) status = statusNode.InnerText.Split(' ')[0]; - HtmlNode descriptionNode = attributes.Descendants("li").First(node => node.InnerText.Contains("description:", StringComparison.CurrentCultureIgnoreCase)).Descendants("div").First(); + HtmlNode descriptionNode = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Description:']/..").Descendants("div").First(); string description = descriptionNode.InnerText; - int i = 0; - foreach(string at in a) - altTitles.Add((i++).ToString(), at); - - return new Manga(sortName, authors, description, altTitles, tags.ToArray(), posterUrl, coverFileNameInCache, links, + return new Manga(title, authors, description, altTitles, tags.ToArray(), posterUrl, coverFileNameInCache, links, year, originalLanguage, status, publicationId); } diff --git a/Tranga/Server.cs b/Tranga/Server.cs index f5ef7c7..544ada1 100644 --- a/Tranga/Server.cs +++ b/Tranga/Server.cs @@ -141,7 +141,7 @@ public class Server : GlobalBase SendResponse(HttpStatusCode.BadRequest, response); break; } - SendResponse(HttpStatusCode.OK, response, connector!.GetPublications(title)); + SendResponse(HttpStatusCode.OK, response, connector!.GetManga(title)); break; case "Manga/Chapters": if(!requestVariables.TryGetValue("connector", out connectorName) ||