Fixed naming errors containing Manga

Added GetMangaFromUrl(url) to Mangaconnector
This commit is contained in:
glax 2023-09-02 21:52:48 +02:00
parent 14ba71005f
commit ad1d4dfe23
6 changed files with 183 additions and 146 deletions

View File

@ -29,7 +29,9 @@ public abstract class MangaConnector : GlobalBase
/// </summary> /// </summary>
/// <param name="publicationTitle">Search-Query</param> /// <param name="publicationTitle">Search-Query</param>
/// <returns>Publications matching the query</returns> /// <returns>Publications matching the query</returns>
public abstract Manga[] GetPublications(string publicationTitle = ""); public abstract Manga[] GetManga(string publicationTitle = "");
public abstract Manga? GetMangaFromUrl(string url);
/// <summary> /// <summary>
/// Returns all Chapters of the publication in the provided language. /// Returns all Chapters of the publication in the provided language.

View File

@ -1,8 +1,9 @@
using System.Globalization; using System.Globalization;
using System.Net; using System.Net;
using System.Text.Json;
using System.Text.Json.Nodes; using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
using Tranga.Jobs; using Tranga.Jobs;
using JsonSerializer = System.Text.Json.JsonSerializer;
namespace Tranga.MangaConnectors; namespace Tranga.MangaConnectors;
public class MangaDex : MangaConnector public class MangaDex : MangaConnector
@ -31,13 +32,13 @@ public class MangaDex : MangaConnector
}); });
} }
public override Manga[] GetPublications(string publicationTitle = "") public override Manga[] GetManga(string publicationTitle = "")
{ {
Log($"Searching Publications. Term=\"{publicationTitle}\""); Log($"Searching Publications. Term=\"{publicationTitle}\"");
const int limit = 100; //How many values we want returned at once const int limit = 100; //How many values we want returned at once
int offset = 0; //"Page" int offset = 0; //"Page"
int total = int.MaxValue; //How many total results are there, is updated on first request int total = int.MaxValue; //How many total results are there, is updated on first request
HashSet<Manga> publications = new(); HashSet<Manga> retManga = new();
int loadedPublicationData = 0; int loadedPublicationData = 0;
while (offset < total) //As long as we haven't requested all "Pages" while (offset < total) //As long as we haven't requested all "Pages"
{ {
@ -57,10 +58,36 @@ public class MangaDex : MangaConnector
JsonArray mangaInResult = result["data"]!.AsArray(); //Manga-data-Array JsonArray mangaInResult = result["data"]!.AsArray(); //Manga-data-Array
//Loop each Manga and extract information from JSON //Loop each Manga and extract information from JSON
foreach (JsonNode? mangeNode in mangaInResult) foreach (JsonNode? mangaNode in mangaInResult)
{ {
Log($"Getting publication data. {++loadedPublicationData}/{total}"); Log($"Getting publication data. {++loadedPublicationData}/{total}");
JsonObject manga = (JsonObject)mangeNode!; Manga manga = MangaFromJsonObject((JsonObject)mangaNode);
retManga.Add(manga); //Add Publication (Manga) to result
}
}
cachedPublications.AddRange(retManga);
Log($"Retrieved {retManga.Count} publications. Term=\"{publicationTitle}\"");
return retManga.ToArray();
}
public override Manga? GetMangaFromUrl(string url)
{
Regex idRex = new (@"https:\/\/mangadex.org\/title\/([A-z0-9-]*)\/.*");
string id = idRex.Match(url).Value;
Log($"Got id {id} from {url}");
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest($"https://api.mangadex.org/manga/{id}", (byte)RequestType.Manga);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return null;
JsonObject? result = JsonSerializer.Deserialize<JsonObject>(requestResult.result);
if(result is not null)
return MangaFromJsonObject(result);
return null;
}
private Manga MangaFromJsonObject(JsonObject manga)
{
JsonObject attributes = manga["attributes"]!.AsObject(); JsonObject attributes = manga["attributes"]!.AsObject();
string publicationId = manga["id"]!.GetValue<string>(); string publicationId = manga["id"]!.GetValue<string>();
@ -122,7 +149,8 @@ public class MangaDex : MangaConnector
? attributes["year"]!.GetValue<int?>() ? attributes["year"]!.GetValue<int?>()
: null; : null;
string? originalLanguage = attributes.ContainsKey("originalLanguage") && attributes["originalLanguage"] is not null string? originalLanguage =
attributes.ContainsKey("originalLanguage") && attributes["originalLanguage"] is not null
? attributes["originalLanguage"]!.GetValue<string?>() ? attributes["originalLanguage"]!.GetValue<string?>()
: null; : null;
@ -142,13 +170,7 @@ public class MangaDex : MangaConnector
status, status,
publicationId publicationId
); );
publications.Add(pub); //Add Publication (Manga) to result return pub;
}
}
cachedPublications.AddRange(publications);
Log($"Retrieved {publications.Count} publications. Term=\"{publicationTitle}\"");
return publications.ToArray();
} }
public override Chapter[] GetChapters(Manga manga, string language="en") public override Chapter[] GetChapters(Manga manga, string language="en")

View File

@ -19,7 +19,7 @@ public class MangaKatana : MangaConnector
}); });
} }
public override Manga[] GetPublications(string publicationTitle = "") public override Manga[] GetManga(string publicationTitle = "")
{ {
Log($"Searching Publications. Term=\"{publicationTitle}\""); Log($"Searching Publications. Term=\"{publicationTitle}\"");
string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower(); string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*").Where(m => m.Value.Length > 0)).ToLower();
@ -44,6 +44,15 @@ public class MangaKatana : MangaConnector
return publications; return publications;
} }
public override Manga? GetMangaFromUrl(string url)
{
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(url, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return null;
return ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]);
}
private Manga[] ParsePublicationsFromHtml(Stream html) private Manga[] ParsePublicationsFromHtml(Stream html)
{ {
StreamReader reader = new(html); StreamReader reader = new(html);
@ -63,12 +72,9 @@ public class MangaKatana : MangaConnector
HashSet<Manga> ret = new(); HashSet<Manga> ret = new();
foreach (string url in urls) foreach (string url in urls)
{ {
DownloadClient.RequestResult requestResult = Manga? manga = GetMangaFromUrl(url);
downloadClient.MakeRequest(url, 1); if (manga is not null)
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) ret.Add((Manga)manga);
return Array.Empty<Manga>();
ret.Add(ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]));
} }
return ret.ToArray(); return ret.ToArray();

View File

@ -19,7 +19,7 @@ public class Manganato : MangaConnector
}); });
} }
public override Manga[] GetPublications(string publicationTitle = "") public override Manga[] GetManga(string publicationTitle = "")
{ {
Log($"Searching Publications. Term=\"{publicationTitle}\""); Log($"Searching Publications. Term=\"{publicationTitle}\"");
string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*")).ToLower(); string sanitizedTitle = string.Join('_', Regex.Matches(publicationTitle, "[A-z]*")).ToLower();
@ -52,17 +52,24 @@ public class Manganato : MangaConnector
HashSet<Manga> ret = new(); HashSet<Manga> ret = new();
foreach (string url in urls) foreach (string url in urls)
{ {
DownloadClient.RequestResult requestResult = Manga? manga = GetMangaFromUrl(url);
downloadClient.MakeRequest(url, 1); if (manga is not null)
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300) ret.Add((Manga)manga);
return Array.Empty<Manga>();
ret.Add(ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]));
} }
return ret.ToArray(); return ret.ToArray();
} }
public override Manga? GetMangaFromUrl(string url)
{
DownloadClient.RequestResult requestResult =
downloadClient.MakeRequest(url, 1);
if ((int)requestResult.statusCode < 200 || (int)requestResult.statusCode >= 300)
return null;
return ParseSinglePublicationFromHtml(requestResult.result, url.Split('/')[^1]);
}
private Manga ParseSinglePublicationFromHtml(Stream html, string publicationId) private Manga ParseSinglePublicationFromHtml(Stream html, string publicationId)
{ {
StreamReader reader = new (html); StreamReader reader = new (html);

View File

@ -69,7 +69,7 @@ public class Mangasee : MangaConnector
}); });
} }
public override Manga[] GetPublications(string publicationTitle = "") public override Manga[] GetManga(string publicationTitle = "")
{ {
Log($"Searching Publications. Term=\"{publicationTitle}\""); Log($"Searching Publications. Term=\"{publicationTitle}\"");
string requestUrl = $"https://mangasee123.com/_search.php"; string requestUrl = $"https://mangasee123.com/_search.php";
@ -84,6 +84,28 @@ public class Mangasee : MangaConnector
return publications; return publications;
} }
public override Manga? GetMangaFromUrl(string url)
{
while (this._browser is null)
{
Log("Waiting for headless browser to download...");
Thread.Sleep(1000);
}
IPage page = _browser!.NewPageAsync().Result;
IResponse response = page.GoToAsync(url, WaitUntilNavigation.DOMContentLoaded).Result;
if (response.Ok)
{
HtmlDocument document = new();
document.LoadHtml(page.GetContentAsync().Result);
page.CloseAsync();
return ParseSinglePublicationFromHtml(document);
}
return null;
}
private Manga[] ParsePublicationsFromHtml(Stream html, string publicationTitle) private Manga[] ParsePublicationsFromHtml(Stream html, string publicationTitle)
{ {
string jsonString = new StreamReader(html).ReadToEnd(); string jsonString = new StreamReader(html).ReadToEnd();
@ -105,73 +127,51 @@ public class Mangasee : MangaConnector
List<SearchResultItem> orderedFiltered = List<SearchResultItem> orderedFiltered =
queryFiltered.OrderBy(item => item.Value).ToDictionary(item => item.Key, item => item.Value).Keys.ToList(); queryFiltered.OrderBy(item => item.Value).ToDictionary(item => item.Key, item => item.Value).Keys.ToList();
uint index = 1;
foreach (SearchResultItem orderedItem in orderedFiltered) foreach (SearchResultItem orderedItem in orderedFiltered)
{ {
DownloadClient.RequestResult requestResult = Manga? manga = GetMangaFromUrl($"https://mangasee123.com/manga/{orderedItem.i}");
downloadClient.MakeRequest($"https://mangasee123.com/manga/{orderedItem.i}", 1); if (manga is not null)
if ((int)requestResult.statusCode >= 200 || (int)requestResult.statusCode < 300) ret.Add((Manga)manga);
{
Log($"Retrieving Publication info: {orderedItem.s} {index++}/{orderedFiltered.Count}");
ret.Add(ParseSinglePublicationFromHtml(requestResult.result, orderedItem.s, orderedItem.i, orderedItem.a));
}
} }
return ret.ToArray(); return ret.ToArray();
} }
private Manga ParseSinglePublicationFromHtml(Stream html, string sortName, string publicationId, string[] a) private Manga ParseSinglePublicationFromHtml(HtmlDocument document)
{ {
StreamReader reader = new (html);
HtmlDocument document = new ();
document.LoadHtml(reader.ReadToEnd());
string originalLanguage = "", status = ""; string originalLanguage = "", status = "";
Dictionary<string, string> altTitles = new(), links = new(); Dictionary<string, string> altTitles = new(), links = new();
HashSet<string> tags = new(); HashSet<string> tags = new();
HtmlNode posterNode = HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//img");
document.DocumentNode.Descendants("img").First(img => img.HasClass("img-fluid") && img.HasClass("bottom-5"));
string posterUrl = posterNode.GetAttributeValue("src", ""); string posterUrl = posterNode.GetAttributeValue("src", "");
string coverFileNameInCache = SaveCoverImageToCache(posterUrl, 1); string coverFileNameInCache = SaveCoverImageToCache(posterUrl, 1);
HtmlNode attributes = document.DocumentNode.Descendants("div") HtmlNode titleNode = document.DocumentNode.SelectSingleNode("//div[@class='BoxBody']//div[@class='row']//h1");
.First(div => div.HasClass("col-md-9") && div.HasClass("col-sm-8") && div.HasClass("top-5")) string title = titleNode.InnerText;
.Descendants("ul").First(); string publicationId = title;
HtmlNode[] authorsNodes = attributes.Descendants("li") HtmlNode[] authorsNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Author(s):']/..").Descendants("a").ToArray();
.First(node => node.InnerText.Contains("author(s):", StringComparison.CurrentCultureIgnoreCase))
.Descendants("a").ToArray();
List<string> authors = new(); List<string> authors = new();
foreach(HtmlNode authorNode in authorsNodes) foreach(HtmlNode authorNode in authorsNodes)
authors.Add(authorNode.InnerText); authors.Add(authorNode.InnerText);
HtmlNode[] genreNodes = attributes.Descendants("li") HtmlNode[] genreNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Genre(s):']/..").Descendants("a").ToArray();
.First(node => node.InnerText.Contains("genre(s):", StringComparison.CurrentCultureIgnoreCase))
.Descendants("a").ToArray();
foreach (HtmlNode genreNode in genreNodes) foreach (HtmlNode genreNode in genreNodes)
tags.Add(genreNode.InnerText); tags.Add(genreNode.InnerText);
HtmlNode yearNode = attributes.Descendants("li") HtmlNode yearNode = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Released:']/..").Descendants("a").First();
.First(node => node.InnerText.Contains("released:", StringComparison.CurrentCultureIgnoreCase))
.Descendants("a").First();
int year = Convert.ToInt32(yearNode.InnerText); int year = Convert.ToInt32(yearNode.InnerText);
HtmlNode[] statusNodes = attributes.Descendants("li") HtmlNode[] statusNodes = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Status:']/..").Descendants("a").ToArray();
.First(node => node.InnerText.Contains("status:", StringComparison.CurrentCultureIgnoreCase))
.Descendants("a").ToArray();
foreach(HtmlNode statusNode in statusNodes) foreach(HtmlNode statusNode in statusNodes)
if (statusNode.InnerText.Contains("publish", StringComparison.CurrentCultureIgnoreCase)) if (statusNode.InnerText.Contains("publish", StringComparison.CurrentCultureIgnoreCase))
status = statusNode.InnerText.Split(' ')[0]; status = statusNode.InnerText.Split(' ')[0];
HtmlNode descriptionNode = attributes.Descendants("li").First(node => node.InnerText.Contains("description:", StringComparison.CurrentCultureIgnoreCase)).Descendants("div").First(); HtmlNode descriptionNode = document.DocumentNode.SelectNodes("//div[@class='BoxBody']//div[@class='row']//span[text()='Description:']/..").Descendants("div").First();
string description = descriptionNode.InnerText; string description = descriptionNode.InnerText;
int i = 0; return new Manga(title, authors, description, altTitles, tags.ToArray(), posterUrl, coverFileNameInCache, links,
foreach(string at in a)
altTitles.Add((i++).ToString(), at);
return new Manga(sortName, authors, description, altTitles, tags.ToArray(), posterUrl, coverFileNameInCache, links,
year, originalLanguage, status, publicationId); year, originalLanguage, status, publicationId);
} }

View File

@ -141,7 +141,7 @@ public class Server : GlobalBase
SendResponse(HttpStatusCode.BadRequest, response); SendResponse(HttpStatusCode.BadRequest, response);
break; break;
} }
SendResponse(HttpStatusCode.OK, response, connector!.GetPublications(title)); SendResponse(HttpStatusCode.OK, response, connector!.GetManga(title));
break; break;
case "Manga/Chapters": case "Manga/Chapters":
if(!requestVariables.TryGetValue("connector", out connectorName) || if(!requestVariables.TryGetValue("connector", out connectorName) ||