2024-12-14 21:53:29 +01:00
using System.Text.RegularExpressions ;
using API.MangaDownloadClients ;
using HtmlAgilityPack ;
namespace API.Schema.MangaConnectors ;
public class MangaHere : MangaConnector
{
public MangaHere ( ) : base ( "MangaHere" , [ "en" ] , [ "www.mangahere.cc" ] )
{
this . downloadClient = new ChromiumDownloadClient ( ) ;
}
2024-12-16 22:54:23 +01:00
public override ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) [ ] GetManga ( string publicationTitle = "" )
2024-12-14 21:53:29 +01:00
{
string sanitizedTitle = string . Join ( '+' , Regex . Matches ( publicationTitle , "[A-z]*" ) . Where ( str = > str . Length > 0 ) ) . ToLower ( ) ;
string requestUrl = $"https://www.mangahere.cc/search?title={sanitizedTitle}" ;
RequestResult requestResult =
downloadClient . MakeRequest ( requestUrl , RequestType . Default ) ;
if ( ( int ) requestResult . statusCode < 200 | | ( int ) requestResult . statusCode > = 300 | | requestResult . htmlDocument is null )
2024-12-15 23:00:35 +01:00
return [ ] ;
2024-12-14 21:53:29 +01:00
2024-12-16 22:54:23 +01:00
( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) [ ] publications = ParsePublicationsFromHtml ( requestResult . htmlDocument ) ;
2024-12-14 21:53:29 +01:00
return publications ;
}
2024-12-16 22:54:23 +01:00
private ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) [ ] ParsePublicationsFromHtml ( HtmlDocument document )
2024-12-14 21:53:29 +01:00
{
if ( document . DocumentNode . SelectNodes ( "//div[contains(concat(' ',normalize-space(@class),' '),' container ')]" ) . Any ( node = > node . ChildNodes . Any ( cNode = > cNode . HasClass ( "search-keywords" ) ) ) )
2024-12-15 23:00:35 +01:00
return [ ] ;
2024-12-14 21:53:29 +01:00
List < string > urls = document . DocumentNode
. SelectNodes ( "//a[contains(@href, '/manga/') and not(contains(@href, '.html'))]" )
. Select ( thumb = > $"https://www.mangahere.cc{thumb.GetAttributeValue(" href ", " ")}" ) . Distinct ( ) . ToList ( ) ;
2024-12-16 22:54:23 +01:00
HashSet < ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) > ret = new ( ) ;
2024-12-14 21:53:29 +01:00
foreach ( string url in urls )
{
2024-12-16 22:54:23 +01:00
( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) ? manga = GetMangaFromUrl ( url ) ;
2024-12-15 23:00:35 +01:00
if ( manga is { } x )
ret . Add ( x ) ;
2024-12-14 21:53:29 +01:00
}
return ret . ToArray ( ) ;
}
2024-12-16 22:54:23 +01:00
public override ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) ? GetMangaFromId ( string publicationId )
2024-12-14 21:53:29 +01:00
{
return GetMangaFromUrl ( $"https://www.mangahere.cc/manga/{publicationId}" ) ;
}
2024-12-16 22:54:23 +01:00
public override ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) ? GetMangaFromUrl ( string url )
2024-12-14 21:53:29 +01:00
{
RequestResult requestResult =
downloadClient . MakeRequest ( url , RequestType . MangaInfo ) ;
if ( ( int ) requestResult . statusCode < 200 | | ( int ) requestResult . statusCode > = 300 | | requestResult . htmlDocument is null )
return null ;
Regex idRex = new ( @"https:\/\/www\.mangahere\.[a-z]{0,63}\/manga\/([0-9A-z\-]+).*" ) ;
string id = idRex . Match ( url ) . Groups [ 1 ] . Value ;
return ParseSinglePublicationFromHtml ( requestResult . htmlDocument , id , url ) ;
}
2024-12-16 22:54:23 +01:00
private ( Manga , List < Author > ? , List < MangaTag > ? , List < Link > ? , List < MangaAltTitle > ? ) ParseSinglePublicationFromHtml ( HtmlDocument document , string publicationId , string websiteUrl )
2024-12-14 21:53:29 +01:00
{
string originalLanguage = "" , status = "" ;
Dictionary < string , string > altTitles = new ( ) , links = new ( ) ;
MangaReleaseStatus releaseStatus = MangaReleaseStatus . Unreleased ;
//We dont get posters, because same origin bs HtmlNode posterNode = document.DocumentNode.SelectSingleNode("//img[contains(concat(' ',normalize-space(@class),' '),' detail-info-cover-img ')]");
2024-12-15 23:00:35 +01:00
string coverUrl = "http://static.mangahere.cc/v20230914/mangahere/images/nopicture.jpg" ;
2024-12-14 21:53:29 +01:00
HtmlNode titleNode = document . DocumentNode . SelectSingleNode ( "//span[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-title-font ')]" ) ;
string sortName = titleNode . InnerText ;
2024-12-15 23:00:35 +01:00
List < string > authorNames = document . DocumentNode
2024-12-14 21:53:29 +01:00
. SelectNodes ( "//p[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-say ')]/a" )
. Select ( node = > node . InnerText )
. ToList ( ) ;
2024-12-16 22:54:23 +01:00
List < Author > authors = authorNames . Select ( n = > new Author ( n ) ) . ToList ( ) ;
2024-12-14 21:53:29 +01:00
HashSet < string > tags = document . DocumentNode
. SelectNodes ( "//p[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-tag-list ')]/a" )
. Select ( node = > node . InnerText )
. ToHashSet ( ) ;
2024-12-16 22:54:23 +01:00
List < MangaTag > mangaTags = tags . Select ( n = > new MangaTag ( n ) ) . ToList ( ) ;
2024-12-14 21:53:29 +01:00
status = document . DocumentNode . SelectSingleNode ( "//span[contains(concat(' ',normalize-space(@class),' '),' detail-info-right-title-tip ')]" ) . InnerText ;
switch ( status . ToLower ( ) )
{
case "cancelled" : releaseStatus = MangaReleaseStatus . Cancelled ; break ;
case "hiatus" : releaseStatus = MangaReleaseStatus . OnHiatus ; break ;
case "discontinued" : releaseStatus = MangaReleaseStatus . Cancelled ; break ;
case "complete" : releaseStatus = MangaReleaseStatus . Completed ; break ;
case "ongoing" : releaseStatus = MangaReleaseStatus . Continuing ; break ;
}
HtmlNode descriptionNode = document . DocumentNode
. SelectSingleNode ( "//p[contains(concat(' ',normalize-space(@class),' '),' fullcontent ')]" ) ;
string description = descriptionNode . InnerText ;
2024-12-15 23:00:35 +01:00
Manga manga = new ( publicationId , sortName , description , websiteUrl , coverUrl , null , 0 ,
2024-12-18 16:42:59 +01:00
originalLanguage , releaseStatus , - 1 ,
2024-12-16 19:25:22 +01:00
this ,
authors ,
mangaTags ,
2024-12-15 23:00:35 +01:00
[] ,
[] ) ;
return ( manga , authors , mangaTags , [ ] , [ ] ) ;
2024-12-14 21:53:29 +01:00
}
public override Chapter [ ] GetChapters ( Manga manga , string language = "en" )
{
string requestUrl = $"https://www.mangahere.cc/manga/{manga.MangaId}" ;
RequestResult requestResult =
downloadClient . MakeRequest ( requestUrl , RequestType . Default ) ;
if ( ( int ) requestResult . statusCode < 200 | | ( int ) requestResult . statusCode > = 300 | | requestResult . htmlDocument is null )
return Array . Empty < Chapter > ( ) ;
List < string > urls = requestResult . htmlDocument . DocumentNode . SelectNodes ( "//div[@id='list-1']/ul//li//a[contains(@href, '/manga/')]" )
. Select ( node = > node . GetAttributeValue ( "href" , "" ) ) . ToList ( ) ;
Regex chapterRex = new ( @".*\/manga\/[a-zA-Z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\@]+\/v([0-9(TBD)]+)\/c([0-9\.]+)\/.*" ) ;
List < Chapter > chapters = new ( ) ;
foreach ( string url in urls )
{
Match rexMatch = chapterRex . Match ( url ) ;
2025-01-09 01:33:30 +01:00
int? volumeNumber = rexMatch . Groups [ 1 ] . Value = = "TBD" ? null : int . Parse ( rexMatch . Groups [ 1 ] . Value ) ;
2025-01-25 11:57:54 +01:00
string chapterNumber = new ( rexMatch . Groups [ 2 ] . Value ) ;
2024-12-14 21:53:29 +01:00
string fullUrl = $"https://www.mangahere.cc{url}" ;
try
{
chapters . Add ( new Chapter ( manga , fullUrl , chapterNumber , volumeNumber , null ) ) ;
}
catch ( Exception e )
{
}
}
//Return Chapters ordered by Chapter-Number
return chapters . Order ( ) . ToArray ( ) ;
}
internal override string [ ] GetChapterImageUrls ( Chapter chapter )
{
List < string > imageUrls = new ( ) ;
int downloaded = 1 ;
int images = 1 ;
string url = string . Join ( '/' , chapter . Url . Split ( '/' ) [ . . ^ 1 ] ) ;
do
{
RequestResult requestResult =
downloadClient . MakeRequest ( $"{url}/{downloaded}.html" , RequestType . Default ) ;
if ( ( int ) requestResult . statusCode < 200 | | ( int ) requestResult . statusCode > = 300 | | requestResult . htmlDocument is null )
{
return [ ] ;
}
imageUrls . AddRange ( ParseImageUrlsFromHtml ( requestResult . htmlDocument ) ) ;
images = requestResult . htmlDocument . DocumentNode
. SelectNodes ( "//a[contains(@href, '/manga/')]" )
. MaxBy ( node = > node . GetAttributeValue ( "data-page" , 0 ) ) ! . GetAttributeValue ( "data-page" , 0 ) ;
} while ( downloaded + + < = images ) ;
return imageUrls . ToArray ( ) ;
}
private string [ ] ParseImageUrlsFromHtml ( HtmlDocument document )
{
return document . DocumentNode
. SelectNodes ( "//img[contains(concat(' ',normalize-space(@class),' '),' reader-main-img ')]" )
. Select ( node = >
{
string url = node . GetAttributeValue ( "src" , "" ) ;
return url . StartsWith ( "//" ) ? $"https:{url}" : url ;
} )
. ToArray ( ) ;
}
}