280 lines
12 KiB
PHP
280 lines
12 KiB
PHP
<?php
|
||
|
||
namespace Animarr\Extractor;
|
||
|
||
use Animarr\AniDB;
|
||
use Animarr\Database;
|
||
use Animarr\Release\MultiRelease;
|
||
use Animarr\Release\Release;
|
||
|
||
class SceneExtractor implements Extractor{
|
||
|
||
private $db;
|
||
|
||
public function __construct(AniDB $aniDB = null){
|
||
$this->db = $aniDB;
|
||
}
|
||
|
||
private function tryMatch($title){
|
||
if($this->db === null){
|
||
return true;
|
||
}
|
||
|
||
return $this->db->matchTitle((string) $title) !== null;
|
||
}
|
||
|
||
public function extractInformation($releaseTitle, $deepMatch = false, &$debug = null){
|
||
$baseExt = pathinfo(trim(strtolower($releaseTitle)), PATHINFO_EXTENSION);
|
||
if(in_array($baseExt, Database::getConfigKey("filter.extension.ignore", []))){
|
||
return null;
|
||
}
|
||
|
||
$id_start = "\\[\\(\\{";
|
||
$id_end = "\\]\\)\\}";
|
||
$space = " _\\.";
|
||
$tag = "^$id_end";
|
||
$tag_separator = "$space,-";
|
||
$split = "\\-‒–\\/#";
|
||
|
||
$releaseTitle = str_ireplace(["capitulo", "especial", "⁄"], ["episode", "special", "/"], $releaseTitle);
|
||
|
||
$episodeWordRegex = "[Ee][Pp](|[\\.]|isode)";
|
||
|
||
$episode = "0-9";
|
||
$name = "^$id_start$id_end";//"{$space}{$split}A-Za-z0-9~:;,'\\+!@?&\\.";
|
||
|
||
$specials = "(O[Pp]|E[Dd]|OVA|ONA|OAD|SP|PV|CM|Teaser)";
|
||
|
||
$specials_find = [" opening ", " ending ", " movie ", /*" special ", */" picture drama ", " trailer "];
|
||
|
||
$episode_match = [
|
||
"/^(?P<title>.*)[{$split}]?[{$space}]?{$episodeWordRegex}[{$space}]*(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u",
|
||
"/^(?P<title>.*)[{$split}][{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>[{$space}]+[{$split}][{$space}]+.+)$/u",
|
||
"/^(?P<title>.*)[{$split}][{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u",
|
||
];
|
||
|
||
$volume_match = [
|
||
"/^(?P<title>.*)[{$split}]?[{$space}]+Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)[\\- ]+((Vol(|[\\.]|ume))|)[\\-{$episode} ]+(END|End|)$/u",
|
||
"/^(?P<title>.*)[{$split}]?[{$space}]+[$id_start]Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)[$id_end](|[\\- ][\\-{$episode} ]+)$/u",
|
||
"/^(?P<title>.*)[{$split}]?[{$space}]+Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)$/u",
|
||
];
|
||
|
||
$batch_match = [
|
||
"/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+[{$split}][{$episode}]+)[{$space}]+(.*)$/u",
|
||
"/^(?P<title>.*)[{$space}]+[{$split}][{$space}]+(.*)$/u",
|
||
"/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+[{$split}][{$episode}]+)(.*)$/u",
|
||
];
|
||
|
||
$special_match = [
|
||
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>NC$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
|
||
"/^(?P<title>.*)([{$split}]+|[{$space}]+)(?P<type>NC$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
|
||
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
|
||
"/^(?P<title>.*)([{$split}]+|[{$space}]+)(?P<type>$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
|
||
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>S)(?P<number>[{$episode}]+)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
|
||
];
|
||
|
||
$name_match = "";
|
||
$tag_match = "/[{$id_start}](?P<tag>[{$tag}]+)[{$id_end}]/u";
|
||
|
||
$base_match = [
|
||
"/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+ (?P<name2>\\([{$name}]+\\))[{$name}]*)(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u",
|
||
"/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+(\\(([0-9]{4}|Season [0-9]+|S[0-9]+|TV)\\)[{$name}]*|))(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u",
|
||
];
|
||
|
||
if($deepMatch){
|
||
$episode_match[] = "/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u";
|
||
|
||
//Bad match: [Commie] Silver Spoon 2 [BD 720p AAC]
|
||
$episode_match[] = "/^(?P<title>.*)[{$space}]+{$episodeWordRegex}[{$space}]*(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))$/u";
|
||
|
||
$episode_match[] = "/^(?P<title>.*)([{$space}]+[{$split}]|)[{$space}]+S?(?P<season>[{$episode}]+)([xX]|E)(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u";
|
||
|
||
$batch_match[] = "/^(?P<title>.*)[{$space}]*(?P<version>[v\\. ][0-9])(.*)$/u";
|
||
$batch_match[] = "/^(?P<title>[{$name}]+(\\([0-9]{4}\\)|))(.*)$/u";
|
||
|
||
$base_match[] = "/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+(\\([0-9]{4}|TV\\)|))(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u";
|
||
|
||
$base_match[] = "/^(?P<group>)(?P<name>[{$name}]+(\\([0-9]{4}\\)|))(?P<extra>.*)$/u";
|
||
}
|
||
|
||
foreach($base_match as $bmatch){
|
||
if(preg_match($bmatch, $releaseTitle, $matches) > 0){
|
||
$group = $matches["group"];
|
||
if(isset($matches["name2"])){
|
||
if(preg_match("#^\\(([0-9]{4}|Season [0-9]+|S[0-9]+|TV)\\)#", $matches["name2"])> 0){
|
||
continue;
|
||
}
|
||
$rname = trim(str_replace(str_split("$space"), " ", str_replace($matches["name2"], "", $matches["name"])));
|
||
}else{
|
||
$rname = trim(str_replace(str_split("$space"), " ", $matches["name"]));
|
||
}
|
||
|
||
$tags = [];
|
||
|
||
$ext = @ltrim(strtolower($matches["ext"]), ".");
|
||
|
||
if(in_array($ext, Database::getConfigKey("filter.extension.ignore", []))){
|
||
return null;
|
||
}
|
||
|
||
if(preg_match_all($tag_match, $matches["extra"], $matches) > 0){
|
||
foreach($matches["tag"] as $v){
|
||
$v = str_replace(str_split("$tag_separator"), " ", $v);
|
||
foreach(explode(" ", $v) as $tag){
|
||
$tags[] = $tag;
|
||
}
|
||
}
|
||
}
|
||
|
||
$vmatch = null;
|
||
$smatch = null;
|
||
$ematch = null;
|
||
$bcmatch = null;
|
||
|
||
$isVolume = false;
|
||
$isSpecial = false;
|
||
$isBatch = false;
|
||
|
||
$number = null;
|
||
$title = null;
|
||
$version = null;
|
||
$specialType = null;
|
||
|
||
foreach($volume_match as $r){
|
||
if(preg_match($r, $rname, $matches) > 0){
|
||
$isVolume = true;
|
||
$title = trim($matches["title"], " {$split}");
|
||
$number = $matches["number"];
|
||
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
|
||
$vmatch = $r;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if(!$isVolume){
|
||
foreach($special_match as $r){
|
||
if(preg_match($r, $rname, $matches) > 0){
|
||
$title = trim($matches["title"], " -");
|
||
$number = $matches["number"];
|
||
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
|
||
$isSpecial = true;
|
||
|
||
$typeConstant = Release::class . "::SPECIAL_TYPE_" . strtoupper($matches["type"]);
|
||
|
||
if(defined($typeConstant)){
|
||
$specialType = $typeConstant;
|
||
}else{
|
||
$specialType = Release::SPECIAL_TYPE_UNKNOWN;
|
||
}
|
||
|
||
$smatch = $r;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if(!$isSpecial and !$deepMatch){
|
||
foreach($specials_find as $needle){
|
||
if(stripos($rname . " ", $needle) !== false){
|
||
$isSpecial = true;
|
||
continue; //Really strange special, TODO
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if(!$isVolume and !$isSpecial){
|
||
foreach($episode_match as $r){
|
||
if(preg_match($r, $rname, $matches) > 0){
|
||
$title = trim($matches["title"], " -");
|
||
$number = $matches["number"];
|
||
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
|
||
$ematch = $r;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
if(($deepMatch and $number === null) or stripos($title, " batch") !== false or in_array("Batch", $tags) or in_array("batch", $tags) or (!$this->tryMatch($title) and $number === null)){
|
||
foreach($batch_match as $r){
|
||
if(preg_match($r, $rname, $matches) > 0){
|
||
$title = trim($matches["title"], " -");
|
||
$bcmatch = $r;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if($title === null){
|
||
$title = $rname;
|
||
}
|
||
|
||
$isBatch = true;
|
||
$isVolume = false;
|
||
$isSpecial = false;
|
||
}
|
||
|
||
if(!$this->tryMatch($title)){
|
||
continue;
|
||
}
|
||
|
||
if($group === ""){
|
||
$group = "UNKNOWN";
|
||
}
|
||
|
||
if($version != "" and $version === " 5"){
|
||
//Half episode, abort!
|
||
return null; //???
|
||
continue;
|
||
}
|
||
|
||
$r = null;
|
||
|
||
if($isVolume){
|
||
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_VOLUME, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
|
||
$r = new MultiRelease($releaseTitle, $title, $group, Release::TYPE_VOLUME, $tags, $number === null ? -1 : (int) $number);
|
||
}else if($isBatch){
|
||
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_BATCH, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
|
||
$r = new MultiRelease($releaseTitle, $title, $group, Release::TYPE_BATCH, $tags);
|
||
}else if($isSpecial){
|
||
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_SPECIAL, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
|
||
$r = new Release($releaseTitle, $title, $group, Release::TYPE_SPECIAL, $tags, $number === null ? -1 : (int) $number);
|
||
if($specialType !== null){
|
||
$r->setSpecialType($specialType);
|
||
}
|
||
}else if($title !== null and $number !== null){
|
||
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_SINGLE, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
|
||
$r = new Release($releaseTitle, $title, $group, Release::TYPE_SINGLE, $tags, $number === null ? -1 : (int) $number);
|
||
}
|
||
|
||
if($r !== null){
|
||
if($version !== null and $version{0} === "v"){
|
||
$r->setVersion((int) trim($version, " v."));
|
||
}
|
||
return $r;
|
||
}
|
||
}
|
||
}
|
||
|
||
return (!$deepMatch) ? $this->extractInformation($releaseTitle, true, $debug) : null;
|
||
}
|
||
|
||
private function setDebug(&$debug, $title, $group, $tags, $rname, $version, $number, $type, $bmatch, $vmatch, $smatch, $ematch, $bcmatch){
|
||
//var_dump($title . " " . "$group, $rname, $version, $number, $type");
|
||
if(is_array($debug)){
|
||
$debug["title"] = $title;
|
||
$debug["group"] = $group;
|
||
$debug["tags"] = implode(" ", $tags);
|
||
$debug["rname"] = $rname;
|
||
$debug["number"] = $number;
|
||
$debug["version"] = $version;
|
||
$debug["type"] = $type;
|
||
|
||
$debug["base_match"] = htmlentities($bmatch);
|
||
$debug["volume_match"] = htmlentities($vmatch);
|
||
$debug["special_match"] = htmlentities($smatch);
|
||
$debug["episode_match"] = htmlentities($ematch);
|
||
$debug["batch_match"] = htmlentities($bcmatch);
|
||
}
|
||
}
|
||
}
|