Animarr/src/Animarr/Extractor/SceneExtractor.php

280 lines
12 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace Animarr\Extractor;
use Animarr\AniDB;
use Animarr\Database;
use Animarr\Release\MultiRelease;
use Animarr\Release\Release;
class SceneExtractor implements Extractor{
private $db;
public function __construct(AniDB $aniDB = null){
$this->db = $aniDB;
}
private function tryMatch($title){
if($this->db === null){
return true;
}
return $this->db->matchTitle((string) $title) !== null;
}
public function extractInformation($releaseTitle, $deepMatch = false, &$debug = null){
$baseExt = pathinfo(trim(strtolower($releaseTitle)), PATHINFO_EXTENSION);
if(in_array($baseExt, Database::getConfigKey("filter.extension.ignore", []))){
return null;
}
$id_start = "\\[\\(\\{";
$id_end = "\\]\\)\\}";
$space = " _\\.";
$tag = "^$id_end";
$tag_separator = "$space,-";
$split = "\\-\\/#";
$releaseTitle = str_ireplace(["capitulo", "especial", ""], ["episode", "special", "/"], $releaseTitle);
$episodeWordRegex = "[Ee][Pp](|[\\.]|isode)";
$episode = "0-9";
$name = "^$id_start$id_end";//"{$space}{$split}A-Za-z0-9~:;,'\\+!@?&\\.";
$specials = "(O[Pp]|E[Dd]|OVA|ONA|OAD|SP|PV|CM|Teaser)";
$specials_find = [" opening ", " ending ", " movie ", /*" special ", */" picture drama ", " trailer "];
$episode_match = [
"/^(?P<title>.*)[{$split}]?[{$space}]?{$episodeWordRegex}[{$space}]*(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u",
"/^(?P<title>.*)[{$split}][{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>[{$space}]+[{$split}][{$space}]+.+)$/u",
"/^(?P<title>.*)[{$split}][{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u",
];
$volume_match = [
"/^(?P<title>.*)[{$split}]?[{$space}]+Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)[\\- ]+((Vol(|[\\.]|ume))|)[\\-{$episode} ]+(END|End|)$/u",
"/^(?P<title>.*)[{$split}]?[{$space}]+[$id_start]Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)[$id_end](|[\\- ][\\-{$episode} ]+)$/u",
"/^(?P<title>.*)[{$split}]?[{$space}]+Vol(|[\\.]|ume)[{$space}]*(?P<number>[{$episode}]+)$/u",
];
$batch_match = [
"/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+[{$split}][{$episode}]+)[{$space}]+(.*)$/u",
"/^(?P<title>.*)[{$space}]+[{$split}][{$space}]+(.*)$/u",
"/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+[{$split}][{$episode}]+)(.*)$/u",
];
$special_match = [
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>NC$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
"/^(?P<title>.*)([{$split}]+|[{$space}]+)(?P<type>NC$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
"/^(?P<title>.*)([{$split}]+|[{$space}]+)(?P<type>$specials)[{$space}]*[{$split}]?[{$space}]*(?P<number>[{$episode}]*)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
"/^(?P<title>.*)[{$split}]+[{$space}]*(?P<type>S)(?P<number>[{$episode}]+)[A-Za-z]?(?P<version>([v\\. ][0-9]|))$/u",
];
$name_match = "";
$tag_match = "/[{$id_start}](?P<tag>[{$tag}]+)[{$id_end}]/u";
$base_match = [
"/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+ (?P<name2>\\([{$name}]+\\))[{$name}]*)(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u",
"/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+(\\(([0-9]{4}|Season [0-9]+|S[0-9]+|TV)\\)[{$name}]*|))(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u",
];
if($deepMatch){
$episode_match[] = "/^(?P<title>.*)[{$space}]+(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u";
//Bad match: [Commie] Silver Spoon 2 [BD 720p AAC]
$episode_match[] = "/^(?P<title>.*)[{$space}]+{$episodeWordRegex}[{$space}]*(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))$/u";
$episode_match[] = "/^(?P<title>.*)([{$space}]+[{$split}]|)[{$space}]+S?(?P<season>[{$episode}]+)([xX]|E)(?P<number>[{$episode}]+)(?P<version>([v\\. ][0-9]|))(?P<ename>(([{$space}]+[{$split}][{$space}]+|[{$space}]+).*|))$/u";
$batch_match[] = "/^(?P<title>.*)[{$space}]*(?P<version>[v\\. ][0-9])(.*)$/u";
$batch_match[] = "/^(?P<title>[{$name}]+(\\([0-9]{4}\\)|))(.*)$/u";
$base_match[] = "/^[{$id_start}](?P<group>[{$tag}]+)[{$id_end}][{$split}]?(?P<name>[{$name}]+(\\([0-9]{4}|TV\\)|))(?P<extra>.*)(?P<ext>(\\.[a-z0-9]{2,7}|))$/u";
$base_match[] = "/^(?P<group>)(?P<name>[{$name}]+(\\([0-9]{4}\\)|))(?P<extra>.*)$/u";
}
foreach($base_match as $bmatch){
if(preg_match($bmatch, $releaseTitle, $matches) > 0){
$group = $matches["group"];
if(isset($matches["name2"])){
if(preg_match("#^\\(([0-9]{4}|Season [0-9]+|S[0-9]+|TV)\\)#", $matches["name2"])> 0){
continue;
}
$rname = trim(str_replace(str_split("$space"), " ", str_replace($matches["name2"], "", $matches["name"])));
}else{
$rname = trim(str_replace(str_split("$space"), " ", $matches["name"]));
}
$tags = [];
$ext = @ltrim(strtolower($matches["ext"]), ".");
if(in_array($ext, Database::getConfigKey("filter.extension.ignore", []))){
return null;
}
if(preg_match_all($tag_match, $matches["extra"], $matches) > 0){
foreach($matches["tag"] as $v){
$v = str_replace(str_split("$tag_separator"), " ", $v);
foreach(explode(" ", $v) as $tag){
$tags[] = $tag;
}
}
}
$vmatch = null;
$smatch = null;
$ematch = null;
$bcmatch = null;
$isVolume = false;
$isSpecial = false;
$isBatch = false;
$number = null;
$title = null;
$version = null;
$specialType = null;
foreach($volume_match as $r){
if(preg_match($r, $rname, $matches) > 0){
$isVolume = true;
$title = trim($matches["title"], " {$split}");
$number = $matches["number"];
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
$vmatch = $r;
break;
}
}
if(!$isVolume){
foreach($special_match as $r){
if(preg_match($r, $rname, $matches) > 0){
$title = trim($matches["title"], " -");
$number = $matches["number"];
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
$isSpecial = true;
$typeConstant = Release::class . "::SPECIAL_TYPE_" . strtoupper($matches["type"]);
if(defined($typeConstant)){
$specialType = $typeConstant;
}else{
$specialType = Release::SPECIAL_TYPE_UNKNOWN;
}
$smatch = $r;
break;
}
}
if(!$isSpecial and !$deepMatch){
foreach($specials_find as $needle){
if(stripos($rname . " ", $needle) !== false){
$isSpecial = true;
continue; //Really strange special, TODO
}
}
}
}
if(!$isVolume and !$isSpecial){
foreach($episode_match as $r){
if(preg_match($r, $rname, $matches) > 0){
$title = trim($matches["title"], " -");
$number = $matches["number"];
$version = (isset($matches["version"]) and $matches["version"] !== "") ? $matches["version"] : "v1";
$ematch = $r;
break;
}
}
}
if(($deepMatch and $number === null) or stripos($title, " batch") !== false or in_array("Batch", $tags) or in_array("batch", $tags) or (!$this->tryMatch($title) and $number === null)){
foreach($batch_match as $r){
if(preg_match($r, $rname, $matches) > 0){
$title = trim($matches["title"], " -");
$bcmatch = $r;
break;
}
}
if($title === null){
$title = $rname;
}
$isBatch = true;
$isVolume = false;
$isSpecial = false;
}
if(!$this->tryMatch($title)){
continue;
}
if($group === ""){
$group = "UNKNOWN";
}
if($version != "" and $version === " 5"){
//Half episode, abort!
return null; //???
continue;
}
$r = null;
if($isVolume){
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_VOLUME, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
$r = new MultiRelease($releaseTitle, $title, $group, Release::TYPE_VOLUME, $tags, $number === null ? -1 : (int) $number);
}else if($isBatch){
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_BATCH, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
$r = new MultiRelease($releaseTitle, $title, $group, Release::TYPE_BATCH, $tags);
}else if($isSpecial){
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_SPECIAL, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
$r = new Release($releaseTitle, $title, $group, Release::TYPE_SPECIAL, $tags, $number === null ? -1 : (int) $number);
if($specialType !== null){
$r->setSpecialType($specialType);
}
}else if($title !== null and $number !== null){
$this->setDebug($debug, $title, $group, $tags, $rname, $version, $number, Release::TYPE_SINGLE, $bmatch, $vmatch, $smatch, $ematch, $bcmatch);
$r = new Release($releaseTitle, $title, $group, Release::TYPE_SINGLE, $tags, $number === null ? -1 : (int) $number);
}
if($r !== null){
if($version !== null and $version{0} === "v"){
$r->setVersion((int) trim($version, " v."));
}
return $r;
}
}
}
return (!$deepMatch) ? $this->extractInformation($releaseTitle, true, $debug) : null;
}
private function setDebug(&$debug, $title, $group, $tags, $rname, $version, $number, $type, $bmatch, $vmatch, $smatch, $ematch, $bcmatch){
//var_dump($title . " " . "$group, $rname, $version, $number, $type");
if(is_array($debug)){
$debug["title"] = $title;
$debug["group"] = $group;
$debug["tags"] = implode(" ", $tags);
$debug["rname"] = $rname;
$debug["number"] = $number;
$debug["version"] = $version;
$debug["type"] = $type;
$debug["base_match"] = htmlentities($bmatch);
$debug["volume_match"] = htmlentities($vmatch);
$debug["special_match"] = htmlentities($smatch);
$debug["episode_match"] = htmlentities($ematch);
$debug["batch_match"] = htmlentities($bcmatch);
}
}
}