METANOIA/metadata/thwiki.cc/source.go

764 lines
20 KiB
Go

package thwiki_cc
import (
"encoding/json"
"fmt"
"git.gammaspectra.live/S.O.N.G/METANOIA/metadata"
wikitext_parser "git.gammaspectra.live/S.O.N.G/wikitext-parser"
"github.com/oriser/regroup"
"golang.org/x/text/unicode/norm"
"io/ioutil"
"log"
"math"
"net/http"
"net/url"
"strconv"
"strings"
"time"
)
var baseURL = "https://thwiki.cc/"
var baseLyricsURL = "https://cd.thwiki.cc/"
type Source struct {
client *metadata.CachingClient
}
func NewSource() *Source {
s := &Source{}
s.client = metadata.NewCachingClient(s.GetURL(), time.Second/2)
return s
}
func (s *Source) GetName() string {
return "THBWiki"
}
func (s *Source) GetURL() string {
return baseURL
}
func (s *Source) GetLicense() metadata.License {
return metadata.License{
Code: metadata.CC_BY_NC_SA_30,
URL: baseURL + "THBWiki:关于#.E7.89.88.E6.9D.83.E4.BF.A1.E6.81.AF",
Attribution: fmt.Sprintf("%s (%s)", s.GetName(), s.GetURL()),
}
}
func (s *Source) FindByTOC(toc metadata.TOC) (albums []*metadata.Album) {
foundAlbums := s.FindByCDDB1Group([]metadata.CDDB1{toc.GetCDDB1()})
for _, album := range foundAlbums {
if len(album.Discs) == 1 {
for i, track := range album.Discs[0].Tracks {
diff := track.Duration - toc.GetTrackDuration(i)
if diff < 0 {
diff = -diff
}
if track.Duration != 0 && diff > time.Second*6 { //match threshold
goto L
}
}
}
albums = append(albums, album)
L:
}
return
}
func (s *Source) FindByCDDB1(cddb metadata.CDDB1) []*metadata.Album {
return s.FindByCDDB1Group([]metadata.CDDB1{cddb})
}
func (s *Source) FindByCDDB1Group(group []metadata.CDDB1) []*metadata.Album {
tracks := 0
var duration time.Duration
discs := len(group)
secondsRange := 1.
gapLength := time.Second * 2
for _, cddb := range group {
duration += cddb.GetDuration()
tracks += cddb.GetTrackNumber()
offset := (gapLength * time.Duration(cddb.GetTrackNumber())) / 2 //pre-gap length
duration -= offset
secondsRange += offset.Seconds()
}
query := []string{
fmt.Sprintf("G3%d\t%d", discs, discs),
fmt.Sprintf("H3%d\t%d", tracks, tracks),
fmt.Sprintf("I3%d\t%d", int(math.Floor(duration.Seconds()-secondsRange)), int(math.Ceil(duration.Seconds()+secondsRange))),
}
return s.FindAdvancedQueryArguments(strings.Join(query, "\n") + "\n")
}
func (s *Source) FindByAlbumNames(names []metadata.Name) []*metadata.Album {
srsearch := ""
for _, v := range names {
if len(srsearch) > 0 {
srsearch += fmt.Sprintf(" OR %q OR %q", "名称 = "+v.Name, "译名 = "+v.Name)
} else {
srsearch += fmt.Sprintf("%q OR %q", "名称 = "+v.Name, "译名 = "+v.Name)
}
}
return s.FindQueryArguments(srsearch)
}
func (s *Source) FindByCatalogNumber(catalog metadata.CatalogNumber) []*metadata.Album {
srsearch := fmt.Sprintf("%q", "编号 = "+catalog)
if strings.Index(string(catalog), "-") != -1 {
srsearch += fmt.Sprintf(" OR %q", "编号 = "+strings.Replace(string(catalog), "-", "", -1))
}
return s.FindQueryArguments(srsearch)
}
func (s *Source) FindQueryArguments(queryArgs string) (albums []*metadata.Album) {
uri, _ := url.Parse(baseURL)
uri.Path += "api.php"
query := uri.Query()
query.Add("action", "query")
query.Add("format", "json")
query.Add("list", "search")
query.Add("srwhat", "text")
query.Add("srlimit", "500")
srsearch := "incategory:同人专辑 ("
srsearch += queryArgs
srsearch += ")"
query.Add("srsearch", srsearch)
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*14)
if err != nil {
return nil
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
type SearchResult struct {
Query struct {
SearchInfo struct {
TotalHits int `json:"totalhits"`
} `json:"searchinfo"`
Search []struct {
NameSpace int `json:"ns"`
Title string `json:"title"`
PageId int `json:"pageid"`
Snippet string `json:"snippet"`
} `json:"search"`
} `json:"query"`
}
result := &SearchResult{}
err = json.Unmarshal(body, result)
if err != nil {
return nil
}
for _, r := range result.Query.Search {
album := s.GetAlbumInformation(wikitext_parser.NormalizeWikiTitle(r.Title))
if album != nil {
albums = append(albums, album)
}
}
return
}
func (s *Source) FindAdvancedQueryArguments(queryArgs string) (albums []*metadata.Album) {
uri, _ := url.Parse(baseURL)
uri.Path += "api.php"
query := uri.Query()
query.Add("action", "uask")
query.Add("pre", "专辑")
query.Add("sort", "")
query.Add("query", queryArgs)
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*30)
if err != nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
split := strings.Split(string(body), " ")
if len(split) != 2 {
return nil
}
numberOfResults, _ := strconv.Atoi(split[0])
if numberOfResults == 0 {
return nil
}
query.Add("token", split[1])
query.Add("order", "")
query.Add("limit", "25")
query.Add("offset", "0")
query.Add("result", "d")
uri.RawQuery = query.Encode()
response, err = s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*30)
if err != nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
body, err = ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
type SearchResult struct {
From int `json:"from"`
Till int `json:"till"`
Total int `json:"total"`
Symbols string `json:"symbols"`
Results struct {
Link []string `json:"link"`
Text []string `json:"text"`
} `json:"resu"`
}
result := &SearchResult{}
err = json.Unmarshal(body, result)
if err != nil {
return nil
}
for _, albumName := range result.Results.Text {
album := s.GetAlbumInformation(wikitext_parser.NormalizeWikiTitle(albumName))
if album != nil {
albums = append(albums, album)
}
}
return
}
func (s *Source) GetArticle(title string) ([]interface{}, error) {
uri, _ := url.Parse(baseURL)
uri.Path += "index.php"
query := uri.Query()
query.Add("title", title)
query.Add("action", "raw")
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*60)
if err != nil {
return nil, err
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil, err
}
return wikitext_parser.ParseWikiText(string(body)), nil
}
func (s *Source) GetFileURL(title string) string {
uri, _ := url.Parse(baseURL)
uri.Path += "api.php"
query := uri.Query()
query.Add("titles", "File:"+title)
query.Add("action", "query")
query.Add("prop", "imageinfo")
query.Add("iiprop", "url")
query.Add("format", "json")
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*180)
if err != nil {
return ""
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return ""
}
type ImageData struct {
Query struct {
Pages map[string]struct {
ImageInfo []struct {
URL string `json:"url"`
} `json:"imageinfo"`
} `json:"pages"`
} `json:"query"`
}
var d ImageData
err = json.Unmarshal(body, &d)
if err != nil {
return ""
}
for _, v := range d.Query.Pages {
if len(v.ImageInfo) > 0 {
return v.ImageInfo[0].URL
}
}
return ""
}
func (s *Source) GetSongLyrics(songName string) *metadata.LRCLyrics {
uri, _ := url.Parse(baseLyricsURL)
uri.Path += "lyrics/" + wikitext_parser.NormalizeWikiTitle(songName) + ".lrc"
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*120)
if err != nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
return metadata.NewLRCLyrics(string(body), "original", []metadata.Name{ //TODO: detect original language
{Kind: "url", Name: baseURL + "歌词:" + wikitext_parser.NormalizeWikiTitle(songName)},
})
}
func getWikiStringOptions(title string, trim bool) *wikitext_parser.WikiStringValueOptions {
opts := &wikitext_parser.WikiStringValueOptions{}
opts.Default()
opts.PageName = title
opts.StringHandler = func(value string, opt *wikitext_parser.WikiStringValueOptions) []string {
return []string{norm.NFC.String(norm.NFKD.String(value))}
}
opts.TemplateHandler = func(template *wikitext_parser.Template, opt *wikitext_parser.WikiStringValueOptions) (result []string) {
switch strings.ToUpper(template.Name) {
case "CM":
if val, ok := template.Parameters["1"]; ok && len(val) > 0 {
result = append(result, fmt.Sprintf("Comiket %s", wikitext_parser.GetWikiStringValue(val, opt)[0]))
}
case "红楼梦":
if val, ok := template.Parameters["1"]; ok && len(val) > 0 {
result = append(result, fmt.Sprintf("Touhou Kouroumu %s", wikitext_parser.GetWikiStringValue(val, opt)[0]))
}
case "例大祭":
if val, ok := template.Parameters["1"]; ok && len(val) > 0 {
result = append(result, fmt.Sprintf("Hakurei Shrine Reitaisai %s", wikitext_parser.GetWikiStringValue(val, opt)[0]))
}
case "PAGENAME", "SUBPAGENAME":
result = append(result, opt.PageName)
default:
result = append(result, template.Name)
}
return
}
opts.Trim = trim
return opts
}
func getStringValue(v []interface{}, opt *wikitext_parser.WikiStringValueOptions) []string {
return wikitext_parser.GetWikiStringValue(v, opt)
}
func (s *Source) GetAlbumInformation(title string) *metadata.Album {
article, err := s.GetArticle(title)
if err != nil {
return nil
}
var album *metadata.Album
opts := getWikiStringOptions(title, true)
var staffMappings []map[string]string
listingStaff := false
var staffNameGroupRE = regroup.MustCompile(`(?P<position>.+)\((?P<name>.+)\)`)
var zunComposerRole = metadata.Role{
Kind: "composer",
Name: []metadata.Name{
{Kind: "original", Name: "ZUN"},
{Kind: "url", Name: baseURL + "ZUN"},
},
}
handleStaffMapping := func(discIndex int, val []interface{}, kind string, track *metadata.Track, opts *wikitext_parser.WikiStringValueOptions) {
if len(staffMappings) > discIndex {
for _, e := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") {
entryValue := strings.TrimSpace(e)
if len(entryValue) > 0 {
groupValue, ok := staffMappings[discIndex][entryValue]
if ok && groupValue != entryValue {
track.Roles = append(track.Roles, metadata.Role{
Kind: kind,
Name: []metadata.Name{{Kind: "original", Name: entryValue}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(entryValue)}},
Group: groupValue,
})
} else {
track.Roles = append(track.Roles, metadata.Role{
Kind: kind,
Name: []metadata.Name{{Kind: "original", Name: entryValue}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(entryValue)}},
})
}
}
}
}
}
for _, v := range article {
if text, ok := v.(string); ok {
if strings.Index(text, "== Staff ==") != -1 {
listingStaff = true
}
} else if list, ok := v.(*wikitext_parser.DescriptionList); ok {
if listingStaff {
roleName := strings.ToLower(strings.Join(getStringValue(list.Name, opts), ""))
switch roleName {
case "total produce":
fallthrough
case "produce":
fallthrough
case "production":
roleName = "producer"
case "assistant":
roleName = "assistant"
case "illustration":
fallthrough
case "cover artwork":
roleName = "illustration"
case "mastering":
roleName = "mastering"
case "graphic design":
fallthrough
case "graphics":
fallthrough
case "design":
roleName = "design"
case "compose":
roleName = "composer"
}
for _, a := range strings.Split(strings.Join(getStringValue(list.Entries, opts), ""), ",") {
role := metadata.Role{
Kind: roleName,
Name: []metadata.Name{{Kind: "original", Name: strings.TrimSpace(a)}},
}
m := &struct {
Position string `regroup:"position"`
Name string `regroup:"name"`
}{}
if staffNameGroupRE.MatchToTarget(strings.TrimSpace(a), m) == nil {
//Is a group
role.Name = []metadata.Name{{Kind: "original", Name: strings.TrimSpace(m.Position)}}
role.Group = strings.TrimSpace(m.Name)
}
album.Roles = append(album.Roles, role)
}
}
} else if template, ok := v.(*wikitext_parser.Template); ok {
switch template.Name {
case "专辑人员列表": //Album Person List
if album == nil {
continue
}
discIndex, err := strconv.Atoi(strings.Join(getStringValue(template.Parameters["碟号"], opts), " ")) //disc number
discIndex--
if err != nil {
discIndex = len(staffMappings)
}
if discIndex < 0 {
continue
}
staffMappings = append(staffMappings, make(map[string]string))
for _, val := range template.Parameters { //nested
for _, value := range getStringValue(val, opts) {
parts := strings.Split(value, "=")
p0 := strings.TrimSpace(parts[0])
if len(parts) > 1 {
staffMappings[discIndex][p0] = strings.TrimSpace(parts[1])
} else {
staffMappings[discIndex][p0] = p0
}
}
}
case "专辑曲目列表": //Album Track List
if album == nil {
continue
}
listingStaff = false
discIndex := len(album.Discs)
disc := metadata.Disc{}
for _, value := range template.Parameters["嵌套"] { //nested
if tpl, ok := value.(*wikitext_parser.Template); ok {
track := metadata.Track{}
for k, val := range tpl.Parameters {
switch k {
case "时长": //duration
d := strings.Join(getStringValue(val, opts), "")
sub := strings.Split(d, ":")
s1, _ := strconv.ParseInt(strings.TrimLeft(sub[0], "0"), 10, 0)
if len(sub) > 2 {
//TODO
} else if len(sub) > 1 {
s2, _ := strconv.ParseInt(strings.TrimLeft(sub[1], "0"), 10, 0)
track.Duration = time.Minute*time.Duration(s1) + time.Second*time.Duration(s2)
} else {
track.Duration = time.Second * time.Duration(s1)
}
case "名称": //title
trackTitle := strings.Join(getStringValue(val, opts), " ")
track.Name = append(track.Name, metadata.Name{
Kind: "original",
Name: trackTitle,
})
track.Lyrics = func() []metadata.Lyrics {
return []metadata.Lyrics{s.GetSongLyrics(trackTitle)}
}
case "原专辑": //original release
track.Links = append(track.Links, metadata.Link{
Kind: "original release",
Name: []metadata.Name{{Kind: "original", Name: strings.Join(getStringValue(val, opts), " ")}},
})
case "原名称": //original release title
track.Links = append(track.Links, metadata.Link{
Kind: "original release title",
Name: []metadata.Name{{Kind: "original", Name: strings.Join(getStringValue(val, opts), " ")}},
})
case "原曲": //original song
case "编曲": //arranger/composer
if _, ok := tpl.Parameters["原曲"]; ok {
handleStaffMapping(discIndex, val, "arranger", &track, opts)
track.Roles = append(track.Roles, zunComposerRole) //TODO check
} else {
handleStaffMapping(discIndex, val, "composer", &track, opts)
}
case "再编曲": //re-arranger
handleStaffMapping(discIndex, val, "remix", &track, opts)
case "演唱": //vocals
handleStaffMapping(discIndex, val, "vocals", &track, opts)
case "作词": //lyrics
handleStaffMapping(discIndex, val, "lyrics", &track, opts)
}
}
disc.Tracks = append(disc.Tracks, track)
}
}
album.Discs = append(album.Discs, disc)
case "同人专辑信息": //Doujin Album Information
album = &metadata.Album{
License: s.GetLicense(),
SourceUniqueIdentifier: baseURL + title,
Identifiers: []metadata.Name{
{
Kind: "url",
Name: baseURL + title,
},
},
}
for k, val := range template.Parameters {
switch k {
case "封面": //cover: jpg
fileName := strings.Join(getStringValue(val, opts), "")
if strings.Index(fileName, ".") == -1 {
fileName = title + "封面." + fileName
}
f := s.GetFileURL(fileName)
if len(f) > 0 {
album.Art = append(album.Art, metadata.Name{
Kind: "front",
Name: s.GetFileURL(fileName),
})
}
case "展会": //release
for _, value := range val {
tpl, ok := value.(*wikitext_parser.Template)
if ok {
//TODO
switch tpl.Name {
case "CM":
album.Links = append(album.Links, metadata.Link{
Kind: "release",
Name: []metadata.Name{
{Kind: "name", Name: fmt.Sprintf("Comiket %s", tpl.Parameters["1"][0].(string))},
},
})
case "红楼梦":
album.Links = append(album.Links, metadata.Link{
Kind: "release",
Name: []metadata.Name{
{Kind: "name", Name: fmt.Sprintf("Touhou Kouroumu %s", tpl.Parameters["1"][0].(string))},
},
})
case "例大祭":
album.Links = append(album.Links, metadata.Link{
Kind: "release",
Name: []metadata.Name{
{Kind: "name", Name: fmt.Sprintf("Hakurei Shrine Reitaisai %s", tpl.Parameters["1"][0].(string))},
},
})
}
break
}
}
case "名称": //name
album.Name = append(album.Name, metadata.Name{
Kind: "original",
Name: strings.Join(getStringValue(val, opts), " "),
})
case "制作方": //produced by
for _, producer := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") {
if strings.TrimSpace(producer) == "" {
continue
}
album.Roles = append(album.Roles, metadata.Role{
Kind: "producer",
Name: []metadata.Name{{Kind: "original", Name: producer}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(producer)}},
})
}
case "发行方": //issuer
for _, issuer := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") {
if strings.TrimSpace(issuer) == "" {
continue
}
album.Roles = append(album.Roles, metadata.Role{
Kind: "issuer",
Name: []metadata.Name{{Kind: "original", Name: issuer}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(issuer)}},
})
}
case "编号": //catalog number
for _, catalog := range strings.Split(strings.Join(getStringValue(val, opts), "+"), "+") {
album.Identifiers = append(album.Identifiers, metadata.Name{
Kind: "catalog",
Name: catalog,
})
}
case "风格类型": //style
for _, style := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") {
if strings.TrimSpace(style) == "" {
continue
}
switch strings.ToLower(style) {
default:
album.Tags = append(album.Tags, metadata.Name{
Kind: "genre",
Name: strings.ToLower(style), //TODO: normalize
})
}
}
case "官网页面": //official page
album.Links = append(album.Links, metadata.Link{
Kind: "official",
Name: []metadata.Name{{Kind: "url", Name: strings.Join(getStringValue(val, opts), " ")}},
})
}
}
}
} else {
continue
}
}
return album
}
func (s *Source) Test() {
albumC := s.FindByCDDB1(0x730dec08)
albums := s.FindByAlbumNames([]metadata.Name{{Name: "Bayside Beat"}})
album := s.GetAlbumInformation("Haunted_House")
album2 := s.GetAlbumInformation("マジコカタストロフィ")
lyrics := album2.Discs[0].Tracks[1].Lyrics()
log.Print(album)
log.Print(albumC)
log.Print(album2)
log.Print(lyrics)
log.Print(albums)
}