METANOIA/metadata/en.touhouwiki.net/source.go

529 lines
13 KiB
Go

package en_touhouwiki_net
import (
"encoding/json"
"fmt"
"git.gammaspectra.live/S.O.N.G/METANOIA/metadata"
"git.gammaspectra.live/S.O.N.G/METANOIA/utilities"
wikitext_parser "git.gammaspectra.live/S.O.N.G/wikitext-parser"
"github.com/oriser/regroup"
"io/ioutil"
"log"
"net/http"
"net/url"
"strings"
"time"
)
var baseURL = "https://en.touhouwiki.net/"
var baseAPIURL = "https://touhouwiki-mirror.gammaspectra.live/"
type Source struct {
client *metadata.CachingClient
}
func NewSource() *Source {
s := &Source{}
s.client = metadata.NewCachingClient(s.GetURL(), time.Second/10)
return s
}
func (s *Source) GetName() string {
return "Touhou Wiki"
}
func (s *Source) GetURL() string {
return baseURL
}
func (s *Source) GetLicense() metadata.License {
return metadata.License{
Code: metadata.CC_BY_SA_40,
URL: baseURL + "wiki/Touhou_Wiki:Copyrights#Content_licensing",
Attribution: fmt.Sprintf("%s (%s)", s.GetName(), s.GetURL()),
}
}
func (s *Source) FindByTOC(toc metadata.TOC) []*metadata.Album {
return s.QueryCDDB(toc, toc.GetCDDB1())
}
func (s *Source) FindByCDDB1(cddb metadata.CDDB1) []*metadata.Album {
return s.QueryCDDB(nil, cddb)
}
func (s *Source) QueryCDDB(toc metadata.TOC, cddb metadata.CDDB1) (results []*metadata.Album) {
uri, _ := url.Parse(baseAPIURL)
uri.Path += "cddb"
query := uri.Query()
if len(toc) > 1 {
query.Add("cmd", fmt.Sprintf("cddb query %s", toc.CDDBString()))
} else {
query.Add("cmd", fmt.Sprintf("cddb query %s", cddb.String()))
}
query.Add("hello", "anonymous localhost METANOIA "+utilities.Version)
query.Add("proto", "6")
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*14)
if err != nil {
return nil
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
matchLineRE := regroup.MustCompile(`(?m)^(200 )?(?P<group>[A-Za-z][A-Za-z0-9_]+) (?P<cddb1>[a-f0-9]{8}) (?P<match>.+)$`)
matchURLRE := regroup.MustCompile(`(?m)^EXTD=https://en.touhouwiki.net/index.php\?curid=(?P<album>[0-9]+)$`)
type matchResult struct {
Group string `regroup:"group"`
CDDB1 string `regroup:"cddb1"`
Match string `regroup:"match"`
}
m := &matchResult{}
rets, err := matchLineRE.MatchAllToTarget(string(body), -1, m)
if err == nil {
for _, elem := range rets {
match := elem.(*matchResult)
uri, _ := url.Parse(baseAPIURL)
uri.Path += "cddb"
query := uri.Query()
query.Add("cmd", fmt.Sprintf("cddb read %s %s", match.Group, match.CDDB1))
query.Add("hello", "anonymous localhost METANOIA "+utilities.Version)
query.Add("proto", "6")
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*30)
if err != nil {
continue
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
continue
}
t := &struct {
AlbumId int `regroup:"album"`
}{}
if matchURLRE.MatchToTarget(string(body), t) == nil {
a := s.GetAlbumInformation(t.AlbumId)
if a != nil {
results = append(results, s.GetAlbumInformation(t.AlbumId))
}
}
}
}
return
}
func (s *Source) FindByAlbumNames(names []metadata.Name) (r []*metadata.Album) {
for _, n := range names {
for _, result := range s.FindQueryArguments(n.Name, false) {
func() {
for _, rr := range r {
if rr.SourceUniqueIdentifier == result.SourceUniqueIdentifier {
return
}
}
r = append(r, result)
}()
}
}
return
}
func (s *Source) FindByCatalogNumber(catalog metadata.CatalogNumber) (r []*metadata.Album) {
return s.FindQueryArguments(string(catalog), false)
}
type albumEntry struct {
Id int `json:"pageid"`
Type string `json:"type"`
MainTitle string `json:"pagetitle"`
Titles map[string]string `json:"titles"`
Image string `json:"image"`
CatalogNumbers []string `json:"catalognumbers"`
Genre []string `json:"genre"`
TrackCount int `json:"trackcount"`
Duration int `json:"duration"`
ReleaseDate string `json:"releasedate"`
ReleaseEvent string `json:"releaseevent"`
Links []string `json:"links"`
Artists []artistEntry `json:"artists"`
Discs []discEntry `json:"discs"`
}
type discEntry struct {
Name string `json:"name"`
Duration int `json:"duration"`
TrackCount int `json:"trackcount"`
Tracks []trackEntry `json:"tracks"`
}
type trackEntry struct {
Duration int `json:"duration"`
MainTitle string `json:"title"`
Titles map[string]string `json:"titles"`
Artists []artistEntry `json:"artists"`
Lyrics string `json:"lyrics"`
Links []string `json:"links"`
Original []string `json:"original"`
}
type artistEntry struct {
Position string `json:"position"`
Names []string `json:"names"`
}
func (s *Source) FindQueryArguments(queryArgs string, loose bool) (albums []*metadata.Album) {
uri, _ := url.Parse(baseAPIURL)
uri.Path += "search"
query := uri.Query()
if loose {
query.Add("type", "loosealbum")
} else {
query.Add("type", "album")
}
query.Add("query", queryArgs)
uri.RawQuery = query.Encode()
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*14)
if err != nil {
return nil
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
var result []*albumEntry
err = json.Unmarshal(body, &result)
if err != nil {
return nil
}
for _, r := range result {
album := s.ParseAlbumInformation(r)
if album != nil {
albums = append(albums, album)
}
}
return
}
type lyricsEntry struct {
MainTitle string `json:"pagetitle"`
Titles []string `json:"titles"`
Links []string `json:"links"`
Duration int `json:"duration"`
Artists []artistEntry `json:"artists"`
Entries struct {
Kanji []string `json:"kanji"`
Romaji []string `json:"romaji"`
English []string `json:"english"`
} `json:"entries"`
}
func (s *Source) GetSongLyrics(lyricsName string) (result []*metadata.TextLyrics) {
uri, _ := url.Parse(baseAPIURL)
uri.Path += "lyrics/" + wikitext_parser.NormalizeWikiTitle(lyricsName)
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*120)
if err != nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
l := &lyricsEntry{}
err = json.Unmarshal(body, &l)
if err != nil || l.MainTitle == "" {
return nil
}
identifiers := []metadata.Name{
{Kind: "url", Name: baseURL + "wiki/" + wikitext_parser.NormalizeWikiTitle("Lyrics: "+l.MainTitle)},
}
if len(l.Entries.Kanji) > 0 {
result = append(result, &metadata.TextLyrics{
Language: "original", //TODO: detect original language
Identifiers: identifiers,
Entries: l.Entries.Kanji,
})
}
if len(l.Entries.Romaji) > 0 {
result = append(result, &metadata.TextLyrics{
Language: "romaji",
Identifiers: identifiers,
Entries: l.Entries.Romaji,
})
}
if len(l.Entries.English) > 0 {
result = append(result, &metadata.TextLyrics{
Language: "english",
Identifiers: identifiers,
Entries: l.Entries.English,
})
}
return
}
var staffNameGroupRE = regroup.MustCompile(`(?P<position>.+)\((?P<name>.+)\)`)
func (s *Source) GetAlbumInformation(id int) *metadata.Album {
uri, _ := url.Parse(baseAPIURL)
uri.Path += fmt.Sprintf("album/%d", id)
response, err := s.client.Request(&http.Request{
Method: "GET",
URL: uri,
}, time.Hour*24*14)
if err != nil {
return nil
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil
}
result := &albumEntry{}
err = json.Unmarshal(body, &result)
if err != nil {
return nil
}
return s.ParseAlbumInformation(result)
}
func (s *Source) ParseAlbumInformation(a *albumEntry) (album *metadata.Album) {
if a.MainTitle == "" {
return
}
album = &metadata.Album{
License: s.GetLicense(),
SourceUniqueIdentifier: baseURL + "wiki/" + wikitext_parser.NormalizeWikiTitle(a.MainTitle),
Identifiers: []metadata.Name{
{
Kind: "url",
Name: baseURL + "wiki/" + wikitext_parser.NormalizeWikiTitle(a.MainTitle),
},
{
Kind: "url",
Name: fmt.Sprintf("%salbum/%d", baseAPIURL, a.Id),
},
},
}
if a.Type == "arrangement" {
album.Tags = append(album.Tags, metadata.Name{
Kind: "genre",
Name: "remix",
})
}
for k, t := range a.Titles {
album.Name = append(album.Name, metadata.Name{
Kind: k,
Name: t,
})
}
if len(a.Image) > 0 {
album.Art = append(album.Art, metadata.Name{
Kind: "front",
Name: baseAPIURL + "pages_by_name/File:" + wikitext_parser.NormalizeWikiTitle(a.Image),
})
}
for _, tag := range a.Genre {
album.Tags = append(album.Tags, metadata.Name{
Kind: "genre",
Name: tag,
})
}
for _, catno := range a.CatalogNumbers {
album.Identifiers = append(album.Identifiers, metadata.Name{
Kind: "catalog",
Name: catno,
})
}
if len(a.ReleaseEvent) > 0 {
album.Links = append(album.Links, metadata.Link{
Kind: "release",
Name: []metadata.Name{
{Kind: "name", Name: a.ReleaseEvent},
},
})
}
for _, l := range a.Links {
album.Links = append(album.Links, metadata.Link{
Kind: "official",
Name: []metadata.Name{{Kind: "url", Name: l}},
})
}
for _, artist := range a.Artists {
role := metadata.Role{
Kind: artist.Position,
}
for _, sValue := range artist.Names {
m := &struct {
Position string `regroup:"position"`
Name string `regroup:"name"`
}{}
if staffNameGroupRE.MatchToTarget(strings.TrimSpace(sValue), m) == nil {
//Is a group
role.Name = []metadata.Name{{Kind: "original", Name: strings.TrimSpace(m.Position)}}
role.Group = strings.TrimSpace(m.Name)
} else {
role.Name = []metadata.Name{{Kind: "original", Name: sValue}}
}
}
album.Roles = append(album.Roles, role)
}
for _, d := range a.Discs {
disc := metadata.Disc{}
if len(d.Name) > 0 {
disc.Name = []metadata.Name{{Kind: "original", Name: d.Name}}
}
for _, t := range d.Tracks {
track := metadata.Track{}
for k, title := range t.Titles {
track.Name = append(track.Name, metadata.Name{
Kind: k,
Name: title,
})
}
track.Duration = time.Second * time.Duration(t.Duration)
for _, artist := range t.Artists {
role := metadata.Role{
Kind: artist.Position,
}
for _, sValue := range artist.Names {
m := &struct {
Position string `regroup:"position"`
Name string `regroup:"name"`
}{}
if staffNameGroupRE.MatchToTarget(strings.TrimSpace(sValue), m) == nil {
//Is a group
role.Name = []metadata.Name{{Kind: "original", Name: strings.TrimSpace(m.Position)}}
role.Group = strings.TrimSpace(m.Name)
} else {
role.Name = []metadata.Name{{Kind: "original", Name: sValue}}
}
}
track.Roles = append(track.Roles, role)
}
if len(t.Original) > 0 {
for _, o := range t.Original {
if strings.Index(o, "Album: ") == 0 {
track.Links = append(track.Links, metadata.Link{
Kind: "original release album",
Name: []metadata.Name{{Kind: "original", Name: strings.TrimPrefix(o, "Album: ")}},
})
} else {
track.Links = append(track.Links, metadata.Link{
Kind: "original release title",
Name: []metadata.Name{{Kind: "original", Name: o}},
})
}
}
}
for _, l := range t.Links {
track.Links = append(track.Links, metadata.Link{
Kind: "official",
Name: []metadata.Name{{Kind: "url", Name: l}},
})
}
if len(t.Lyrics) > 0 {
lyricsPage := t.Lyrics
track.Lyrics = func() (result []metadata.Lyrics) {
for _, l := range s.GetSongLyrics(lyricsPage) {
result = append(result, l)
}
return
}
} else {
track.Lyrics = func() []metadata.Lyrics {
return nil
}
}
disc.Tracks = append(disc.Tracks, track)
}
album.Discs = append(album.Discs, disc)
}
album.ReleaseDate, _ = time.ParseInLocation("2006-01-02", a.ReleaseDate, time.UTC)
return
}
func (s *Source) Test() {
albumC := s.FindByCDDB1(0x730dec08)
albums := s.FindByAlbumNames([]metadata.Name{{Name: "Bayside Beat"}})
album := s.GetAlbumInformation(54742)
lyrics := album.Discs[0].Tracks[3].Lyrics()
log.Print(album)
log.Print(albumC)
log.Print(lyrics)
log.Print(albums)
}