package thwiki_cc import ( "encoding/json" "fmt" "git.gammaspectra.live/S.O.N.G/METANOIA/metadata" wikitext_parser "git.gammaspectra.live/S.O.N.G/wikitext-parser" "github.com/oriser/regroup" "golang.org/x/text/unicode/norm" "io/ioutil" "log" "math" "net/http" "net/url" "strconv" "strings" "time" ) var baseURL = "https://thwiki.cc/" var baseLyricsURL = "https://cd.thwiki.cc/" type Source struct { client *metadata.CachingClient } func NewSource() *Source { s := &Source{} s.client = metadata.NewCachingClient(s.GetURL(), time.Second/2) return s } func (s *Source) GetName() string { return "THBWiki" } func (s *Source) GetURL() string { return baseURL } func (s *Source) GetLicense() metadata.License { return metadata.License{ Code: metadata.CC_BY_NC_SA_30, URL: baseURL + "THBWiki:关于#.E7.89.88.E6.9D.83.E4.BF.A1.E6.81.AF", Attribution: fmt.Sprintf("%s (%s)", s.GetName(), s.GetURL()), } } func (s *Source) FindByTOC(toc metadata.TOC) (albums []*metadata.Album) { foundAlbums := s.FindByCDDB1Group([]metadata.CDDB1{toc.GetCDDB1()}) for _, album := range foundAlbums { if len(album.Discs) == 1 { for i, track := range album.Discs[0].Tracks { diff := track.Duration - toc.GetTrackDuration(i) if diff < 0 { diff = -diff } if track.Duration != 0 && diff > time.Second*6 { //match threshold goto L } } } albums = append(albums, album) L: } return } func (s *Source) FindByCDDB1(cddb metadata.CDDB1) []*metadata.Album { return s.FindByCDDB1Group([]metadata.CDDB1{cddb}) } func (s *Source) FindByCDDB1Group(group []metadata.CDDB1) []*metadata.Album { tracks := 0 var duration time.Duration discs := len(group) secondsRange := 1. gapLength := time.Second * 2 for _, cddb := range group { duration += cddb.GetDuration() tracks += cddb.GetTrackNumber() offset := (gapLength * time.Duration(cddb.GetTrackNumber())) / 2 //pre-gap length duration -= offset secondsRange += offset.Seconds() } query := []string{ fmt.Sprintf("G3%d\t%d", discs, discs), fmt.Sprintf("H3%d\t%d", tracks, tracks), fmt.Sprintf("I3%d\t%d", int(math.Floor(duration.Seconds()-secondsRange)), int(math.Ceil(duration.Seconds()+secondsRange))), } return s.FindAdvancedQueryArguments(strings.Join(query, "\n") + "\n") } func (s *Source) FindByAlbumNames(names []metadata.Name) []*metadata.Album { srsearch := "" for _, v := range names { if len(srsearch) > 0 { srsearch += fmt.Sprintf(" OR %q OR %q", "名称 = "+v.Name, "译名 = "+v.Name) } else { srsearch += fmt.Sprintf("%q OR %q", "名称 = "+v.Name, "译名 = "+v.Name) } } return s.FindQueryArguments(srsearch) } func (s *Source) FindByCatalogNumber(catalog metadata.CatalogNumber) []*metadata.Album { srsearch := fmt.Sprintf("%q", "编号 = "+catalog) if strings.Index(string(catalog), "-") != -1 { srsearch += fmt.Sprintf(" OR %q", "编号 = "+strings.Replace(string(catalog), "-", "", -1)) } return s.FindQueryArguments(srsearch) } func (s *Source) FindQueryArguments(queryArgs string) (albums []*metadata.Album) { uri, _ := url.Parse(baseURL) uri.Path += "api.php" query := uri.Query() query.Add("action", "query") query.Add("format", "json") query.Add("list", "search") query.Add("srwhat", "text") query.Add("srlimit", "500") srsearch := "incategory:同人专辑 (" srsearch += queryArgs srsearch += ")" query.Add("srsearch", srsearch) uri.RawQuery = query.Encode() response, err := s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*14) if err != nil { return nil } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { return nil } type SearchResult struct { Query struct { SearchInfo struct { TotalHits int `json:"totalhits"` } `json:"searchinfo"` Search []struct { NameSpace int `json:"ns"` Title string `json:"title"` PageId int `json:"pageid"` Snippet string `json:"snippet"` } `json:"search"` } `json:"query"` } result := &SearchResult{} err = json.Unmarshal(body, result) if err != nil { return nil } for _, r := range result.Query.Search { album := s.GetAlbumInformation(wikitext_parser.NormalizeWikiTitle(r.Title)) if album != nil { albums = append(albums, album) } } return } func (s *Source) FindAdvancedQueryArguments(queryArgs string) (albums []*metadata.Album) { uri, _ := url.Parse(baseURL) uri.Path += "api.php" query := uri.Query() query.Add("action", "uask") query.Add("pre", "专辑") query.Add("sort", "") query.Add("query", queryArgs) uri.RawQuery = query.Encode() response, err := s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*30) if err != nil { return nil } defer response.Body.Close() if response.StatusCode != http.StatusOK { return nil } body, err := ioutil.ReadAll(response.Body) if err != nil { return nil } split := strings.Split(string(body), " ") if len(split) != 2 { return nil } numberOfResults, _ := strconv.Atoi(split[0]) if numberOfResults == 0 { return nil } query.Add("token", split[1]) query.Add("order", "") query.Add("limit", "25") query.Add("offset", "0") query.Add("result", "d") uri.RawQuery = query.Encode() response, err = s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*30) if err != nil { return nil } defer response.Body.Close() if response.StatusCode != http.StatusOK { return nil } body, err = ioutil.ReadAll(response.Body) if err != nil { return nil } type SearchResult struct { From int `json:"from"` Till int `json:"till"` Total int `json:"total"` Symbols string `json:"symbols"` Results struct { Link []string `json:"link"` Text []string `json:"text"` } `json:"resu"` } result := &SearchResult{} err = json.Unmarshal(body, result) if err != nil { return nil } for _, albumName := range result.Results.Text { album := s.GetAlbumInformation(wikitext_parser.NormalizeWikiTitle(albumName)) if album != nil { albums = append(albums, album) } } return } func (s *Source) GetArticle(title string) ([]interface{}, error) { uri, _ := url.Parse(baseURL) uri.Path += "index.php" query := uri.Query() query.Add("title", title) query.Add("action", "raw") uri.RawQuery = query.Encode() response, err := s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*60) if err != nil { return nil, err } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { return nil, err } return wikitext_parser.ParseWikiText(string(body)), nil } func (s *Source) GetFileURL(title string) string { uri, _ := url.Parse(baseURL) uri.Path += "api.php" query := uri.Query() query.Add("titles", "File:"+title) query.Add("action", "query") query.Add("prop", "imageinfo") query.Add("iiprop", "url") query.Add("format", "json") uri.RawQuery = query.Encode() response, err := s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*180) if err != nil { return "" } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { return "" } type ImageData struct { Query struct { Pages map[string]struct { ImageInfo []struct { URL string `json:"url"` } `json:"imageinfo"` } `json:"pages"` } `json:"query"` } var d ImageData err = json.Unmarshal(body, &d) if err != nil { return "" } for _, v := range d.Query.Pages { if len(v.ImageInfo) > 0 { return v.ImageInfo[0].URL } } return "" } func (s *Source) GetSongLyrics(songName string) *metadata.LRCLyrics { uri, _ := url.Parse(baseLyricsURL) uri.Path += "lyrics/" + wikitext_parser.NormalizeWikiTitle(songName) + ".lrc" response, err := s.client.Request(&http.Request{ Method: "GET", URL: uri, }, time.Hour*24*120) if err != nil { return nil } defer response.Body.Close() if response.StatusCode != http.StatusOK { return nil } body, err := ioutil.ReadAll(response.Body) if err != nil { return nil } return metadata.NewLRCLyrics(string(body), "original", []metadata.Name{ //TODO: detect original language {Kind: "url", Name: baseURL + "歌词:" + wikitext_parser.NormalizeWikiTitle(songName)}, }) } func getWikiStringOptions(title string, trim bool) *wikitext_parser.WikiStringValueOptions { opts := &wikitext_parser.WikiStringValueOptions{} opts.Default() opts.PageName = title opts.StringHandler = func(value string, opt *wikitext_parser.WikiStringValueOptions) []string { return []string{norm.NFC.String(norm.NFKD.String(value))} } opts.TemplateHandler = func(template *wikitext_parser.Template, opt *wikitext_parser.WikiStringValueOptions) (result []string) { switch strings.ToUpper(template.Name) { case "CM": if val, ok := template.Parameters["1"]; ok && len(val) > 0 { result = append(result, fmt.Sprintf("Comiket %s", wikitext_parser.GetWikiStringValue(val, opt)[0])) } case "红楼梦": if val, ok := template.Parameters["1"]; ok && len(val) > 0 { result = append(result, fmt.Sprintf("Touhou Kouroumu %s", wikitext_parser.GetWikiStringValue(val, opt)[0])) } case "例大祭": if val, ok := template.Parameters["1"]; ok && len(val) > 0 { result = append(result, fmt.Sprintf("Hakurei Shrine Reitaisai %s", wikitext_parser.GetWikiStringValue(val, opt)[0])) } case "PAGENAME", "SUBPAGENAME": result = append(result, opt.PageName) default: result = append(result, template.Name) } return } opts.Trim = trim return opts } func getStringValue(v []interface{}, opt *wikitext_parser.WikiStringValueOptions) []string { return wikitext_parser.GetWikiStringValue(v, opt) } func (s *Source) GetAlbumInformation(title string) *metadata.Album { article, err := s.GetArticle(title) if err != nil { return nil } var album *metadata.Album opts := getWikiStringOptions(title, true) var staffMappings []map[string]string listingStaff := false var staffNameGroupRE = regroup.MustCompile(`(?P.+)\((?P.+)\)`) var zunComposerRole = metadata.Role{ Kind: "composer", Name: []metadata.Name{ {Kind: "original", Name: "ZUN"}, {Kind: "url", Name: baseURL + "ZUN"}, }, } handleStaffMapping := func(discIndex int, val []interface{}, kind string, track *metadata.Track, opts *wikitext_parser.WikiStringValueOptions) { if len(staffMappings) > discIndex { for _, e := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") { entryValue := strings.TrimSpace(e) if len(entryValue) > 0 { groupValue, ok := staffMappings[discIndex][entryValue] if ok && groupValue != entryValue { track.Roles = append(track.Roles, metadata.Role{ Kind: kind, Name: []metadata.Name{{Kind: "original", Name: entryValue}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(entryValue)}}, Group: groupValue, }) } else { track.Roles = append(track.Roles, metadata.Role{ Kind: kind, Name: []metadata.Name{{Kind: "original", Name: entryValue}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(entryValue)}}, }) } } } } } for _, v := range article { if text, ok := v.(string); ok { if strings.Index(text, "== Staff ==") != -1 { listingStaff = true } } else if list, ok := v.(*wikitext_parser.DescriptionList); ok { if listingStaff { roleName := strings.ToLower(strings.Join(getStringValue(list.Name, opts), "")) switch roleName { case "total produce": fallthrough case "produce": fallthrough case "production": roleName = "producer" case "assistant": roleName = "assistant" case "illustration": fallthrough case "cover artwork": roleName = "illustration" case "mastering": roleName = "mastering" case "graphic design": fallthrough case "graphics": fallthrough case "design": roleName = "design" case "compose": roleName = "composer" } for _, a := range strings.Split(strings.Join(getStringValue(list.Entries, opts), ""), ",") { role := metadata.Role{ Kind: roleName, Name: []metadata.Name{{Kind: "original", Name: strings.TrimSpace(a)}}, } m := &struct { Position string `regroup:"position"` Name string `regroup:"name"` }{} if staffNameGroupRE.MatchToTarget(strings.TrimSpace(a), m) == nil { //Is a group role.Name = []metadata.Name{{Kind: "original", Name: strings.TrimSpace(m.Position)}} role.Group = strings.TrimSpace(m.Name) } album.Roles = append(album.Roles, role) } } } else if template, ok := v.(*wikitext_parser.Template); ok { switch template.Name { case "专辑人员列表": //Album Person List if album == nil { continue } discIndex, err := strconv.Atoi(strings.Join(getStringValue(template.Parameters["碟号"], opts), " ")) //disc number discIndex-- if err != nil { discIndex = len(staffMappings) } if discIndex < 0 { continue } staffMappings = append(staffMappings, make(map[string]string)) for _, val := range template.Parameters { //nested for _, value := range getStringValue(val, opts) { parts := strings.Split(value, "=") p0 := strings.TrimSpace(parts[0]) if len(parts) > 1 { staffMappings[discIndex][p0] = strings.TrimSpace(parts[1]) } else { staffMappings[discIndex][p0] = p0 } } } case "专辑曲目列表": //Album Track List if album == nil { continue } listingStaff = false discIndex := len(album.Discs) disc := metadata.Disc{} for _, value := range template.Parameters["嵌套"] { //nested if tpl, ok := value.(*wikitext_parser.Template); ok { track := metadata.Track{} for k, val := range tpl.Parameters { switch k { case "时长": //duration d := strings.Join(getStringValue(val, opts), "") sub := strings.Split(d, ":") s1, _ := strconv.ParseInt(strings.TrimLeft(sub[0], "0"), 10, 0) if len(sub) > 2 { //TODO } else if len(sub) > 1 { s2, _ := strconv.ParseInt(strings.TrimLeft(sub[1], "0"), 10, 0) track.Duration = time.Minute*time.Duration(s1) + time.Second*time.Duration(s2) } else { track.Duration = time.Second * time.Duration(s1) } case "名称": //title trackTitle := strings.Join(getStringValue(val, opts), " ") track.Name = append(track.Name, metadata.Name{ Kind: "original", Name: trackTitle, }) track.Lyrics = func() []metadata.Lyrics { return []metadata.Lyrics{s.GetSongLyrics(trackTitle)} } case "原专辑": //original release track.Links = append(track.Links, metadata.Link{ Kind: "original release", Name: []metadata.Name{{Kind: "original", Name: strings.Join(getStringValue(val, opts), " ")}}, }) case "原名称": //original release title track.Links = append(track.Links, metadata.Link{ Kind: "original release title", Name: []metadata.Name{{Kind: "original", Name: strings.Join(getStringValue(val, opts), " ")}}, }) case "原曲": //original song case "编曲": //arranger/composer if _, ok := tpl.Parameters["原曲"]; ok { handleStaffMapping(discIndex, val, "arranger", &track, opts) track.Roles = append(track.Roles, zunComposerRole) //TODO check } else { handleStaffMapping(discIndex, val, "composer", &track, opts) } case "再编曲": //re-arranger handleStaffMapping(discIndex, val, "remix", &track, opts) case "演唱": //vocals handleStaffMapping(discIndex, val, "vocals", &track, opts) case "作词": //lyrics handleStaffMapping(discIndex, val, "lyrics", &track, opts) } } disc.Tracks = append(disc.Tracks, track) } } album.Discs = append(album.Discs, disc) case "同人专辑信息": //Doujin Album Information album = &metadata.Album{ License: s.GetLicense(), SourceUniqueIdentifier: baseURL + title, Identifiers: []metadata.Name{ { Kind: "url", Name: baseURL + title, }, }, } for k, val := range template.Parameters { switch k { case "封面": //cover: jpg fileName := strings.Join(getStringValue(val, opts), "") if strings.Index(fileName, ".") == -1 { fileName = title + "封面." + fileName } f := s.GetFileURL(fileName) if len(f) > 0 { album.Art = append(album.Art, metadata.Name{ Kind: "front", Name: s.GetFileURL(fileName), }) } case "展会": //release for _, value := range val { tpl, ok := value.(*wikitext_parser.Template) if ok { //TODO switch tpl.Name { case "CM": album.Links = append(album.Links, metadata.Link{ Kind: "release", Name: []metadata.Name{ {Kind: "name", Name: fmt.Sprintf("Comiket %s", tpl.Parameters["1"][0].(string))}, }, }) case "红楼梦": album.Links = append(album.Links, metadata.Link{ Kind: "release", Name: []metadata.Name{ {Kind: "name", Name: fmt.Sprintf("Touhou Kouroumu %s", tpl.Parameters["1"][0].(string))}, }, }) case "例大祭": album.Links = append(album.Links, metadata.Link{ Kind: "release", Name: []metadata.Name{ {Kind: "name", Name: fmt.Sprintf("Hakurei Shrine Reitaisai %s", tpl.Parameters["1"][0].(string))}, }, }) } break } } case "名称": //name album.Name = append(album.Name, metadata.Name{ Kind: "original", Name: strings.Join(getStringValue(val, opts), " "), }) case "制作方": //produced by for _, producer := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") { if strings.TrimSpace(producer) == "" { continue } album.Roles = append(album.Roles, metadata.Role{ Kind: "producer", Name: []metadata.Name{{Kind: "original", Name: producer}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(producer)}}, }) } case "发行方": //issuer for _, issuer := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") { if strings.TrimSpace(issuer) == "" { continue } album.Roles = append(album.Roles, metadata.Role{ Kind: "issuer", Name: []metadata.Name{{Kind: "original", Name: issuer}, {Kind: "url", Name: baseURL + wikitext_parser.NormalizeWikiTitle(issuer)}}, }) } case "编号": //catalog number for _, catalog := range strings.Split(strings.Join(getStringValue(val, opts), "+"), "+") { album.Identifiers = append(album.Identifiers, metadata.Name{ Kind: "catalog", Name: catalog, }) } case "风格类型": //style for _, style := range strings.Split(strings.Join(getStringValue(val, opts), ","), ",") { if strings.TrimSpace(style) == "" { continue } switch strings.ToLower(style) { default: album.Tags = append(album.Tags, metadata.Name{ Kind: "genre", Name: strings.ToLower(style), //TODO: normalize }) } } case "官网页面": //official page album.Links = append(album.Links, metadata.Link{ Kind: "official", Name: []metadata.Name{{Kind: "url", Name: strings.Join(getStringValue(val, opts), " ")}}, }) } } } } else { continue } } return album } func (s *Source) Test() { albumC := s.FindByCDDB1(0x730dec08) albums := s.FindByAlbumNames([]metadata.Name{{Name: "Bayside Beat"}}) album := s.GetAlbumInformation("Haunted_House") album2 := s.GetAlbumInformation("マジコカタストロフィ") lyrics := album2.Discs[0].Tracks[1].Lyrics() log.Print(album) log.Print(albumC) log.Print(album2) log.Print(lyrics) log.Print(albums) }