Added multiple catalog numbers, links, images, release events, lyrics, handle HTML tags

This commit is contained in:
DataHoarder 2022-02-20 03:49:05 +01:00
parent 730a4e60cb
commit 1c58db383d
3 changed files with 315 additions and 43 deletions

197
server.go
View file

@ -72,17 +72,20 @@ func (t JSONTime) MarshalJSON() ([]byte, error) {
}
type albumEntry struct {
Id int `json:"pageid"`
Type string `json:"type"`
MainTitle string `json:"pagetitle"`
Titles []string `json:"titles"`
CatalogNumber string `json:"catalognumber,omitempty"`
Genre []string `json:"genre,omitempty"`
TrackCount int `json:"trackcount,omitempty"`
Duration int `json:"duration,omitempty"`
ReleaseDate JSONTime `json:"date,omitempty"`
Artists []artistEntry `json:"artists,omitempty"`
Discs []discEntry `json:"discs,omitempty"`
Id int `json:"pageid"`
Type string `json:"type"`
MainTitle string `json:"pagetitle"`
Titles []string `json:"titles"`
Image string `json:"image,omitempty"`
CatalogNumbers []string `json:"catalognumbers,omitempty"`
Genre []string `json:"genre,omitempty"`
TrackCount int `json:"trackcount,omitempty"`
Duration int `json:"duration,omitempty"`
ReleaseDate JSONTime `json:"releasedate,omitempty"`
ReleaseEvent string `json:"releaseevent,omitempty"`
Links []string `json:"links,omitempty"`
Artists []artistEntry `json:"artists,omitempty"`
Discs []discEntry `json:"discs,omitempty"`
}
type discEntry struct {
Name string `json:"name,omitempty"`
@ -93,9 +96,11 @@ type discEntry struct {
type trackEntry struct {
Duration int `json:"duration,omitempty"`
MainTitle string `json:"title,omitempty"`
Original string `json:"original,omitempty"`
Titles []string `json:"titles,omitempty"`
Artists []artistEntry `json:"artists,omitempty"`
Lyrics string `json:"lyrics,omitempty"`
Links []string `json:"links,omitempty"`
Original []string `json:"original,omitempty"`
}
type artistEntry struct {
Position string `json:"position"`
@ -148,11 +153,62 @@ func getArtistEntries(kind string, entries []interface{}) (artists []artistEntry
for _, value := range entries {
if text, ok := value.(string); ok {
switch strings.ToLower(text) {
case "&", "and":
recreateArtist(kind)
if i := strings.Index(text, " & "); i != -1 {
for _, vv := range strings.Split(text, " & ") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else if i := strings.Index(text, "& "); i == 0 {
for _, vv := range strings.Split(text, "& ") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else if i := strings.Index(text, " &"); i == len(text)-2 {
for _, vv := range strings.Split(text, " &") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else if i := strings.Index(text, " and "); i != -1 {
for _, vv := range strings.Split(text, " and ") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else if i := strings.Index(text, "and "); i == 0 {
for _, vv := range strings.Split(text, "and ") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else if i := strings.Index(text, " and"); i == len(text)-4 {
for _, vv := range strings.Split(text, " and") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
recreateArtist(kind)
}
} else {
for _, vv := range strings.Split(text, " and") {
vv = normalizeStringCharacters(vv)
if len(vv) > 0 {
artist.Names = append(artist.Names, vv)
}
}
}
artist.Names = append(artist.Names, normalizeStringCharacters(text))
} else if _, ok := value.(wikiparser.NewLineToken); ok {
recreateArtist(kind)
} else if tpl, ok := value.(*wikiparser.Template); ok {
@ -224,13 +280,23 @@ func getStringValue(pageName string, v []interface{}) (result []string) {
for _, value := range v {
if text, ok := value.(string); ok {
result = append(result, normalizeStringCharacters(text))
text = normalizeStringCharacters(text)
if len(text) > 0 {
result = append(result, text)
}
} else if template, ok := value.(*wikiparser.Template); ok {
if template.IsLink {
output := 0
for _, vv := range template.Parameters {
result = append(result, getStringValue(pageName, vv)...)
for _, vvv := range getStringValue(pageName, vv) {
vvv = strings.TrimSpace(vvv)
if len(vvv) > 0 {
output++
result = append(result, vvv)
}
}
}
if len(result) == 0 {
if output == 0 {
result = append(result, template.Name)
}
} else {
@ -255,6 +321,11 @@ func getStringValue(pageName string, v []interface{}) (result []string) {
result = append(result, template.Name)
}
}
} else if html, ok := value.(*wikiparser.HTML); ok && html.Tag != nil {
str := strings.TrimSpace(html.Tag.String())
if len(str) > 0 {
result = append(result, str)
}
} else if link, ok := value.(*wikiparser.Link); ok {
if len(link.Name) > 0 {
result = append(result, getStringValue(pageName, link.Name)...)
@ -330,7 +401,11 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
if val, ok = tpl.Parameters["catalogno"]; ok {
if stringVal = getStringValue(entry.MainTitle, val); len(stringVal) > 0 {
entry.CatalogNumber = stringVal[0]
for _, catno := range stringVal {
if strings.ToLower(catno) != "n/a" {
entry.CatalogNumbers = append(entry.CatalogNumbers, catno)
}
}
}
}
@ -342,6 +417,9 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
}
if val, ok = tpl.Parameters["image"]; ok && len(val) > 0 {
entry.Image = strings.TrimSpace(strings.Join(getStringValue(entry.MainTitle, val), ""))
}
if val, ok = tpl.Parameters["group"]; ok {
entry.Artists = append(entry.Artists, getArtistEntries("group", val)...)
}
@ -369,6 +447,12 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
if val, ok = tpl.Parameters["other"]; ok {
entry.Artists = append(entry.Artists, getArtistEntries("other", val)...)
}
if val, ok = tpl.Parameters["convention"]; ok && len(val) > 0 {
value := getStringValue(entry.MainTitle, val)
if len(value) > 0 {
entry.ReleaseEvent = value[0]
}
}
if val, ok = tpl.Parameters["released"]; ok {
if stringVal = getStringValue(entry.MainTitle, val); len(stringVal) > 0 {
@ -378,6 +462,22 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
}
if val, ok = tpl.Parameters["website"]; ok {
for _, value := range val {
if linkVal, ok := value.(*wikiparser.Link); ok && linkVal.IsExternal {
entry.Links = append(entry.Links, linkVal.URL)
}
}
}
if val, ok = tpl.Parameters["digital"]; ok {
for _, value := range val {
if linkVal, ok := value.(*wikiparser.Link); ok && linkVal.IsExternal {
entry.Links = append(entry.Links, linkVal.URL)
}
}
}
/*
if val, ok = tpl.Parameters["length"]; ok {
if stringVal = getStringValue(entry.MainTitle, val); len(stringVal) > 0 {
@ -438,8 +538,8 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
track := trackEntry{}
if mainTitleValue := getStringValue(entry.MainTitle, trackTpl.Parameters["1"]); len(mainTitleValue) > 0 {
track.MainTitle = mainTitleValue[0]
track.Titles = append(track.Titles, mainTitleValue...)
track.MainTitle = strings.Join(mainTitleValue, "")
track.Titles = append(track.Titles, track.MainTitle)
}
if durations := getStringValue(entry.MainTitle, trackTpl.Parameters["2"]); len(durations) > 0 {
split := strings.Split(durations[0], ":")
@ -459,6 +559,11 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
track.Duration = duration
}
if lyrics, ok := trackTpl.Parameters["lyrics"]; ok {
//TODO: parse lyrics
track.Lyrics = wikiparser.NormalizeWikiTitle(getStringValue(entry.MainTitle, lyrics)[0])
}
if len(listVal.Entries) > 1 {
if extraListData, ok := listVal.Entries[1].(*wikiparser.UnorderedList); ok && len(extraListData.Entries) > 0 {
for i, entryValue := range extraListData.Entries {
@ -480,16 +585,31 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
}
switch keyEntry {
case "original arrangement":
track.Artists = append(track.Artists, getArtistEntries("original arranger", values)...)
case "original title":
track.Original = strings.TrimSpace(strings.Join(getStringValue(entry.MainTitle, values), " "))
case "lyrics", "vocals", "arranger", "composer", "producer", "remix":
track.Original = append(track.Original, strings.TrimSpace(strings.Join(getStringValue(entry.MainTitle, values), " ")))
case "original album":
var clean []string
for _, albumValue := range getStringValue(entry.MainTitle, values) {
albumValue = strings.Trim(albumValue, " \t'\"")
if len(albumValue) > 0 {
clean = append(clean, albumValue)
}
}
track.Original = append(track.Original, "Album: "+strings.TrimSpace(strings.Join(clean, " ")))
case "lyrics", "vocals", "chorus", "arranger", "composer", "producer", "remix":
track.Artists = append(track.Artists, getArtistEntries(keyEntry, values)...)
case "arrangement":
track.Artists = append(track.Artists, getArtistEntries("arranger", values)...)
case "original arrangement":
track.Artists = append(track.Artists, getArtistEntries("original arranger", values)...)
case "composition":
track.Artists = append(track.Artists, getArtistEntries("composer", values)...)
case "promotional video":
for _, value := range values {
if linkVal, ok := value.(*wikiparser.Link); ok && linkVal.IsExternal {
track.Links = append(track.Links, linkVal.URL)
}
}
}
}
@ -529,12 +649,14 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
}
if len(entry.CatalogNumber) > 0 {
normalized := normalizeSearchTitle(entry.CatalogNumber)
if _, ok := albumTitleLookup[normalized]; !ok {
albumTitleLookup[normalized] = []*albumEntry{entry}
} else {
albumTitleLookup[normalized] = append(albumTitleLookup[normalized], entry)
if len(entry.CatalogNumbers) > 0 {
for _, catno := range entry.CatalogNumbers {
normalized := normalizeSearchTitle(catno)
if _, ok := albumTitleLookup[normalized]; !ok {
albumTitleLookup[normalized] = []*albumEntry{entry}
} else {
albumTitleLookup[normalized] = append(albumTitleLookup[normalized], entry)
}
}
}
for _, title := range entry.Titles {
@ -587,6 +709,7 @@ var normalizeSearchTitleTransformer = transform.Chain(
runes.Remove(runes.In(unicode.Pf)),
runes.Remove(runes.In(unicode.Pi)),
runes.Remove(runes.In(unicode.Pd)),
runes.Remove(runes.In(unicode.Pc)),
runes.Remove(runes.In(unicode.Sc)),
runes.Remove(runes.In(unicode.Sk)),
runes.Remove(runes.In(unicode.Sm)),
@ -602,7 +725,7 @@ func normalizeSearchTitle(title string) (normalized string) {
}
func normalizeStringCharacters(text string) (normalized string) {
normalized = norm.NFKC.String(text)
normalized = strings.TrimSpace(norm.NFKC.String(text))
return
}
@ -691,8 +814,8 @@ func main() {
break
}
}
if len(result.albumEntry.CatalogNumber) > 0 {
out = fmt.Sprintf("%sSoundtrack%d_%d %s [%s] %s / %s", strings.ToUpper(result.albumEntry.Type[0:1])+result.albumEntry.Type[1:], result.albumEntry.Id, result.discIndex, cddb1.String(), result.albumEntry.CatalogNumber, group, result.albumEntry.MainTitle)
if len(result.albumEntry.CatalogNumbers) > result.discIndex {
out = fmt.Sprintf("%sSoundtrack%d_%d %s [%s] %s / %s", strings.ToUpper(result.albumEntry.Type[0:1])+result.albumEntry.Type[1:], result.albumEntry.Id, result.discIndex, cddb1.String(), result.albumEntry.CatalogNumbers[result.discIndex], group, result.albumEntry.MainTitle)
} else {
out = fmt.Sprintf("%sSoundtrack%d_%d %s %s / %s", strings.ToUpper(result.albumEntry.Type[0:1])+result.albumEntry.Type[1:], result.albumEntry.Id, result.discIndex, cddb1.String(), group, result.albumEntry.MainTitle)
}
@ -787,8 +910,8 @@ func main() {
writer.Write([]byte(fmt.Sprintf("DISCID=%s\n", cddb1.String())))
writer.Write([]byte(fmt.Sprintf("DNUM=%d\n", len(entry.Discs))))
writer.Write([]byte(fmt.Sprintf("DINDEX=%d\n", discIndex+1)))
if len(entry.CatalogNumber) > 0 {
writer.Write([]byte(fmt.Sprintf("DTITLE=%s / [%s] %s\n", group, entry.CatalogNumber, entry.MainTitle)))
if len(entry.CatalogNumbers) > discIndex {
writer.Write([]byte(fmt.Sprintf("DTITLE=%s / [%s] %s\n", group, entry.CatalogNumbers[discIndex], entry.MainTitle)))
} else {
writer.Write([]byte(fmt.Sprintf("DTITLE=%s / %s\n", group, entry.MainTitle)))
}

View file

@ -1,6 +1,9 @@
package wikiparser
import "strings"
import (
"golang.org/x/text/unicode/norm"
"strings"
)
func NormalizeWikiTitle(title string) string {
return strings.Replace(title, " ", "_", -1)
@ -11,6 +14,43 @@ type Link struct {
IsExternal bool
Name []interface{}
}
type HTML struct {
Tag *HTMLTag
}
type HTMLTag struct {
Parent *HTMLTag
Name string
Parameters string
Content []*HTMLTag
}
func (t *HTMLTag) String() (r string) {
if t.Name == "#text" {
return t.Parameters
}
for _, c := range t.Content {
r += c.String()
}
if t.Name == "del" { //add strikethrough
var runes []rune
for _, r := range []rune(norm.NFD.String(r)) {
runes = append(runes, '\u0336') //combining long stroke overlay
runes = append(runes, r)
}
r = norm.NFC.String(string(runes))
} else if t.Name == "ref" { //remove references
return ""
} else if t.Name == "br" { //new line
return "\n"
} else if t.Name == "script" {
return ""
}
return
}
type NewLineToken struct {
}
@ -52,6 +92,72 @@ func ParseWikiText(text string) (result []interface{}) {
return
}
func ParseHTML(text string, index int, depth int) (i int, html *HTML) {
var c byte
html = &HTML{}
readingTag := false
readingParameters := false
isTerminating := false
var tag *HTMLTag
tagDepth := 0
for i = index; i < len(text); i++ {
c = text[i]
if c == '<' {
newTag := &HTMLTag{
Parent: tag,
}
if tag != nil {
tag.Content = append(tag.Content, newTag)
}
tag = newTag
readingTag = true
readingParameters = false
isTerminating = false
if tagDepth == 0 && html.Tag == nil {
html.Tag = tag
}
tagDepth++
} else if readingTag && c == '>' {
readingTag = false
readingParameters = false
if isTerminating {
tagDepth--
tag = tag.Parent
}
if tagDepth == 0 || tag.Parent == nil {
return i + 1, html
}
} else if readingTag && c == '/' {
isTerminating = true
} else if readingTag {
if c == ' ' {
readingParameters = true
}
if readingParameters {
tag.Parameters += string(c)
} else {
tag.Name += string(c)
}
} else if tagDepth > 0 {
if len(tag.Content) == 0 || tag.Content[len(tag.Content)-1].Name != "#text" {
tag.Content = append(tag.Content, &HTMLTag{
Parent: tag,
Name: "#text",
})
}
tag.Content[len(tag.Content)-1].Parameters += string(c)
}
}
return
}
func ParseLink(text string, index int, depth int, startCharacter byte) (i int, link *Link) {
var c byte
@ -62,7 +168,7 @@ func ParseLink(text string, index int, depth int, startCharacter byte) (i int, l
t := strings.TrimSpace(text[lastToken:i])
if len(t) > 0 {
if link == nil {
link = &Link{URL: t, IsExternal: startCharacter == '{'}
link = &Link{URL: t, IsExternal: startCharacter == '['}
} else {
link.Name = append(link.Name, t)
}
@ -187,9 +293,27 @@ func ParseTemplate(text string, index int, depth int, startCharacter byte) (i in
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '<' { //html trigger
addValue()
var html *HTML
var scanIndex int
scanIndex, html = ParseHTML(text, i, depth+1)
if html != nil && template != nil {
if key == "" {
template.AddParameterUnkeyed(html)
} else {
template.AddParameter(key, html)
}
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '|' {
hasTemplate := template != nil
addValue()
lastToken = i + 1
if hasTemplate {
template.UnkeyedIndex++
}
key = ""
} else if c == '\n' {
addValue()
@ -345,6 +469,16 @@ func ParseUnorderedList(text string, index int, depth int, indent int, startChar
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '<' { //html trigger
addValue()
var html *HTML
var scanIndex int
scanIndex, html = ParseHTML(text, i, depth+1)
if html != nil {
currentValue = append(currentValue, html)
}
lastToken = scanIndex
i = scanIndex - 1
} else {
processIndent = false
}
@ -426,6 +560,20 @@ func ParseDescriptionList(text string, index int, depth int) (i int, list *Descr
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '<' { //html trigger
addValue()
var html *HTML
var scanIndex int
scanIndex, html = ParseHTML(text, i, depth+1)
if html != nil {
if !hasKey {
list.Name = append(list.Name, html)
} else {
list.Entries = append(list.Entries, html)
}
}
lastToken = scanIndex
i = scanIndex - 1
}
}

View file

@ -3,9 +3,10 @@ package wikiparser
import "fmt"
type Template struct {
Name string
IsLink bool
Parameters map[string][]interface{}
Name string
IsLink bool
Parameters map[string][]interface{}
UnkeyedIndex int
}
func NewTemplate(name string, isLink bool) *Template {
@ -17,7 +18,7 @@ func NewTemplate(name string, isLink bool) *Template {
}
func (t *Template) AddParameterUnkeyed(value interface{}) {
t.Parameters[fmt.Sprintf("%d", len(t.Parameters))] = []interface{}{value}
t.AddParameter(fmt.Sprintf("%d", t.UnkeyedIndex), value)
}
func (t *Template) AddParameter(key string, value interface{}) {