Better normalization of titles, re-arrange album entries

This commit is contained in:
DataHoarder 2022-02-19 21:18:33 +01:00
parent 255388f491
commit 54688f036b

View file

@ -74,13 +74,13 @@ type albumEntry struct {
Id int `json:"pageid"`
Type string `json:"type"`
MainTitle string `json:"pagetitle"`
Artists []artistEntry `json:"artists,omitempty"`
Titles []string `json:"titles"`
CatalogNumber string `json:"catalognumber,omitempty"`
Discs []discEntry `json:"discs,omitempty"`
TrackCount int `json:"trackcount,omitempty"`
Duration int `json:"duration,omitempty"`
ReleaseDate JSONTime `json:"date,omitempty"`
Artists []artistEntry `json:"artists,omitempty"`
Discs []discEntry `json:"discs,omitempty"`
}
type discEntry struct {
Name string `json:"name,omitempty"`
@ -156,10 +156,12 @@ func getArtistEntries(kind string, entries []interface{}) (artists []artistEntry
} else if tpl, ok := value.(*wikiparser.Template); ok {
if tpl.IsLink {
var result []string
result = append(result, tpl.Name)
for _, vv := range tpl.Parameters {
result = append(result, getStringValue("", vv)...)
}
if len(result) == 0 {
result = append(result, tpl.Name)
}
artist.Names = append(artist.Names, strings.Join(result, " "))
} else {
artist.Names = append(artist.Names, strings.Join(getStringValue("", []interface{}{tpl}), " "))
@ -215,19 +217,6 @@ func getArtistEntries(kind string, entries []interface{}) (artists []artistEntry
return
}
func normalizeStringCharacters(text string) string {
//TODO: use transform chain
for _, v := range [][2]string{
{"", ", "},
{"", "("},
{"", ")"},
} {
text = strings.Replace(text, v[0], v[1], -1)
}
return text
}
func getStringValue(pageName string, v []interface{}) (result []string) {
for _, value := range v {
@ -236,10 +225,12 @@ func getStringValue(pageName string, v []interface{}) (result []string) {
result = append(result, normalizeStringCharacters(text))
} else if template, ok := value.(*wikiparser.Template); ok {
if template.IsLink {
result = append(result, template.Name)
for _, vv := range template.Parameters {
result = append(result, getStringValue(pageName, vv)...)
}
if len(result) == 0 {
result = append(result, template.Name)
}
} else {
switch strings.ToUpper(template.Name) {
case "H:TITLE":
@ -524,7 +515,7 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
if len(entry.CatalogNumber) > 0 {
normalized := normalizeTitle(entry.CatalogNumber)
normalized := normalizeSearchTitle(entry.CatalogNumber)
if _, ok := albumTitleLookup[normalized]; !ok {
albumTitleLookup[normalized] = []*albumEntry{entry}
} else {
@ -532,7 +523,7 @@ func processIndexDirectory(filePath, indexPath, kind string, wg *sync.WaitGroup)
}
}
for _, title := range entry.Titles {
normalized := normalizeTitle(title)
normalized := normalizeSearchTitle(title)
if _, ok := albumTitleLookup[normalized]; !ok {
albumTitleLookup[normalized] = []*albumEntry{entry}
} else {
@ -567,7 +558,7 @@ func processIndex(filePath string) {
wg.Wait()
}
var normalizeTransformer = transform.Chain(
var normalizeSearchTitleTransformer = transform.Chain(
norm.NFKD,
//width.Narrow,
runes.Remove(runes.In(unicode.Cc)),
@ -590,8 +581,14 @@ var normalizeTransformer = transform.Chain(
norm.NFC,
)
func normalizeTitle(title string) (normalized string) {
normalized, _, _ = transform.String(normalizeTransformer, title)
func normalizeSearchTitle(title string) (normalized string) {
normalized, _, _ = transform.String(normalizeSearchTitleTransformer, title)
return
}
func normalizeStringCharacters(text string) (normalized string) {
normalized = norm.NFKC.String(text)
return
}
@ -789,12 +786,12 @@ func main() {
switch request.URL.Query().Get("type") {
case "album": //search by title or catalog number
entries, ok := albumTitleLookup[normalizeTitle(request.URL.Query().Get("query"))]
entries, ok := albumTitleLookup[normalizeSearchTitle(request.URL.Query().Get("query"))]
if !ok {
writer.Write([]byte("[]"))
} else {
jsonBytes, _ := json.MarshalIndent(entries, "", " ")
jsonBytes, _ := json.Marshal(entries)
writer.Write(jsonBytes)
}
@ -846,7 +843,7 @@ func main() {
}
writer.WriteHeader(http.StatusOK)
byteEntries, _ := json.MarshalIndent(dirEntries, "", " ")
byteEntries, _ := json.Marshal(dirEntries)
writer.Write(byteEntries)
} else {
writer.Header().Set("Content-Type", "text/plain")