wikitext-parser/wikitext.go
2022-02-20 20:19:16 +01:00

157 lines
5 KiB
Go

package wikitext_parser
import (
"strings"
)
func NormalizeWikiTitle(title string) string {
return strings.Replace(title, " ", "_", -1)
}
type NewLineToken struct {
}
type GetInterfaceSliceStringValueOptions struct {
PageName string
Trim bool
StringHandler func(value string, opt *GetInterfaceSliceStringValueOptions) []string
HTMLHandler func(value *HTML, opt *GetInterfaceSliceStringValueOptions) []string
LinkHandler func(value *Link, opt *GetInterfaceSliceStringValueOptions) []string
TemplateLinkHandler func(value *Template, opt *GetInterfaceSliceStringValueOptions) []string
TemplateHandler func(value *Template, opt *GetInterfaceSliceStringValueOptions) []string
UnorderedListHandler func(value *UnorderedList, opt *GetInterfaceSliceStringValueOptions) []string
DescriptionListHandler func(value *DescriptionList, opt *GetInterfaceSliceStringValueOptions) []string
NewLineHandler func(opt *GetInterfaceSliceStringValueOptions) []string
}
func (o *GetInterfaceSliceStringValueOptions) Default() {
o.Trim = true
o.StringHandler = func(value string, opt *GetInterfaceSliceStringValueOptions) []string {
return []string{value}
}
o.HTMLHandler = func(value *HTML, opt *GetInterfaceSliceStringValueOptions) []string {
return []string{value.Tag.String()}
}
o.NewLineHandler = func(opt *GetInterfaceSliceStringValueOptions) []string {
return []string{"\n"}
}
o.LinkHandler = func(value *Link, opt *GetInterfaceSliceStringValueOptions) (result []string) {
if len(value.Name) > 0 {
result = append(result, GetWikiStringValue(value.Name, opt)...)
} else {
result = append(result, value.URL)
}
result = append(result, GetWikiStringValue(value.Name, opt)...)
return
}
o.TemplateLinkHandler = func(value *Template, opt *GetInterfaceSliceStringValueOptions) (result []string) {
output := 0
for _, vv := range value.Parameters {
for _, vvv := range GetWikiStringValue(vv, opt) {
vvv = strings.TrimSpace(vvv)
if len(vvv) > 0 {
output++
result = append(result, vvv)
}
}
}
if output == 0 {
result = append(result, value.Name)
}
return
}
o.TemplateHandler = func(value *Template, opt *GetInterfaceSliceStringValueOptions) (result []string) {
switch strings.ToUpper(value.Name) {
case "PAGENAME", "SUBPAGENAME":
result = append(result, opt.PageName)
default:
result = append(result, value.Name)
}
return
}
o.UnorderedListHandler = func(value *UnorderedList, opt *GetInterfaceSliceStringValueOptions) []string {
return GetWikiStringValue(value.Entries, opt)
}
o.DescriptionListHandler = func(value *DescriptionList, opt *GetInterfaceSliceStringValueOptions) []string {
return []string{strings.Join(GetWikiStringValue(value.Name, opt), ", ") + ": " + strings.Join(GetWikiStringValue(value.Entries, opt), ", ")}
}
}
func GetWikiStringValue(v []interface{}, opts *GetInterfaceSliceStringValueOptions) (r []string) {
var result []string
for _, value := range v {
if text, ok := value.(string); ok {
result = append(result, opts.StringHandler(text, opts)...)
} else if template, ok := value.(*Template); ok {
if template.IsLink {
result = append(result, opts.TemplateLinkHandler(template, opts)...)
} else {
result = append(result, opts.TemplateHandler(template, opts)...)
}
} else if html, ok := value.(*HTML); ok && html.Tag != nil {
result = append(result, opts.HTMLHandler(html, opts)...)
} else if _, ok := value.(NewLineToken); ok {
result = append(result, opts.NewLineHandler(opts)...)
} else if link, ok := value.(*Link); ok {
result = append(result, opts.LinkHandler(link, opts)...)
} else if unorderedList, ok := value.(*UnorderedList); ok {
result = append(result, opts.UnorderedListHandler(unorderedList, opts)...)
} else if descriptionList, ok := value.(*DescriptionList); ok {
result = append(result, opts.DescriptionListHandler(descriptionList, opts)...)
}
}
r = make([]string, 0, len(result))
for _, e := range result {
if opts.Trim {
e = strings.TrimSpace(e)
}
if len(e) > 0 {
r = append(r, e)
}
}
return
}
//ParseWikiText small WikiText parser that extracts text, Templates, and its arguments/parameters
func ParseWikiText(text string) (result []interface{}) {
index := 0
for index < len(text) {
templateIndex := strings.Index(text[index:], "{{")
linkIndex := strings.Index(text[index:], "[[")
if templateIndex == -1 && linkIndex == -1 {
t := strings.TrimSpace(text[index:])
if len(t) > 0 {
result = append(result, text[index:])
}
break
} else {
bestIndex := templateIndex
if templateIndex == -1 {
bestIndex = linkIndex
} else {
if linkIndex != -1 && linkIndex < bestIndex {
bestIndex = linkIndex
}
}
t := strings.TrimSpace(text[index : index+bestIndex])
if len(t) > 0 {
result = append(result, text[index:index+bestIndex])
}
var tpl *Template
index, tpl = ParseTemplate(text, index+bestIndex+2, 0, text[index+bestIndex])
if tpl != nil {
result = append(result, tpl)
}
}
}
return
}