wikitext-parser/wikitext.go

214 lines
6.0 KiB
Go

package wikitext_parser
import (
"strings"
)
func NormalizeWikiTitle(title string) string {
return strings.ReplaceAll(title, " ", "_")
}
type NewLineToken struct {
}
type WikiStringValueOptions struct {
PageName string
Trim bool
StringHandler func(value string, opt *WikiStringValueOptions) []string
HTMLHandler func(value *HTML, opt *WikiStringValueOptions) []string
LinkHandler func(value *Link, opt *WikiStringValueOptions) []string
TemplateLinkHandler func(value *Template, opt *WikiStringValueOptions) []string
TemplateHandler func(value *Template, opt *WikiStringValueOptions) []string
UnorderedListHandler func(value *UnorderedList, opt *WikiStringValueOptions) []string
DescriptionListHandler func(value *DescriptionList, opt *WikiStringValueOptions) []string
NewLineHandler func(opt *WikiStringValueOptions) []string
}
func (o *WikiStringValueOptions) Default() {
o.Trim = true
o.StringHandler = func(value string, opt *WikiStringValueOptions) []string {
return []string{value}
}
o.HTMLHandler = func(value *HTML, opt *WikiStringValueOptions) []string {
return []string{value.Tag.String()}
}
o.NewLineHandler = func(opt *WikiStringValueOptions) []string {
return []string{"\n"}
}
o.LinkHandler = func(value *Link, opt *WikiStringValueOptions) (result []string) {
if len(value.Name) > 0 {
result = append(result, GetWikiStringValue(value.Name, opt)...)
} else {
result = append(result, value.URL)
}
return
}
o.TemplateLinkHandler = func(value *Template, opt *WikiStringValueOptions) (result []string) {
output := 0
for _, vv := range value.Parameters {
for _, vvv := range GetWikiStringValue(vv, opt) {
vvv = strings.TrimSpace(vvv)
if len(vvv) > 0 {
output++
result = append(result, vvv)
}
}
}
if output == 0 {
result = append(result, value.Name)
}
return
}
o.TemplateHandler = func(value *Template, opt *WikiStringValueOptions) (result []string) {
switch strings.ToUpper(value.Name) {
case "PAGENAME", "SUBPAGENAME":
result = append(result, opt.PageName)
default:
result = append(result, value.Name)
}
return
}
o.UnorderedListHandler = func(value *UnorderedList, opt *WikiStringValueOptions) []string {
return GetWikiStringValue(value.Entries, opt)
}
o.DescriptionListHandler = func(value *DescriptionList, opt *WikiStringValueOptions) []string {
return []string{strings.Join(GetWikiStringValue(value.Name, opt), ", ") + ": " + strings.Join(GetWikiStringValue(value.Entries, opt), ", ")}
}
}
func GetWikiStringValue(v []interface{}, opts *WikiStringValueOptions) (r []string) {
var result []string
for _, value := range v {
if text, ok := value.(string); ok {
result = append(result, opts.StringHandler(text, opts)...)
} else if template, ok := value.(*Template); ok {
if template.IsLink {
result = append(result, opts.TemplateLinkHandler(template, opts)...)
} else {
result = append(result, opts.TemplateHandler(template, opts)...)
}
} else if html, ok := value.(*HTML); ok && html.Tag != nil {
result = append(result, opts.HTMLHandler(html, opts)...)
} else if _, ok := value.(NewLineToken); ok {
result = append(result, opts.NewLineHandler(opts)...)
} else if link, ok := value.(*Link); ok {
result = append(result, opts.LinkHandler(link, opts)...)
} else if unorderedList, ok := value.(*UnorderedList); ok {
result = append(result, opts.UnorderedListHandler(unorderedList, opts)...)
} else if descriptionList, ok := value.(*DescriptionList); ok {
result = append(result, opts.DescriptionListHandler(descriptionList, opts)...)
}
}
r = make([]string, 0, len(result))
for _, e := range result {
if opts.Trim {
e = strings.TrimSpace(e)
}
if len(e) > 0 {
r = append(r, e)
}
}
return
}
//ParseWikiText small WikiText parser that extracts text, Templates, and its arguments/parameters
func ParseWikiText(text string) (result []interface{}) {
index := 0
var i int
var c byte
lastToken := index
addValue := func() int {
if lastToken < len(text) && i-lastToken > 0 {
t := strings.TrimSpace(text[lastToken:i])
if len(t) > 0 {
result = append(result, t)
}
return len(t)
}
return 0
}
afterNewLine := false
for i = index; i < len(text); i++ {
c = text[i]
if (c == '{' && i < len(text)-1 && text[i+1] == '{') || (c == '[' && i < len(text)-1 && text[i+1] == '[') {
addValue()
var tpl *Template
var scanIndex int
scanIndex, tpl = ParseTemplate(text, i+2, 1, c)
if tpl != nil {
result = append(result, tpl)
}
lastToken = scanIndex
i = scanIndex - 1
} else if (c == '{' && i < len(text)-1 && text[i+1] != '{' && text[i+1] != '[') || (c == '[' && i < len(text)-1 && text[i+1] != '[' && text[i+1] != '{') {
addValue()
var link *Link
var scanIndex int
scanIndex, link = ParseLink(text, i+1, 1, c)
if link != nil {
result = append(result, link)
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '<' { //html trigger
addValue()
var html *HTML
var scanIndex int
scanIndex, html = ParseHTML(text, i, 1)
if html != nil {
result = append(result, html)
}
lastToken = scanIndex
i = scanIndex - 1
} else if c == '\n' {
addValue()
lastToken = i + 1
afterNewLine = true
result = append(result, NewLineToken{})
} else if afterNewLine && (c == '*' || c == '#') {
addValue()
var list *UnorderedList
var scanIndex int
scanIndex, list = ParseUnorderedList(text, i, 1, 1, c)
if list != nil {
result = append(result, list)
}
lastToken = scanIndex
i = scanIndex - 1
} else if afterNewLine && c == ';' {
addValue()
var list *DescriptionList
var scanIndex int
scanIndex, list = ParseDescriptionList(text, i+1, 1)
if list != nil {
result = append(result, list)
}
lastToken = scanIndex
i = scanIndex - 1
} else if afterNewLine && c == ':' {
addValue()
lastToken = i + 1
}
if afterNewLine && c != '\n' && c != ' ' && c != '\t' {
afterNewLine = false
}
}
return
}