package wikitext_parser import ( "strings" ) func NormalizeWikiTitle(title string) string { return strings.ReplaceAll(title, " ", "_") } type NewLineToken struct { } type WikiStringValueOptions struct { PageName string Trim bool StringHandler func(value string, opt *WikiStringValueOptions) []string HTMLHandler func(value *HTML, opt *WikiStringValueOptions) []string LinkHandler func(value *Link, opt *WikiStringValueOptions) []string TemplateLinkHandler func(value *Template, opt *WikiStringValueOptions) []string TemplateHandler func(value *Template, opt *WikiStringValueOptions) []string UnorderedListHandler func(value *UnorderedList, opt *WikiStringValueOptions) []string DescriptionListHandler func(value *DescriptionList, opt *WikiStringValueOptions) []string NewLineHandler func(opt *WikiStringValueOptions) []string } func (o *WikiStringValueOptions) Default() { o.Trim = true o.StringHandler = func(value string, opt *WikiStringValueOptions) []string { return []string{value} } o.HTMLHandler = func(value *HTML, opt *WikiStringValueOptions) []string { return []string{value.Tag.String()} } o.NewLineHandler = func(opt *WikiStringValueOptions) []string { return []string{"\n"} } o.LinkHandler = func(value *Link, opt *WikiStringValueOptions) (result []string) { if len(value.Name) > 0 { result = append(result, GetWikiStringValue(value.Name, opt)...) } else { result = append(result, value.URL) } return } o.TemplateLinkHandler = func(value *Template, opt *WikiStringValueOptions) (result []string) { output := 0 for _, vv := range value.Parameters { for _, vvv := range GetWikiStringValue(vv, opt) { vvv = strings.TrimSpace(vvv) if len(vvv) > 0 { output++ result = append(result, vvv) } } } if output == 0 { result = append(result, value.Name) } return } o.TemplateHandler = func(value *Template, opt *WikiStringValueOptions) (result []string) { switch strings.ToUpper(value.Name) { case "PAGENAME", "SUBPAGENAME": result = append(result, opt.PageName) default: result = append(result, value.Name) } return } o.UnorderedListHandler = func(value *UnorderedList, opt *WikiStringValueOptions) []string { return GetWikiStringValue(value.Entries, opt) } o.DescriptionListHandler = func(value *DescriptionList, opt *WikiStringValueOptions) []string { return []string{strings.Join(GetWikiStringValue(value.Name, opt), ", ") + ": " + strings.Join(GetWikiStringValue(value.Entries, opt), ", ")} } } func GetWikiStringValue(v []interface{}, opts *WikiStringValueOptions) (r []string) { var result []string for _, value := range v { if text, ok := value.(string); ok { result = append(result, opts.StringHandler(text, opts)...) } else if template, ok := value.(*Template); ok { if template.IsLink { result = append(result, opts.TemplateLinkHandler(template, opts)...) } else { result = append(result, opts.TemplateHandler(template, opts)...) } } else if html, ok := value.(*HTML); ok && html.Tag != nil { result = append(result, opts.HTMLHandler(html, opts)...) } else if _, ok := value.(NewLineToken); ok { result = append(result, opts.NewLineHandler(opts)...) } else if link, ok := value.(*Link); ok { result = append(result, opts.LinkHandler(link, opts)...) } else if unorderedList, ok := value.(*UnorderedList); ok { result = append(result, opts.UnorderedListHandler(unorderedList, opts)...) } else if descriptionList, ok := value.(*DescriptionList); ok { result = append(result, opts.DescriptionListHandler(descriptionList, opts)...) } } r = make([]string, 0, len(result)) for _, e := range result { if opts.Trim { e = strings.TrimSpace(e) } if len(e) > 0 { r = append(r, e) } } return } //ParseWikiText small WikiText parser that extracts text, Templates, and its arguments/parameters func ParseWikiText(text string) (result []interface{}) { index := 0 var i int var c byte lastToken := index addValue := func() int { if lastToken < len(text) && i-lastToken > 0 { t := strings.TrimSpace(text[lastToken:i]) if len(t) > 0 { result = append(result, t) } return len(t) } return 0 } afterNewLine := false for i = index; i < len(text); i++ { c = text[i] if (c == '{' && i < len(text)-1 && text[i+1] == '{') || (c == '[' && i < len(text)-1 && text[i+1] == '[') { addValue() var tpl *Template var scanIndex int scanIndex, tpl = ParseTemplate(text, i+2, 1, c) if tpl != nil { result = append(result, tpl) } lastToken = scanIndex i = scanIndex - 1 } else if (c == '{' && i < len(text)-1 && text[i+1] != '{' && text[i+1] != '[') || (c == '[' && i < len(text)-1 && text[i+1] != '[' && text[i+1] != '{') { addValue() var link *Link var scanIndex int scanIndex, link = ParseLink(text, i+1, 1, c) if link != nil { result = append(result, link) } lastToken = scanIndex i = scanIndex - 1 } else if c == '<' { //html trigger addValue() var html *HTML var scanIndex int scanIndex, html = ParseHTML(text, i, 1) if html != nil { result = append(result, html) } lastToken = scanIndex i = scanIndex - 1 } else if c == '\n' { addValue() lastToken = i + 1 afterNewLine = true result = append(result, NewLineToken{}) } else if afterNewLine && (c == '*' || c == '#') { addValue() var list *UnorderedList var scanIndex int scanIndex, list = ParseUnorderedList(text, i, 1, 1, c) if list != nil { result = append(result, list) } lastToken = scanIndex i = scanIndex - 1 } else if afterNewLine && c == ';' { addValue() var list *DescriptionList var scanIndex int scanIndex, list = ParseDescriptionList(text, i+1, 1) if list != nil { result = append(result, list) } lastToken = scanIndex i = scanIndex - 1 } else if afterNewLine && c == ':' { addValue() lastToken = i + 1 } if afterNewLine && c != '\n' && c != ' ' && c != '\t' { afterNewLine = false } } return }