Clean up code
This commit is contained in:
parent
8995d1673d
commit
6659b4450d
95
parse.go
95
parse.go
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
|
||||
Copyright (C) 2021, Sven Windisch <semantosoph@posteo.de>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
@ -99,21 +100,15 @@ func (a *Article) doQuotes() {
|
|||
tn := make([]*Token, 0, len(a.Tokens))
|
||||
t := a.Tokens
|
||||
for ; ni < len(t); ni++ {
|
||||
// log.Println(*t[ni])
|
||||
|
||||
if t[ni].TType == "quote" {
|
||||
l++
|
||||
// log.Println(l)
|
||||
}
|
||||
if t[ni].TType != "quote" || ni == len(t)-1 {
|
||||
switch {
|
||||
case l == 0:
|
||||
// log.Println(l)
|
||||
case l == 1:
|
||||
// log.Println(l)
|
||||
tn = append(tn, &Token{TText: "'", TType: "text"})
|
||||
case l == 2:
|
||||
// log.Println(l)
|
||||
switch state {
|
||||
case QS_b:
|
||||
tn = append(tn, &Token{TType: "html", TText: "i"})
|
||||
|
@ -134,7 +129,6 @@ func (a *Article) doQuotes() {
|
|||
state = QS_i
|
||||
}
|
||||
case l == 3, l == 4:
|
||||
// log.Println(l)
|
||||
if l == 4 {
|
||||
tn = append(tn, &Token{TText: "'", TType: "text"})
|
||||
}
|
||||
|
@ -158,7 +152,6 @@ func (a *Article) doQuotes() {
|
|||
state = QS_b
|
||||
}
|
||||
case l >= 5:
|
||||
// log.Println(l)
|
||||
s := ""
|
||||
for i := 5; i < l; i++ {
|
||||
s += "'"
|
||||
|
@ -193,7 +186,6 @@ func (a *Article) doQuotes() {
|
|||
}
|
||||
|
||||
if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" {
|
||||
// log.Println(l)
|
||||
save = state
|
||||
switch state {
|
||||
case QS_b:
|
||||
|
@ -211,7 +203,6 @@ func (a *Article) doQuotes() {
|
|||
l = 0
|
||||
}
|
||||
if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" {
|
||||
// log.Println(l)
|
||||
switch state {
|
||||
case QS_b:
|
||||
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
||||
|
@ -230,11 +221,9 @@ func (a *Article) doQuotes() {
|
|||
}
|
||||
|
||||
if t[ni].TType != "quote" && t[ni].TType != "newline" {
|
||||
// log.Println(l)
|
||||
tn = append(tn, t[ni])
|
||||
}
|
||||
if t[ni].TType == "newline" || ni == len(t)-1 {
|
||||
// log.Println(l)
|
||||
switch state {
|
||||
case QS_b:
|
||||
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
||||
|
@ -252,13 +241,11 @@ func (a *Article) doQuotes() {
|
|||
save = QS_none
|
||||
}
|
||||
if t[ni].TType == "newline" {
|
||||
// log.Println(l)
|
||||
tn = append(tn, t[ni])
|
||||
}
|
||||
|
||||
}
|
||||
a.Tokens = tn
|
||||
// a.OldTokens = t
|
||||
}
|
||||
|
||||
//nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html,
|
||||
|
@ -283,7 +270,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
|||
lastti := -1
|
||||
for ti < len(t) {
|
||||
if ti == lastti {
|
||||
// fmt.Println(len(t), ti, *t[ti], *t[ti-1], *t[ti+1])
|
||||
return nil, errors.New("parsing issue")
|
||||
}
|
||||
lastti = ti
|
||||
|
@ -512,84 +498,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
|||
}
|
||||
case "*", "#", ";", ":":
|
||||
ti += 1
|
||||
/* stack := ""
|
||||
si := 0
|
||||
ni := ti
|
||||
ln := &ParseNode{NType: "root", Nodes: make([]*ParseNode, 0, 4)}
|
||||
for {
|
||||
|
||||
this := ""
|
||||
islist := false
|
||||
for ; ni < len(t); ni++ {
|
||||
switch t[ni].TType {
|
||||
case "*", "#", ";", ":":
|
||||
islist = true
|
||||
}
|
||||
if islist {
|
||||
this += t[ni].TType
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
same := 0
|
||||
for i := 0; i < len(this) && i < len(stack); i++ {
|
||||
if this[i] == stack[i] ||
|
||||
(this[i] == ';' && stack[i] == ':') ||
|
||||
(this[i] == ':' && stack[i] == ';') {
|
||||
same++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
n := ln
|
||||
for i := 0; i < same; i++ {
|
||||
n = n.Nodes[len(n.Nodes)-1]
|
||||
n = n.Nodes[len(n.Nodes)-1]
|
||||
}
|
||||
|
||||
for i := same; i < len(this); i++ { //open
|
||||
var nn *ParseNode
|
||||
switch this[i] {
|
||||
case '*':
|
||||
nn = &ParseNode{NType: "html", NSubType: "ul"}
|
||||
case '#':
|
||||
nn = &ParseNode{NType: "html", NSubType: "ol"}
|
||||
case ';':
|
||||
nn = &ParseNode{NType: "html", NSubType: "dl"}
|
||||
case ':':
|
||||
nn = &ParseNode{NType: "html", NSubType: "dl"}
|
||||
}
|
||||
nn.Nodes = make([]*ParseNode, 0, 1)
|
||||
n.Nodes = append(n.Nodes, nn)
|
||||
n = nn
|
||||
if i < len(this)-1 {
|
||||
var elem *ParseNode
|
||||
switch this[len] {
|
||||
case '*', '#':
|
||||
elem = &ParseNode{NType: "html", NSubType: "li"}
|
||||
case ';':
|
||||
elem = &ParseNode{NType: "html", NSubType: "dt"}
|
||||
case ':':
|
||||
elem = &ParseNode{NType: "html", NSubType: "dd"}
|
||||
}
|
||||
elem.Nodes = make([]*ParseNode, 0, 1)
|
||||
n.Nodes = append(n.Nodes, elem)
|
||||
n = elem
|
||||
}
|
||||
}
|
||||
var nitem *ParseNode
|
||||
switch this[len] {
|
||||
case '*', '#':
|
||||
nitem = &ParseNode{NType: "html", NSubType: "li"}
|
||||
case ';':
|
||||
nitem = &ParseNode{NType: "html", NSubType: "dt"}
|
||||
case ':':
|
||||
nitem = &ParseNode{NType: "html", NSubType: "dd"}
|
||||
}
|
||||
n := &ParseNode{NType: "html", NSubType: st}
|
||||
nl = append(nl, n)
|
||||
|
||||
} */
|
||||
case "newline":
|
||||
n := &ParseNode{NType: "text", Contents: "\n"}
|
||||
nl = append(nl, n)
|
||||
|
@ -621,7 +529,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
|||
}
|
||||
if templateIndex >= len(a.Templates) {
|
||||
return nil, errors.New("Template index out of range")
|
||||
//fmt.Println("Template index out of range", t[ti])
|
||||
} else {
|
||||
n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name}
|
||||
nl = append(nl, n)
|
||||
|
|
|
@ -16,10 +16,6 @@ limitations under the License.
|
|||
|
||||
package gowiki
|
||||
|
||||
// "bytes"
|
||||
// "errors"
|
||||
// "fmt"
|
||||
|
||||
func (a *Article) ParseSimple() error {
|
||||
a.Text = a.MediaWiki
|
||||
a.Parsed = true
|
||||
|
|
42
tokenize.go
42
tokenize.go
|
@ -17,11 +17,8 @@ limitations under the License.
|
|||
package gowiki
|
||||
|
||||
import (
|
||||
// "bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
// "html"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -160,14 +157,6 @@ func (a *Article) decodeHTMLtag(l string) (int, string, string, bool, bool) {
|
|||
closefound := false
|
||||
tagend := 0
|
||||
tagstart := 0
|
||||
//taking care of comments at preprocessing time
|
||||
/* if strings.HasPrefix(l, "<!--") {
|
||||
i := strings.Index(l[4:], "-->")
|
||||
if i == -1 {
|
||||
return len(l), "!--", l[4:], true, true
|
||||
}
|
||||
return 4 + i + 3, "!--", l[4 : 4+i], true, true
|
||||
} */
|
||||
dhtLoop:
|
||||
for idx, rv := range l {
|
||||
// fmt.Println(string(rv), inquote, string(quote), idx, matchingpos)
|
||||
|
@ -214,7 +203,6 @@ dhtLoop:
|
|||
attr = l[tagend:matchingpos]
|
||||
}
|
||||
return matchingpos + 1, tag, attr, closefound, true
|
||||
// e, tag, attr, closed, ok := decodeHTMLtag(l[pos:end])
|
||||
}
|
||||
|
||||
func matchPrefixes(s string, prefixes []string) bool {
|
||||
|
@ -368,7 +356,6 @@ plLoop2:
|
|||
endpos = idx
|
||||
break plLoop2
|
||||
}
|
||||
|
||||
}
|
||||
case ']':
|
||||
if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' {
|
||||
|
@ -469,7 +456,6 @@ plLoop:
|
|||
if len(pipepos) == 0 {
|
||||
link = WikiCanonicalForm(l[2:matchingpos])
|
||||
nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}}
|
||||
|
||||
} else {
|
||||
link = WikiCanonicalForm(l[2:pipepos[0]])
|
||||
for i := 0; i < len(pipepos)-1; i++ {
|
||||
|
@ -507,12 +493,10 @@ func (a *Article) decodeBehavSwitch(l string) (int, bool) {
|
|||
} else {
|
||||
return len(match), true
|
||||
}
|
||||
// e, ok := decodeMagic(l[pos:end])
|
||||
}
|
||||
|
||||
func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
|
||||
nt := make([]*Token, 0)
|
||||
// fmt.Println("in parseInlineText")
|
||||
|
||||
tStart, tEnd := start, start
|
||||
|
||||
|
@ -544,19 +528,6 @@ func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
|
|||
tStart, tEnd = pos, pos
|
||||
continue
|
||||
}
|
||||
/* case '{':
|
||||
e, tt, ok := a.parseTemplateEtc(l[pos:end])
|
||||
fmt.Println("template:", e, tt, ok)
|
||||
if ok {
|
||||
if len(cs) > 0 {
|
||||
nt = append(nt, &Token{TText: cs, TType: "text"})
|
||||
}
|
||||
nt = append(nt, tt...)
|
||||
pos += e
|
||||
cs = ""
|
||||
continue
|
||||
}
|
||||
cs += string(rv) */
|
||||
case '_':
|
||||
e, ok := a.decodeBehavSwitch(l[pos:end])
|
||||
if ok {
|
||||
|
@ -693,20 +664,18 @@ func (a *Article) Tokenize(mw string, g PageGetter) ([]*Token, error) {
|
|||
for i := range tokens {
|
||||
if tokens[i].TType == "special" {
|
||||
specialcount++
|
||||
t, ok := templatemap[tokens[i].TText] //nowikipremathmap[tokens[i].TText]
|
||||
t, ok := templatemap[tokens[i].TText]
|
||||
if !ok {
|
||||
return nil, errors.New("special not in map")
|
||||
}
|
||||
tokens[i] = t
|
||||
}
|
||||
}
|
||||
// fmt.Println(specialcount, len(nowikipremathmap))
|
||||
// if specialcount != len(nowikipremathmap) {
|
||||
|
||||
if specialcount != len(templatemap) {
|
||||
if DebugLevel > 0 {
|
||||
fmt.Println("[Tokenize] Warning: number of specials in map differs from number found")
|
||||
}
|
||||
// return nil, errors.New("number of specials in map differs from number found")
|
||||
}
|
||||
return tokens, nil
|
||||
}
|
||||
|
@ -770,7 +739,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
|||
am = append(am, moc...)
|
||||
am = append(am, mcc...)
|
||||
sort.Sort(ssInt(am))
|
||||
// fmt.Println(am)
|
||||
|
||||
tokens := make(map[string]*Token, len(am))
|
||||
if len(am) == 0 {
|
||||
return mw, tokens
|
||||
|
@ -782,11 +751,10 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
|||
openidx := 0
|
||||
count := 0
|
||||
for i := range am {
|
||||
// fmt.Println("ctype", ctype, "lastclose", lastclose, "count", count, "openidx", openidx, "am[i]", am[i])
|
||||
if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) {
|
||||
// closing an open one
|
||||
special := fmt.Sprintf("\x07%07d", count)
|
||||
// special := fmt.Sprintf("@%07d", count)
|
||||
|
||||
tokens[special] = &Token{
|
||||
TText: mw[am[openidx][1]:am[i][0]],
|
||||
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
||||
|
@ -806,7 +774,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
|||
if ctype != -1 {
|
||||
//it's open: close it
|
||||
special := fmt.Sprintf("\x07%07d", count)
|
||||
// special := fmt.Sprintf("@%07d", count)
|
||||
|
||||
tokens[special] = &Token{
|
||||
TText: mw[am[openidx][1]:len(mw)],
|
||||
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
||||
|
|
Reference in a new issue