Clean up code

This commit is contained in:
Sven Windisch 2021-12-16 23:39:56 +01:00
parent 8995d1673d
commit 6659b4450d
4 changed files with 6 additions and 136 deletions

View file

@ -1,5 +1,6 @@
/* /*
Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com> Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
Copyright (C) 2021, Sven Windisch <semantosoph@posteo.de>
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -99,21 +100,15 @@ func (a *Article) doQuotes() {
tn := make([]*Token, 0, len(a.Tokens)) tn := make([]*Token, 0, len(a.Tokens))
t := a.Tokens t := a.Tokens
for ; ni < len(t); ni++ { for ; ni < len(t); ni++ {
// log.Println(*t[ni])
if t[ni].TType == "quote" { if t[ni].TType == "quote" {
l++ l++
// log.Println(l)
} }
if t[ni].TType != "quote" || ni == len(t)-1 { if t[ni].TType != "quote" || ni == len(t)-1 {
switch { switch {
case l == 0: case l == 0:
// log.Println(l)
case l == 1: case l == 1:
// log.Println(l)
tn = append(tn, &Token{TText: "'", TType: "text"}) tn = append(tn, &Token{TText: "'", TType: "text"})
case l == 2: case l == 2:
// log.Println(l)
switch state { switch state {
case QS_b: case QS_b:
tn = append(tn, &Token{TType: "html", TText: "i"}) tn = append(tn, &Token{TType: "html", TText: "i"})
@ -134,7 +129,6 @@ func (a *Article) doQuotes() {
state = QS_i state = QS_i
} }
case l == 3, l == 4: case l == 3, l == 4:
// log.Println(l)
if l == 4 { if l == 4 {
tn = append(tn, &Token{TText: "'", TType: "text"}) tn = append(tn, &Token{TText: "'", TType: "text"})
} }
@ -158,7 +152,6 @@ func (a *Article) doQuotes() {
state = QS_b state = QS_b
} }
case l >= 5: case l >= 5:
// log.Println(l)
s := "" s := ""
for i := 5; i < l; i++ { for i := 5; i < l; i++ {
s += "'" s += "'"
@ -193,7 +186,6 @@ func (a *Article) doQuotes() {
} }
if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" { if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" {
// log.Println(l)
save = state save = state
switch state { switch state {
case QS_b: case QS_b:
@ -211,7 +203,6 @@ func (a *Article) doQuotes() {
l = 0 l = 0
} }
if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" { if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" {
// log.Println(l)
switch state { switch state {
case QS_b: case QS_b:
tn = append(tn, &Token{TType: "html", TText: "/b"}) tn = append(tn, &Token{TType: "html", TText: "/b"})
@ -230,11 +221,9 @@ func (a *Article) doQuotes() {
} }
if t[ni].TType != "quote" && t[ni].TType != "newline" { if t[ni].TType != "quote" && t[ni].TType != "newline" {
// log.Println(l)
tn = append(tn, t[ni]) tn = append(tn, t[ni])
} }
if t[ni].TType == "newline" || ni == len(t)-1 { if t[ni].TType == "newline" || ni == len(t)-1 {
// log.Println(l)
switch state { switch state {
case QS_b: case QS_b:
tn = append(tn, &Token{TType: "html", TText: "/b"}) tn = append(tn, &Token{TType: "html", TText: "/b"})
@ -252,13 +241,11 @@ func (a *Article) doQuotes() {
save = QS_none save = QS_none
} }
if t[ni].TType == "newline" { if t[ni].TType == "newline" {
// log.Println(l)
tn = append(tn, t[ni]) tn = append(tn, t[ni])
} }
} }
a.Tokens = tn a.Tokens = tn
// a.OldTokens = t
} }
//nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html, //nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html,
@ -283,7 +270,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
lastti := -1 lastti := -1
for ti < len(t) { for ti < len(t) {
if ti == lastti { if ti == lastti {
// fmt.Println(len(t), ti, *t[ti], *t[ti-1], *t[ti+1])
return nil, errors.New("parsing issue") return nil, errors.New("parsing issue")
} }
lastti = ti lastti = ti
@ -512,84 +498,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
} }
case "*", "#", ";", ":": case "*", "#", ";", ":":
ti += 1 ti += 1
/* stack := ""
si := 0
ni := ti
ln := &ParseNode{NType: "root", Nodes: make([]*ParseNode, 0, 4)}
for {
this := ""
islist := false
for ; ni < len(t); ni++ {
switch t[ni].TType {
case "*", "#", ";", ":":
islist = true
}
if islist {
this += t[ni].TType
} else {
break
}
}
same := 0
for i := 0; i < len(this) && i < len(stack); i++ {
if this[i] == stack[i] ||
(this[i] == ';' && stack[i] == ':') ||
(this[i] == ':' && stack[i] == ';') {
same++
} else {
break
}
}
n := ln
for i := 0; i < same; i++ {
n = n.Nodes[len(n.Nodes)-1]
n = n.Nodes[len(n.Nodes)-1]
}
for i := same; i < len(this); i++ { //open
var nn *ParseNode
switch this[i] {
case '*':
nn = &ParseNode{NType: "html", NSubType: "ul"}
case '#':
nn = &ParseNode{NType: "html", NSubType: "ol"}
case ';':
nn = &ParseNode{NType: "html", NSubType: "dl"}
case ':':
nn = &ParseNode{NType: "html", NSubType: "dl"}
}
nn.Nodes = make([]*ParseNode, 0, 1)
n.Nodes = append(n.Nodes, nn)
n = nn
if i < len(this)-1 {
var elem *ParseNode
switch this[len] {
case '*', '#':
elem = &ParseNode{NType: "html", NSubType: "li"}
case ';':
elem = &ParseNode{NType: "html", NSubType: "dt"}
case ':':
elem = &ParseNode{NType: "html", NSubType: "dd"}
}
elem.Nodes = make([]*ParseNode, 0, 1)
n.Nodes = append(n.Nodes, elem)
n = elem
}
}
var nitem *ParseNode
switch this[len] {
case '*', '#':
nitem = &ParseNode{NType: "html", NSubType: "li"}
case ';':
nitem = &ParseNode{NType: "html", NSubType: "dt"}
case ':':
nitem = &ParseNode{NType: "html", NSubType: "dd"}
}
n := &ParseNode{NType: "html", NSubType: st}
nl = append(nl, n)
} */
case "newline": case "newline":
n := &ParseNode{NType: "text", Contents: "\n"} n := &ParseNode{NType: "text", Contents: "\n"}
nl = append(nl, n) nl = append(nl, n)
@ -621,7 +529,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
} }
if templateIndex >= len(a.Templates) { if templateIndex >= len(a.Templates) {
return nil, errors.New("Template index out of range") return nil, errors.New("Template index out of range")
//fmt.Println("Template index out of range", t[ti])
} else { } else {
n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name} n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name}
nl = append(nl, n) nl = append(nl, n)

View file

@ -16,10 +16,6 @@ limitations under the License.
package gowiki package gowiki
// "bytes"
// "errors"
// "fmt"
func (a *Article) ParseSimple() error { func (a *Article) ParseSimple() error {
a.Text = a.MediaWiki a.Text = a.MediaWiki
a.Parsed = true a.Parsed = true

View file

@ -17,11 +17,8 @@ limitations under the License.
package gowiki package gowiki
import ( import (
// "bytes"
"errors" "errors"
"fmt" "fmt"
// "html"
"regexp" "regexp"
"sort" "sort"
"strings" "strings"
@ -160,14 +157,6 @@ func (a *Article) decodeHTMLtag(l string) (int, string, string, bool, bool) {
closefound := false closefound := false
tagend := 0 tagend := 0
tagstart := 0 tagstart := 0
//taking care of comments at preprocessing time
/* if strings.HasPrefix(l, "<!--") {
i := strings.Index(l[4:], "-->")
if i == -1 {
return len(l), "!--", l[4:], true, true
}
return 4 + i + 3, "!--", l[4 : 4+i], true, true
} */
dhtLoop: dhtLoop:
for idx, rv := range l { for idx, rv := range l {
// fmt.Println(string(rv), inquote, string(quote), idx, matchingpos) // fmt.Println(string(rv), inquote, string(quote), idx, matchingpos)
@ -214,7 +203,6 @@ dhtLoop:
attr = l[tagend:matchingpos] attr = l[tagend:matchingpos]
} }
return matchingpos + 1, tag, attr, closefound, true return matchingpos + 1, tag, attr, closefound, true
// e, tag, attr, closed, ok := decodeHTMLtag(l[pos:end])
} }
func matchPrefixes(s string, prefixes []string) bool { func matchPrefixes(s string, prefixes []string) bool {
@ -368,7 +356,6 @@ plLoop2:
endpos = idx endpos = idx
break plLoop2 break plLoop2
} }
} }
case ']': case ']':
if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' { if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' {
@ -469,7 +456,6 @@ plLoop:
if len(pipepos) == 0 { if len(pipepos) == 0 {
link = WikiCanonicalForm(l[2:matchingpos]) link = WikiCanonicalForm(l[2:matchingpos])
nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}} nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}}
} else { } else {
link = WikiCanonicalForm(l[2:pipepos[0]]) link = WikiCanonicalForm(l[2:pipepos[0]])
for i := 0; i < len(pipepos)-1; i++ { for i := 0; i < len(pipepos)-1; i++ {
@ -507,12 +493,10 @@ func (a *Article) decodeBehavSwitch(l string) (int, bool) {
} else { } else {
return len(match), true return len(match), true
} }
// e, ok := decodeMagic(l[pos:end])
} }
func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) { func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
nt := make([]*Token, 0) nt := make([]*Token, 0)
// fmt.Println("in parseInlineText")
tStart, tEnd := start, start tStart, tEnd := start, start
@ -544,19 +528,6 @@ func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
tStart, tEnd = pos, pos tStart, tEnd = pos, pos
continue continue
} }
/* case '{':
e, tt, ok := a.parseTemplateEtc(l[pos:end])
fmt.Println("template:", e, tt, ok)
if ok {
if len(cs) > 0 {
nt = append(nt, &Token{TText: cs, TType: "text"})
}
nt = append(nt, tt...)
pos += e
cs = ""
continue
}
cs += string(rv) */
case '_': case '_':
e, ok := a.decodeBehavSwitch(l[pos:end]) e, ok := a.decodeBehavSwitch(l[pos:end])
if ok { if ok {
@ -693,20 +664,18 @@ func (a *Article) Tokenize(mw string, g PageGetter) ([]*Token, error) {
for i := range tokens { for i := range tokens {
if tokens[i].TType == "special" { if tokens[i].TType == "special" {
specialcount++ specialcount++
t, ok := templatemap[tokens[i].TText] //nowikipremathmap[tokens[i].TText] t, ok := templatemap[tokens[i].TText]
if !ok { if !ok {
return nil, errors.New("special not in map") return nil, errors.New("special not in map")
} }
tokens[i] = t tokens[i] = t
} }
} }
// fmt.Println(specialcount, len(nowikipremathmap))
// if specialcount != len(nowikipremathmap) {
if specialcount != len(templatemap) { if specialcount != len(templatemap) {
if DebugLevel > 0 { if DebugLevel > 0 {
fmt.Println("[Tokenize] Warning: number of specials in map differs from number found") fmt.Println("[Tokenize] Warning: number of specials in map differs from number found")
} }
// return nil, errors.New("number of specials in map differs from number found")
} }
return tokens, nil return tokens, nil
} }
@ -770,7 +739,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
am = append(am, moc...) am = append(am, moc...)
am = append(am, mcc...) am = append(am, mcc...)
sort.Sort(ssInt(am)) sort.Sort(ssInt(am))
// fmt.Println(am)
tokens := make(map[string]*Token, len(am)) tokens := make(map[string]*Token, len(am))
if len(am) == 0 { if len(am) == 0 {
return mw, tokens return mw, tokens
@ -782,11 +751,10 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
openidx := 0 openidx := 0
count := 0 count := 0
for i := range am { for i := range am {
// fmt.Println("ctype", ctype, "lastclose", lastclose, "count", count, "openidx", openidx, "am[i]", am[i])
if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) { if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) {
// closing an open one // closing an open one
special := fmt.Sprintf("\x07%07d", count) special := fmt.Sprintf("\x07%07d", count)
// special := fmt.Sprintf("@%07d", count)
tokens[special] = &Token{ tokens[special] = &Token{
TText: mw[am[openidx][1]:am[i][0]], TText: mw[am[openidx][1]:am[i][0]],
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]), TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
@ -806,7 +774,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
if ctype != -1 { if ctype != -1 {
//it's open: close it //it's open: close it
special := fmt.Sprintf("\x07%07d", count) special := fmt.Sprintf("\x07%07d", count)
// special := fmt.Sprintf("@%07d", count)
tokens[special] = &Token{ tokens[special] = &Token{
TText: mw[am[openidx][1]:len(mw)], TText: mw[am[openidx][1]:len(mw)],
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]), TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),

View file

@ -17,7 +17,6 @@ limitations under the License.
package gowiki package gowiki
import ( import (
// "fmt"
"strings" "strings"
) )