Clean up code
This commit is contained in:
parent
8995d1673d
commit
6659b4450d
95
parse.go
95
parse.go
|
@ -1,5 +1,6 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
|
Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
|
||||||
|
Copyright (C) 2021, Sven Windisch <semantosoph@posteo.de>
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
@ -99,21 +100,15 @@ func (a *Article) doQuotes() {
|
||||||
tn := make([]*Token, 0, len(a.Tokens))
|
tn := make([]*Token, 0, len(a.Tokens))
|
||||||
t := a.Tokens
|
t := a.Tokens
|
||||||
for ; ni < len(t); ni++ {
|
for ; ni < len(t); ni++ {
|
||||||
// log.Println(*t[ni])
|
|
||||||
|
|
||||||
if t[ni].TType == "quote" {
|
if t[ni].TType == "quote" {
|
||||||
l++
|
l++
|
||||||
// log.Println(l)
|
|
||||||
}
|
}
|
||||||
if t[ni].TType != "quote" || ni == len(t)-1 {
|
if t[ni].TType != "quote" || ni == len(t)-1 {
|
||||||
switch {
|
switch {
|
||||||
case l == 0:
|
case l == 0:
|
||||||
// log.Println(l)
|
|
||||||
case l == 1:
|
case l == 1:
|
||||||
// log.Println(l)
|
|
||||||
tn = append(tn, &Token{TText: "'", TType: "text"})
|
tn = append(tn, &Token{TText: "'", TType: "text"})
|
||||||
case l == 2:
|
case l == 2:
|
||||||
// log.Println(l)
|
|
||||||
switch state {
|
switch state {
|
||||||
case QS_b:
|
case QS_b:
|
||||||
tn = append(tn, &Token{TType: "html", TText: "i"})
|
tn = append(tn, &Token{TType: "html", TText: "i"})
|
||||||
|
@ -134,7 +129,6 @@ func (a *Article) doQuotes() {
|
||||||
state = QS_i
|
state = QS_i
|
||||||
}
|
}
|
||||||
case l == 3, l == 4:
|
case l == 3, l == 4:
|
||||||
// log.Println(l)
|
|
||||||
if l == 4 {
|
if l == 4 {
|
||||||
tn = append(tn, &Token{TText: "'", TType: "text"})
|
tn = append(tn, &Token{TText: "'", TType: "text"})
|
||||||
}
|
}
|
||||||
|
@ -158,7 +152,6 @@ func (a *Article) doQuotes() {
|
||||||
state = QS_b
|
state = QS_b
|
||||||
}
|
}
|
||||||
case l >= 5:
|
case l >= 5:
|
||||||
// log.Println(l)
|
|
||||||
s := ""
|
s := ""
|
||||||
for i := 5; i < l; i++ {
|
for i := 5; i < l; i++ {
|
||||||
s += "'"
|
s += "'"
|
||||||
|
@ -193,7 +186,6 @@ func (a *Article) doQuotes() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" {
|
if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" {
|
||||||
// log.Println(l)
|
|
||||||
save = state
|
save = state
|
||||||
switch state {
|
switch state {
|
||||||
case QS_b:
|
case QS_b:
|
||||||
|
@ -211,7 +203,6 @@ func (a *Article) doQuotes() {
|
||||||
l = 0
|
l = 0
|
||||||
}
|
}
|
||||||
if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" {
|
if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" {
|
||||||
// log.Println(l)
|
|
||||||
switch state {
|
switch state {
|
||||||
case QS_b:
|
case QS_b:
|
||||||
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
||||||
|
@ -230,11 +221,9 @@ func (a *Article) doQuotes() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if t[ni].TType != "quote" && t[ni].TType != "newline" {
|
if t[ni].TType != "quote" && t[ni].TType != "newline" {
|
||||||
// log.Println(l)
|
|
||||||
tn = append(tn, t[ni])
|
tn = append(tn, t[ni])
|
||||||
}
|
}
|
||||||
if t[ni].TType == "newline" || ni == len(t)-1 {
|
if t[ni].TType == "newline" || ni == len(t)-1 {
|
||||||
// log.Println(l)
|
|
||||||
switch state {
|
switch state {
|
||||||
case QS_b:
|
case QS_b:
|
||||||
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
tn = append(tn, &Token{TType: "html", TText: "/b"})
|
||||||
|
@ -252,13 +241,11 @@ func (a *Article) doQuotes() {
|
||||||
save = QS_none
|
save = QS_none
|
||||||
}
|
}
|
||||||
if t[ni].TType == "newline" {
|
if t[ni].TType == "newline" {
|
||||||
// log.Println(l)
|
|
||||||
tn = append(tn, t[ni])
|
tn = append(tn, t[ni])
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
a.Tokens = tn
|
a.Tokens = tn
|
||||||
// a.OldTokens = t
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html,
|
//nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html,
|
||||||
|
@ -283,7 +270,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
||||||
lastti := -1
|
lastti := -1
|
||||||
for ti < len(t) {
|
for ti < len(t) {
|
||||||
if ti == lastti {
|
if ti == lastti {
|
||||||
// fmt.Println(len(t), ti, *t[ti], *t[ti-1], *t[ti+1])
|
|
||||||
return nil, errors.New("parsing issue")
|
return nil, errors.New("parsing issue")
|
||||||
}
|
}
|
||||||
lastti = ti
|
lastti = ti
|
||||||
|
@ -512,84 +498,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
||||||
}
|
}
|
||||||
case "*", "#", ";", ":":
|
case "*", "#", ";", ":":
|
||||||
ti += 1
|
ti += 1
|
||||||
/* stack := ""
|
|
||||||
si := 0
|
|
||||||
ni := ti
|
|
||||||
ln := &ParseNode{NType: "root", Nodes: make([]*ParseNode, 0, 4)}
|
|
||||||
for {
|
|
||||||
|
|
||||||
this := ""
|
|
||||||
islist := false
|
|
||||||
for ; ni < len(t); ni++ {
|
|
||||||
switch t[ni].TType {
|
|
||||||
case "*", "#", ";", ":":
|
|
||||||
islist = true
|
|
||||||
}
|
|
||||||
if islist {
|
|
||||||
this += t[ni].TType
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
same := 0
|
|
||||||
for i := 0; i < len(this) && i < len(stack); i++ {
|
|
||||||
if this[i] == stack[i] ||
|
|
||||||
(this[i] == ';' && stack[i] == ':') ||
|
|
||||||
(this[i] == ':' && stack[i] == ';') {
|
|
||||||
same++
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
n := ln
|
|
||||||
for i := 0; i < same; i++ {
|
|
||||||
n = n.Nodes[len(n.Nodes)-1]
|
|
||||||
n = n.Nodes[len(n.Nodes)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := same; i < len(this); i++ { //open
|
|
||||||
var nn *ParseNode
|
|
||||||
switch this[i] {
|
|
||||||
case '*':
|
|
||||||
nn = &ParseNode{NType: "html", NSubType: "ul"}
|
|
||||||
case '#':
|
|
||||||
nn = &ParseNode{NType: "html", NSubType: "ol"}
|
|
||||||
case ';':
|
|
||||||
nn = &ParseNode{NType: "html", NSubType: "dl"}
|
|
||||||
case ':':
|
|
||||||
nn = &ParseNode{NType: "html", NSubType: "dl"}
|
|
||||||
}
|
|
||||||
nn.Nodes = make([]*ParseNode, 0, 1)
|
|
||||||
n.Nodes = append(n.Nodes, nn)
|
|
||||||
n = nn
|
|
||||||
if i < len(this)-1 {
|
|
||||||
var elem *ParseNode
|
|
||||||
switch this[len] {
|
|
||||||
case '*', '#':
|
|
||||||
elem = &ParseNode{NType: "html", NSubType: "li"}
|
|
||||||
case ';':
|
|
||||||
elem = &ParseNode{NType: "html", NSubType: "dt"}
|
|
||||||
case ':':
|
|
||||||
elem = &ParseNode{NType: "html", NSubType: "dd"}
|
|
||||||
}
|
|
||||||
elem.Nodes = make([]*ParseNode, 0, 1)
|
|
||||||
n.Nodes = append(n.Nodes, elem)
|
|
||||||
n = elem
|
|
||||||
}
|
|
||||||
}
|
|
||||||
var nitem *ParseNode
|
|
||||||
switch this[len] {
|
|
||||||
case '*', '#':
|
|
||||||
nitem = &ParseNode{NType: "html", NSubType: "li"}
|
|
||||||
case ';':
|
|
||||||
nitem = &ParseNode{NType: "html", NSubType: "dt"}
|
|
||||||
case ':':
|
|
||||||
nitem = &ParseNode{NType: "html", NSubType: "dd"}
|
|
||||||
}
|
|
||||||
n := &ParseNode{NType: "html", NSubType: st}
|
|
||||||
nl = append(nl, n)
|
|
||||||
|
|
||||||
} */
|
|
||||||
case "newline":
|
case "newline":
|
||||||
n := &ParseNode{NType: "text", Contents: "\n"}
|
n := &ParseNode{NType: "text", Contents: "\n"}
|
||||||
nl = append(nl, n)
|
nl = append(nl, n)
|
||||||
|
@ -621,7 +529,6 @@ func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
|
||||||
}
|
}
|
||||||
if templateIndex >= len(a.Templates) {
|
if templateIndex >= len(a.Templates) {
|
||||||
return nil, errors.New("Template index out of range")
|
return nil, errors.New("Template index out of range")
|
||||||
//fmt.Println("Template index out of range", t[ti])
|
|
||||||
} else {
|
} else {
|
||||||
n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name}
|
n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name}
|
||||||
nl = append(nl, n)
|
nl = append(nl, n)
|
||||||
|
|
|
@ -16,10 +16,6 @@ limitations under the License.
|
||||||
|
|
||||||
package gowiki
|
package gowiki
|
||||||
|
|
||||||
// "bytes"
|
|
||||||
// "errors"
|
|
||||||
// "fmt"
|
|
||||||
|
|
||||||
func (a *Article) ParseSimple() error {
|
func (a *Article) ParseSimple() error {
|
||||||
a.Text = a.MediaWiki
|
a.Text = a.MediaWiki
|
||||||
a.Parsed = true
|
a.Parsed = true
|
||||||
|
|
42
tokenize.go
42
tokenize.go
|
@ -17,11 +17,8 @@ limitations under the License.
|
||||||
package gowiki
|
package gowiki
|
||||||
|
|
||||||
import (
|
import (
|
||||||
// "bytes"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
// "html"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -160,14 +157,6 @@ func (a *Article) decodeHTMLtag(l string) (int, string, string, bool, bool) {
|
||||||
closefound := false
|
closefound := false
|
||||||
tagend := 0
|
tagend := 0
|
||||||
tagstart := 0
|
tagstart := 0
|
||||||
//taking care of comments at preprocessing time
|
|
||||||
/* if strings.HasPrefix(l, "<!--") {
|
|
||||||
i := strings.Index(l[4:], "-->")
|
|
||||||
if i == -1 {
|
|
||||||
return len(l), "!--", l[4:], true, true
|
|
||||||
}
|
|
||||||
return 4 + i + 3, "!--", l[4 : 4+i], true, true
|
|
||||||
} */
|
|
||||||
dhtLoop:
|
dhtLoop:
|
||||||
for idx, rv := range l {
|
for idx, rv := range l {
|
||||||
// fmt.Println(string(rv), inquote, string(quote), idx, matchingpos)
|
// fmt.Println(string(rv), inquote, string(quote), idx, matchingpos)
|
||||||
|
@ -214,7 +203,6 @@ dhtLoop:
|
||||||
attr = l[tagend:matchingpos]
|
attr = l[tagend:matchingpos]
|
||||||
}
|
}
|
||||||
return matchingpos + 1, tag, attr, closefound, true
|
return matchingpos + 1, tag, attr, closefound, true
|
||||||
// e, tag, attr, closed, ok := decodeHTMLtag(l[pos:end])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchPrefixes(s string, prefixes []string) bool {
|
func matchPrefixes(s string, prefixes []string) bool {
|
||||||
|
@ -368,7 +356,6 @@ plLoop2:
|
||||||
endpos = idx
|
endpos = idx
|
||||||
break plLoop2
|
break plLoop2
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
case ']':
|
case ']':
|
||||||
if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' {
|
if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' {
|
||||||
|
@ -469,7 +456,6 @@ plLoop:
|
||||||
if len(pipepos) == 0 {
|
if len(pipepos) == 0 {
|
||||||
link = WikiCanonicalForm(l[2:matchingpos])
|
link = WikiCanonicalForm(l[2:matchingpos])
|
||||||
nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}}
|
nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
link = WikiCanonicalForm(l[2:pipepos[0]])
|
link = WikiCanonicalForm(l[2:pipepos[0]])
|
||||||
for i := 0; i < len(pipepos)-1; i++ {
|
for i := 0; i < len(pipepos)-1; i++ {
|
||||||
|
@ -507,12 +493,10 @@ func (a *Article) decodeBehavSwitch(l string) (int, bool) {
|
||||||
} else {
|
} else {
|
||||||
return len(match), true
|
return len(match), true
|
||||||
}
|
}
|
||||||
// e, ok := decodeMagic(l[pos:end])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
|
func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
|
||||||
nt := make([]*Token, 0)
|
nt := make([]*Token, 0)
|
||||||
// fmt.Println("in parseInlineText")
|
|
||||||
|
|
||||||
tStart, tEnd := start, start
|
tStart, tEnd := start, start
|
||||||
|
|
||||||
|
@ -544,19 +528,6 @@ func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
|
||||||
tStart, tEnd = pos, pos
|
tStart, tEnd = pos, pos
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
/* case '{':
|
|
||||||
e, tt, ok := a.parseTemplateEtc(l[pos:end])
|
|
||||||
fmt.Println("template:", e, tt, ok)
|
|
||||||
if ok {
|
|
||||||
if len(cs) > 0 {
|
|
||||||
nt = append(nt, &Token{TText: cs, TType: "text"})
|
|
||||||
}
|
|
||||||
nt = append(nt, tt...)
|
|
||||||
pos += e
|
|
||||||
cs = ""
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
cs += string(rv) */
|
|
||||||
case '_':
|
case '_':
|
||||||
e, ok := a.decodeBehavSwitch(l[pos:end])
|
e, ok := a.decodeBehavSwitch(l[pos:end])
|
||||||
if ok {
|
if ok {
|
||||||
|
@ -693,20 +664,18 @@ func (a *Article) Tokenize(mw string, g PageGetter) ([]*Token, error) {
|
||||||
for i := range tokens {
|
for i := range tokens {
|
||||||
if tokens[i].TType == "special" {
|
if tokens[i].TType == "special" {
|
||||||
specialcount++
|
specialcount++
|
||||||
t, ok := templatemap[tokens[i].TText] //nowikipremathmap[tokens[i].TText]
|
t, ok := templatemap[tokens[i].TText]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, errors.New("special not in map")
|
return nil, errors.New("special not in map")
|
||||||
}
|
}
|
||||||
tokens[i] = t
|
tokens[i] = t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// fmt.Println(specialcount, len(nowikipremathmap))
|
|
||||||
// if specialcount != len(nowikipremathmap) {
|
|
||||||
if specialcount != len(templatemap) {
|
if specialcount != len(templatemap) {
|
||||||
if DebugLevel > 0 {
|
if DebugLevel > 0 {
|
||||||
fmt.Println("[Tokenize] Warning: number of specials in map differs from number found")
|
fmt.Println("[Tokenize] Warning: number of specials in map differs from number found")
|
||||||
}
|
}
|
||||||
// return nil, errors.New("number of specials in map differs from number found")
|
|
||||||
}
|
}
|
||||||
return tokens, nil
|
return tokens, nil
|
||||||
}
|
}
|
||||||
|
@ -770,7 +739,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
||||||
am = append(am, moc...)
|
am = append(am, moc...)
|
||||||
am = append(am, mcc...)
|
am = append(am, mcc...)
|
||||||
sort.Sort(ssInt(am))
|
sort.Sort(ssInt(am))
|
||||||
// fmt.Println(am)
|
|
||||||
tokens := make(map[string]*Token, len(am))
|
tokens := make(map[string]*Token, len(am))
|
||||||
if len(am) == 0 {
|
if len(am) == 0 {
|
||||||
return mw, tokens
|
return mw, tokens
|
||||||
|
@ -782,11 +751,10 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
||||||
openidx := 0
|
openidx := 0
|
||||||
count := 0
|
count := 0
|
||||||
for i := range am {
|
for i := range am {
|
||||||
// fmt.Println("ctype", ctype, "lastclose", lastclose, "count", count, "openidx", openidx, "am[i]", am[i])
|
|
||||||
if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) {
|
if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) {
|
||||||
// closing an open one
|
// closing an open one
|
||||||
special := fmt.Sprintf("\x07%07d", count)
|
special := fmt.Sprintf("\x07%07d", count)
|
||||||
// special := fmt.Sprintf("@%07d", count)
|
|
||||||
tokens[special] = &Token{
|
tokens[special] = &Token{
|
||||||
TText: mw[am[openidx][1]:am[i][0]],
|
TText: mw[am[openidx][1]:am[i][0]],
|
||||||
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
||||||
|
@ -806,7 +774,7 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
||||||
if ctype != -1 {
|
if ctype != -1 {
|
||||||
//it's open: close it
|
//it's open: close it
|
||||||
special := fmt.Sprintf("\x07%07d", count)
|
special := fmt.Sprintf("\x07%07d", count)
|
||||||
// special := fmt.Sprintf("@%07d", count)
|
|
||||||
tokens[special] = &Token{
|
tokens[special] = &Token{
|
||||||
TText: mw[am[openidx][1]:len(mw)],
|
TText: mw[am[openidx][1]:len(mw)],
|
||||||
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
|
||||||
|
|
Reference in a new issue