Strip refs completely.
This commit is contained in:
parent
0a4a4697ff
commit
cd0be486c3
90
tokenize.go
90
tokenize.go
|
@ -362,11 +362,8 @@ plLoop2:
|
|||
}
|
||||
case '<':
|
||||
if spacepos > 0 {
|
||||
// e, tag, attr, closed, ok := a.decodeHTMLtag(l[idx:len(l)])
|
||||
_, tag, _, _, ok := a.decodeHTMLtag(l[idx:len(l)])
|
||||
// fmt.Println("html tag in ext link. Line:", l, "\n\n", tag, ok)
|
||||
if ok && tag == "/ref" {
|
||||
// fmt.Println("closing link...")
|
||||
matchingpos = idx
|
||||
endpos = idx
|
||||
break plLoop2
|
||||
|
@ -657,7 +654,8 @@ func (a *Article) lineType(l string) string {
|
|||
|
||||
func (a *Article) Tokenize(mw string, g PageGetter) ([]*Token, error) {
|
||||
mwnc := a.stripComments(mw)
|
||||
mw_stripped, nowikipremathmap := a.stripNowikiPreMath(mwnc)
|
||||
mwnr := a.stripRefs(mwnc)
|
||||
mw_stripped, nowikipremathmap := a.stripNowikiPreMath(mwnr)
|
||||
mw_tmpl, templatemap := a.processTemplates(mw_stripped, nowikipremathmap, g)
|
||||
mw_links := a.preprocessLinks(mw_tmpl)
|
||||
|
||||
|
@ -719,6 +717,12 @@ func (a *Article) stripComments(mw string) string {
|
|||
return commentsRe.ReplaceAllLiteralString(mw, "")
|
||||
}
|
||||
|
||||
var refRe = regexp.MustCompile(`(?msU)<ref.*</ref>`)
|
||||
|
||||
func (a *Article) stripRefs(mw string) string {
|
||||
return refRe.ReplaceAllLiteralString(mw, "")
|
||||
}
|
||||
|
||||
var nowikiOpenRe = regexp.MustCompile(`(?i)<\s*(nowiki)\s*[^>/]*>`)
|
||||
var nowikiCloseRe = regexp.MustCompile(`(?i)<(/nowiki)\s*[^>/]*>`)
|
||||
var preOpenRe = regexp.MustCompile(`(?i)<\s*(pre)\s*[^>]*>`)
|
||||
|
@ -740,14 +744,6 @@ func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
|
|||
moc := mathOpenRe.FindAllStringSubmatchIndex(mw, -1)
|
||||
mcc := mathCloseRe.FindAllStringSubmatchIndex(mw, -1)
|
||||
|
||||
/*
|
||||
nwoc = append(nwoc, []int{len(mw) + 1, len(mw) + 1})
|
||||
nwcc = append(nwcc, []int{len(mw) + 1, len(mw) + 1})
|
||||
poc = append(poc, []int{len(mw) + 1, len(mw) + 1})
|
||||
pcc = append(pcc, []int{len(mw) + 1, len(mw) + 1})
|
||||
moc = append(moc, []int{len(mw) + 1, len(mw) + 1})
|
||||
mcc = append(mcc, []int{len(mw) + 1, len(mw) + 1})
|
||||
*/
|
||||
for i := range nwoc {
|
||||
nwoc[i] = append(nwoc[i], 0)
|
||||
}
|
||||
|
@ -844,73 +840,3 @@ func (a *Article) preprocessLinks(s string) string {
|
|||
}
|
||||
return string(mw)
|
||||
}
|
||||
|
||||
//var nowikiOpenRe = regexp.MustCompile(`(?i)<\s*nowiki\s*[^>/]*>`)
|
||||
//var nowikiCloseRe = regexp.MustCompile(`(?i)</nowiki\s*[^>/]*>`)
|
||||
//var nowikiOpenCloseRe = regexp.MustCompile(`(?i)<nowiki\s*[^>]*/>`)
|
||||
/*
|
||||
type WikiParser struct {
|
||||
mw string
|
||||
}
|
||||
|
||||
func NewWikiParser(mw string) *WikiParser {
|
||||
return &WikiParser{mw: mw}
|
||||
}
|
||||
|
||||
func (wp *WikiParser) doNowiki() {
|
||||
openCandidates := nowikiOpenRe.FindAllStringIndex(wp.mw, -1)
|
||||
closeCandidates := nowikiCloseRe.FindAllStringIndex(wp.mw, -1)
|
||||
openCloseCandidates := nowikiOpenCloseRe.FindAllStringIndex(wp.mw, -1)
|
||||
tail := []int{len(wp.mw) + 1, len(wp.mw) + 1}
|
||||
openCandidates = append(openCandidates, tail)
|
||||
closeCandidates = append(closeCandidates, tail)
|
||||
openCloseCandidates = append(openCloseCandidates, tail)
|
||||
oi := 0
|
||||
ci := 0
|
||||
oci := 0
|
||||
inNowiki := false
|
||||
ol = make([][]int, 0, len(openCandidates))
|
||||
cl = make([][]int, 0, len(closeCandidates))
|
||||
ocl = make([][]int, 0, len(openCloseCandidates))
|
||||
for {
|
||||
if oi == len(openCandidates)-1 &&
|
||||
ci == len(closeCandidates)-1 &&
|
||||
oci == len(openCloseCandidates)-1 {
|
||||
break
|
||||
}
|
||||
switch {
|
||||
case openCandidates[oi][0] <= closeCandidates[oi][0] &&
|
||||
openCandidates[oi][0] <= openCloseloseCandidates[oi][0]:
|
||||
if !inNowiki {
|
||||
ol = append(ol.openCandidates[oi])
|
||||
inNowiki = true
|
||||
}
|
||||
oi += 1
|
||||
|
||||
case closeCandidates[oi][0] <= openCandidates[oi][0] &&
|
||||
closeCandidates[oi][0] <= openCloseloseCandidates[oi][0]:
|
||||
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wp *WikiParser) Parse() {
|
||||
doSGML()
|
||||
doNowiki()
|
||||
doMath()
|
||||
doPre()
|
||||
doBlanks()
|
||||
doHTMLvalidation()
|
||||
doReplaceVariables()
|
||||
doHR()
|
||||
doAllQuotes()
|
||||
doHeadings()
|
||||
doLists()
|
||||
doDates()
|
||||
doExternalLinks()
|
||||
doInternalLinks()
|
||||
doISBN()
|
||||
doRecombine()
|
||||
}
|
||||
*/
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package gowiki
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -64,6 +65,21 @@ func TestExternalLink(t *testing.T) {
|
|||
|
||||
l := a.GetTextLinks()
|
||||
if l[0].Text != "Test" || l[0].Link.PageName != "Https://test.org" {
|
||||
t.Error("Error parsing media link ", l)
|
||||
t.Error("Error parsing ext link ", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRefRemoval(t *testing.T) {
|
||||
mw := "Test<ref name=\"testref\">This is a text reference</ref>Test<ref>{{curly ref}}</ref>Test"
|
||||
t.Log(mw)
|
||||
|
||||
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
|
||||
if err != nil {
|
||||
t.Error("Error:", err)
|
||||
}
|
||||
|
||||
l := a.GetText()
|
||||
if strings.TrimSpace(l) != "TestTestTest" {
|
||||
t.Error("Error removing ref ", l)
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue