Don't render file links in abstract text
This commit is contained in:
parent
fe16921713
commit
8995d1673d
21
gowiki.go
21
gowiki.go
|
@ -18,14 +18,11 @@ package gowiki
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
// "errors"
|
||||
// "fmt"
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// var Debug bool = false
|
||||
var DebugLevel int = 0
|
||||
|
||||
type Article struct {
|
||||
|
@ -37,15 +34,15 @@ type Article struct {
|
|||
AbstractText string
|
||||
Media []WikiLink
|
||||
Tokens []*Token
|
||||
// OldTokens []*Token
|
||||
Root *ParseNode
|
||||
Parsed bool
|
||||
Text string
|
||||
TextLinks []FullWikiLink
|
||||
Templates []*Template
|
||||
Root *ParseNode
|
||||
Parsed bool
|
||||
Text string
|
||||
TextLinks []FullWikiLink
|
||||
Templates []*Template
|
||||
|
||||
// unexported fields
|
||||
gt bool
|
||||
ga bool
|
||||
text *bytes.Buffer
|
||||
nchar int
|
||||
innerParseErrorCount int
|
||||
|
@ -85,8 +82,8 @@ func (a *Article) GetText() string {
|
|||
}
|
||||
|
||||
func (a *Article) GetAbstract() string {
|
||||
if !a.gt {
|
||||
a.genText()
|
||||
if !a.ga {
|
||||
a.genAbstract()
|
||||
}
|
||||
return a.AbstractText
|
||||
}
|
||||
|
@ -141,7 +138,7 @@ func (namespaces Namespaces) WikiCanonicalFormNamespaceEsc(l string, defaultName
|
|||
ns, ok := namespaces[strings.ToLower(cns)]
|
||||
switch {
|
||||
case ok && len(cns) > 0:
|
||||
namespace = ns //strings.ToUpper(cns[0:1]) + strings.ToLower(cns[1:])
|
||||
namespace = ns
|
||||
case ok:
|
||||
namespace = ""
|
||||
default:
|
||||
|
|
|
@ -18,8 +18,6 @@ package gowiki
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
// "os"
|
||||
// "strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
|
41
text.go
41
text.go
|
@ -26,8 +26,10 @@ func (a *Article) appendText(t string) {
|
|||
a.text.WriteString(t)
|
||||
}
|
||||
|
||||
func (a *Article) genTextInternal(root *ParseNode, indent int) {
|
||||
// fullText determines whether to generate the complete article text (true) or just the article abstract (false)
|
||||
func (a *Article) genTextInternal(root *ParseNode, fullText bool) {
|
||||
lastwasspace := false
|
||||
lastwasimage := false
|
||||
for _, n := range root.Nodes {
|
||||
var linkStart int
|
||||
var fl FullWikiLink
|
||||
|
@ -43,8 +45,11 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
|
|||
case "text":
|
||||
a.appendText(n.Contents)
|
||||
case "image":
|
||||
a.appendText("\n")
|
||||
tappend = "\n"
|
||||
if fullText {
|
||||
a.appendText("\n")
|
||||
tappend = "\n"
|
||||
}
|
||||
lastwasimage = true
|
||||
case "link":
|
||||
isLink = true
|
||||
linkStart = len(a.text.Bytes())
|
||||
|
@ -54,8 +59,8 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
|
|||
case "h1", "h2", "h3", "h4", "h5", "h6":
|
||||
a.appendText("\n")
|
||||
tappend = "\n"
|
||||
if len(a.AbstractText) == 0 {
|
||||
a.AbstractText = a.text.String()
|
||||
if !fullText {
|
||||
return
|
||||
}
|
||||
case "br":
|
||||
a.appendText("\n")
|
||||
|
@ -64,7 +69,14 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
|
|||
}
|
||||
}
|
||||
if len(n.Nodes) > 0 {
|
||||
a.genTextInternal(n, 0)
|
||||
if lastwasimage {
|
||||
if fullText {
|
||||
a.genTextInternal(n, fullText)
|
||||
}
|
||||
lastwasimage = false
|
||||
} else {
|
||||
a.genTextInternal(n, fullText)
|
||||
}
|
||||
}
|
||||
if isLink {
|
||||
ttmp := a.text.Bytes()
|
||||
|
@ -76,7 +88,6 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
|
|||
if n.NType == "space" {
|
||||
lastwasspace = true
|
||||
}
|
||||
// a.Text += tappend
|
||||
a.appendText(tappend)
|
||||
}
|
||||
|
||||
|
@ -87,16 +98,22 @@ func (a *Article) genText() error {
|
|||
a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024))
|
||||
a.text.Truncate(0)
|
||||
a.nchar = 0
|
||||
a.AbstractText = ""
|
||||
a.genTextInternal(a.Root, 0)
|
||||
a.genTextInternal(a.Root, true)
|
||||
a.Text = string(a.text.Bytes())
|
||||
if len(a.AbstractText) == 0 {
|
||||
a.AbstractText = a.Text
|
||||
}
|
||||
a.gt = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Article) genAbstract() error {
|
||||
a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024))
|
||||
a.text.Truncate(0)
|
||||
a.nchar = 0
|
||||
a.genTextInternal(a.Root, false)
|
||||
a.AbstractText = string(a.text.Bytes())
|
||||
a.ga = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Article) GenText() error {
|
||||
return a.genText()
|
||||
}
|
||||
|
|
67
text_test.go
Normal file
67
text_test.go
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
Copyright (C) 2021 Sven Windisch <semantosoph@posteo.de>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package gowiki
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFulltext(t *testing.T) {
|
||||
mw := "Test[[File:Deschd]]Test"
|
||||
t.Log(mw)
|
||||
|
||||
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
|
||||
if err != nil {
|
||||
t.Error("Error:", err)
|
||||
}
|
||||
|
||||
l := a.GetText()
|
||||
if strings.TrimSpace(l) != "Test\nFile:Deschd\nTest" {
|
||||
t.Error("Error generating full text ", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAbstractNoFile(t *testing.T) {
|
||||
mw := "Test[[File:Deschd]]Test"
|
||||
t.Log(mw)
|
||||
|
||||
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
|
||||
if err != nil {
|
||||
t.Error("Error:", err)
|
||||
}
|
||||
|
||||
l := a.GetAbstract()
|
||||
if strings.TrimSpace(l) != "TestTest" {
|
||||
t.Error("Error removing file link ", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAbstractNoHeadline(t *testing.T) {
|
||||
mw := "ThisisAbstract\n== Testheadline ==\n NotanAbstract"
|
||||
t.Log(mw)
|
||||
|
||||
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
|
||||
if err != nil {
|
||||
t.Error("Error:", err)
|
||||
}
|
||||
|
||||
l := a.GetAbstract()
|
||||
if strings.TrimSpace(l) != "ThisisAbstract" {
|
||||
t.Error("Error removing headline ", l)
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) Sven Windisch <semantosoph@posteo.de>
|
||||
Copyright (C) 2021 Sven Windisch <semantosoph@posteo.de>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
|
Reference in a new issue