Don't render file links in abstract text

This commit is contained in:
Sven Windisch 2021-12-16 23:39:39 +01:00
parent fe16921713
commit 8995d1673d
5 changed files with 106 additions and 27 deletions

View file

@ -18,14 +18,11 @@ package gowiki
import (
"bytes"
// "errors"
// "fmt"
"html"
"regexp"
"strings"
)
// var Debug bool = false
var DebugLevel int = 0
type Article struct {
@ -37,15 +34,15 @@ type Article struct {
AbstractText string
Media []WikiLink
Tokens []*Token
// OldTokens []*Token
Root *ParseNode
Parsed bool
Text string
TextLinks []FullWikiLink
Templates []*Template
Root *ParseNode
Parsed bool
Text string
TextLinks []FullWikiLink
Templates []*Template
// unexported fields
gt bool
ga bool
text *bytes.Buffer
nchar int
innerParseErrorCount int
@ -85,8 +82,8 @@ func (a *Article) GetText() string {
}
func (a *Article) GetAbstract() string {
if !a.gt {
a.genText()
if !a.ga {
a.genAbstract()
}
return a.AbstractText
}
@ -141,7 +138,7 @@ func (namespaces Namespaces) WikiCanonicalFormNamespaceEsc(l string, defaultName
ns, ok := namespaces[strings.ToLower(cns)]
switch {
case ok && len(cns) > 0:
namespace = ns //strings.ToUpper(cns[0:1]) + strings.ToLower(cns[1:])
namespace = ns
case ok:
namespace = ""
default:

View file

@ -18,8 +18,6 @@ package gowiki
import (
"encoding/json"
// "os"
// "strings"
"testing"
)

41
text.go
View file

@ -26,8 +26,10 @@ func (a *Article) appendText(t string) {
a.text.WriteString(t)
}
func (a *Article) genTextInternal(root *ParseNode, indent int) {
// fullText determines whether to generate the complete article text (true) or just the article abstract (false)
func (a *Article) genTextInternal(root *ParseNode, fullText bool) {
lastwasspace := false
lastwasimage := false
for _, n := range root.Nodes {
var linkStart int
var fl FullWikiLink
@ -43,8 +45,11 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
case "text":
a.appendText(n.Contents)
case "image":
a.appendText("\n")
tappend = "\n"
if fullText {
a.appendText("\n")
tappend = "\n"
}
lastwasimage = true
case "link":
isLink = true
linkStart = len(a.text.Bytes())
@ -54,8 +59,8 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
case "h1", "h2", "h3", "h4", "h5", "h6":
a.appendText("\n")
tappend = "\n"
if len(a.AbstractText) == 0 {
a.AbstractText = a.text.String()
if !fullText {
return
}
case "br":
a.appendText("\n")
@ -64,7 +69,14 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
}
}
if len(n.Nodes) > 0 {
a.genTextInternal(n, 0)
if lastwasimage {
if fullText {
a.genTextInternal(n, fullText)
}
lastwasimage = false
} else {
a.genTextInternal(n, fullText)
}
}
if isLink {
ttmp := a.text.Bytes()
@ -76,7 +88,6 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) {
if n.NType == "space" {
lastwasspace = true
}
// a.Text += tappend
a.appendText(tappend)
}
@ -87,16 +98,22 @@ func (a *Article) genText() error {
a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024))
a.text.Truncate(0)
a.nchar = 0
a.AbstractText = ""
a.genTextInternal(a.Root, 0)
a.genTextInternal(a.Root, true)
a.Text = string(a.text.Bytes())
if len(a.AbstractText) == 0 {
a.AbstractText = a.Text
}
a.gt = true
return nil
}
func (a *Article) genAbstract() error {
a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024))
a.text.Truncate(0)
a.nchar = 0
a.genTextInternal(a.Root, false)
a.AbstractText = string(a.text.Bytes())
a.ga = true
return nil
}
func (a *Article) GenText() error {
return a.genText()
}

67
text_test.go Normal file
View file

@ -0,0 +1,67 @@
/*
Copyright (C) 2021 Sven Windisch <semantosoph@posteo.de>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gowiki
import (
"strings"
"testing"
)
func TestFulltext(t *testing.T) {
mw := "Test[[File:Deschd]]Test"
t.Log(mw)
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
if err != nil {
t.Error("Error:", err)
}
l := a.GetText()
if strings.TrimSpace(l) != "Test\nFile:Deschd\nTest" {
t.Error("Error generating full text ", l)
}
}
func TestAbstractNoFile(t *testing.T) {
mw := "Test[[File:Deschd]]Test"
t.Log(mw)
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
if err != nil {
t.Error("Error:", err)
}
l := a.GetAbstract()
if strings.TrimSpace(l) != "TestTest" {
t.Error("Error removing file link ", l)
}
}
func TestAbstractNoHeadline(t *testing.T) {
mw := "ThisisAbstract\n== Testheadline ==\n NotanAbstract"
t.Log(mw)
a, err := ParseArticle("Test", mw, &DummyPageGetter{})
if err != nil {
t.Error("Error:", err)
}
l := a.GetAbstract()
if strings.TrimSpace(l) != "ThisisAbstract" {
t.Error("Error removing headline ", l)
}
}

View file

@ -1,5 +1,5 @@
/*
Copyright (C) Sven Windisch <semantosoph@posteo.de>
Copyright (C) 2021 Sven Windisch <semantosoph@posteo.de>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.