From 8995d1673d0024e2b4351cbd9d483cedff15773f Mon Sep 17 00:00:00 2001 From: Sven Windisch Date: Thu, 16 Dec 2021 23:39:39 +0100 Subject: [PATCH] Don't render file links in abstract text --- gowiki.go | 21 +++++++-------- gowiki_test.go | 2 -- text.go | 41 ++++++++++++++++++++--------- text_test.go | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ tokenize_test.go | 2 +- 5 files changed, 106 insertions(+), 27 deletions(-) create mode 100644 text_test.go diff --git a/gowiki.go b/gowiki.go index 654f056..60b68ca 100644 --- a/gowiki.go +++ b/gowiki.go @@ -18,14 +18,11 @@ package gowiki import ( "bytes" - // "errors" - // "fmt" "html" "regexp" "strings" ) -// var Debug bool = false var DebugLevel int = 0 type Article struct { @@ -37,15 +34,15 @@ type Article struct { AbstractText string Media []WikiLink Tokens []*Token - // OldTokens []*Token - Root *ParseNode - Parsed bool - Text string - TextLinks []FullWikiLink - Templates []*Template + Root *ParseNode + Parsed bool + Text string + TextLinks []FullWikiLink + Templates []*Template // unexported fields gt bool + ga bool text *bytes.Buffer nchar int innerParseErrorCount int @@ -85,8 +82,8 @@ func (a *Article) GetText() string { } func (a *Article) GetAbstract() string { - if !a.gt { - a.genText() + if !a.ga { + a.genAbstract() } return a.AbstractText } @@ -141,7 +138,7 @@ func (namespaces Namespaces) WikiCanonicalFormNamespaceEsc(l string, defaultName ns, ok := namespaces[strings.ToLower(cns)] switch { case ok && len(cns) > 0: - namespace = ns //strings.ToUpper(cns[0:1]) + strings.ToLower(cns[1:]) + namespace = ns case ok: namespace = "" default: diff --git a/gowiki_test.go b/gowiki_test.go index 69ae2e5..f0b474f 100644 --- a/gowiki_test.go +++ b/gowiki_test.go @@ -18,8 +18,6 @@ package gowiki import ( "encoding/json" - // "os" - // "strings" "testing" ) diff --git a/text.go b/text.go index 7f16961..28e340b 100644 --- a/text.go +++ b/text.go @@ -26,8 +26,10 @@ func (a *Article) appendText(t string) { a.text.WriteString(t) } -func (a *Article) genTextInternal(root *ParseNode, indent int) { +// fullText determines whether to generate the complete article text (true) or just the article abstract (false) +func (a *Article) genTextInternal(root *ParseNode, fullText bool) { lastwasspace := false + lastwasimage := false for _, n := range root.Nodes { var linkStart int var fl FullWikiLink @@ -43,8 +45,11 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) { case "text": a.appendText(n.Contents) case "image": - a.appendText("\n") - tappend = "\n" + if fullText { + a.appendText("\n") + tappend = "\n" + } + lastwasimage = true case "link": isLink = true linkStart = len(a.text.Bytes()) @@ -54,8 +59,8 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) { case "h1", "h2", "h3", "h4", "h5", "h6": a.appendText("\n") tappend = "\n" - if len(a.AbstractText) == 0 { - a.AbstractText = a.text.String() + if !fullText { + return } case "br": a.appendText("\n") @@ -64,7 +69,14 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) { } } if len(n.Nodes) > 0 { - a.genTextInternal(n, 0) + if lastwasimage { + if fullText { + a.genTextInternal(n, fullText) + } + lastwasimage = false + } else { + a.genTextInternal(n, fullText) + } } if isLink { ttmp := a.text.Bytes() @@ -76,7 +88,6 @@ func (a *Article) genTextInternal(root *ParseNode, indent int) { if n.NType == "space" { lastwasspace = true } - // a.Text += tappend a.appendText(tappend) } @@ -87,16 +98,22 @@ func (a *Article) genText() error { a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024)) a.text.Truncate(0) a.nchar = 0 - a.AbstractText = "" - a.genTextInternal(a.Root, 0) + a.genTextInternal(a.Root, true) a.Text = string(a.text.Bytes()) - if len(a.AbstractText) == 0 { - a.AbstractText = a.Text - } a.gt = true return nil } +func (a *Article) genAbstract() error { + a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024)) + a.text.Truncate(0) + a.nchar = 0 + a.genTextInternal(a.Root, false) + a.AbstractText = string(a.text.Bytes()) + a.ga = true + return nil +} + func (a *Article) GenText() error { return a.genText() } diff --git a/text_test.go b/text_test.go new file mode 100644 index 0000000..bac1855 --- /dev/null +++ b/text_test.go @@ -0,0 +1,67 @@ +/* +Copyright (C) 2021 Sven Windisch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gowiki + +import ( + "strings" + "testing" +) + +func TestFulltext(t *testing.T) { + mw := "Test[[File:Deschd]]Test" + t.Log(mw) + + a, err := ParseArticle("Test", mw, &DummyPageGetter{}) + if err != nil { + t.Error("Error:", err) + } + + l := a.GetText() + if strings.TrimSpace(l) != "Test\nFile:Deschd\nTest" { + t.Error("Error generating full text ", l) + } +} + +func TestAbstractNoFile(t *testing.T) { + mw := "Test[[File:Deschd]]Test" + t.Log(mw) + + a, err := ParseArticle("Test", mw, &DummyPageGetter{}) + if err != nil { + t.Error("Error:", err) + } + + l := a.GetAbstract() + if strings.TrimSpace(l) != "TestTest" { + t.Error("Error removing file link ", l) + } +} + +func TestAbstractNoHeadline(t *testing.T) { + mw := "ThisisAbstract\n== Testheadline ==\n NotanAbstract" + t.Log(mw) + + a, err := ParseArticle("Test", mw, &DummyPageGetter{}) + if err != nil { + t.Error("Error:", err) + } + + l := a.GetAbstract() + if strings.TrimSpace(l) != "ThisisAbstract" { + t.Error("Error removing headline ", l) + } +} diff --git a/tokenize_test.go b/tokenize_test.go index 11a7fc4..bb8f393 100644 --- a/tokenize_test.go +++ b/tokenize_test.go @@ -1,5 +1,5 @@ /* -Copyright (C) Sven Windisch +Copyright (C) 2021 Sven Windisch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.