Adding library code.

2018-01-03 16:21:59 -05:00 · 2018-01-03 16:21:59 -05:00 · 93aa7513fb
parent d5cca56718
commit 93aa7513fb
11 changed files with 2921 additions and 1 deletions
--- a/202
+++ b/202
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
@ -1,3 +1,3 @@
 # gowiki
-Gowiki is a golang library to parse mediawiki markup as found in Wikipedia pages
+Gowiki is a golang library to parse mediawiki markup as found in Wikipedia pages.

--- a/gowiki.go
+++ b/gowiki.go
@ -0,0 +1,233 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	"bytes"
+	//	"errors"
+	//	"fmt"
+	"html"
+	"regexp"
+	"strings"
+)
+
+// var Debug bool = false
+var DebugLevel int = 0
+
+type Article struct {
+	MediaWiki    string
+	Title        string
+	Links        []WikiLink
+	ExtLinks     []string
+	Type         string
+	AbstractText string
+	Media        []WikiLink
+	Tokens       []*Token
+	//	OldTokens    []*Token
+	Root      *ParseNode
+	Parsed    bool
+	Text      string
+	TextLinks []FullWikiLink
+	Templates []*Template
+
+	// unexported fields
+	gt                   bool
+	text                 *bytes.Buffer
+	nchar                int
+	innerParseErrorCount int
+}
+type WikiLink struct {
+	Namespace string
+	PageName  string
+	Anchor    string
+}
+type FullWikiLink struct {
+	Link  WikiLink
+	Text  string
+	Start int // rune offset of beginning
+	End   int // rune offset of end (index of the char after the last)
+}
+
+type PageGetter interface {
+	Get(page WikiLink) (string, error)
+}
+
+func NewArticle(title, text string) (*Article, error) {
+	a := new(Article)
+	a.Title = title
+	a.MediaWiki = text
+	a.Links = make([]WikiLink, 0, 16)
+	a.Media = make([]WikiLink, 0, 16)
+	a.TextLinks = make([]FullWikiLink, 0, 16)
+	a.ExtLinks = make([]string, 0, 16)
+	return a, nil
+}
+
+func (a *Article) GetText() string {
+	if !a.gt {
+		a.genText()
+	}
+	return a.Text
+}
+
+func (a *Article) GetAbstract() string {
+	if !a.gt {
+		a.genText()
+	}
+	return a.AbstractText
+}
+
+func (a *Article) GetLinks() []WikiLink {
+	return a.Links
+}
+
+func (a *Article) GetExternalLinks() []string {
+	return a.ExtLinks
+}
+
+func (a *Article) GetMedia() []WikiLink {
+	return a.Media
+}
+
+func (a *Article) GetTextLinks() []FullWikiLink {
+	if !a.gt {
+		a.genText()
+	}
+	return a.TextLinks
+}
+
+var canoReSpaces = regexp.MustCompile(`[ _]+`)
+
+func WikiCanonicalFormEsc(l string, unescape bool) WikiLink {
+	return StandardNamespaces.WikiCanonicalFormNamespaceEsc(l, "", unescape)
+}
+
+func WikiCanonicalForm(l string) WikiLink {
+	return StandardNamespaces.WikiCanonicalFormNamespaceEsc(l, "", true)
+}
+
+func WikiCanonicalFormNamespace(l string, defaultNamespace string) WikiLink {
+	return StandardNamespaces.WikiCanonicalFormNamespaceEsc(l, defaultNamespace, true)
+}
+
+func (namespaces Namespaces) WikiCanonicalFormNamespaceEsc(l string, defaultNamespace string, unescape bool) WikiLink {
+	hpos := strings.IndexRune(l, '#')
+	anchor := ""
+	if hpos >= 0 {
+		anchor = l[hpos+1:]
+		l = l[0:hpos]
+	}
+	i := strings.Index(l, ":")
+	namespace := defaultNamespace
+	if i >= 0 {
+		cns := strings.TrimSpace(canoReSpaces.ReplaceAllString(l[:i], " "))
+		if unescape {
+			cns = html.UnescapeString(cns)
+		}
+		ns, ok := namespaces[strings.ToLower(cns)]
+		switch {
+		case ok && len(cns) > 0:
+			namespace = ns //strings.ToUpper(cns[0:1]) + strings.ToLower(cns[1:])
+		case ok:
+			namespace = ""
+		default:
+			i = -1
+		}
+	}
+	article := strings.TrimSpace(canoReSpaces.ReplaceAllString(l[i+1:], " "))
+	anchor = canoReSpaces.ReplaceAllString(anchor, " ")
+	if unescape {
+		article = html.UnescapeString(article)
+		anchor = html.UnescapeString(anchor)
+	}
+	if len(article) > 0 {
+		article = strings.ToUpper(article[0:1]) + article[1:]
+	}
+	return WikiLink{Namespace: namespace, PageName: article, Anchor: anchor}
+}
+
+func (wl *WikiLink) FullPagename() string {
+	if len(wl.Namespace) == 0 {
+		return wl.PageName
+	}
+	return wl.Namespace + ":" + wl.PageName
+}
+
+func (wl *WikiLink) FullPagenameAnchor() string {
+	ns := ""
+	if len(wl.Namespace) != 0 {
+		ns = wl.Namespace + ":"
+	}
+	an := ""
+	if len(wl.Anchor) != 0 {
+		an = "#" + wl.Anchor
+	}
+	return ns + wl.PageName + an
+}
+
+func (wl *WikiLink) IsImplicitSelfLink() bool {
+	return len(wl.PageName) == 0
+}
+
+func (wl *WikiLink) HasAnchor() bool {
+	return len(wl.Anchor) != 0
+}
+
+func (wl *WikiLink) GetAnchor() string {
+	return wl.Anchor
+}
+
+type Namespaces map[string]string
+
+var StandardNamespaces Namespaces = map[string]string{
+	"media":                  "Media",
+	"special":                "Special",
+	"talk":                   "Talk",
+	"user":                   "User",
+	"user talk":              "User talk",
+	"wikipedia":              "Wikipedia",
+	"wikipedia talk":         "Wikipedia talk",
+	"file":                   "File",
+	"file talk":              "File talk",
+	"mediawiki":              "MediaWiki",
+	"mediawiki talk":         "MediaWiki talk",
+	"template":               "Template",
+	"template talk":          "Template talk",
+	"help":                   "Help",
+	"help talk":              "Help talk",
+	"category":               "Category",
+	"category talk":          "Category talk",
+	"portal":                 "Portal",
+	"portal talk":            "Portal talk",
+	"book":                   "Book",
+	"book talk":              "Book talk",
+	"draft":                  "Draft",
+	"draft talk":             "Draft talk",
+	"education program":      "Education Program",
+	"education program talk": "Education Program talk",
+	"timedtext":              "TimedText",
+	"timedtext talk":         "TimedText talk",
+	"module":                 "Module",
+	"module talk":            "Module talk",
+	"topic":                  "Topic",
+}
+
+type DummyPageGetter struct{}
+
+func (g *DummyPageGetter) Get(wl WikiLink) (string, error) {
+	return "", nil
+}
--- a/gowiki_test.go
+++ b/gowiki_test.go
@ -0,0 +1,46 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	"encoding/json"
+	//	"os"
+	//	"strings"
+	"testing"
+)
+
+func TestParseArticle(t *testing.T) {
+	mw := "* ''[[The Album (ABBA album)|''The Album'']]'' (1977)"
+	t.Log(mw)
+	a, err := ParseArticle("Test", mw, &DummyPageGetter{})
+	if err != nil {
+		t.Error("Error:", err)
+	}
+	b, err := json.MarshalIndent(a.Tokens, "", "\t")
+	if err != nil {
+		t.Error("Error:", err)
+	}
+	t.Log("Tokens\n")
+	t.Log(string(b))
+}
+
+func TestWikiCanonicalFormNamespaceEsc(t *testing.T) {
+	wl := StandardNamespaces.WikiCanonicalFormNamespaceEsc("WiKIpEdia:pagename#section", "", true)
+	if wl.Namespace != "Wikipedia" || wl.PageName != "Pagename" || wl.Anchor != "section" {
+		t.Error("Error: wikilink not parsed correctly", wl)
+	}
+}
--- a/parse.go
+++ b/parse.go
@ -0,0 +1,636 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	"errors"
+	"fmt"
+	"html"
+	"log"
+	"strconv"
+	"strings"
+)
+
+const maxInnerParseErrorCount = 100
+
+type ParseNode struct {
+	NType    string
+	NSubType string
+	Link     WikiLink
+	Contents string
+	Flags    int
+	Nodes    []*ParseNode
+}
+
+func (a *Article) PrintParseTree() {
+	a.printParseTree(a.Root, 0)
+}
+
+func (a *Article) printParseTree(root *ParseNode, depth int) {
+	if depth > 20 {
+		return
+	}
+	spaces := "......................................"
+	min := len(spaces)
+	if depth < len(spaces) {
+		min = depth
+	}
+	if depth < 0 {
+		min = 0
+	}
+	prefix := spaces[0:min]
+	for _, n := range root.Nodes {
+		fmt.Printf("%s NType: %10s  NSubType: %10s  Contents: %16s  Flags: %d\n", prefix, n.NType, n.NSubType, n.Contents, n.Flags)
+		if len(n.Nodes) > 0 {
+			a.printParseTree(n, depth+1)
+		}
+	}
+}
+
+const (
+	TClosed int = 1 << iota
+)
+
+const (
+	QS_none int = iota
+	QS_i
+	QS_b
+	QS_ib
+	QS_bi
+)
+
+func ParseArticle(title, text string, g PageGetter) (*Article, error) {
+	a, err := NewArticle(title, text)
+	if err != nil {
+		return nil, err
+	}
+	a.Tokens, err = a.Tokenize(a.MediaWiki, g)
+	if err != nil {
+		return a, err
+	}
+	err = a.parse()
+	if err != nil {
+		return a, err
+	}
+	a.gt = false
+	return a, nil
+}
+
+func (a *Article) doQuotes() {
+	log.SetFlags(log.Lshortfile) // | log.Ldate | log.Ltime)
+	state := QS_none
+	save := QS_none
+	l := 0
+	ni := 0
+	tn := make([]*Token, 0, len(a.Tokens))
+	t := a.Tokens
+	for ; ni < len(t); ni++ {
+		// log.Println(*t[ni])
+
+		if t[ni].TType == "quote" {
+			l++
+			// log.Println(l)
+		}
+		if t[ni].TType != "quote" || ni == len(t)-1 {
+			switch {
+			case l == 0:
+				// log.Println(l)
+			case l == 1:
+				// log.Println(l)
+				tn = append(tn, &Token{TText: "'", TType: "text"})
+			case l == 2:
+				// log.Println(l)
+				switch state {
+				case QS_b:
+					tn = append(tn, &Token{TType: "html", TText: "i"})
+					state = QS_bi
+				case QS_i:
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					state = QS_none
+				case QS_bi:
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					state = QS_b
+				case QS_ib:
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					tn = append(tn, &Token{TType: "html", TText: "b"})
+					state = QS_b
+				case QS_none:
+					tn = append(tn, &Token{TType: "html", TText: "i"})
+					state = QS_i
+				}
+			case l == 3, l == 4:
+				// log.Println(l)
+				if l == 4 {
+					tn = append(tn, &Token{TText: "'", TType: "text"})
+				}
+				switch state {
+				case QS_b:
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					state = QS_none
+				case QS_i:
+					tn = append(tn, &Token{TType: "html", TText: "b"})
+					state = QS_ib
+				case QS_ib:
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					state = QS_i
+				case QS_bi:
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					tn = append(tn, &Token{TType: "html", TText: "i"})
+					state = QS_i
+				case QS_none:
+					tn = append(tn, &Token{TType: "html", TText: "b"})
+					state = QS_b
+				}
+			case l >= 5:
+				// log.Println(l)
+				s := ""
+				for i := 5; i < l; i++ {
+					s += "'"
+				}
+				if len(s) > 0 {
+					tn = append(tn, &Token{TText: s, TType: "text"})
+				}
+				switch state {
+				case QS_b:
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					tn = append(tn, &Token{TType: "html", TText: "i"})
+					state = QS_i
+				case QS_i:
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					tn = append(tn, &Token{TType: "html", TText: "b"})
+					state = QS_b
+				case QS_ib:
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					state = QS_none
+				case QS_bi:
+					tn = append(tn, &Token{TType: "html", TText: "/i"})
+					tn = append(tn, &Token{TType: "html", TText: "/b"})
+					state = QS_none
+				case QS_none:
+					tn = append(tn, &Token{TType: "html", TText: "b"})
+					tn = append(tn, &Token{TType: "html", TText: "i"})
+					state = QS_bi
+				}
+			}
+			l = 0
+		}
+
+		if t[ni].TType == "link" || t[ni].TType == "extlink" || t[ni].TType == "filelink" {
+			// log.Println(l)
+			save = state
+			switch state {
+			case QS_b:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			case QS_i:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_ib:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_bi:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			}
+			state = QS_none
+			l = 0
+		}
+		if t[ni].TType == "closelink" || t[ni].TType == "closeextlink" || t[ni].TType == "closefilelink" {
+			// log.Println(l)
+			switch state {
+			case QS_b:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			case QS_i:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_ib:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_bi:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			}
+			state = save
+			save = QS_none
+			l = 0
+		}
+
+		if t[ni].TType != "quote" && t[ni].TType != "newline" {
+			// log.Println(l)
+			tn = append(tn, t[ni])
+		}
+		if t[ni].TType == "newline" || ni == len(t)-1 {
+			// log.Println(l)
+			switch state {
+			case QS_b:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			case QS_i:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_ib:
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+			case QS_bi:
+				tn = append(tn, &Token{TType: "html", TText: "/i"})
+				tn = append(tn, &Token{TType: "html", TText: "/b"})
+			}
+			state = QS_none
+			l = 0
+			save = QS_none
+		}
+		if t[ni].TType == "newline" {
+			// log.Println(l)
+			tn = append(tn, t[ni])
+		}
+
+	}
+	a.Tokens = tn
+	//	a.OldTokens = t
+}
+
+//nowiki, wikipre, pre, math, quote, colon, magic, h?, *, #, ;, :, html,
+func (a *Article) parse() error {
+	a.doQuotes()
+	nodes, err := a.internalParse(a.Tokens)
+	if err != nil {
+		return err
+	}
+	root := &ParseNode{NType: "root", Nodes: nodes}
+	a.Root = root
+	a.Parsed = true
+	return nil
+}
+func isImage(t *Token) bool {
+	return strings.ToLower(t.TLink.Namespace) == "file"
+}
+
+func (a *Article) internalParse(t []*Token) ([]*ParseNode, error) {
+	ti := 0
+	nl := make([]*ParseNode, 0, 0)
+	lastti := -1
+	for ti < len(t) {
+		if ti == lastti {
+			//			fmt.Println(len(t), ti, *t[ti], *t[ti-1], *t[ti+1])
+			return nil, errors.New("parsing issue")
+		}
+		lastti = ti
+		switch t[ti].TType {
+		case "nowiki":
+			n := &ParseNode{NType: "text", NSubType: "nowiki", Contents: html.UnescapeString(t[ti].TText)}
+			nl = append(nl, n)
+			ti++
+			/*		case "curlyblock":
+					n := &ParseNode{NType: "curly", Contents: t[ti].TText}
+					nl = append(nl, n)
+					ti++ */
+		case "text":
+			n := &ParseNode{NType: "text", Contents: html.UnescapeString(t[ti].TText)}
+			nl = append(nl, n)
+			ti++
+		case "math":
+			n := &ParseNode{NType: "math", Contents: t[ti].TText}
+			nl = append(nl, n)
+			ti++
+		case "pre":
+			n2 := &ParseNode{NType: "text", NSubType: "pre", Contents: html.UnescapeString(t[ti].TText)}
+			n1 := &ParseNode{NType: "html", NSubType: "pre", Contents: t[ti].TAttr, Nodes: []*ParseNode{n2}}
+			nl = append(nl, n1)
+			ti++
+		case "nop":
+			ti++
+		case "wikipre":
+			closebefore := len(t)
+			ni := ti + 1
+			for ; ni < len(t)-1; ni++ {
+				if t[ni].TType == "newline" {
+					if t[ni+1].TType == "wikipre" {
+						t[ni+1].TType = "nop"
+					} else {
+						closebefore = ni
+						break
+					}
+				}
+			}
+			if closebefore <= ni+1 {
+				n := &ParseNode{NType: "html", NSubType: "pre"}
+				nl = append(nl, n)
+				ti++
+			} else {
+				nodes, err := a.internalParse(t[ti+1 : closebefore])
+				if err != nil {
+					return nil, err
+				}
+				n := &ParseNode{NType: "html", NSubType: "pre", Nodes: nodes}
+				nl = append(nl, n)
+				ti = closebefore
+			}
+		case "extlink":
+			ni := ti + 1
+			for ; ni < len(t); ni++ {
+				if t[ni].TType == "closeextlink" {
+					break
+				}
+			}
+			if ni == len(t) {
+				return nil, errors.New("Unmatched external link token for link: " + t[ti].TText)
+			}
+			n := &ParseNode{NType: "extlink", NSubType: "", Contents: t[ti].TText}
+			a.ExtLinks = append(a.ExtLinks, t[ti].TText)
+			if ni > ti+1 {
+				nodes, err := a.internalParse(t[ti+1 : ni])
+				if err != nil {
+					return nil, err
+				}
+				n.Nodes = nodes
+			}
+			nl = append(nl, n)
+			ti = ni + 1
+
+		case "closeextlink":
+			return nil, errors.New("Unmatched close external link token")
+		case "hrule":
+			n := &ParseNode{NType: "html", NSubType: "hr"}
+			nl = append(nl, n)
+			ti++
+		case "magic":
+			n := &ParseNode{NType: "magic", Contents: t[ti].TText}
+			nl = append(nl, n)
+			ti++
+		case "colon":
+			n := &ParseNode{NType: "text", Contents: ":"}
+			nl = append(nl, n)
+			ti++
+		case "space":
+			n := &ParseNode{NType: "space", Contents: " "}
+			nl = append(nl, n)
+			ti++
+		case "blank":
+			n := &ParseNode{NType: "break"}
+			nl = append(nl, n)
+			ti++
+		case "redirect":
+			ni := ti + 1
+			for ; ni < len(t); ni++ {
+				if t[ni].TType == "newline" {
+					break
+				}
+				if t[ni].TType == "link" {
+					break
+				}
+			}
+			if ni == len(t) || t[ni].TType == "newline" {
+				n := &ParseNode{NType: "text", Contents: html.UnescapeString(t[ti].TText)}
+				nl = append(nl, n)
+				ti++
+			} else {
+				n := &ParseNode{NType: "redirect", Link: t[ni].TLink, NSubType: t[ni].TAttr}
+				nl = append(nl, n)
+				ti++
+			}
+		case "link":
+			ni := ti + 1
+			nopen := 1
+			for ; ni < len(t); ni++ {
+				switch t[ni].TType {
+				case "link":
+					nopen++
+				case "closelink":
+					nopen--
+				}
+				if nopen == 0 {
+					break
+				}
+			}
+			if ni == len(t) {
+				return nil, errors.New("Unmatched link token for link: " + t[ti].TLink.PageName + " namespace: " + t[ti].TLink.Namespace)
+			}
+			var n *ParseNode
+			n = &ParseNode{NType: "link", Link: t[ti].TLink}
+			a.Links = append(a.Links, t[ti].TLink)
+			if ni > ti+1 {
+				nodes, err := a.internalParse(t[ti+1 : ni])
+				if err != nil {
+					return nil, err
+				}
+				n.Nodes = nodes
+			}
+			nl = append(nl, n)
+			ti = ni + 1
+		case "filelink":
+			ni := ti + 1
+			nopen := 1
+			for ; ni < len(t); ni++ {
+				switch t[ni].TType {
+				case "filelink":
+					nopen++
+				case "closefilelink":
+					nopen--
+				}
+				if nopen == 0 {
+					break
+				}
+			}
+			if ni == len(t) {
+				return nil, errors.New("Unmatched filelink token for filelink: " + t[ti].TLink.PageName + " namespace: " + t[ti].TLink.Namespace)
+			}
+			var n *ParseNode
+			n = &ParseNode{NType: "image", Link: t[ti].TLink}
+			a.Media = append(a.Media, t[ti].TLink)
+			if ni > ti+1 {
+				nodes, err := a.internalParse(t[ti+1 : ni])
+				if err != nil {
+					return nil, err
+				}
+				n.Nodes = nodes
+			}
+			nl = append(nl, n)
+			ti = ni + 1
+
+		case "closelink":
+			return nil, errors.New("Unmatched close link token")
+		case "closefilelink":
+			return nil, errors.New("Unmatched close file link token")
+		case "html":
+			tag := strings.ToLower(t[ti].TText)
+			if tag[0] == '/' {
+				ti++
+				continue
+			}
+			n := &ParseNode{NType: "html", NSubType: tag, Contents: t[ti].TAttr}
+			if t[ti].TClosed == true {
+				flags := TClosed
+				n.Flags = flags
+				nl = append(nl, n)
+				ti++
+				continue
+			}
+			ni := ti + 1
+			nopen := 1
+			for ; ni < len(t); ni++ {
+				if t[ni].TType == "html" {
+					ntag := strings.ToLower(t[ni].TText)
+					switch ntag {
+					case tag:
+						nopen++
+					case "/" + tag:
+						nopen--
+					}
+					if nopen == 0 {
+						break
+					}
+				}
+			}
+			if ni > ti+1 {
+				nodes, err := a.internalParse(t[ti+1 : ni])
+				if err != nil {
+					a.innerParseErrorCount++
+					if a.innerParseErrorCount >= maxInnerParseErrorCount {
+						return nil, err
+					}
+					ti++
+					continue
+				}
+				n.Nodes = nodes
+			}
+			nl = append(nl, n)
+			ti = ni + 1
+			if ti > len(t) {
+				ti = len(t)
+			}
+		case "*", "#", ";", ":":
+			ti += 1
+			/*			stack := ""
+						si := 0
+						ni := ti
+						ln := &ParseNode{NType: "root", Nodes: make([]*ParseNode, 0, 4)}
+						for {
+
+							this := ""
+							islist := false
+							for ; ni < len(t); ni++ {
+								switch t[ni].TType {
+								case "*", "#", ";", ":":
+									islist = true
+								}
+								if islist {
+									this += t[ni].TType
+								} else {
+									break
+								}
+							}
+							same := 0
+							for i := 0; i < len(this) && i < len(stack); i++ {
+								if this[i] == stack[i] ||
+									(this[i] == ';' && stack[i] == ':') ||
+									(this[i] == ':' && stack[i] == ';') {
+									same++
+								} else {
+									break
+								}
+							}
+							n := ln
+							for i := 0; i < same; i++ {
+								n = n.Nodes[len(n.Nodes)-1]
+								n = n.Nodes[len(n.Nodes)-1]
+							}
+
+							for i := same; i < len(this); i++ { //open
+								var nn *ParseNode
+								switch this[i] {
+								case '*':
+									nn = &ParseNode{NType: "html", NSubType: "ul"}
+								case '#':
+									nn = &ParseNode{NType: "html", NSubType: "ol"}
+								case ';':
+									nn = &ParseNode{NType: "html", NSubType: "dl"}
+								case ':':
+									nn = &ParseNode{NType: "html", NSubType: "dl"}
+								}
+								nn.Nodes = make([]*ParseNode, 0, 1)
+								n.Nodes = append(n.Nodes, nn)
+								n = nn
+								if i < len(this)-1 {
+									var elem *ParseNode
+									switch this[len] {
+									case '*', '#':
+										elem = &ParseNode{NType: "html", NSubType: "li"}
+									case ';':
+										elem = &ParseNode{NType: "html", NSubType: "dt"}
+									case ':':
+										elem = &ParseNode{NType: "html", NSubType: "dd"}
+									}
+									elem.Nodes = make([]*ParseNode, 0, 1)
+									n.Nodes = append(n.Nodes, elem)
+									n = elem
+								}
+							}
+							var nitem *ParseNode
+							switch this[len] {
+							case '*', '#':
+								nitem = &ParseNode{NType: "html", NSubType: "li"}
+							case ';':
+								nitem = &ParseNode{NType: "html", NSubType: "dt"}
+							case ':':
+								nitem = &ParseNode{NType: "html", NSubType: "dd"}
+							}
+							n := &ParseNode{NType: "html", NSubType: st}
+							nl = append(nl, n)
+
+						} */
+		case "newline":
+			n := &ParseNode{NType: "text", Contents: "\n"}
+			nl = append(nl, n)
+			ti++
+		case "h1", "h2", "h3", "h4", "h5", "h6":
+			ni := ti + 1
+			for ; ni < len(t); ni++ {
+				if t[ni].TType == "newline" {
+					break
+				}
+			}
+			if ni == len(t) {
+				return nil, errors.New("No newline after heading")
+			}
+			n := &ParseNode{NType: "html", NSubType: t[ti].TType}
+			if ni > ti+1 {
+				nodes, err := a.internalParse(t[ti+1 : ni])
+				if err != nil {
+					return nil, err
+				}
+				n.Nodes = nodes
+			}
+			nl = append(nl, n)
+			ti = ni + 1
+		case "tb", "te":
+			templateIndex, err := strconv.Atoi(t[ti].TText)
+			if err != nil {
+				return nil, errors.New("Malformed tb token")
+			}
+			if templateIndex >= len(a.Templates) {
+				return nil, errors.New("Template index out of range")
+				//fmt.Println("Template index out of range", t[ti])
+			} else {
+				n := &ParseNode{NType: t[ti].TType, Contents: a.Templates[templateIndex].Name}
+				nl = append(nl, n)
+			}
+			ti++
+
+		default:
+			return nil, errors.New("Unrecognized token type: " + t[ti].TType)
+		}
+	}
+	return nl, nil
+}
--- a/redirect.go
+++ b/redirect.go
@ -0,0 +1,39 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import "strings"
+
+func (a *Article) checkRedirect(mw string) (bool, *WikiLink) {
+	if len(mw) < 9 || strings.ToLower(mw[0:9]) != "#redirect" {
+		return false, nil
+	}
+	idx := strings.Index(mw, "\n")
+	if idx < 0 {
+		idx = len(mw)
+	}
+	nnt, err := a.parseInlineText(mw, 9, idx)
+	if err != nil {
+		return false, nil
+	}
+	for _, t := range nnt {
+		if t.TType == "link" {
+			return true, &t.TLink
+		}
+	}
+	return false, nil
+}
--- a/simple.go
+++ b/simple.go
@ -0,0 +1,27 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+//	"bytes"
+//	"errors"
+//	"fmt"
+
+func (a *Article) ParseSimple() error {
+	a.Text = a.MediaWiki
+	a.Parsed = true
+	return nil
+}
--- a/template.go
+++ b/template.go
@ -0,0 +1,660 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	"fmt"
+	"os"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type Template struct {
+	Typ        string            `json:"type"` //magic,normal,ext,param
+	Name       string            `json:"name"`
+	Attr       string            `json:"attr"` //text after the ':' in magic templates
+	Parameters map[string]string `json:"parameters"`
+}
+
+func (a *Article) parseTemplateEtc(l string) []Template {
+	return nil
+}
+
+type streak struct {
+	opening bool
+	length  int
+	b       int
+	e       int
+}
+
+type template struct {
+	b        int
+	e        int
+	isparam  bool
+	children []*template
+	rt       string
+	rendered bool
+}
+
+type byStart []*template
+
+func (a byStart) Len() int           { return len(a) }
+func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byStart) Less(i, j int) bool { return a[i].b < a[j].b }
+
+var templateStreaksRe = regexp.MustCompile(`(?:\{\{+)|(?:\}\}+)`)
+
+func findCurlyStreaks(mw string) [][]int {
+	out := [][]int{}
+	found := '.'
+	beg := 0
+	//	count :=0
+	for i, r := range mw {
+		switch r {
+		case found:
+		default:
+			if i-beg > 1 && (found == '{' || found == '}') {
+				out = append(out, []int{beg, i})
+			}
+			beg = i
+			found = r
+		}
+	}
+	if beg < len(mw)-1 && (found == '{' || found == '}') {
+		out = append(out, []int{beg, len(mw)})
+	}
+	return out
+}
+
+func findTemplates(mw string) []*template {
+	//	tsl := templateStreaksRe.FindAllStringSubmatchIndex(mw, -1)
+	tsl := findCurlyStreaks(mw)
+	//	fmt.Println(tsl)
+	streaks := make([]streak, 0, len(tsl))
+	for _, pair := range tsl {
+		streaks = append(streaks, streak{
+			opening: (mw[pair[0]] == '{'),
+			length:  pair[1] - pair[0],
+			b:       pair[0],
+			e:       pair[1],
+		})
+	}
+	//	fmt.Println(streaks)
+	tl := make([]*template, 0, 8)
+	i := 0
+	for i < len(streaks) {
+		if !streaks[i].opening && streaks[i].length > 1 { // found a closing set: search for the opening
+			found := false
+			for j := i - 1; j >= 0; j-- {
+				if streaks[j].opening && streaks[j].length > 1 {
+					found = true
+					n := 2
+					isparam := false
+					if streaks[i].length > 2 && streaks[j].length > 2 {
+						n = 3
+						isparam = true
+					}
+					tl = append(tl, &template{
+						isparam: isparam,
+						b:       streaks[j].e - n,
+						e:       streaks[i].b + n,
+					})
+					streaks[i].length -= n
+					streaks[i].b += n
+					streaks[j].length -= n
+					streaks[j].e -= n
+					break
+				}
+			}
+			if found {
+				continue
+			}
+		}
+		i++
+	}
+	sort.Sort(byStart(tl))
+	/*	fmt.Println("Templates found:")
+		for i := range tl {
+			fmt.Println(tl[i])
+		} */
+	out := make([]*template, 0, 4)
+	cur_end := 0
+	for i := range tl {
+		tl[i].children = []*template{}
+		if tl[i].b >= cur_end {
+			cur_end = tl[i].e
+			out = append(out, tl[i])
+		} else {
+			for j := i - 1; j >= 0; j-- {
+				if tl[j].e > tl[i].e {
+					tl[j].children = append(tl[j].children, tl[i])
+					break
+				}
+			}
+		}
+	}
+	/*	fmt.Println("Templates out:")
+		for i := range out {
+			fmt.Println(out[i])
+		}*/
+	/*	fmt.Println("Templates found:")
+		for i := range tl {
+			fmt.Println(mw[tl[i].b:tl[i].e])
+		}
+	*/
+	return out
+}
+
+func findTemplateParamPos(mw string, t *template) [][]int { //first is position of pipe, second is position of first equal
+	out := make([][]int, 0, 1)
+	inChildTemplate := false
+	inlink := false
+	lastopen := false
+	lastclosed := false
+	for i, rv := range mw[t.b:t.e] {
+		inChildTemplate = false
+		open := false
+		closed := false
+		for _, ct := range t.children {
+			if i+t.b >= ct.b && i+t.b < ct.e {
+				inChildTemplate = true
+				break
+			}
+		}
+		if !inChildTemplate {
+			switch {
+			case rv == '[':
+				if lastopen {
+					inlink = true
+				}
+				open = true
+			case rv == ']':
+				if lastclosed {
+					inlink = false
+				}
+				closed = true
+			case rv == '|' && !inlink:
+				out = append(out, []int{i + t.b})
+			case rv == '=' && len(out) > 0 && len(out[len(out)-1]) == 1 && !inlink:
+				out[len(out)-1] = append(out[len(out)-1], i+t.b)
+			}
+		}
+		lastopen = open
+		lastclosed = closed
+	}
+	return out
+}
+
+/*func (a *Article) processTemplates(mw string, tokens map[string]*Token) (string, map[string]*Token) {
+	mlt := findTemplates(mw)
+	last := 0
+	out := make([]byte, 0, len(mw))
+	//	tokens := make(map[string]*Token, len(mlt))
+	for i, t := range mlt {
+		sb := fmt.Sprintf("\x07tb%05d", i)
+		se := fmt.Sprintf("\x07te%05d", i)
+		out = append(out, []byte(mw[last:t.b])...)
+		out = append(out, []byte(sb+a.renderTemplate(mw, t)+se)...)
+		last = t.e
+		tokens[sb] = &Token{
+			TText: fmt.Sprintf("%d", i),
+			TType: "tb",
+		}
+		tokens[se] = &Token{
+			TText: fmt.Sprintf("%d", i),
+			TType: "te",
+		}
+
+	}
+	out = append(out, []byte(mw[last:])...)
+	return string(out), tokens
+} */
+
+func (a *Article) processTemplates(mws string, tokens map[string]*Token, g PageGetter) (string, map[string]*Token) {
+	//strip nowiki noinclude etc here
+	//	mws := a.stripComments(mw)
+	//	mws = a.stripNoinclude(mws)
+
+	//	fmt.Println(mws)
+	mlt := findTemplates(mws)
+
+	last := 0
+	out := make([]byte, 0, len(mws))
+	for i, t := range mlt {
+		//		fmt.Println("Process templates:", *t)
+		sb := fmt.Sprintf("\x07tb%05d", i)
+		se := fmt.Sprintf("\x07te%05d", i)
+		tn, pm := a.renderInnerTemplates(mws, t, nil, g, 0)
+		a.addTemplate(tn, pm)
+		out = append(out, []byte(mws[last:t.b])...)
+		out = append(out, []byte(sb+t.rt+se)...)
+		last = t.e
+		tokens[sb] = &Token{
+			TText: fmt.Sprintf("%d", i),
+			TType: "tb",
+		}
+		tokens[se] = &Token{
+			TText: fmt.Sprintf("%d", i),
+			TType: "te",
+		}
+	}
+	out = append(out, []byte(mws[last:])...)
+
+	//unstrip here
+
+	return string(out), tokens
+}
+
+func (a *Article) addTemplate(tn string, pm map[string]string) {
+	outT := Template{Parameters: pm}
+	base, attr, typ, _ := detectTemplateType(tn)
+	outT.Typ = typ
+	outT.Name = base
+	outT.Attr = attr
+	a.Templates = append(a.Templates, &outT)
+	return
+}
+
+func (a *Article) renderTemplate(mw string, t *template) string {
+	pp := findTemplateParamPos(mw, t)
+	n := 2
+	if t.isparam {
+		n = 3
+	}
+	var tn string
+	if len(pp) > 0 {
+		tn = fmt.Sprint(strings.TrimSpace(mw[t.b+n : pp[0][0]]))
+	} else {
+		tn = fmt.Sprint(strings.TrimSpace(mw[t.b+n : t.e-n]))
+	}
+	pm := make(map[string]string, len(pp))
+	pp = append(pp, []int{t.e - n})
+	for i := 0; i < len(pp)-1; i++ {
+		var name string
+		var param string
+		if len(pp[i]) > 1 { //named param
+			name = fmt.Sprint(strings.TrimSpace(mw[pp[i][0]+1 : pp[i][1]]))
+			param = fmt.Sprint(strings.TrimSpace(mw[pp[i][1]+1 : pp[i+1][0]]))
+		} else {
+			name = fmt.Sprint(i + 1)
+			param = fmt.Sprint(strings.TrimSpace(mw[pp[i][0]+1 : pp[i+1][0]]))
+		}
+		pm[name] = param
+	}
+
+	outT := Template{Parameters: pm}
+	base, attr, typ, text := detectTemplateType(tn)
+	switch {
+	case t.isparam:
+		outT.Typ = "param"
+		outT.Name = tn
+		text = ""
+	default:
+		outT.Typ = typ
+		outT.Name = base
+		outT.Attr = attr
+	}
+	a.Templates = append(a.Templates, &outT)
+	return text
+}
+
+func detectTemplateType(tn string) (string, string, string, string) {
+	index := strings.Index(tn, ":")
+	var base string
+	var attr string
+	if index > 0 {
+		base = strings.TrimSpace(tn[:index])
+		attr = strings.TrimSpace(tn[index+1:])
+	} else {
+		base = tn
+	}
+	_, ok := MagicMap[base]
+	if ok {
+		return base, attr, "magic", ""
+	}
+
+	return tn, "", "normal", ""
+}
+
+type TemplateRenderer func(name, mw string, params map[string]string) string
+
+var MagicMap map[string]TemplateRenderer = map[string]TemplateRenderer{
+	"DISPLAYTITLE": nil,
+}
+
+var noHashFunctionsMap map[string]bool = map[string]bool{
+	"displaytitle":     true,
+	"formatdate":       true,
+	"int":              true,
+	"namespace":        true,
+	"pagesinnamespace": true,
+	"speciale":         true,
+	"special":          true,
+	"tag":              true,
+	"anchorencode":     true, "basepagenamee": true, "basepagename": true, "canonicalurle": true,
+	"canonicalurl": true, "cascadingsources": true, "defaultsort": true, "filepath": true,
+	"formatnum": true, "fullpagenamee": true, "fullpagename": true, "fullurle": true,
+	"fullurl": true, "gender": true, "grammar": true, "language": true,
+	"lcfirst": true, "lc": true, "localurle": true, "localurl": true,
+	"namespacee": true, "namespacenumber": true, "nse": true, "ns": true,
+	"numberingroup": true, "numberofactiveusers": true, "numberofadmins": true, "numberofarticles": true,
+	"numberofedits": true, "numberoffiles": true, "numberofpages": true, "numberofusers": true,
+	"numberofviews": true, "padleft": true, "padright": true, "pageid": true,
+	"pagenamee": true, "pagename": true, "pagesincategory": true, "pagesize": true,
+	"plural": true, "protectionlevel": true, "revisionday2": true, "revisionday": true,
+	"revisionid": true, "revisionmonth1": true, "revisionmonth": true, "revisiontimestamp": true,
+	"revisionuser": true, "revisionyear": true, "rootpagenamee": true, "rootpagename": true,
+	"subjectpagenamee": true, "subjectpagename": true, "subjectspacee": true, "subjectspace": true,
+	"subpagenamee": true, "subpagename": true, "talkpagenamee": true, "talkpagename": true,
+	"talkspacee": true, "talkspace": true, "ucfirst": true, "uc": true,
+	"urlencode": true,
+}
+var variablesMap map[string]bool = map[string]bool{
+	"articlepath":         true,
+	"basepagenamee":       true,
+	"basepagename":        true,
+	"cascadingsources":    true,
+	"contentlanguage":     true,
+	"currentday2":         true,
+	"currentdayname":      true,
+	"currentday":          true,
+	"currentdow":          true,
+	"currenthour":         true,
+	"currentmonth1":       true,
+	"currentmonthabbrev":  true,
+	"currentmonthnamegen": true,
+	"currentmonthname":    true,
+	"currentmonth":        true,
+	"currenttimestamp":    true,
+	"currenttime":         true,
+	"currentversion":      true,
+	"currentweek":         true,
+	"currentyear":         true,
+	"directionmark":       true,
+	"fullpagenamee":       true,
+	"fullpagename":        true,
+	"localday2":           true,
+	"localdayname":        true,
+	"localday":            true,
+	"localdow":            true,
+	"localhour":           true,
+	"localmonth1":         true,
+	"localmonthabbrev":    true,
+	"localmonthnamegen":   true,
+	"localmonthname":      true,
+	"localmonth":          true,
+	"localtimestamp":      true,
+	"localtime":           true,
+	"localweek":           true,
+	"localyear":           true,
+	"namespacee":          true,
+	"namespacenumber":     true,
+	"namespace":           true,
+	"numberofactiveusers": true,
+	"numberofadmins":      true,
+	"numberofarticles":    true,
+	"numberofedits":       true,
+	"numberoffiles":       true,
+	"numberofpages":       true,
+	"numberofusers":       true,
+	"numberofviews":       true,
+	"pageid":              true,
+	"pagenamee":           true,
+	"pagename":            true,
+	"revisionday2":        true,
+	"revisionday":         true,
+	"revisionid":          true,
+	"revisionmonth1":      true,
+	"revisionmonth":       true,
+	"revisionsize":        true,
+	"revisiontimestamp":   true,
+	"revisionuser":        true,
+	"revisionyear":        true,
+	"rootpagenamee":       true,
+	"rootpagename":        true,
+	"scriptpath":          true,
+	"servername":          true,
+	"server":              true,
+	"sitename":            true,
+	"stylepath":           true,
+	"subjectpagenamee":    true,
+	"subjectpagename":     true,
+	"subjectspacee":       true,
+	"subjectspace":        true,
+	"subpagenamee":        true,
+	"subpagename":         true,
+	"talkpagenamee":       true,
+	"talkpagename":        true,
+	"talkspacee":          true,
+	"talkspace":           true,
+}
+
+func (a *Article) renderTemplateMagic(name string, params map[string]string) string {
+	return ""
+}
+
+func (a *Article) renderTemplateExt(name string, params map[string]string) string {
+	return ""
+}
+
+func (a *Article) renderTemplateRecursive(name string, params map[string]string, g PageGetter, depth int) string {
+	if depth > 4 {
+		return ""
+	}
+	//name and parameters have already been substituted so they are guarranteed not to contain any template
+
+	//establish the type of template
+	switch templateType(name) {
+	case "magic":
+		return a.renderTemplateMagic(name, params)
+	case "ext":
+		return a.renderTemplateExt(name, params)
+	}
+	//case "normal"
+	//based on the type of template
+	//for the name and each parameter, find templates and substite them in the proper order
+	mw, err := g.Get(WikiCanonicalFormNamespace(name, "Template"))
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "Title:", a.Title, " Error retrieving:", name, " ->", err)
+		return ""
+	}
+	return a.TranscludeTemplatesRecursive(mw, params, g, depth)
+}
+
+func (a *Article) TranscludeTemplatesRecursive(mw string, params map[string]string, g PageGetter, depth int) string {
+	var mws string
+	followed := 0
+	for {
+		if followed > 4 {
+			return ""
+		}
+		//strip nowiki noinclude etc here
+		mws := a.stripComments(mw)
+		isRedirect, redirect := a.checkRedirect(mws)
+		if !isRedirect {
+			break
+		}
+		var err error
+		mw, err = g.Get(*redirect)
+		if err != nil {
+			return ""
+		}
+		followed++
+	}
+	mws = a.stripNoinclude(mws)
+
+	//	fmt.Println(ds[depth], "TranscludeTemplatesRecursive", mws)
+	mlt := findTemplates(mws)
+
+	last := 0
+	out := make([]byte, 0, len(mws))
+	for _, t := range mlt {
+		a.renderInnerTemplates(mws, t, params, g, depth)
+		out = append(out, []byte(mws[last:t.b])...)
+		out = append(out, []byte(t.rt)...)
+		last = t.e
+	}
+	out = append(out, []byte(mws[last:])...)
+
+	//unstrip here
+
+	return string(out)
+}
+
+var ds []string = []string{"   ", "      ", "         ", "            ", "               ", "                  "}
+
+func (a *Article) renderInnerTemplates(mws string, t *template, params map[string]string, g PageGetter, depth int) (string, map[string]string) {
+	// render inner templates first
+	//	fmt.Println(ds[depth], *t, "\n", ds[depth], "Template:\n", ds[depth], mws[t.b:t.e])
+	for _, it := range t.children {
+		if !it.rendered {
+			a.renderInnerTemplates(mws, it, params, g, depth)
+		}
+	}
+	//	fmt.Println(ds[depth], "Working on", mws[t.b:t.e])
+	pp := findTemplateParamPos(mws, t) //position of the pipes for this template
+	//	fmt.Println(ds[depth], "pp:", pp)
+
+	n := 2
+	if t.isparam {
+		n = 3
+	}
+	pp = append(pp, []int{t.e - n})
+
+	var mw string
+	var tb int
+	//	var te int
+	if len(t.children) == 0 {
+		//		fmt.Println(ds[depth], "No nested templates in", mws[t.b:t.e])
+		mw = mws
+		tb = t.b
+		//		te = t.e
+	} else {
+		//		fmt.Println(ds[depth], "Nested templates: fixing pp")
+		//substitute the strings and update pp
+		tci := 0
+		ioff := t.children[tci].b
+		tb = 0
+		mw = mws[t.b:ioff]
+		//		fmt.Println(*t)
+		ooff := -t.b
+		ppi0 := 0
+		ppi1 := 0
+		for ppi0 < len(pp) {
+			//			fmt.Println(mws)
+			//			fmt.Println(len(mws), tci, ioff, ooff, ppi0, ppi1, pp)
+			if pp[ppi0][ppi1] <= ioff {
+				pp[ppi0][ppi1] += ooff
+				ppi1++
+				if ppi1 >= len(pp[ppi0]) {
+					ppi0++
+					ppi1 = 0
+				}
+			} else {
+				mw += t.children[tci].rt
+				ooff += len(t.children[tci].rt) - (t.children[tci].e - t.children[tci].b)
+				teoff := t.children[tci].e
+				tci++
+				if tci >= len(t.children) {
+					ioff = t.e
+				} else {
+					ioff = t.children[tci].b
+				}
+				//				fmt.Println(ds[depth], tci, teoff, ioff)
+				mw += mws[teoff:ioff]
+			}
+		}
+		//		te = len(mw)
+	}
+	//	fmt.Println("len(mw):", len(mw), "mw:", mw, "\npp:", pp)
+	var tn string
+	if len(pp) > 1 {
+		tn = fmt.Sprint(strings.TrimSpace(mw[tb+n : pp[0][0]]))
+	} else {
+		tn = fmt.Sprint(strings.TrimSpace(mw[tb+n : pp[len(pp)-1][0]]))
+	}
+
+	t.rendered = true
+	if t.isparam { //it's a parameter substitution
+		text, ok := params[tn]
+		if ok {
+			t.rt = text
+			return "", nil
+		}
+		if len(pp) == 1 { //no default
+			t.rt = "{{{" + tn + "}}}"
+			return "", nil
+		}
+		t.rt = mw[pp[0][0]+1 : pp[len(pp)-1][0]]
+		return "", nil
+	}
+	pm := make(map[string]string, len(pp))
+	for i := 0; i < len(pp)-1; i++ {
+		var name string
+		var param string
+		if len(pp[i]) > 1 { //named param
+			name = fmt.Sprint(strings.TrimSpace(mw[pp[i][0]+1 : pp[i][1]]))
+			param = fmt.Sprint(strings.TrimSpace(mw[pp[i][1]+1 : pp[i+1][0]]))
+		} else {
+			name = fmt.Sprint(i + 1)
+			param = fmt.Sprint(strings.TrimSpace(mw[pp[i][0]+1 : pp[i+1][0]]))
+		}
+		pm[name] = param
+	}
+	t.rt = a.renderTemplateRecursive(tn, pm, g, depth+1)
+	return tn, pm
+}
+
+func templateType(tn string) string {
+	index := strings.Index(tn, ":")
+	tns := strings.TrimSpace(tn)
+	var base string
+	//	var attr string
+	if index > 0 {
+		base = strings.TrimSpace(tn[:index])
+		//		attr = strings.TrimSpace(tn[index+1:])
+	} else {
+		base = tns
+	}
+	base = strings.ToLower(base)
+	_, ok1 := noHashFunctionsMap[base]
+	_, ok2 := variablesMap[base]
+	if ok1 || ok2 {
+		return "magic"
+	}
+	if strings.HasPrefix(tns, "#") {
+		return "ext"
+	}
+	return "normal"
+}
+
+var noincludeRe = regexp.MustCompile(`(?isU)<noinclude>.*(?:</noinclude>|\z)`)
+var includeonlyRe = regexp.MustCompile(`(?isU)<includeonly>(.*)(?:</includeonly>|\z)`)
+
+func (a *Article) stripNoinclude(mw string) string {
+	mwni := noincludeRe.ReplaceAllLiteralString(mw, "")
+	ssl := includeonlyRe.FindAllStringSubmatch(mwni, -1)
+	if len(ssl) == 0 {
+		return mwni
+	}
+	sl := make([]string, 0, len(ssl))
+	for _, s := range ssl {
+		sl = append(sl, s[1])
+	}
+	return strings.Join(sl, "")
+}
--- a/text.go
+++ b/text.go
@ -0,0 +1,102 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	"bytes"
+	"unicode/utf8"
+)
+
+func (a *Article) appendText(t string) {
+	a.nchar += utf8.RuneCountInString(t)
+	a.text.WriteString(t)
+}
+
+func (a *Article) genTextInternal(root *ParseNode, indent int) {
+	lastwasspace := false
+	for _, n := range root.Nodes {
+		var linkStart int
+		var fl FullWikiLink
+		isLink := false
+		tappend := ""
+		switch n.NType {
+		case "break":
+			a.appendText("\n")
+		case "space":
+			if !lastwasspace {
+				a.appendText(" ")
+			}
+		case "text":
+			a.appendText(n.Contents)
+		case "image":
+			a.appendText("\n")
+			tappend = "\n"
+		case "link":
+			isLink = true
+			linkStart = len(a.text.Bytes())
+			fl = FullWikiLink{Link: n.Link, Start: a.nchar}
+		case "html":
+			switch n.NSubType {
+			case "h1", "h2", "h3", "h4", "h5", "h6":
+				a.appendText("\n")
+				tappend = "\n"
+				if len(a.AbstractText) == 0 {
+					a.AbstractText = a.text.String()
+				}
+			case "br":
+				a.appendText("\n")
+			case "ref":
+				a.appendText(" ")
+			}
+		}
+		if len(n.Nodes) > 0 {
+			a.genTextInternal(n, 0)
+		}
+		if isLink {
+			ttmp := a.text.Bytes()
+			fl.End = a.nchar
+			fl.Text = string(ttmp[linkStart:])
+			a.TextLinks = append(a.TextLinks, fl)
+		}
+		lastwasspace = false
+		if n.NType == "space" {
+			lastwasspace = true
+		}
+		//		a.Text += tappend
+		a.appendText(tappend)
+	}
+
+	return
+}
+
+func (a *Article) genText() error {
+	a.text = bytes.NewBuffer(make([]byte, 1024*1024, 1024*1024))
+	a.text.Truncate(0)
+	a.nchar = 0
+	a.AbstractText = ""
+	a.genTextInternal(a.Root, 0)
+	a.Text = string(a.text.Bytes())
+	if len(a.AbstractText) == 0 {
+		a.AbstractText = a.Text
+	}
+	a.gt = true
+	return nil
+}
+
+func (a *Article) GenText() error {
+	return a.genText()
+}
--- a/tokenize.go
+++ b/tokenize.go
@ -0,0 +1,916 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	//	"bytes"
+	"errors"
+	"fmt"
+	//	"html"
+	"regexp"
+	"sort"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type Token struct {
+	TText   string   `json:"tText,omitempty"`
+	TType   string   `json:"tType,omitempty"`
+	TAttr   string   `json:"tAttr,omitempty"`
+	TLink   WikiLink `json:"tLink,omitempty"`
+	TClosed bool     `json:"tClosed,omitempty"`
+	TPipes  []string `json:"tPipes,omitempty"`
+}
+
+func (a *Article) parseRedirectLine(l string) ([]*Token, error) {
+	nt := make([]*Token, 0, 2)
+	nt = append(nt, &Token{TType: "redirect"})
+	nnt, err := a.parseInlineText(l, 9, len(l))
+	if err != nil {
+		return nil, err
+	}
+	nt = append(nt, nnt...)
+	return nt, nil
+}
+
+func (a *Article) parseWikiPreLine(l string) ([]*Token, error) {
+	nt := make([]*Token, 0, 2)
+	nt = append(nt, &Token{TType: "wikipre"})
+	nnt, err := a.parseInlineText(l, 1, len(l))
+	if err != nil {
+		return nil, err
+	}
+	nt = append(nt, nnt...)
+	return nt, nil
+}
+
+func (a *Article) parseHRuler(l string) ([]*Token, error) {
+	pos := 0
+	for i, rv := range l {
+		if rv != '-' {
+			pos = i
+			break
+		}
+	}
+	nt := make([]*Token, 0, 2)
+	nt = append(nt, &Token{TType: "hrule"})
+	if pos != 0 {
+		nnt, err := a.parseInlineText(l, pos, len(l))
+		if err != nil {
+			return nil, err
+		}
+		nt = append(nt, nnt...)
+	}
+	return nt, nil
+}
+
+func (a *Article) parseHeadingLine(l string) ([]*Token, error) {
+	pf := 0
+	pl := 0
+	for i, rv := range l {
+		if rv == '=' {
+			pl = i
+		}
+	}
+	for {
+		pf++
+		if pf == pl || l[pf] != '=' {
+			pf--
+			break
+		}
+		pl--
+		if pf == pl || l[pl] != '=' {
+			pl++
+			pf--
+			break
+		}
+	}
+	pf++
+	if pf > 6 {
+		diff := pf - 6
+		pf -= diff
+		pl += diff
+	}
+	nt := make([]*Token, 0, 2)
+	nt = append(nt, &Token{TType: fmt.Sprintf("h%d", pf)})
+	nnt, err := a.parseInlineText(l, pf, pl)
+	if err != nil {
+		return nil, err
+	}
+	nt = append(nt, nnt...)
+	return nt, nil
+}
+
+func (a *Article) parseListLine(l string) ([]*Token, error) {
+	nt := make([]*Token, 0, 2)
+	pos := 0
+	for ; pos < len(l); pos++ {
+		switch l[pos] {
+		case ';', ':', '*', '#':
+			nt = append(nt, &Token{TType: l[pos : pos+1]})
+			continue
+		}
+		break
+	}
+	if pos < len(l) {
+		nnt, err := a.parseInlineText(l, pos, len(l))
+		if err != nil {
+			return nil, err
+		}
+		nt = append(nt, nnt...)
+	}
+	return nt, nil
+}
+
+func (a *Article) parseTableLine(l string) ([]*Token, error) {
+	nt := make([]*Token, 0, 0)
+	return nt, nil
+}
+
+func isValidHTMLtag(tag string) bool {
+	return true
+}
+
+func (a *Article) decodeHTMLtag(l string) (int, string, string, bool, bool) {
+	matchingpos := 0
+	inquote := false
+	lastbackslash := false
+	quote := '#'
+	closefound := false
+	tagend := 0
+	tagstart := 0
+	//taking care of comments at preprocessing time
+	/*	if strings.HasPrefix(l, "<!--") {
+		i := strings.Index(l[4:], "-->")
+		if i == -1 {
+			return len(l), "!--", l[4:], true, true
+		}
+		return 4 + i + 3, "!--", l[4 : 4+i], true, true
+	} */
+dhtLoop:
+	for idx, rv := range l {
+		//		fmt.Println(string(rv), inquote, string(quote), idx, matchingpos)
+		switch rv {
+		case '>':
+			if !inquote {
+				matchingpos = idx
+				break dhtLoop
+			}
+		case '\'', '"':
+			switch {
+			case inquote && quote == rv && !lastbackslash:
+				inquote = false
+			case !inquote:
+				inquote = true
+				quote = rv
+			}
+		case ' ', '\t', '\r':
+		case '/':
+			closefound = true
+		}
+		lastbackslash = (rv == '\\')
+		if !unicode.IsSpace(rv) && tagstart == 0 {
+			tagstart = idx
+		}
+		if rv != '/' && !unicode.IsSpace(rv) {
+			closefound = false
+		}
+		if unicode.IsSpace(rv) && tagstart != 0 && tagend == 0 {
+			tagend = idx
+		}
+	}
+	if matchingpos == 0 || tagstart == 0 {
+		return 0, "", "", false, false
+	}
+	var tag string
+	var attr string
+
+	if tagend == 0 {
+		tag = l[tagstart:matchingpos]
+		attr = ""
+	} else {
+		tag = l[tagstart:tagend]
+		attr = l[tagend:matchingpos]
+	}
+	return matchingpos + 1, tag, attr, closefound, true
+	//	e, tag, attr, closed, ok := decodeHTMLtag(l[pos:end])
+}
+
+func matchPrefixes(s string, prefixes []string) bool {
+	for i := range prefixes {
+		if len(s) >= len(prefixes[i]) && strings.EqualFold(s[:len(prefixes[i])], prefixes[i]) {
+			return true
+		}
+	}
+	return false
+}
+
+var extlinkre = regexp.MustCompile(`^(http:)|(ftp:)|()//[^\s]+`)
+
+func isExtLink(l string) bool {
+	// return extlinkre.MatchString(l)
+	return matchPrefixes(l, []string{"http://", "ftp://", "//"})
+}
+
+var filelinkre = regexp.MustCompile(`(?i)^\[\[(?:image:)|(?:media:)|(?:file:)`)
+
+func possibleFileLink(l string) bool {
+	// return filelinkre.MatchString(l)
+	return matchPrefixes(l, []string{"[[image:", "[[media:", "[[file:"})
+}
+
+func (a *Article) parseLink(l string) (int, []*Token, bool) {
+	if len(l) < 5 {
+		return 0, nil, false
+	}
+	if l[1] == '[' {
+		if possibleFileLink(l) {
+			return a.parseFileLink(l)
+		}
+		return a.parseInternalLink(l)
+	}
+	return a.parseExternalLink(l)
+}
+
+func (a *Article) parseInternalLink(l string) (int, []*Token, bool) {
+
+	// possible internal link
+	pipepos := 0
+	closed := false
+	matchingpos := 0
+	linktrail := 0
+	//plLoop:
+	for idx, rv := range l {
+		if idx < 2 {
+			continue
+		}
+		if matchingpos == 0 {
+			switch rv {
+			case '\x07': //prevent special tags in internal link
+				if pipepos == 0 { //only in the link portion
+					return 0, nil, false
+				}
+			case '[':
+				if idx == 2 || len(l) > idx+1 && l[idx+1] == '[' {
+					return 0, nil, false
+				}
+
+			case ']':
+				if len(l) > idx+1 && l[idx+1] == ']' {
+					matchingpos = idx
+				}
+			case '|':
+				if pipepos == 0 {
+					pipepos = idx
+				}
+			default:
+			}
+			continue
+		}
+		if !closed {
+			closed = true
+			continue
+		}
+		if unicode.IsLetter(rv) {
+			linktrail = idx
+			continue
+		}
+		break
+	}
+	if !closed {
+		return 0, nil, false
+	}
+	var link WikiLink
+	var nt []*Token = nil
+	var err error = nil
+	if pipepos == 0 {
+		innerstring := l[2:matchingpos]
+		if linktrail != 0 {
+			innerstring += l[matchingpos+2 : linktrail+1]
+		}
+		link = WikiCanonicalForm(l[2:matchingpos])
+		nt = []*Token{&Token{TText: innerstring, TType: "text"}}
+
+	} else {
+		innerstring := l[pipepos+1 : matchingpos]
+		if linktrail != 0 {
+			innerstring += l[matchingpos+2 : linktrail+1]
+		}
+		link = WikiCanonicalForm(l[2:pipepos])
+		if pipepos+1 < matchingpos {
+			nt, err = a.parseInlineText(innerstring, 0, len(innerstring))
+			if err != nil {
+				return 0, nil, false
+			}
+		}
+	}
+	tokens := make([]*Token, 0, 2)
+	tokens = append(tokens, &Token{TLink: link, TType: "link"})
+	if nt != nil {
+		tokens = append(tokens, nt...)
+	}
+	tokens = append(tokens, &Token{TType: "closelink"})
+	if linktrail != 0 {
+		return linktrail + 1, tokens, true
+	}
+	return matchingpos + 2, tokens, true
+}
+
+func (a *Article) parseExternalLink(l string) (int, []*Token, bool) {
+	// possible external link
+	spacepos := 0
+	matchingpos := 0
+	endpos := 0
+	intLinkOpen := false
+	skipNext := false
+plLoop2:
+	for idx, rv := range l {
+		if idx < 1 {
+			continue
+		}
+		if skipNext {
+			skipNext = false
+			continue
+		}
+		switch rv {
+		case '\x07':
+			if spacepos == 0 {
+				return 0, nil, false
+			}
+		case '[':
+			if len(l) > idx+1 && l[idx+1] == '[' {
+				intLinkOpen = true
+			}
+		case ' ':
+			if spacepos == 0 {
+				spacepos = idx
+			}
+		case '<':
+			if spacepos > 0 {
+				//				e, tag, attr, closed, ok := a.decodeHTMLtag(l[idx:len(l)])
+				_, tag, _, _, ok := a.decodeHTMLtag(l[idx:len(l)])
+				//				fmt.Println("html tag in ext link. Line:", l, "\n\n", tag, ok)
+				if ok && tag == "/ref" {
+					//					fmt.Println("closing link...")
+					matchingpos = idx
+					endpos = idx
+					break plLoop2
+				}
+
+			}
+		case ']':
+			if intLinkOpen && len(l) > idx+1 && l[idx+1] == ']' {
+				intLinkOpen = false
+				skipNext = true
+				continue
+			}
+			matchingpos = idx
+			endpos = idx + 1
+			break plLoop2
+		}
+	}
+	if matchingpos == 0 {
+		return 0, nil, false
+	}
+	var link string
+	var nt []*Token = nil
+	var err error = nil
+	if spacepos == 0 {
+		link = l[1:matchingpos]
+		if !isExtLink(link) {
+			return 0, nil, false
+		}
+	} else {
+		link = l[1:spacepos]
+		if !isExtLink(link) {
+			return 0, nil, false
+		}
+		if spacepos+1 < matchingpos {
+			nt, err = a.parseInlineText(l, spacepos+1, matchingpos)
+			if err != nil {
+				return 0, nil, false
+			}
+		}
+	}
+	tokens := make([]*Token, 0, 2)
+	tokens = append(tokens, &Token{TText: link, TType: "extlink"})
+	if nt != nil {
+		tokens = append(tokens, nt...)
+	}
+	tokens = append(tokens, &Token{TType: "closeextlink"})
+	return endpos, tokens, true
+}
+
+func (a *Article) parseFileLink(l string) (int, []*Token, bool) {
+	// possible internal link
+	pipepos := make([]int, 0, 0)
+	closed := false
+	matchingpos := 0
+	intLinkOpen := false
+	skipNext := false
+plLoop:
+	for idx, rv := range l {
+		if idx < 2 {
+			continue
+		}
+		if skipNext {
+			skipNext = false
+			continue
+		}
+		switch rv {
+		case '\x07': //prevent special tags in internal link
+			if len(pipepos) == 0 { //only in the link portion
+				return 0, nil, false
+			}
+		case '[':
+			if len(l) > idx+1 && l[idx+1] == '[' {
+				intLinkOpen = true
+				skipNext = true
+				continue
+			}
+
+		case ']':
+			if len(l) > idx+1 && l[idx+1] == ']' {
+				if intLinkOpen {
+					intLinkOpen = false
+					skipNext = true
+					continue
+				}
+				matchingpos = idx
+				closed = true
+				break plLoop
+			}
+		case '|':
+			if !intLinkOpen {
+				pipepos = append(pipepos, idx)
+			}
+		default:
+		}
+	}
+	if !closed {
+		return 0, nil, false
+	}
+	var link WikiLink
+	var pipes = make([]string, 0, 0)
+	var nt []*Token = nil
+	var err error = nil
+	if len(pipepos) == 0 {
+		link = WikiCanonicalForm(l[2:matchingpos])
+		nt = []*Token{&Token{TText: l[2:matchingpos], TType: "text"}}
+
+	} else {
+		link = WikiCanonicalForm(l[2:pipepos[0]])
+		for i := 0; i < len(pipepos)-1; i++ {
+			pipes = append(pipes, l[pipepos[i]+1:pipepos[i+1]])
+		}
+		if pipepos[len(pipepos)-1]+1 < matchingpos {
+			nt, err = a.parseInlineText(l, pipepos[len(pipepos)-1]+1, matchingpos)
+			if err != nil {
+				return 0, nil, false
+			}
+		}
+	}
+	tokens := make([]*Token, 0, 2)
+	tokens = append(tokens, &Token{TLink: link, TType: "filelink", TPipes: pipes})
+	if nt != nil {
+		tokens = append(tokens, nt...)
+	}
+	tokens = append(tokens, &Token{TType: "closefilelink"})
+	return matchingpos + 2, tokens, true
+}
+
+func min(a, b int) int {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+var behavswitchre = regexp.MustCompile(`^__[A-Z]+__`)
+
+func (a *Article) decodeBehavSwitch(l string) (int, bool) {
+	match := behavswitchre.FindString(l)
+	if len(match) == 0 {
+		return 0, false
+	} else {
+		return len(match), true
+	}
+	// e, ok := decodeMagic(l[pos:end])
+}
+
+func (a *Article) parseInlineText(l string, start, end int) ([]*Token, error) {
+	nt := make([]*Token, 0)
+	//	fmt.Println("in parseInlineText")
+
+	tStart, tEnd := start, start
+
+	for pos := start; pos < end; {
+		rv, rune_len := utf8.DecodeRuneInString(l[pos:end])
+		switch rv {
+		case '<':
+			e, tag, attr, closed, ok := a.decodeHTMLtag(l[pos:end])
+			if ok {
+				pos += e
+				if isValidHTMLtag(tag) {
+					if tEnd > tStart {
+						nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+					}
+					nt = append(nt, &Token{TType: "html", TText: tag, TAttr: attr, TClosed: closed})
+					tStart = pos
+				}
+				tEnd = pos
+				continue
+			}
+		case '[':
+			e, lt, ok := a.parseLink(l[pos:end])
+			if ok {
+				if tEnd > tStart {
+					nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+				}
+				nt = append(nt, lt...)
+				pos += e
+				tStart, tEnd = pos, pos
+				continue
+			}
+			/*		case '{':
+					e, tt, ok := a.parseTemplateEtc(l[pos:end])
+					fmt.Println("template:", e, tt, ok)
+					if ok {
+						if len(cs) > 0 {
+							nt = append(nt, &Token{TText: cs, TType: "text"})
+						}
+						nt = append(nt, tt...)
+						pos += e
+						cs = ""
+						continue
+					}
+					cs += string(rv) */
+		case '_':
+			e, ok := a.decodeBehavSwitch(l[pos:end])
+			if ok {
+				if tEnd > tStart {
+					nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+				}
+				nt = append(nt, &Token{TType: "magic", TAttr: l[pos : pos+e]})
+				pos += e
+				tStart, tEnd = pos, pos
+				continue
+			}
+		case ' ', '\t', '\r':
+			if tEnd > tStart {
+				nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+			}
+			nt = append(nt, &Token{TType: "space"})
+			tStart = pos + rune_len
+		case '\'':
+			if tEnd > tStart {
+				nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+			}
+			nt = append(nt, &Token{TType: "quote"})
+			tStart = pos + rune_len
+		case ':':
+			if tEnd > tStart {
+				nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+			}
+			nt = append(nt, &Token{TType: "colon"})
+			tStart = pos + rune_len
+		case '\x07':
+			//		case '@':
+			if tEnd > tStart {
+				nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+			}
+			nt = append(nt, &Token{TType: "special", TText: l[pos : pos+8]})
+			pos += 8
+			tStart, tEnd = pos, pos
+			continue
+		}
+		pos += rune_len
+		tEnd = pos
+	}
+	if tEnd > tStart {
+		nt = append(nt, &Token{TText: l[tStart:tEnd], TType: "text"})
+	}
+	return nt, nil
+}
+
+func (a *Article) isHeading(l string) bool {
+	if l[0] != '=' {
+		return false
+	}
+	done := 0
+	lastEqual := false
+	for _, rv := range l {
+		done++
+		if done > 2 {
+			if unicode.IsSpace(rv) {
+				continue
+			}
+			if rv == '=' {
+				lastEqual = true
+				continue
+			}
+			lastEqual = false
+		}
+
+	}
+	return lastEqual
+}
+
+func (a *Article) isTable(l string) bool {
+	return (len(l) > 1 && (l[0:2] == "{|" || l[0:2] == "|}" || l[0:2] == "|+" || l[0:2] == "|-")) || (len(l) > 0 && (l[0:1] == "|" || l[0:1] == "!"))
+}
+
+func (a *Article) lineType(l string) string {
+	switch {
+	case len(l) == 0:
+		return "blank"
+	case len(l) > 8 && strings.ToLower(l[0:9]) == "#redirect":
+		return "redirect"
+	case len(l) > 3 && l[0:4] == "----":
+		return "hr"
+	case a.isHeading(l):
+		return "heading"
+	case l[0] == ';' || l[0] == ':' || l[0] == '*' || l[0] == '#':
+		return "list"
+	case a.isTable(l):
+		return "table"
+	case l[0] == ' ':
+		return "wikipre"
+	}
+	return "normal"
+}
+
+func (a *Article) Tokenize(mw string, g PageGetter) ([]*Token, error) {
+	mwnc := a.stripComments(mw)
+	mw_stripped, nowikipremathmap := a.stripNowikiPreMath(mwnc)
+	mw_tmpl, templatemap := a.processTemplates(mw_stripped, nowikipremathmap, g)
+	mw_links := a.preprocessLinks(mw_tmpl)
+
+	lines := strings.Split(mw_links, "\n")
+	tokens := make([]*Token, 0, 16)
+	for _, l := range lines {
+		var nt []*Token
+		var err error = nil
+		lt := a.lineType(l)
+		switch lt {
+		case "normal":
+			nt, err = a.parseInlineText(l, 0, len(l))
+		case "redirect":
+			nt, err = a.parseRedirectLine(l)
+		case "hr":
+			nt, err = a.parseHRuler(l)
+		case "heading":
+			nt, err = a.parseHeadingLine(l)
+		case "list":
+			nt, err = a.parseListLine(l)
+		case "table":
+			nt, err = a.parseTableLine(l)
+		case "wikipre":
+			nt, err = a.parseWikiPreLine(l)
+		case "blank":
+			nt = []*Token{&Token{TType: "blank"}}
+		}
+		if err != nil {
+			return nil, err
+		}
+		nt = append(nt, &Token{TType: "newline"})
+		tokens = append(tokens, nt...)
+	}
+	specialcount := 0
+	for i := range tokens {
+		if tokens[i].TType == "special" {
+			specialcount++
+			t, ok := templatemap[tokens[i].TText] //nowikipremathmap[tokens[i].TText]
+			if !ok {
+				return nil, errors.New("special not in map")
+			}
+			tokens[i] = t
+		}
+	}
+	//	fmt.Println(specialcount, len(nowikipremathmap))
+	//	if specialcount != len(nowikipremathmap) {
+	if specialcount != len(templatemap) {
+		if DebugLevel > 0 {
+			fmt.Println("[Tokenize] Warning: number of specials in map differs from number found")
+		}
+		//				return nil, errors.New("number of specials in map differs from number found")
+	}
+	return tokens, nil
+}
+
+var commentsRe = regexp.MustCompile(`(?isU)<!--.*(?:-->|\z)`)
+
+func (a *Article) stripComments(mw string) string {
+	return commentsRe.ReplaceAllLiteralString(mw, "")
+}
+
+var nowikiOpenRe = regexp.MustCompile(`(?i)<\s*(nowiki)\s*[^>/]*>`)
+var nowikiCloseRe = regexp.MustCompile(`(?i)<(/nowiki)\s*[^>/]*>`)
+var preOpenRe = regexp.MustCompile(`(?i)<\s*(pre)\s*[^>]*>`)
+var preCloseRe = regexp.MustCompile(`(?i)<(/pre)\s*[^>]*>`)
+var mathOpenRe = regexp.MustCompile(`(?i)<\s*(math)\s*[^>]*>`)
+var mathCloseRe = regexp.MustCompile(`(?i)<(/math)\s*[^>]*>`)
+
+type ssInt [][]int
+
+func (a ssInt) Len() int           { return len(a) }
+func (a ssInt) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a ssInt) Less(i, j int) bool { return a[i][0] < a[j][0] }
+
+func (a *Article) stripNowikiPreMath(mw string) (string, map[string]*Token) {
+	nwoc := nowikiOpenRe.FindAllStringSubmatchIndex(mw, -1)
+	nwcc := nowikiCloseRe.FindAllStringSubmatchIndex(mw, -1)
+	poc := preOpenRe.FindAllStringSubmatchIndex(mw, -1)
+	pcc := preCloseRe.FindAllStringSubmatchIndex(mw, -1)
+	moc := mathOpenRe.FindAllStringSubmatchIndex(mw, -1)
+	mcc := mathCloseRe.FindAllStringSubmatchIndex(mw, -1)
+
+	/*
+		nwoc = append(nwoc, []int{len(mw) + 1, len(mw) + 1})
+		nwcc = append(nwcc, []int{len(mw) + 1, len(mw) + 1})
+		poc = append(poc, []int{len(mw) + 1, len(mw) + 1})
+		pcc = append(pcc, []int{len(mw) + 1, len(mw) + 1})
+		moc = append(moc, []int{len(mw) + 1, len(mw) + 1})
+		mcc = append(mcc, []int{len(mw) + 1, len(mw) + 1})
+	*/
+	for i := range nwoc {
+		nwoc[i] = append(nwoc[i], 0)
+	}
+	for i := range nwcc {
+		nwcc[i] = append(nwcc[i], 1)
+	}
+	for i := range poc {
+		poc[i] = append(poc[i], 2)
+	}
+	for i := range pcc {
+		pcc[i] = append(pcc[i], 3)
+	}
+	for i := range moc {
+		moc[i] = append(moc[i], 4)
+	}
+	for i := range mcc {
+		mcc[i] = append(mcc[i], 5)
+	}
+	am := make([][]int, 0, len(nwoc)+len(nwcc)+len(poc)+len(pcc)+len(moc)+len(mcc))
+	am = append(am, nwoc...)
+	am = append(am, nwcc...)
+	am = append(am, poc...)
+	am = append(am, pcc...)
+	am = append(am, moc...)
+	am = append(am, mcc...)
+	sort.Sort(ssInt(am))
+	//	fmt.Println(am)
+	tokens := make(map[string]*Token, len(am))
+	if len(am) == 0 {
+		return mw, tokens
+	}
+
+	ctype := -1
+	out := ""
+	lastclose := 0
+	openidx := 0
+	count := 0
+	for i := range am {
+		//		fmt.Println("ctype", ctype, "lastclose", lastclose, "count", count, "openidx", openidx, "am[i]", am[i])
+		if (ctype != -1) && (am[i][4] == ctype+1) && (am[openidx][1] <= am[i][0]) {
+			// closing an open one
+			special := fmt.Sprintf("\x07%07d", count)
+			//			special := fmt.Sprintf("@%07d", count)
+			tokens[special] = &Token{
+				TText: mw[am[openidx][1]:am[i][0]],
+				TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
+				TAttr: mw[am[openidx][3] : am[openidx][1]-1],
+			}
+			out += special
+			ctype = -1
+			lastclose = am[i][1]
+			count++
+		} else if (ctype == -1) && (am[i][4]&1 == 0) && (lastclose <= am[i][0]) {
+			// open a new one
+			out += mw[lastclose:am[i][0]]
+			ctype = am[i][4]
+			openidx = i
+		}
+	}
+	if ctype != -1 {
+		//it's open: close it
+		special := fmt.Sprintf("\x07%07d", count)
+		//		special := fmt.Sprintf("@%07d", count)
+		tokens[special] = &Token{
+			TText: mw[am[openidx][1]:len(mw)],
+			TType: strings.ToLower(mw[am[openidx][2]:am[openidx][3]]),
+			TAttr: mw[am[openidx][3] : am[openidx][1]-1],
+		}
+		out += special
+		ctype = -1
+		count++
+	} else {
+		out += mw[lastclose:]
+	}
+	return out, tokens
+}
+
+var multiLineLinksRe = regexp.MustCompile(`(?sm)\[\[[^\n|]*\|.*?\]\]`)
+
+/* TODO: add preprocessing as in Parser.php:pstPass2() to enable pipe tricks
+ */
+func (a *Article) preprocessLinks(s string) string {
+	mw := []byte(s)
+	mll := multiLineLinksRe.FindAllSubmatchIndex(mw, -1)
+	for _, pair := range mll {
+		for i := pair[0]; i < pair[1]; {
+			// we have to walk this string carefully, by rune, not by i
+			rv, rlen := utf8.DecodeRune(mw[i:])
+			if rv == '\n' {
+				mw[i] = ' '
+			}
+			i += rlen
+		}
+	}
+	return string(mw)
+}
+
+//var nowikiOpenRe = regexp.MustCompile(`(?i)<\s*nowiki\s*[^>/]*>`)
+//var nowikiCloseRe = regexp.MustCompile(`(?i)</nowiki\s*[^>/]*>`)
+//var nowikiOpenCloseRe = regexp.MustCompile(`(?i)<nowiki\s*[^>]*/>`)
+/*
+type WikiParser struct {
+	mw string
+}
+
+func NewWikiParser(mw string) *WikiParser {
+	return &WikiParser{mw: mw}
+}
+
+func (wp *WikiParser) doNowiki() {
+	openCandidates := nowikiOpenRe.FindAllStringIndex(wp.mw, -1)
+	closeCandidates := nowikiCloseRe.FindAllStringIndex(wp.mw, -1)
+	openCloseCandidates := nowikiOpenCloseRe.FindAllStringIndex(wp.mw, -1)
+	tail := []int{len(wp.mw) + 1, len(wp.mw) + 1}
+	openCandidates = append(openCandidates, tail)
+	closeCandidates = append(closeCandidates, tail)
+	openCloseCandidates = append(openCloseCandidates, tail)
+	oi := 0
+	ci := 0
+	oci := 0
+	inNowiki := false
+	ol = make([][]int, 0, len(openCandidates))
+	cl = make([][]int, 0, len(closeCandidates))
+	ocl = make([][]int, 0, len(openCloseCandidates))
+	for {
+		if oi == len(openCandidates)-1 &&
+			ci == len(closeCandidates)-1 &&
+			oci == len(openCloseCandidates)-1 {
+			break
+		}
+		switch {
+		case openCandidates[oi][0] <= closeCandidates[oi][0] &&
+			openCandidates[oi][0] <= openCloseloseCandidates[oi][0]:
+			if !inNowiki {
+				ol = append(ol.openCandidates[oi])
+				inNowiki = true
+			}
+			oi += 1
+
+		case closeCandidates[oi][0] <= openCandidates[oi][0] &&
+			closeCandidates[oi][0] <= openCloseloseCandidates[oi][0]:
+
+		default:
+		}
+	}
+}
+
+func (wp *WikiParser) Parse() {
+	doSGML()
+	doNowiki()
+	doMath()
+	doPre()
+	doBlanks()
+	doHTMLvalidation()
+	doReplaceVariables()
+	doHR()
+	doAllQuotes()
+	doHeadings()
+	doLists()
+	doDates()
+	doExternalLinks()
+	doInternalLinks()
+	doISBN()
+	doRecombine()
+}
+*/
--- a/utils.go
+++ b/utils.go
@ -0,0 +1,59 @@
+/*
+Copyright (C) IBM Corporation 2015, Michele Franceschini <franceschini@us.ibm.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gowiki
+
+import (
+	//	"fmt"
+	"strings"
+)
+
+func (a *Article) CheckRedirect() (bool, *WikiLink) {
+
+	rf := false
+	for i, t := range a.Tokens {
+		if i > 10 {
+			break
+		}
+		switch t.TType {
+		case "redirect":
+			rf = true
+		case "link":
+			if rf {
+				return true, &t.TLink
+			}
+		}
+	}
+	return false, nil
+}
+
+func (a *Article) CheckDisambiguation() bool {
+	for _, t := range a.Templates {
+		if t.Typ != "normal" {
+			continue
+		}
+		ln := strings.ToLower(t.Name)
+		if strings.Contains(ln, "disambig") ||
+			ln == "dab" ||
+			ln == "geodis" ||
+			ln == "hndis" ||
+			ln == "hndis-cleanup" ||
+			ln == "numberdis" {
+			return true
+		}
+	}
+	return false
+}