summaryrefslogtreecommitdiffstats
path: root/parser/pageparser/pagelexer_intro.go
diff options
context:
space:
mode:
Diffstat (limited to 'parser/pageparser/pagelexer_intro.go')
-rw-r--r--parser/pageparser/pagelexer_intro.go195
1 files changed, 195 insertions, 0 deletions
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
new file mode 100644
index 000000000..539e6cfaa
--- /dev/null
+++ b/parser/pageparser/pagelexer_intro.go
@@ -0,0 +1,195 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+func lexIntroSection(l *pageLexer) stateFunc {
+ l.summaryDivider = summaryDivider
+
+LOOP:
+ for {
+ r := l.next()
+ if r == eof {
+ break
+ }
+
+ switch {
+ case r == '+':
+ return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
+ case r == '-':
+ return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
+ case r == '{':
+ return lexFrontMatterJSON
+ case r == '#':
+ return lexFrontMatterOrgMode
+ case r == byteOrderMark:
+ l.emit(TypeIgnore)
+ case !isSpace(r) && !isEndOfLine(r):
+ if r == '<' {
+ l.backup()
+ if l.hasPrefix(htmlCommentStart) {
+ // This may be commented out front matter, which should
+ // still be read.
+ l.consumeToNextLine()
+ l.isInHTMLComment = true
+ l.emit(TypeIgnore)
+ continue LOOP
+ } else {
+ return l.errorf("plain HTML documents not supported")
+ }
+ }
+ break LOOP
+ }
+ }
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {
+ l.isInHTMLComment = false
+ right := l.index(htmlCommentEnd)
+ if right == -1 {
+ return l.errorf("starting HTML comment with no end")
+ }
+ l.pos += right + len(htmlCommentEnd)
+ l.emit(TypeIgnore)
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexFrontMatterJSON(l *pageLexer) stateFunc {
+ // Include the left delimiter
+ l.backup()
+
+ var (
+ inQuote bool
+ level int
+ )
+
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == eof:
+ return l.errorf("unexpected EOF parsing JSON front matter")
+ case r == '{':
+ if !inQuote {
+ level++
+ }
+ case r == '}':
+ if !inQuote {
+ level--
+ }
+ case r == '"':
+ inQuote = !inQuote
+ case r == '\\':
+ // This may be an escaped quote. Make sure it's not marked as a
+ // real one.
+ l.next()
+ }
+
+ if level == 0 {
+ break
+ }
+ }
+
+ l.consumeCRLF()
+ l.emit(TypeFrontMatterJSON)
+
+ return lexMainSection
+}
+
+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
+ /*
+ #+TITLE: Test File For chaseadamsio/goorgeous
+ #+AUTHOR: Chase Adams
+ #+DESCRIPTION: Just another golang parser for org content!
+ */
+
+ l.summaryDivider = summaryDividerOrg
+
+ l.backup()
+
+ if !l.hasPrefix(delimOrg) {
+ return lexMainSection
+ }
+
+ // Read lines until we no longer see a #+ prefix
+LOOP:
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == '\n':
+ if !l.hasPrefix(delimOrg) {
+ break LOOP
+ }
+ case r == eof:
+ break LOOP
+
+ }
+ }
+
+ l.emit(TypeFrontMatterORG)
+
+ return lexMainSection
+
+}
+
+// Handle YAML or TOML front matter.
+func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {
+
+ for i := 0; i < 2; i++ {
+ if r := l.next(); r != delimr {
+ return l.errorf("invalid %s delimiter", name)
+ }
+ }
+
+ // Let front matter start at line 1
+ wasEndOfLine := l.consumeCRLF()
+ // We don't care about the delimiters.
+ l.ignore()
+
+ var r rune
+
+ for {
+ if !wasEndOfLine {
+ r = l.next()
+ if r == eof {
+ return l.errorf("EOF looking for end %s front matter delimiter", name)
+ }
+ }
+
+ if wasEndOfLine || isEndOfLine(r) {
+ if l.hasPrefix(delim) {
+ l.emit(tp)
+ l.pos += 3
+ l.consumeCRLF()
+ l.ignore()
+ break
+ }
+ }
+
+ wasEndOfLine = false
+ }
+
+ return lexMainSection
+}