1 files changed, 195 insertions, 0 deletions
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
new file mode 100644
index 000000000..539e6cfaa
--- /dev/null
+++ b/parser/pageparser/pagelexer_intro.go
@@ -0,0 +1,195 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+func lexIntroSection(l *pageLexer) stateFunc {
+	l.summaryDivider = summaryDivider
+
+LOOP:
+	for {
+		r := l.next()
+		if r == eof {
+			break
+		}
+
+		switch {
+		case r == '+':
+			return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
+		case r == '-':
+			return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
+		case r == '{':
+			return lexFrontMatterJSON
+		case r == '#':
+			return lexFrontMatterOrgMode
+		case r == byteOrderMark:
+			l.emit(TypeIgnore)
+		case !isSpace(r) && !isEndOfLine(r):
+			if r == '<' {
+				l.backup()
+				if l.hasPrefix(htmlCommentStart) {
+					// This may be commented out front matter, which should
+					// still be read.
+					l.consumeToNextLine()
+					l.isInHTMLComment = true
+					l.emit(TypeIgnore)
+					continue LOOP
+				} else {
+					return l.errorf("plain HTML documents not supported")
+				}
+			}
+			break LOOP
+		}
+	}
+
+	// Now move on to the shortcodes.
+	return lexMainSection
+}
+
+func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {
+	l.isInHTMLComment = false
+	right := l.index(htmlCommentEnd)
+	if right == -1 {
+		return l.errorf("starting HTML comment with no end")
+	}
+	l.pos += right + len(htmlCommentEnd)
+	l.emit(TypeIgnore)
+
+	// Now move on to the shortcodes.
+	return lexMainSection
+}
+
+func lexFrontMatterJSON(l *pageLexer) stateFunc {
+	// Include the left delimiter
+	l.backup()
+
+	var (
+		inQuote bool
+		level   int
+	)
+
+	for {
+
+		r := l.next()
+
+		switch {
+		case r == eof:
+			return l.errorf("unexpected EOF parsing JSON front matter")
+		case r == '{':
+			if !inQuote {
+				level++
+			}
+		case r == '}':
+			if !inQuote {
+				level--
+			}
+		case r == '"':
+			inQuote = !inQuote
+		case r == '\\':
+			// This may be an escaped quote. Make sure it's not marked as a
+			// real one.
+			l.next()
+		}
+
+		if level == 0 {
+			break
+		}
+	}
+
+	l.consumeCRLF()
+	l.emit(TypeFrontMatterJSON)
+
+	return lexMainSection
+}
+
+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
+	/*
+		#+TITLE: Test File For chaseadamsio/goorgeous
+		#+AUTHOR: Chase Adams
+		#+DESCRIPTION: Just another golang parser for org content!
+	*/
+
+	l.summaryDivider = summaryDividerOrg
+
+	l.backup()
+
+	if !l.hasPrefix(delimOrg) {
+		return lexMainSection
+	}
+
+	// Read lines until we no longer see a #+ prefix
+LOOP:
+	for {
+
+		r := l.next()
+
+		switch {
+		case r == '\n':
+			if !l.hasPrefix(delimOrg) {
+				break LOOP
+			}
+		case r == eof:
+			break LOOP
+
+		}
+	}
+
+	l.emit(TypeFrontMatterORG)
+
+	return lexMainSection
+
+}
+
+// Handle YAML or TOML front matter.
+func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {
+
+	for i := 0; i < 2; i++ {
+		if r := l.next(); r != delimr {
+			return l.errorf("invalid %s delimiter", name)
+		}
+	}
+
+	// Let front matter start at line 1
+	wasEndOfLine := l.consumeCRLF()
+	// We don't care about the delimiters.
+	l.ignore()
+
+	var r rune
+
+	for {
+		if !wasEndOfLine {
+			r = l.next()
+			if r == eof {
+				return l.errorf("EOF looking for end %s front matter delimiter", name)
+			}
+		}
+
+		if wasEndOfLine || isEndOfLine(r) {
+			if l.hasPrefix(delim) {
+				l.emit(tp)
+				l.pos += 3
+				l.consumeCRLF()
+				l.ignore()
+				break
+			}
+		}
+
+		wasEndOfLine = false
+	}
+
+	return lexMainSection
+}