diff options
Diffstat (limited to 'hugofs/files/classifier.go')
-rw-r--r-- | hugofs/files/classifier.go | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/hugofs/files/classifier.go b/hugofs/files/classifier.go new file mode 100644 index 000000000..5e26bbac0 --- /dev/null +++ b/hugofs/files/classifier.go @@ -0,0 +1,203 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package files + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "unicode" + + "github.com/spf13/afero" +) + +var ( + // This should be the only list of valid extensions for content files. + contentFileExtensions = []string{ + "html", "htm", + "mdown", "markdown", "md", + "asciidoc", "adoc", "ad", + "rest", "rst", + "mmark", + "org", + "pandoc", "pdc"} + + contentFileExtensionsSet map[string]bool + + htmlFileExtensions = []string{ + "html", "htm"} + + htmlFileExtensionsSet map[string]bool +) + +func init() { + contentFileExtensionsSet = make(map[string]bool) + for _, ext := range contentFileExtensions { + contentFileExtensionsSet[ext] = true + } + htmlFileExtensionsSet = make(map[string]bool) + for _, ext := range htmlFileExtensions { + htmlFileExtensionsSet[ext] = true + } +} + +func IsContentFile(filename string) bool { + return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] +} + +func IsHTMLFile(filename string) bool { + return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] +} + +func IsContentExt(ext string) bool { + return contentFileExtensionsSet[ext] +} + +type ContentClass string + +const ( + ContentClassLeaf ContentClass = "leaf" + ContentClassBranch ContentClass = "branch" + ContentClassFile ContentClass = "zfile" // Sort below + ContentClassContent ContentClass = "zcontent" +) + +func (c ContentClass) IsBundle() bool { + return c == ContentClassLeaf || c == ContentClassBranch +} + +func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass { + if !IsContentFile(filename) { + return ContentClassFile + } + + if IsHTMLFile(filename) { + // We need to look inside the file. If the first non-whitespace + // character is a "<", then we treat it as a regular file. + // Eearlier we created pages for these files, but that had all sorts + // of troubles, and isn't what it says in the documentation. + // See https://github.com/gohugoio/hugo/issues/7030 + if open == nil { + panic(fmt.Sprintf("no file opener provided for %q", filename)) + } + + f, err := open() + if err != nil { + return ContentClassFile + } + ishtml := isHTMLContent(f) + f.Close() + if ishtml { + return ContentClassFile + } + + } + + if strings.HasPrefix(filename, "_index.") { + return ContentClassBranch + } + + if strings.HasPrefix(filename, "index.") { + return ContentClassLeaf + } + + return ContentClassContent +} + +var htmlComment = []rune{'<', '!', '-', '-'} + +func isHTMLContent(r io.Reader) bool { + br := bufio.NewReader(r) + i := 0 + for { + c, _, err := br.ReadRune() + if err != nil { + break + } + + if i > 0 { + if i >= len(htmlComment) { + return false + } + + if c != htmlComment[i] { + return true + } + + i++ + continue + } + + if !unicode.IsSpace(c) { + if i == 0 && c != '<' { + return false + } + i++ + } + } + return true +} + +const ( + ComponentFolderArchetypes = "archetypes" + ComponentFolderStatic = "static" + ComponentFolderLayouts = "layouts" + ComponentFolderContent = "content" + ComponentFolderData = "data" + ComponentFolderAssets = "assets" + ComponentFolderI18n = "i18n" + + FolderResources = "resources" +) + +var ( + ComponentFolders = []string{ + ComponentFolderArchetypes, + ComponentFolderStatic, + ComponentFolderLayouts, + ComponentFolderContent, + ComponentFolderData, + ComponentFolderAssets, + ComponentFolderI18n, + } + + componentFoldersSet = make(map[string]bool) +) + +func init() { + sort.Strings(ComponentFolders) + for _, f := range ComponentFolders { + componentFoldersSet[f] = true + } +} + +// ResolveComponentFolder returns "content" from "content/blog/foo.md" etc. +func ResolveComponentFolder(filename string) string { + filename = strings.TrimPrefix(filename, string(os.PathSeparator)) + for _, cf := range ComponentFolders { + if strings.HasPrefix(filename, cf) { + return cf + } + } + + return "" +} + +func IsComponentFolder(name string) bool { + return componentFoldersSet[name] +} |