media: Also consider extension in FromContent

As used in `resources.GetRemote`.

This will now reject image files with text and text files with images.
This commit is contained in:
Bjørn Erik Pedersen
2021-12-21 10:35:33 +01:00
parent ce04011096
commit 6779117f72
7 changed files with 85 additions and 43 deletions

View File

@@ -28,6 +28,8 @@ import (
"github.com/mitchellh/mapstructure"
)
var zero Type
const (
defaultDelimiter = "."
)
@@ -64,16 +66,14 @@ type SuffixInfo struct {
// FromContent resolve the Type primarily using http.DetectContentType.
// If http.DetectContentType resolves to application/octet-stream, a zero Type is returned.
// If http.DetectContentType resolves to text/plain or application/xml, we try to get more specific using types and ext.
func FromContent(types Types, ext string, content []byte) Type {
ext = strings.TrimPrefix(ext, ".")
func FromContent(types Types, extensionHints []string, content []byte) Type {
t := strings.Split(http.DetectContentType(content), ";")[0]
var m Type
if t == "application/octet-stream" {
return m
return zero
}
var found bool
m, found = types.GetByType(t)
m, found := types.GetByType(t)
if !found {
if t == "text/xml" {
// This is how it's configured in Hugo by default.
@@ -81,19 +81,36 @@ func FromContent(types Types, ext string, content []byte) Type {
}
}
if !found || ext == "" {
return m
if !found {
return zero
}
if m.Type() == "text/plain" || m.Type() == "application/xml" {
// http.DetectContentType isn't brilliant when it comes to common text formats, so we need to do better.
// For now we say that if it's detected to be a text format and the extension/content type in header reports
// it to be a text format, then we use that.
mm, _, found := types.GetFirstBySuffix(ext)
if found && mm.IsText() {
return mm
var mm Type
for _, extension := range extensionHints {
extension = strings.TrimPrefix(extension, ".")
mm, _, found = types.GetFirstBySuffix(extension)
if found {
break
}
}
if found {
if m == mm {
return m
}
if m.IsText() && mm.IsText() {
// http.DetectContentType isn't brilliant when it comes to common text formats, so we need to do better.
// For now we say that if it's detected to be a text format and the extension/content type in header reports
// it to be a text format, then we use that.
return mm
}
// E.g. an image with a *.js extension.
return zero
}
return m
}

View File

@@ -15,7 +15,6 @@ package media
import (
"encoding/json"
"fmt"
"io/ioutil"
"path/filepath"
"sort"
@@ -194,15 +193,39 @@ func TestFromContent(t *testing.T) {
content, err := ioutil.ReadFile(filename)
c.Assert(err, qt.IsNil)
ext := strings.TrimPrefix(paths.Ext(filename), ".")
fmt.Println("=>", ext)
var exts []string
if ext == "jpg" {
exts = append(exts, "foo", "bar", "jpg")
} else {
exts = []string{ext}
}
expected, _, found := mtypes.GetFirstBySuffix(ext)
c.Assert(found, qt.IsTrue)
got := FromContent(mtypes, ext, content)
got := FromContent(mtypes, exts, content)
c.Assert(got, qt.Equals, expected)
})
}
}
func TestFromContentFakes(t *testing.T) {
c := qt.New(t)
files, err := filepath.Glob("./testdata/fake.*")
c.Assert(err, qt.IsNil)
mtypes := DefaultTypes
for _, filename := range files {
name := filepath.Base(filename)
c.Run(name, func(c *qt.C) {
content, err := ioutil.ReadFile(filename)
c.Assert(err, qt.IsNil)
ext := strings.TrimPrefix(paths.Ext(filename), ".")
got := FromContent(mtypes, []string{ext}, content)
c.Assert(got, qt.Equals, zero)
})
}
}
func TestDecodeTypes(t *testing.T) {
c := qt.New(t)

BIN
media/testdata/fake.js vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

3
media/testdata/fake.png vendored Normal file
View File

@@ -0,0 +1,3 @@
function foo() {
return "foo";
}

BIN
media/testdata/resource.jpe vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB