Switch EXIF library

Closes #10855
Closes #8586
Closes #8996
This commit is contained in:
Bjørn Erik Pedersen
2024-07-07 12:54:30 +02:00
parent a28bed0817
commit 72ff937e11
12 changed files with 297 additions and 194 deletions

View File

@@ -14,25 +14,18 @@
package exif
import (
"bytes"
"fmt"
"io"
"math"
"math/big"
"regexp"
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"
"github.com/bep/imagemeta"
"github.com/bep/logg"
"github.com/bep/tmc"
_exif "github.com/rwcarlsen/goexif/exif"
"github.com/rwcarlsen/goexif/tiff"
)
const exifTimeLayout = "2006:01:02 15:04:05"
// ExifInfo holds the decoded Exif data for an Image.
type ExifInfo struct {
// GPS latitude in degrees.
@@ -53,6 +46,15 @@ type Decoder struct {
excludeFieldsrRe *regexp.Regexp
noDate bool
noLatLong bool
warnl logg.LevelLogger
}
func (d *Decoder) shouldInclude(s string) bool {
return (d.includeFieldsRe == nil || d.includeFieldsRe.MatchString(s))
}
func (d *Decoder) shouldExclude(s string) bool {
return d.excludeFieldsrRe != nil && d.excludeFieldsrRe.MatchString(s)
}
func IncludeFields(expression string) func(*Decoder) error {
@@ -91,6 +93,13 @@ func WithDateDisabled(disabled bool) func(*Decoder) error {
}
}
func WithWarnLogger(warnl logg.LevelLogger) func(*Decoder) error {
return func(d *Decoder) error {
d.warnl = warnl
return nil
}
}
func compileRegexp(expression string) (*regexp.Regexp, error) {
expression = strings.TrimSpace(expression)
if expression == "" {
@@ -115,148 +124,222 @@ func NewDecoder(options ...func(*Decoder) error) (*Decoder, error) {
return d, nil
}
func (d *Decoder) Decode(r io.Reader) (ex *ExifInfo, err error) {
var (
isTimeTag = func(s string) bool {
return strings.Contains(s, "Time")
}
isGPSTag = func(s string) bool {
return strings.HasPrefix(s, "GPS")
}
)
// Filename is only used for logging.
func (d *Decoder) Decode(filename string, format imagemeta.ImageFormat, r io.Reader) (ex *ExifInfo, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("exif failed: %v", r)
}
}()
var x *_exif.Exif
x, err = _exif.Decode(r)
if err != nil {
if err.Error() == "EOF" {
// Found no Exif
return nil, nil
}
return
var tagInfos imagemeta.Tags
handleTag := func(ti imagemeta.TagInfo) error {
tagInfos.Add(ti)
return nil
}
shouldInclude := func(ti imagemeta.TagInfo) bool {
if ti.Source == imagemeta.EXIF {
if !d.noDate {
// We need the time tags to calculate the date.
if isTimeTag(ti.Tag) {
return true
}
}
if !d.noLatLong {
// We need to GPS tags to calculate the lat/long.
if isGPSTag(ti.Tag) {
return true
}
}
if !strings.HasPrefix(ti.Namespace, "IFD0") {
// Drop thumbnail tags.
return false
}
}
if d.shouldExclude(ti.Tag) {
return false
}
return d.shouldInclude(ti.Tag)
}
var warnf func(string, ...any)
if d.warnl != nil {
// There should be very little warnings (fingers crossed!),
// but this will typically be unrecognized formats.
// To be able to possibly get rid of these warnings,
// we need to know what images are causing them.
warnf = func(format string, args ...any) {
format = fmt.Sprintf("%q: %s: ", filename, format)
d.warnl.Logf(format, args...)
}
}
err = imagemeta.Decode(
imagemeta.Options{
R: r.(io.ReadSeeker),
ImageFormat: format,
ShouldHandleTag: shouldInclude,
HandleTag: handleTag,
Sources: imagemeta.EXIF, // For now. TODO(bep)
Warnf: warnf,
},
)
var tm time.Time
var lat, long float64
if !d.noDate {
tm, _ = x.DateTime()
tm, _ = tagInfos.GetDateTime()
}
if !d.noLatLong {
lat, long, _ = x.LatLong()
if math.IsNaN(lat) {
lat = 0
}
if math.IsNaN(long) {
long = 0
}
lat, long, _ = tagInfos.GetLatLong()
}
walker := &exifWalker{x: x, vals: make(map[string]any), includeMatcher: d.includeFieldsRe, excludeMatcher: d.excludeFieldsrRe}
if err = x.Walk(walker); err != nil {
return
tags := make(map[string]any)
for k, v := range tagInfos.All() {
if d.shouldExclude(k) {
continue
}
if !d.shouldInclude(k) {
continue
}
tags[k] = v.Value
}
ex = &ExifInfo{Lat: lat, Long: long, Date: tm, Tags: walker.vals}
ex = &ExifInfo{Lat: lat, Long: long, Date: tm, Tags: tags}
return
}
func decodeTag(x *_exif.Exif, f _exif.FieldName, t *tiff.Tag) (any, error) {
switch t.Format() {
case tiff.StringVal, tiff.UndefVal:
s := nullString(t.Val)
if strings.Contains(string(f), "DateTime") {
if d, err := tryParseDate(x, s); err == nil {
return d, nil
}
}
return s, nil
case tiff.OtherVal:
return "unknown", nil
}
var rv []any
for i := 0; i < int(t.Count); i++ {
switch t.Format() {
case tiff.RatVal:
n, d, _ := t.Rat2(i)
rat := big.NewRat(n, d)
// if t is int or t > 1, use float64
if rat.IsInt() || rat.Cmp(big.NewRat(1, 1)) == 1 {
f, _ := rat.Float64()
rv = append(rv, f)
} else {
rv = append(rv, rat)
}
case tiff.FloatVal:
v, _ := t.Float(i)
rv = append(rv, v)
case tiff.IntVal:
v, _ := t.Int(i)
rv = append(rv, v)
}
}
if t.Count == 1 {
if len(rv) == 1 {
return rv[0], nil
}
}
return rv, nil
}
// Code borrowed from exif.DateTime and adjusted.
func tryParseDate(x *_exif.Exif, s string) (time.Time, error) {
dateStr := strings.TrimRight(s, "\x00")
// TODO(bep): look for timezone offset, GPS time, etc.
timeZone := time.Local
if tz, _ := x.TimeZone(); tz != nil {
timeZone = tz
}
return time.ParseInLocation(exifTimeLayout, dateStr, timeZone)
}
type exifWalker struct {
x *_exif.Exif
vals map[string]any
includeMatcher *regexp.Regexp
excludeMatcher *regexp.Regexp
}
func (e *exifWalker) Walk(f _exif.FieldName, tag *tiff.Tag) error {
name := string(f)
if e.excludeMatcher != nil && e.excludeMatcher.MatchString(name) {
return nil
}
if e.includeMatcher != nil && !e.includeMatcher.MatchString(name) {
return nil
}
val, err := decodeTag(e.x, f, tag)
if err != nil {
return err
}
e.vals[name] = val
return nil
}
func nullString(in []byte) string {
var rv bytes.Buffer
for len(in) > 0 {
r, size := utf8.DecodeRune(in)
if unicode.IsGraphic(r) {
rv.WriteRune(r)
}
in = in[size:]
}
return rv.String()
}
var tcodec *tmc.Codec
func init() {
newIntadapter := func(target any) tmc.Adapter {
var bitSize int
var isSigned bool
switch target.(type) {
case int:
bitSize = 0
isSigned = true
case int8:
bitSize = 8
isSigned = true
case int16:
bitSize = 16
isSigned = true
case int32:
bitSize = 32
isSigned = true
case int64:
bitSize = 64
isSigned = true
case uint:
bitSize = 0
case uint8:
bitSize = 8
case uint16:
bitSize = 16
case uint32:
bitSize = 32
case uint64:
bitSize = 64
}
intFromString := func(s string) (any, error) {
if bitSize == 0 {
return strconv.Atoi(s)
}
var v any
var err error
if isSigned {
v, err = strconv.ParseInt(s, 10, bitSize)
} else {
v, err = strconv.ParseUint(s, 10, bitSize)
}
if err != nil {
return 0, err
}
if isSigned {
i := v.(int64)
switch target.(type) {
case int:
return int(i), nil
case int8:
return int8(i), nil
case int16:
return int16(i), nil
case int32:
return int32(i), nil
case int64:
return i, nil
}
}
i := v.(uint64)
switch target.(type) {
case uint:
return uint(i), nil
case uint8:
return uint8(i), nil
case uint16:
return uint16(i), nil
case uint32:
return uint32(i), nil
case uint64:
return i, nil
}
return 0, fmt.Errorf("unsupported target type %T", target)
}
intToString := func(v any) (string, error) {
return fmt.Sprintf("%d", v), nil
}
return tmc.NewAdapter(target, intFromString, intToString)
}
ru, _ := imagemeta.NewRat[uint32](1, 2)
ri, _ := imagemeta.NewRat[int32](1, 2)
tmcAdapters := []tmc.Adapter{
tmc.NewAdapter(ru, nil, nil),
tmc.NewAdapter(ri, nil, nil),
newIntadapter(int(1)),
newIntadapter(int8(1)),
newIntadapter(int16(1)),
newIntadapter(int32(1)),
newIntadapter(int64(1)),
newIntadapter(uint(1)),
newIntadapter(uint8(1)),
newIntadapter(uint16(1)),
newIntadapter(uint32(1)),
newIntadapter(uint64(1)),
}
tmcAdapters = append(tmc.DefaultTypeAdapters, tmcAdapters...)
var err error
tcodec, err = tmc.New()
tcodec, err = tmc.New(tmc.WithTypeAdapters(tmcAdapters))
if err != nil {
panic(err)
}

View File

@@ -15,13 +15,12 @@ package exif
import (
"encoding/json"
"math/big"
"os"
"path/filepath"
"testing"
"time"
"github.com/gohugoio/hugo/htesting/hqt"
"github.com/bep/imagemeta"
"github.com/google/go-cmp/cmp"
qt "github.com/frankban/quicktest"
@@ -35,11 +34,12 @@ func TestExif(t *testing.T) {
d, err := NewDecoder(IncludeFields("Lens|Date"))
c.Assert(err, qt.IsNil)
x, err := d.Decode(f)
x, err := d.Decode("", imagemeta.JPEG, f)
c.Assert(err, qt.IsNil)
c.Assert(x.Date.Format("2006-01-02"), qt.Equals, "2017-10-27")
// Malaga: https://goo.gl/taazZy
c.Assert(x.Lat, qt.Equals, float64(36.59744166666667))
c.Assert(x.Long, qt.Equals, float64(-4.50846))
@@ -49,9 +49,9 @@ func TestExif(t *testing.T) {
c.Assert(ok, qt.Equals, true)
c.Assert(lensModel, qt.Equals, "smc PENTAX-DA* 16-50mm F2.8 ED AL [IF] SDM")
v, found = x.Tags["DateTime"]
v, found = x.Tags["ModifyDate"]
c.Assert(found, qt.Equals, true)
c.Assert(v, hqt.IsSameType, time.Time{})
c.Assert(v, qt.Equals, "2017:11:23 09:56:54")
// Verify that it survives a round-trip to JSON and back.
data, err := json.Marshal(x)
@@ -72,8 +72,8 @@ func TestExifPNG(t *testing.T) {
d, err := NewDecoder()
c.Assert(err, qt.IsNil)
_, err = d.Decode(f)
c.Assert(err, qt.Not(qt.IsNil))
_, err = d.Decode("", imagemeta.PNG, f)
c.Assert(err, qt.IsNil)
}
func TestIssue8079(t *testing.T) {
@@ -85,28 +85,11 @@ func TestIssue8079(t *testing.T) {
d, err := NewDecoder()
c.Assert(err, qt.IsNil)
x, err := d.Decode(f)
x, err := d.Decode("", imagemeta.JPEG, f)
c.Assert(err, qt.IsNil)
c.Assert(x.Tags["ImageDescription"], qt.Equals, "Città del Vaticano #nanoblock #vatican #vaticancity")
}
func TestNullString(t *testing.T) {
c := qt.New(t)
for _, test := range []struct {
in string
expect string
}{
{"foo", "foo"},
{"\x20", "\x20"},
{"\xc4\x81", "\xc4\x81"}, // \u0101
{"\u0160", "\u0160"}, // non-breaking space
} {
res := nullString([]byte(test.in))
c.Assert(res, qt.Equals, test.expect)
}
}
func BenchmarkDecodeExif(b *testing.B) {
c := qt.New(b)
f, err := os.Open(filepath.FromSlash("../../testdata/sunset.jpg"))
@@ -118,7 +101,7 @@ func BenchmarkDecodeExif(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err = d.Decode(f)
_, err = d.Decode("", imagemeta.JPEG, f)
c.Assert(err, qt.IsNil)
f.Seek(0, 0)
}
@@ -126,8 +109,13 @@ func BenchmarkDecodeExif(b *testing.B) {
var eq = qt.CmpEquals(
cmp.Comparer(
func(v1, v2 *big.Rat) bool {
return v1.RatString() == v2.RatString()
func(v1, v2 imagemeta.Rat[uint32]) bool {
return v1.String() == v2.String()
},
),
cmp.Comparer(
func(v1, v2 imagemeta.Rat[int32]) bool {
return v1.String() == v2.String()
},
),
cmp.Comparer(func(v1, v2 time.Time) bool {
@@ -138,14 +126,15 @@ var eq = qt.CmpEquals(
func TestIssue10738(t *testing.T) {
c := qt.New(t)
testFunc := func(path, include string) any {
testFunc := func(c *qt.C, path, include string) any {
c.Helper()
f, err := os.Open(filepath.FromSlash(path))
c.Assert(err, qt.IsNil)
defer f.Close()
d, err := NewDecoder(IncludeFields(include))
c.Assert(err, qt.IsNil)
x, err := d.Decode(f)
x, err := d.Decode("", imagemeta.JPEG, f)
c.Assert(err, qt.IsNil)
// Verify that it survives a round-trip to JSON and back.
@@ -194,7 +183,7 @@ func TestIssue10738(t *testing.T) {
include: "Lens|Date|ExposureTime",
}, want{
10,
0,
1,
},
},
{
@@ -221,7 +210,7 @@ func TestIssue10738(t *testing.T) {
include: "Lens|Date|ExposureTime",
}, want{
1,
0,
1,
},
},
{
@@ -266,7 +255,7 @@ func TestIssue10738(t *testing.T) {
include: "Lens|Date|ExposureTime",
}, want{
30,
0,
1,
},
},
{
@@ -293,19 +282,21 @@ func TestIssue10738(t *testing.T) {
include: "Lens|Date|ExposureTime",
}, want{
4,
0,
1,
},
},
}
for _, tt := range tests {
c.Run(tt.name, func(c *qt.C) {
got := testFunc(tt.args.path, tt.args.include)
got := testFunc(c, tt.args.path, tt.args.include)
switch v := got.(type) {
case float64:
c.Assert(v, qt.Equals, float64(tt.want.vN))
case *big.Rat:
c.Assert(v, eq, big.NewRat(tt.want.vN, tt.want.vD))
case imagemeta.Rat[uint32]:
r, err := imagemeta.NewRat[uint32](uint32(tt.want.vN), uint32(tt.want.vD))
c.Assert(err, qt.IsNil)
c.Assert(v, eq, r)
default:
c.Fatalf("unexpected type: %T", got)
}