diff --git a/src/platform/gba/packer/libimagequant/blur.c b/src/platform/gba/packer/libimagequant/blur.c
new file mode 100644
index 0000000..7f0a716
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/blur.c
@@ -0,0 +1,132 @@
+/*
+© 2011-2015 by Kornel Lesiński.
+
+This file is part of libimagequant.
+
+libimagequant is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+libimagequant is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with libimagequant. If not, see .
+*/
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "blur.h"
+
+/*
+ Blurs image horizontally (width 2*size+1) and writes it transposed to dst (called twice gives 2d blur)
+ */
+static void transposing_1d_blur(unsigned char *restrict src, unsigned char *restrict dst, unsigned int width, unsigned int height, const unsigned int size)
+{
+ assert(size > 0);
+
+ for(unsigned int j=0; j < height; j++) {
+ unsigned char *restrict row = src + j*width;
+
+ // accumulate sum for pixels outside line
+ unsigned int sum;
+ sum = row[0]*size;
+ for(unsigned int i=0; i < size; i++) {
+ sum += row[i];
+ }
+
+ // blur with left side outside line
+ for(unsigned int i=0; i < size; i++) {
+ sum -= row[0];
+ sum += row[i+size];
+
+ dst[i*height + j] = sum / (size*2);
+ }
+
+ for(unsigned int i=size; i < width-size; i++) {
+ sum -= row[i-size];
+ sum += row[i+size];
+
+ dst[i*height + j] = sum / (size*2);
+ }
+
+ // blur with right side outside line
+ for(unsigned int i=width-size; i < width; i++) {
+ sum -= row[i-size];
+ sum += row[width-1];
+
+ dst[i*height + j] = sum / (size*2);
+ }
+ }
+}
+
+/**
+ * Picks maximum of neighboring pixels (blur + lighten)
+ */
+LIQ_PRIVATE void liq_max3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height)
+{
+ for(unsigned int j=0; j < height; j++) {
+ const unsigned char *row = src + j*width,
+ *prevrow = src + (j > 1 ? j-1 : 0)*width,
+ *nextrow = src + MIN(height-1,j+1)*width;
+
+ unsigned char prev,curr=row[0],next=row[0];
+
+ for(unsigned int i=0; i < width-1; i++) {
+ prev=curr;
+ curr=next;
+ next=row[i+1];
+
+ unsigned char t1 = MAX(prev,next);
+ unsigned char t2 = MAX(nextrow[i],prevrow[i]);
+ *dst++ = MAX(curr,MAX(t1,t2));
+ }
+ unsigned char t1 = MAX(curr,next);
+ unsigned char t2 = MAX(nextrow[width-1],prevrow[width-1]);
+ *dst++ = MAX(t1,t2);
+ }
+}
+
+/**
+ * Picks minimum of neighboring pixels (blur + darken)
+ */
+LIQ_PRIVATE void liq_min3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height)
+{
+ for(unsigned int j=0; j < height; j++) {
+ const unsigned char *row = src + j*width,
+ *prevrow = src + (j > 1 ? j-1 : 0)*width,
+ *nextrow = src + MIN(height-1,j+1)*width;
+
+ unsigned char prev,curr=row[0],next=row[0];
+
+ for(unsigned int i=0; i < width-1; i++) {
+ prev=curr;
+ curr=next;
+ next=row[i+1];
+
+ unsigned char t1 = MIN(prev,next);
+ unsigned char t2 = MIN(nextrow[i],prevrow[i]);
+ *dst++ = MIN(curr,MIN(t1,t2));
+ }
+ unsigned char t1 = MIN(curr,next);
+ unsigned char t2 = MIN(nextrow[width-1],prevrow[width-1]);
+ *dst++ = MIN(t1,t2);
+ }
+}
+
+/*
+ Filters src image and saves it to dst, overwriting tmp in the process.
+ Image must be width*height pixels high. Size controls radius of box blur.
+ */
+LIQ_PRIVATE void liq_blur(unsigned char *src, unsigned char *tmp, unsigned char *dst, unsigned int width, unsigned int height, unsigned int size)
+{
+ assert(size > 0);
+ if (width < 2*size+1 || height < 2*size+1) {
+ return;
+ }
+ transposing_1d_blur(src, tmp, width, height, size);
+ transposing_1d_blur(tmp, dst, height, width, size);
+}
diff --git a/src/platform/gba/packer/libimagequant/blur.h b/src/platform/gba/packer/libimagequant/blur.h
new file mode 100644
index 0000000..1e77819
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/blur.h
@@ -0,0 +1,8 @@
+#ifndef BLUR_H
+#define BLUR_H
+
+LIQ_PRIVATE void liq_blur(unsigned char *src, unsigned char *tmp, unsigned char *dst, unsigned int width, unsigned int height, unsigned int size);
+LIQ_PRIVATE void liq_max3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height);
+LIQ_PRIVATE void liq_min3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height);
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/kmeans.c b/src/platform/gba/packer/libimagequant/kmeans.c
new file mode 100644
index 0000000..005be65
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/kmeans.c
@@ -0,0 +1,106 @@
+/*
+** © 2011-2016 by Kornel Lesiński.
+** See COPYRIGHT file for license.
+*/
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "kmeans.h"
+#include "nearest.h"
+#include
+#include
+
+#ifdef _OPENMP
+#include
+#else
+#define omp_get_max_threads() 1
+#define omp_get_thread_num() 0
+#endif
+
+/*
+ * K-Means iteration: new palette color is computed from weighted average of colors that map to that palette entry.
+ */
+LIQ_PRIVATE void kmeans_init(const colormap *map, const unsigned int max_threads, kmeans_state average_color[])
+{
+ memset(average_color, 0, sizeof(average_color[0])*(KMEANS_CACHE_LINE_GAP+map->colors)*max_threads);
+}
+
+LIQ_PRIVATE void kmeans_update_color(const f_pixel acolor, const float value, const colormap *map, unsigned int match, const unsigned int thread, kmeans_state average_color[])
+{
+ match += thread * (KMEANS_CACHE_LINE_GAP+map->colors);
+ average_color[match].a += acolor.a * value;
+ average_color[match].r += acolor.r * value;
+ average_color[match].g += acolor.g * value;
+ average_color[match].b += acolor.b * value;
+ average_color[match].total += value;
+}
+
+LIQ_PRIVATE void kmeans_finalize(colormap *map, const unsigned int max_threads, const kmeans_state average_color[])
+{
+ for (unsigned int i=0; i < map->colors; i++) {
+ double a=0, r=0, g=0, b=0, total=0;
+
+ // Aggregate results from all threads
+ for(unsigned int t=0; t < max_threads; t++) {
+ const unsigned int offset = (KMEANS_CACHE_LINE_GAP+map->colors) * t + i;
+
+ a += average_color[offset].a;
+ r += average_color[offset].r;
+ g += average_color[offset].g;
+ b += average_color[offset].b;
+ total += average_color[offset].total;
+ }
+
+ if (!map->palette[i].fixed) {
+ map->palette[i].popularity = total;
+ if (total) {
+ map->palette[i].acolor = (f_pixel){
+ .a = a / total,
+ .r = r / total,
+ .g = g / total,
+ .b = b / total,
+ };
+ } else {
+ unsigned int r = (i + rand()%7);
+ map->palette[i].acolor.a = map->palette[r%map->colors].acolor.a;
+ map->palette[i].acolor.r = map->palette[r%map->colors].acolor.r;
+ map->palette[i].acolor.g = map->palette[(r+1)%map->colors].acolor.g;
+ map->palette[i].acolor.b = map->palette[(r+2)%map->colors].acolor.b;
+ }
+ }
+ }
+}
+
+LIQ_PRIVATE double kmeans_do_iteration(histogram *hist, colormap *const map, kmeans_callback callback)
+{
+ const unsigned int max_threads = omp_get_max_threads();
+ LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads);
+ kmeans_init(map, max_threads, average_color);
+ struct nearest_map *const n = nearest_init(map);
+ hist_item *const achv = hist->achv;
+ const int hist_size = hist->size;
+
+ double total_diff=0;
+#if __GNUC__ >= 9 || __clang__
+ #pragma omp parallel for if (hist_size > 2000) \
+ schedule(static) default(none) shared(achv,average_color,callback,hist_size,map,n) reduction(+:total_diff)
+#else
+ #pragma omp parallel for if (hist_size > 2000) \
+ schedule(static) default(none) shared(average_color,callback) reduction(+:total_diff)
+#endif
+ for(int j=0; j < hist_size; j++) {
+ float diff;
+ unsigned int match = nearest_search(n, &achv[j].acolor, achv[j].tmp.likely_colormap_index, &diff);
+ achv[j].tmp.likely_colormap_index = match;
+ total_diff += diff * achv[j].perceptual_weight;
+
+ if (callback) callback(&achv[j], diff);
+
+ kmeans_update_color(achv[j].acolor, achv[j].perceptual_weight, map, match, omp_get_thread_num(), average_color);
+ }
+
+ nearest_free(n);
+ kmeans_finalize(map, max_threads, average_color);
+
+ return total_diff / hist->total_perceptual_weight;
+}
diff --git a/src/platform/gba/packer/libimagequant/kmeans.h b/src/platform/gba/packer/libimagequant/kmeans.h
new file mode 100644
index 0000000..c51d7bb
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/kmeans.h
@@ -0,0 +1,19 @@
+
+#ifndef KMEANS_H
+#define KMEANS_H
+
+// Spread memory touched by different threads at least 64B apart which I assume is the cache line size. This should avoid memory write contention.
+#define KMEANS_CACHE_LINE_GAP ((64+sizeof(kmeans_state)-1)/sizeof(kmeans_state))
+
+typedef struct {
+ double a, r, g, b, total;
+} kmeans_state;
+
+typedef void (*kmeans_callback)(hist_item *item, float diff);
+
+LIQ_PRIVATE void kmeans_init(const colormap *map, const unsigned int max_threads, kmeans_state state[]);
+LIQ_PRIVATE void kmeans_update_color(const f_pixel acolor, const float value, const colormap *map, unsigned int match, const unsigned int thread, kmeans_state average_color[]);
+LIQ_PRIVATE void kmeans_finalize(colormap *map, const unsigned int max_threads, const kmeans_state state[]);
+LIQ_PRIVATE double kmeans_do_iteration(histogram *hist, colormap *const map, kmeans_callback callback);
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/libimagequant.c b/src/platform/gba/packer/libimagequant/libimagequant.c
new file mode 100644
index 0000000..a1bfafb
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/libimagequant.c
@@ -0,0 +1,2123 @@
+/*
+** © 2009-2018 by Kornel Lesiński.
+** © 1989, 1991 by Jef Poskanzer.
+** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+**
+** See COPYRIGHT file for license.
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#if !(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) && !(defined(_MSC_VER) && _MSC_VER >= 1800)
+#error "This program requires C99, e.g. -std=c99 switch in GCC or it requires MSVC 18.0 or higher."
+#error "Ignore torrent of syntax errors that may follow. It's only because compiler is set to use too old C version."
+#endif
+
+#ifdef _OPENMP
+#include
+#define LIQ_TEMP_ROW_WIDTH(img_width) (((img_width) | 15) + 1) /* keep alignment & leave space between rows to avoid cache line contention */
+#else
+#define LIQ_TEMP_ROW_WIDTH(img_width) (img_width)
+#define omp_get_max_threads() 1
+#define omp_get_thread_num() 0
+#endif
+
+#include "libimagequant.h"
+
+#include "pam.h"
+#include "mediancut.h"
+#include "nearest.h"
+#include "blur.h"
+#include "kmeans.h"
+
+#define LIQ_HIGH_MEMORY_LIMIT (1<<26) /* avoid allocating buffers larger than 64MB */
+
+// each structure has a pointer as a unique identifier that allows type checking at run time
+static const char liq_attr_magic[] = "liq_attr";
+static const char liq_image_magic[] = "liq_image";
+static const char liq_result_magic[] = "liq_result";
+static const char liq_histogram_magic[] = "liq_histogram";
+static const char liq_remapping_result_magic[] = "liq_remapping_result";
+static const char liq_freed_magic[] = "free";
+#define CHECK_STRUCT_TYPE(attr, kind) liq_crash_if_invalid_handle_pointer_given((const liq_attr*)attr, kind ## _magic)
+#define CHECK_USER_POINTER(ptr) liq_crash_if_invalid_pointer_given(ptr)
+
+struct liq_attr {
+ const char *magic_header;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+
+ double target_mse, max_mse, kmeans_iteration_limit;
+ unsigned int max_colors, max_histogram_entries;
+ unsigned int min_posterization_output /* user setting */, min_posterization_input /* speed setting */;
+ unsigned int kmeans_iterations, feedback_loop_trials;
+ bool last_index_transparent, use_contrast_maps;
+ unsigned char use_dither_map;
+ unsigned char speed;
+
+ unsigned char progress_stage1, progress_stage2, progress_stage3;
+ liq_progress_callback_function *progress_callback;
+ void *progress_callback_user_info;
+
+ liq_log_callback_function *log_callback;
+ void *log_callback_user_info;
+ liq_log_flush_callback_function *log_flush_callback;
+ void *log_flush_callback_user_info;
+};
+
+struct liq_image {
+ const char *magic_header;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+
+ f_pixel *f_pixels;
+ rgba_pixel **rows;
+ double gamma;
+ unsigned int width, height;
+ unsigned char *importance_map, *edges, *dither_map;
+ rgba_pixel *pixels, *temp_row;
+ f_pixel *temp_f_row;
+ liq_image_get_rgba_row_callback *row_callback;
+ void *row_callback_user_info;
+ liq_image *background;
+ f_pixel fixed_colors[256];
+ unsigned short fixed_colors_count;
+ bool free_pixels, free_rows, free_rows_internal;
+};
+
+typedef struct liq_remapping_result {
+ const char *magic_header;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+
+ unsigned char *pixels;
+ colormap *palette;
+ liq_progress_callback_function *progress_callback;
+ void *progress_callback_user_info;
+
+ liq_palette int_palette;
+ double gamma, palette_error;
+ float dither_level;
+ unsigned char use_dither_map;
+ unsigned char progress_stage1;
+} liq_remapping_result;
+
+struct liq_result {
+ const char *magic_header;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+
+ liq_remapping_result *remapping;
+ colormap *palette;
+ liq_progress_callback_function *progress_callback;
+ void *progress_callback_user_info;
+
+ liq_palette int_palette;
+ float dither_level;
+ double gamma, palette_error;
+ int min_posterization_output;
+ unsigned char use_dither_map;
+};
+
+struct liq_histogram {
+ const char *magic_header;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+
+ struct acolorhash_table *acht;
+ double gamma;
+ f_pixel fixed_colors[256];
+ unsigned short fixed_colors_count;
+ unsigned short ignorebits;
+ bool had_image_added;
+};
+
+static void contrast_maps(liq_image *image) LIQ_NONNULL;
+static liq_error finalize_histogram(liq_histogram *input_hist, liq_attr *options, histogram **hist_output) LIQ_NONNULL;
+static const rgba_pixel *liq_image_get_row_rgba(liq_image *input_image, unsigned int row) LIQ_NONNULL;
+static bool liq_image_get_row_f_init(liq_image *img) LIQ_NONNULL;
+static const f_pixel *liq_image_get_row_f(liq_image *input_image, unsigned int row) LIQ_NONNULL;
+static void liq_remapping_result_destroy(liq_remapping_result *result) LIQ_NONNULL;
+static liq_error pngquant_quantize(histogram *hist, const liq_attr *options, const int fixed_colors_count, const f_pixel fixed_colors[], const double gamma, bool fixed_result_colors, liq_result **) LIQ_NONNULL;
+static liq_error liq_histogram_quantize_internal(liq_histogram *input_hist, liq_attr *attr, bool fixed_result_colors, liq_result **result_output) LIQ_NONNULL;
+
+LIQ_NONNULL static void liq_verbose_printf(const liq_attr *context, const char *fmt, ...)
+{
+ if (context->log_callback) {
+ va_list va;
+ va_start(va, fmt);
+ int required_space = vsnprintf(NULL, 0, fmt, va)+1; // +\0
+ va_end(va);
+
+ LIQ_ARRAY(char, buf, required_space);
+ va_start(va, fmt);
+ vsnprintf(buf, required_space, fmt, va);
+ va_end(va);
+
+ context->log_callback(context, buf, context->log_callback_user_info);
+ }
+}
+
+LIQ_NONNULL inline static void verbose_print(const liq_attr *attr, const char *msg)
+{
+ if (attr->log_callback) {
+ attr->log_callback(attr, msg, attr->log_callback_user_info);
+ }
+}
+
+LIQ_NONNULL static void liq_verbose_printf_flush(liq_attr *attr)
+{
+ if (attr->log_flush_callback) {
+ attr->log_flush_callback(attr, attr->log_flush_callback_user_info);
+ }
+}
+
+LIQ_NONNULL static bool liq_progress(const liq_attr *attr, const float percent)
+{
+ return attr->progress_callback && !attr->progress_callback(percent, attr->progress_callback_user_info);
+}
+
+LIQ_NONNULL static bool liq_remap_progress(const liq_remapping_result *quant, const float percent)
+{
+ return quant->progress_callback && !quant->progress_callback(percent, quant->progress_callback_user_info);
+}
+
+#if USE_SSE
+inline static bool is_sse_available()
+{
+#if (defined(__x86_64__) || defined(__amd64) || defined(_WIN64))
+ return true;
+#elif _MSC_VER
+ int info[4];
+ __cpuid(info, 1);
+ /* bool is implemented as a built-in type of size 1 in MSVC */
+ return info[3] & (1<<26) ? true : false;
+#else
+ int a,b,c,d;
+ cpuid(1, a, b, c, d);
+ return d & (1<<25); // edx bit 25 is set when SSE is present
+#endif
+}
+#endif
+
+/* make it clear in backtrace when user-supplied handle points to invalid memory */
+NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_handle_pointer_given(const liq_attr *user_supplied_pointer, const char *const expected_magic_header);
+LIQ_EXPORT bool liq_crash_if_invalid_handle_pointer_given(const liq_attr *user_supplied_pointer, const char *const expected_magic_header)
+{
+ if (!user_supplied_pointer) {
+ return false;
+ }
+
+ if (user_supplied_pointer->magic_header == liq_freed_magic) {
+ fprintf(stderr, "%s used after being freed", expected_magic_header);
+ // this is not normal error handling, this is programmer error that should crash the program.
+ // program cannot safely continue if memory has been used after it's been freed.
+ // abort() is nasty, but security vulnerability may be worse.
+ abort();
+ }
+
+ return user_supplied_pointer->magic_header == expected_magic_header;
+}
+
+NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_pointer_given(const void *pointer);
+LIQ_EXPORT bool liq_crash_if_invalid_pointer_given(const void *pointer)
+{
+ if (!pointer) {
+ return false;
+ }
+ // Force a read from the given (potentially invalid) memory location in order to check early whether this crashes the program or not.
+ // It doesn't matter what value is read, the code here is just to shut the compiler up about unused read.
+ char test_access = *((volatile char *)pointer);
+ return test_access || true;
+}
+
+LIQ_NONNULL static void liq_log_error(const liq_attr *attr, const char *msg)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return;
+ liq_verbose_printf(attr, " error: %s", msg);
+}
+
+static double quality_to_mse(long quality)
+{
+ if (quality == 0) {
+ return MAX_DIFF;
+ }
+ if (quality == 100) {
+ return 0;
+ }
+
+ // curve fudged to be roughly similar to quality of libjpeg
+ // except lowest 10 for really low number of colors
+ const double extra_low_quality_fudge = MAX(0,0.016/(0.001+quality) - 0.001);
+ return extra_low_quality_fudge + 2.5/pow(210.0 + quality, 1.2) * (100.1-quality)/100.0;
+}
+
+static unsigned int mse_to_quality(double mse)
+{
+ for(int i=100; i > 0; i--) {
+ if (mse <= quality_to_mse(i) + 0.000001) { // + epsilon for floating point errors
+ return i;
+ }
+ }
+ return 0;
+}
+
+/** internally MSE is a sum of all channels with pixels 0..1 range,
+ but other software gives per-RGB-channel MSE for 0..255 range */
+static double mse_to_standard_mse(double mse) {
+ return mse * 65536.0/6.0;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_quality(liq_attr* attr, int minimum, int target)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (target < 0 || target > 100 || target < minimum || minimum < 0) return LIQ_VALUE_OUT_OF_RANGE;
+
+ attr->target_mse = quality_to_mse(target);
+ attr->max_mse = quality_to_mse(minimum);
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_min_quality(const liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1;
+ return mse_to_quality(attr->max_mse);
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_max_quality(const liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1;
+ return mse_to_quality(attr->target_mse);
+}
+
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_max_colors(liq_attr* attr, int colors)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (colors < 2 || colors > 256) return LIQ_VALUE_OUT_OF_RANGE;
+
+ attr->max_colors = colors;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_max_colors(const liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1;
+
+ return attr->max_colors;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_min_posterization(liq_attr *attr, int bits)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (bits < 0 || bits > 4) return LIQ_VALUE_OUT_OF_RANGE;
+
+ attr->min_posterization_output = bits;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_min_posterization(const liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1;
+
+ return attr->min_posterization_output;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_speed(liq_attr* attr, int speed)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (speed < 1 || speed > 10) return LIQ_VALUE_OUT_OF_RANGE;
+
+ unsigned int iterations = MAX(8-speed, 0);
+ iterations += iterations * iterations/2;
+ attr->kmeans_iterations = iterations;
+ attr->kmeans_iteration_limit = 1.0/(double)(1<<(23-speed));
+ attr->feedback_loop_trials = MAX(56-9*speed, 0);
+
+ attr->max_histogram_entries = (1<<17) + (1<<18)*(10-speed);
+ attr->min_posterization_input = (speed >= 8) ? 1 : 0;
+ attr->use_dither_map = (speed <= (omp_get_max_threads() > 1 ? 7 : 5)); // parallelized dither map might speed up floyd remapping
+ if (attr->use_dither_map && speed < 3) {
+ attr->use_dither_map = 2; // always
+ }
+ attr->use_contrast_maps = (speed <= 7) || attr->use_dither_map;
+ attr->speed = speed;
+
+ attr->progress_stage1 = attr->use_contrast_maps ? 20 : 8;
+ if (attr->feedback_loop_trials < 2) {
+ attr->progress_stage1 += 30;
+ }
+ attr->progress_stage3 = 50 / (1+speed);
+ attr->progress_stage2 = 100 - attr->progress_stage1 - attr->progress_stage3;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_speed(const liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1;
+
+ return attr->speed;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_output_gamma(liq_result* res, double gamma)
+{
+ if (!CHECK_STRUCT_TYPE(res, liq_result)) return LIQ_INVALID_POINTER;
+ if (gamma <= 0 || gamma >= 1.0) return LIQ_VALUE_OUT_OF_RANGE;
+
+ if (res->remapping) {
+ liq_remapping_result_destroy(res->remapping);
+ res->remapping = NULL;
+ }
+
+ res->gamma = gamma;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_min_opacity(liq_attr* attr, int min)
+{
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_min_opacity(const liq_attr *attr)
+{
+ return 0;
+}
+
+LIQ_EXPORT LIQ_NONNULL void liq_set_last_index_transparent(liq_attr* attr, int is_last)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return;
+
+ attr->last_index_transparent = !!is_last;
+}
+
+LIQ_EXPORT void liq_attr_set_progress_callback(liq_attr *attr, liq_progress_callback_function *callback, void *user_info)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return;
+
+ attr->progress_callback = callback;
+ attr->progress_callback_user_info = user_info;
+}
+
+LIQ_EXPORT void liq_result_set_progress_callback(liq_result *result, liq_progress_callback_function *callback, void *user_info)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return;
+
+ result->progress_callback = callback;
+ result->progress_callback_user_info = user_info;
+}
+
+LIQ_EXPORT void liq_set_log_callback(liq_attr *attr, liq_log_callback_function *callback, void* user_info)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return;
+
+ liq_verbose_printf_flush(attr);
+ attr->log_callback = callback;
+ attr->log_callback_user_info = user_info;
+}
+
+LIQ_EXPORT void liq_set_log_flush_callback(liq_attr *attr, liq_log_flush_callback_function *callback, void* user_info)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return;
+
+ attr->log_flush_callback = callback;
+ attr->log_flush_callback_user_info = user_info;
+}
+
+LIQ_EXPORT liq_attr* liq_attr_create()
+{
+ return liq_attr_create_with_allocator(NULL, NULL);
+}
+
+LIQ_EXPORT LIQ_NONNULL void liq_attr_destroy(liq_attr *attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) {
+ return;
+ }
+
+ liq_verbose_printf_flush(attr);
+
+ attr->magic_header = liq_freed_magic;
+ attr->free(attr);
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_attr* liq_attr_copy(const liq_attr *orig)
+{
+ if (!CHECK_STRUCT_TYPE(orig, liq_attr)) {
+ return NULL;
+ }
+
+ liq_attr *attr = orig->malloc(sizeof(liq_attr));
+ if (!attr) return NULL;
+ *attr = *orig;
+ return attr;
+}
+
+static void *liq_aligned_malloc(size_t size)
+{
+ unsigned char *ptr = malloc(size + 16);
+ if (!ptr) {
+ return NULL;
+ }
+
+ uintptr_t offset = 16 - ((uintptr_t)ptr & 15); // also reserves 1 byte for ptr[-1]
+ ptr += offset;
+ assert(0 == (((uintptr_t)ptr) & 15));
+ ptr[-1] = offset ^ 0x59; // store how much pointer was shifted to get the original for free()
+ return ptr;
+}
+
+LIQ_NONNULL static void liq_aligned_free(void *inptr)
+{
+ unsigned char *ptr = inptr;
+ size_t offset = ptr[-1] ^ 0x59;
+ assert(offset > 0 && offset <= 16);
+ free(ptr - offset);
+}
+
+LIQ_EXPORT liq_attr* liq_attr_create_with_allocator(void* (*custom_malloc)(size_t), void (*custom_free)(void*))
+{
+#if USE_SSE
+ if (!is_sse_available()) {
+ return NULL;
+ }
+#endif
+ if (!custom_malloc && !custom_free) {
+ custom_malloc = liq_aligned_malloc;
+ custom_free = liq_aligned_free;
+ } else if (!custom_malloc != !custom_free) {
+ return NULL; // either specify both or none
+ }
+
+ liq_attr *attr = custom_malloc(sizeof(liq_attr));
+ if (!attr) return NULL;
+ *attr = (liq_attr) {
+ .magic_header = liq_attr_magic,
+ .malloc = custom_malloc,
+ .free = custom_free,
+ .max_colors = 256,
+ .last_index_transparent = false, // puts transparent color at last index. This is workaround for blu-ray subtitles.
+ .target_mse = 0,
+ .max_mse = MAX_DIFF,
+ };
+ liq_set_speed(attr, 4);
+ return attr;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_image_add_fixed_color(liq_image *img, liq_color color)
+{
+ if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER;
+ if (img->fixed_colors_count > 255) return LIQ_UNSUPPORTED;
+
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, img->gamma);
+ img->fixed_colors[img->fixed_colors_count++] = rgba_to_f(gamma_lut, (rgba_pixel){
+ .r = color.r,
+ .g = color.g,
+ .b = color.b,
+ .a = color.a,
+ });
+ return LIQ_OK;
+}
+
+LIQ_NONNULL static liq_error liq_histogram_add_fixed_color_f(liq_histogram *hist, f_pixel color)
+{
+ if (hist->fixed_colors_count > 255) return LIQ_UNSUPPORTED;
+
+ hist->fixed_colors[hist->fixed_colors_count++] = color;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma)
+{
+ if (!CHECK_STRUCT_TYPE(hist, liq_histogram)) return LIQ_INVALID_POINTER;
+
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, gamma ? gamma : 0.45455);
+ const f_pixel px = rgba_to_f(gamma_lut, (rgba_pixel){
+ .r = color.r,
+ .g = color.g,
+ .b = color.b,
+ .a = color.a,
+ });
+ return liq_histogram_add_fixed_color_f(hist, px);
+}
+
+LIQ_NONNULL static bool liq_image_use_low_memory(liq_image *img)
+{
+ img->temp_f_row = img->malloc(sizeof(img->f_pixels[0]) * LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_max_threads());
+ return img->temp_f_row != NULL;
+}
+
+LIQ_NONNULL static bool liq_image_should_use_low_memory(liq_image *img, const bool low_memory_hint)
+{
+ return (size_t)img->width * (size_t)img->height > (low_memory_hint ? LIQ_HIGH_MEMORY_LIMIT/8 : LIQ_HIGH_MEMORY_LIMIT) / sizeof(f_pixel); // Watch out for integer overflow
+}
+
+static liq_image *liq_image_create_internal(const liq_attr *attr, rgba_pixel* rows[], liq_image_get_rgba_row_callback *row_callback, void *row_callback_user_info, int width, int height, double gamma)
+{
+ if (gamma < 0 || gamma > 1.0) {
+ liq_log_error(attr, "gamma must be >= 0 and <= 1 (try 1/gamma instead)");
+ return NULL;
+ }
+
+ if (!rows && !row_callback) {
+ liq_log_error(attr, "missing row data");
+ return NULL;
+ }
+
+ liq_image *img = attr->malloc(sizeof(liq_image));
+ if (!img) return NULL;
+ *img = (liq_image){
+ .magic_header = liq_image_magic,
+ .malloc = attr->malloc,
+ .free = attr->free,
+ .width = width, .height = height,
+ .gamma = gamma ? gamma : 0.45455,
+ .rows = rows,
+ .row_callback = row_callback,
+ .row_callback_user_info = row_callback_user_info,
+ };
+
+ if (!rows) {
+ img->temp_row = attr->malloc(sizeof(img->temp_row[0]) * LIQ_TEMP_ROW_WIDTH(width) * omp_get_max_threads());
+ if (!img->temp_row) return NULL;
+ }
+
+ // if image is huge or converted pixels are not likely to be reused then don't cache converted pixels
+ if (liq_image_should_use_low_memory(img, !img->temp_row && !attr->use_contrast_maps && !attr->use_dither_map)) {
+ verbose_print(attr, " conserving memory");
+ if (!liq_image_use_low_memory(img)) return NULL;
+ }
+
+ return img;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_memory_ownership(liq_image *img, int ownership_flags)
+{
+ if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER;
+ if (!img->rows || !ownership_flags || (ownership_flags & ~(LIQ_OWN_ROWS|LIQ_OWN_PIXELS))) {
+ return LIQ_VALUE_OUT_OF_RANGE;
+ }
+
+ if (ownership_flags & LIQ_OWN_ROWS) {
+ if (img->free_rows_internal) return LIQ_VALUE_OUT_OF_RANGE;
+ img->free_rows = true;
+ }
+
+ if (ownership_flags & LIQ_OWN_PIXELS) {
+ img->free_pixels = true;
+ if (!img->pixels) {
+ // for simplicity of this API there's no explicit bitmap argument,
+ // so the row with the lowest address is assumed to be at the start of the bitmap
+ img->pixels = img->rows[0];
+ for(unsigned int i=1; i < img->height; i++) {
+ img->pixels = MIN(img->pixels, img->rows[i]);
+ }
+ }
+ }
+
+ return LIQ_OK;
+}
+
+LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image);
+LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image);
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_importance_map(liq_image *img, unsigned char importance_map[], size_t buffer_size, enum liq_ownership ownership) {
+ if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER;
+ if (!CHECK_USER_POINTER(importance_map)) return LIQ_INVALID_POINTER;
+
+ const size_t required_size = (size_t)img->width * (size_t)img->height;
+ if (buffer_size < required_size) {
+ return LIQ_BUFFER_TOO_SMALL;
+ }
+
+ if (ownership == LIQ_COPY_PIXELS) {
+ unsigned char *tmp = img->malloc(required_size);
+ if (!tmp) {
+ return LIQ_OUT_OF_MEMORY;
+ }
+ memcpy(tmp, importance_map, required_size);
+ importance_map = tmp;
+ } else if (ownership != LIQ_OWN_PIXELS) {
+ return LIQ_UNSUPPORTED;
+ }
+
+ liq_image_free_importance_map(img);
+ img->importance_map = importance_map;
+
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_background(liq_image *img, liq_image *background)
+{
+ if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(background, liq_image)) return LIQ_INVALID_POINTER;
+
+ if (background->background) {
+ return LIQ_UNSUPPORTED;
+ }
+ if (img->width != background->width || img->height != background->height) {
+ return LIQ_BUFFER_TOO_SMALL;
+ }
+
+ if (img->background) {
+ liq_image_destroy(img->background);
+ }
+
+ img->background = background;
+ liq_image_free_maps(img); // Force them to be re-analyzed with the background
+
+ return LIQ_OK;
+}
+
+LIQ_NONNULL static bool check_image_size(const liq_attr *attr, const int width, const int height)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) {
+ return false;
+ }
+
+ if (width <= 0 || height <= 0) {
+ liq_log_error(attr, "width and height must be > 0");
+ return false;
+ }
+
+ if (width > INT_MAX/sizeof(rgba_pixel)/height || width > INT_MAX/16/sizeof(f_pixel) || height > INT_MAX/sizeof(size_t)) {
+ liq_log_error(attr, "image too large");
+ return false;
+ }
+ return true;
+}
+
+LIQ_EXPORT liq_image *liq_image_create_custom(const liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma)
+{
+ if (!check_image_size(attr, width, height)) {
+ return NULL;
+ }
+ return liq_image_create_internal(attr, NULL, row_callback, user_info, width, height, gamma);
+}
+
+LIQ_EXPORT liq_image *liq_image_create_rgba_rows(const liq_attr *attr, void *const rows[], int width, int height, double gamma)
+{
+ if (!check_image_size(attr, width, height)) {
+ return NULL;
+ }
+
+ for(int i=0; i < height; i++) {
+ if (!CHECK_USER_POINTER(rows+i) || !CHECK_USER_POINTER(rows[i])) {
+ liq_log_error(attr, "invalid row pointers");
+ return NULL;
+ }
+ }
+ return liq_image_create_internal(attr, (rgba_pixel**)rows, NULL, NULL, width, height, gamma);
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_image *liq_image_create_rgba(const liq_attr *attr, const void* bitmap, int width, int height, double gamma)
+{
+ if (!check_image_size(attr, width, height)) {
+ return NULL;
+ }
+ if (!CHECK_USER_POINTER(bitmap)) {
+ liq_log_error(attr, "invalid bitmap pointer");
+ return NULL;
+ }
+
+ rgba_pixel *const pixels = (rgba_pixel *const)bitmap;
+ rgba_pixel **rows = attr->malloc(sizeof(rows[0])*height);
+ if (!rows) return NULL;
+
+ for(int i=0; i < height; i++) {
+ rows[i] = pixels + width * i;
+ }
+
+ liq_image *image = liq_image_create_internal(attr, rows, NULL, NULL, width, height, gamma);
+ if (!image) {
+ attr->free(rows);
+ return NULL;
+ }
+ image->free_rows = true;
+ image->free_rows_internal = true;
+ return image;
+}
+
+NEVER_INLINE LIQ_EXPORT void liq_executing_user_callback(liq_image_get_rgba_row_callback *callback, liq_color *temp_row, int row, int width, void *user_info);
+LIQ_EXPORT void liq_executing_user_callback(liq_image_get_rgba_row_callback *callback, liq_color *temp_row, int row, int width, void *user_info)
+{
+ assert(callback);
+ assert(temp_row);
+ callback(temp_row, row, width, user_info);
+}
+
+LIQ_NONNULL inline static bool liq_image_has_rgba_pixels(const liq_image *img)
+{
+ if (!CHECK_STRUCT_TYPE(img, liq_image)) {
+ return false;
+ }
+ return img->rows || (img->temp_row && img->row_callback);
+}
+
+LIQ_NONNULL inline static bool liq_image_can_use_rgba_rows(const liq_image *img)
+{
+ assert(liq_image_has_rgba_pixels(img));
+ return img->rows;
+}
+
+LIQ_NONNULL static const rgba_pixel *liq_image_get_row_rgba(liq_image *img, unsigned int row)
+{
+ if (liq_image_can_use_rgba_rows(img)) {
+ return img->rows[row];
+ }
+
+ assert(img->temp_row);
+ rgba_pixel *temp_row = img->temp_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num();
+ if (img->rows) {
+ memcpy(temp_row, img->rows[row], img->width * sizeof(temp_row[0]));
+ } else {
+ liq_executing_user_callback(img->row_callback, (liq_color*)temp_row, row, img->width, img->row_callback_user_info);
+ }
+
+ return temp_row;
+}
+
+LIQ_NONNULL static void convert_row_to_f(liq_image *img, f_pixel *row_f_pixels, const unsigned int row, const float gamma_lut[])
+{
+ assert(row_f_pixels);
+ assert(!USE_SSE || 0 == ((uintptr_t)row_f_pixels & 15));
+
+ const rgba_pixel *const row_pixels = liq_image_get_row_rgba(img, row);
+
+ for(unsigned int col=0; col < img->width; col++) {
+ row_f_pixels[col] = rgba_to_f(gamma_lut, row_pixels[col]);
+ }
+}
+
+LIQ_NONNULL static bool liq_image_get_row_f_init(liq_image *img)
+{
+ assert(omp_get_thread_num() == 0);
+ if (img->f_pixels) {
+ return true;
+ }
+ if (!liq_image_should_use_low_memory(img, false)) {
+ img->f_pixels = img->malloc(sizeof(img->f_pixels[0]) * img->width * img->height);
+ }
+ if (!img->f_pixels) {
+ return liq_image_use_low_memory(img);
+ }
+
+ if (!liq_image_has_rgba_pixels(img)) {
+ return false;
+ }
+
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, img->gamma);
+ for(unsigned int i=0; i < img->height; i++) {
+ convert_row_to_f(img, &img->f_pixels[i*img->width], i, gamma_lut);
+ }
+ return true;
+}
+
+LIQ_NONNULL static const f_pixel *liq_image_get_row_f(liq_image *img, unsigned int row)
+{
+ if (!img->f_pixels) {
+ assert(img->temp_f_row); // init should have done that
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, img->gamma);
+ f_pixel *row_for_thread = img->temp_f_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num();
+ convert_row_to_f(img, row_for_thread, row, gamma_lut);
+ return row_for_thread;
+ }
+ return img->f_pixels + img->width * row;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_image_get_width(const liq_image *input_image)
+{
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return -1;
+ return input_image->width;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_image_get_height(const liq_image *input_image)
+{
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return -1;
+ return input_image->height;
+}
+
+typedef void free_func(void*);
+
+LIQ_NONNULL static free_func *get_default_free_func(liq_image *img)
+{
+ // When default allocator is used then user-supplied pointers must be freed with free()
+ if (img->free_rows_internal || img->free != liq_aligned_free) {
+ return img->free;
+ }
+ return free;
+}
+
+LIQ_NONNULL static void liq_image_free_rgba_source(liq_image *input_image)
+{
+ if (input_image->free_pixels && input_image->pixels) {
+ get_default_free_func(input_image)(input_image->pixels);
+ input_image->pixels = NULL;
+ }
+
+ if (input_image->free_rows && input_image->rows) {
+ get_default_free_func(input_image)(input_image->rows);
+ input_image->rows = NULL;
+ }
+}
+
+LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image) {
+ if (input_image->importance_map) {
+ input_image->free(input_image->importance_map);
+ input_image->importance_map = NULL;
+ }
+}
+
+LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image) {
+ liq_image_free_importance_map(input_image);
+
+ if (input_image->edges) {
+ input_image->free(input_image->edges);
+ input_image->edges = NULL;
+ }
+
+ if (input_image->dither_map) {
+ input_image->free(input_image->dither_map);
+ input_image->dither_map = NULL;
+ }
+}
+
+LIQ_EXPORT LIQ_NONNULL void liq_image_destroy(liq_image *input_image)
+{
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return;
+
+ liq_image_free_rgba_source(input_image);
+
+ liq_image_free_maps(input_image);
+
+ if (input_image->f_pixels) {
+ input_image->free(input_image->f_pixels);
+ }
+
+ if (input_image->temp_row) {
+ input_image->free(input_image->temp_row);
+ }
+
+ if (input_image->temp_f_row) {
+ input_image->free(input_image->temp_f_row);
+ }
+
+ if (input_image->background) {
+ liq_image_destroy(input_image->background);
+ }
+
+ input_image->magic_header = liq_freed_magic;
+ input_image->free(input_image);
+}
+
+LIQ_EXPORT liq_histogram* liq_histogram_create(const liq_attr* attr)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) {
+ return NULL;
+ }
+
+ liq_histogram *hist = attr->malloc(sizeof(liq_histogram));
+ if (!hist) return NULL;
+ *hist = (liq_histogram) {
+ .magic_header = liq_histogram_magic,
+ .malloc = attr->malloc,
+ .free = attr->free,
+
+ .ignorebits = MAX(attr->min_posterization_output, attr->min_posterization_input),
+ };
+ return hist;
+}
+
+LIQ_EXPORT LIQ_NONNULL void liq_histogram_destroy(liq_histogram *hist)
+{
+ if (!CHECK_STRUCT_TYPE(hist, liq_histogram)) return;
+ hist->magic_header = liq_freed_magic;
+
+ pam_freeacolorhash(hist->acht);
+ hist->free(hist);
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_result *liq_quantize_image(liq_attr *attr, liq_image *img)
+{
+ liq_result *res;
+ if (LIQ_OK != liq_image_quantize(img, attr, &res)) {
+ return NULL;
+ }
+ return res;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_image_quantize(liq_image *const img, liq_attr *const attr, liq_result **result_output)
+{
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (!liq_image_has_rgba_pixels(img)) {
+ return LIQ_UNSUPPORTED;
+ }
+
+ liq_histogram *hist = liq_histogram_create(attr);
+ if (!hist) {
+ return LIQ_OUT_OF_MEMORY;
+ }
+ liq_error err = liq_histogram_add_image(hist, attr, img);
+ if (LIQ_OK != err) {
+ liq_histogram_destroy(hist);
+ return err;
+ }
+
+ err = liq_histogram_quantize_internal(hist, attr, false, result_output);
+ liq_histogram_destroy(hist);
+
+ return err;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_quantize(liq_histogram *input_hist, liq_attr *attr, liq_result **result_output) {
+ return liq_histogram_quantize_internal(input_hist, attr, true, result_output);
+}
+
+LIQ_NONNULL static liq_error liq_histogram_quantize_internal(liq_histogram *input_hist, liq_attr *attr, bool fixed_result_colors, liq_result **result_output)
+{
+ if (!CHECK_USER_POINTER(result_output)) return LIQ_INVALID_POINTER;
+ *result_output = NULL;
+
+ if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER;
+
+ if (liq_progress(attr, 0)) return LIQ_ABORTED;
+
+ histogram *hist;
+ liq_error err = finalize_histogram(input_hist, attr, &hist);
+ if (err != LIQ_OK) {
+ return err;
+ }
+
+ err = pngquant_quantize(hist, attr, input_hist->fixed_colors_count, input_hist->fixed_colors, input_hist->gamma, fixed_result_colors, result_output);
+ pam_freeacolorhist(hist);
+
+ return err;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_set_dithering_level(liq_result *res, float dither_level)
+{
+ if (!CHECK_STRUCT_TYPE(res, liq_result)) return LIQ_INVALID_POINTER;
+
+ if (res->remapping) {
+ liq_remapping_result_destroy(res->remapping);
+ res->remapping = NULL;
+ }
+
+ if (dither_level < 0 || dither_level > 1.0f) return LIQ_VALUE_OUT_OF_RANGE;
+ res->dither_level = dither_level;
+ return LIQ_OK;
+}
+
+LIQ_NONNULL static liq_remapping_result *liq_remapping_result_create(liq_result *result)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) {
+ return NULL;
+ }
+
+ liq_remapping_result *res = result->malloc(sizeof(liq_remapping_result));
+ if (!res) return NULL;
+ *res = (liq_remapping_result) {
+ .magic_header = liq_remapping_result_magic,
+ .malloc = result->malloc,
+ .free = result->free,
+ .dither_level = result->dither_level,
+ .use_dither_map = result->use_dither_map,
+ .palette_error = result->palette_error,
+ .gamma = result->gamma,
+ .palette = pam_duplicate_colormap(result->palette),
+ .progress_callback = result->progress_callback,
+ .progress_callback_user_info = result->progress_callback_user_info,
+ .progress_stage1 = result->use_dither_map ? 20 : 0,
+ };
+ return res;
+}
+
+LIQ_EXPORT LIQ_NONNULL double liq_get_output_gamma(const liq_result *result)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1;
+
+ return result->gamma;
+}
+
+LIQ_NONNULL static void liq_remapping_result_destroy(liq_remapping_result *result)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_remapping_result)) return;
+
+ if (result->palette) pam_freecolormap(result->palette);
+ if (result->pixels) result->free(result->pixels);
+
+ result->magic_header = liq_freed_magic;
+ result->free(result);
+}
+
+LIQ_EXPORT LIQ_NONNULL void liq_result_destroy(liq_result *res)
+{
+ if (!CHECK_STRUCT_TYPE(res, liq_result)) return;
+
+ memset(&res->int_palette, 0, sizeof(liq_palette));
+
+ if (res->remapping) {
+ memset(&res->remapping->int_palette, 0, sizeof(liq_palette));
+ liq_remapping_result_destroy(res->remapping);
+ }
+
+ pam_freecolormap(res->palette);
+
+ res->magic_header = liq_freed_magic;
+ res->free(res);
+}
+
+
+LIQ_EXPORT LIQ_NONNULL double liq_get_quantization_error(const liq_result *result) {
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1;
+
+ if (result->palette_error >= 0) {
+ return mse_to_standard_mse(result->palette_error);
+ }
+
+ return -1;
+}
+
+LIQ_EXPORT LIQ_NONNULL double liq_get_remapping_error(const liq_result *result) {
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1;
+
+ if (result->remapping && result->remapping->palette_error >= 0) {
+ return mse_to_standard_mse(result->remapping->palette_error);
+ }
+
+ return -1;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_quantization_quality(const liq_result *result) {
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1;
+
+ if (result->palette_error >= 0) {
+ return mse_to_quality(result->palette_error);
+ }
+
+ return -1;
+}
+
+LIQ_EXPORT LIQ_NONNULL int liq_get_remapping_quality(const liq_result *result) {
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1;
+
+ if (result->remapping && result->remapping->palette_error >= 0) {
+ return mse_to_quality(result->remapping->palette_error);
+ }
+
+ return -1;
+}
+
+LIQ_NONNULL static int compare_popularity(const void *ch1, const void *ch2)
+{
+ const float v1 = ((const colormap_item*)ch1)->popularity;
+ const float v2 = ((const colormap_item*)ch2)->popularity;
+ return v1 > v2 ? -1 : 1;
+}
+
+LIQ_NONNULL static void sort_palette_qsort(colormap *map, int start, int nelem)
+{
+ if (!nelem) return;
+ qsort(map->palette + start, nelem, sizeof(map->palette[0]), compare_popularity);
+}
+
+#define SWAP_PALETTE(map, a,b) { \
+ const colormap_item tmp = (map)->palette[(a)]; \
+ (map)->palette[(a)] = (map)->palette[(b)]; \
+ (map)->palette[(b)] = tmp; }
+
+LIQ_NONNULL static void sort_palette(colormap *map, const liq_attr *options)
+{
+ /*
+ ** Step 3.5 [GRR]: remap the palette colors so that all entries with
+ ** the maximal alpha value (i.e., fully opaque) are at the end and can
+ ** therefore be omitted from the tRNS chunk.
+ */
+ if (options->last_index_transparent) {
+ for(unsigned int i=0; i < map->colors; i++) {
+ if (map->palette[i].acolor.a < 1.f/256.f) {
+ const unsigned int old = i, transparent_dest = map->colors-1;
+
+ SWAP_PALETTE(map, transparent_dest, old);
+
+ /* colors sorted by popularity make pngs slightly more compressible */
+ sort_palette_qsort(map, 0, map->colors-1);
+ return;
+ }
+ }
+ }
+
+ unsigned int non_fixed_colors = 0;
+ for(unsigned int i = 0; i < map->colors; i++) {
+ if (map->palette[i].fixed) {
+ break;
+ }
+ non_fixed_colors++;
+ }
+
+ /* move transparent colors to the beginning to shrink trns chunk */
+ unsigned int num_transparent = 0;
+ for(unsigned int i = 0; i < non_fixed_colors; i++) {
+ if (map->palette[i].acolor.a < 255.f/256.f) {
+ // current transparent color is swapped with earlier opaque one
+ if (i != num_transparent) {
+ SWAP_PALETTE(map, num_transparent, i);
+ i--;
+ }
+ num_transparent++;
+ }
+ }
+
+ liq_verbose_printf(options, " eliminated opaque tRNS-chunk entries...%d entr%s transparent", num_transparent, (num_transparent == 1)? "y" : "ies");
+
+ /* colors sorted by popularity make pngs slightly more compressible
+ * opaque and transparent are sorted separately
+ */
+ sort_palette_qsort(map, 0, num_transparent);
+ sort_palette_qsort(map, num_transparent, non_fixed_colors - num_transparent);
+
+ if (non_fixed_colors > 9 && map->colors > 16) {
+ SWAP_PALETTE(map, 7, 1); // slightly improves compression
+ SWAP_PALETTE(map, 8, 2);
+ SWAP_PALETTE(map, 9, 3);
+ }
+}
+
+inline static unsigned int posterize_channel(unsigned int color, unsigned int bits)
+{
+ return (color & ~((1<> (8-bits));
+}
+
+LIQ_NONNULL static void set_rounded_palette(liq_palette *const dest, colormap *const map, const double gamma, unsigned int posterize)
+{
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, gamma);
+
+ dest->count = map->colors;
+ for(unsigned int x = 0; x < map->colors; ++x) {
+ rgba_pixel px = f_to_rgb(gamma, map->palette[x].acolor);
+
+ px.r = posterize_channel(px.r, posterize);
+ px.g = posterize_channel(px.g, posterize);
+ px.b = posterize_channel(px.b, posterize);
+ px.a = posterize_channel(px.a, posterize);
+
+ map->palette[x].acolor = rgba_to_f(gamma_lut, px); /* saves rounding error introduced by to_rgb, which makes remapping & dithering more accurate */
+
+ if (!px.a && !map->palette[x].fixed) {
+ px.r = 71; px.g = 112; px.b = 76;
+ }
+
+ dest->entries[x] = (liq_color){.r=px.r,.g=px.g,.b=px.b,.a=px.a};
+ }
+}
+
+LIQ_EXPORT LIQ_NONNULL const liq_palette *liq_get_palette(liq_result *result)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) return NULL;
+
+ if (result->remapping && result->remapping->int_palette.count) {
+ return &result->remapping->int_palette;
+ }
+
+ if (!result->int_palette.count) {
+ set_rounded_palette(&result->int_palette, result->palette, result->gamma, result->min_posterization_output);
+ }
+ return &result->int_palette;
+}
+
+LIQ_NONNULL static float remap_to_palette(liq_image *const input_image, unsigned char *const *const output_pixels, colormap *const map)
+{
+ const int rows = input_image->height;
+ const unsigned int cols = input_image->width;
+ double remapping_error=0;
+
+ if (!liq_image_get_row_f_init(input_image)) {
+ return -1;
+ }
+ if (input_image->background && !liq_image_get_row_f_init(input_image->background)) {
+ return -1;
+ }
+
+ const colormap_item *acolormap = map->palette;
+
+ struct nearest_map *const n = nearest_init(map);
+ const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0;
+
+
+ const unsigned int max_threads = omp_get_max_threads();
+ LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads);
+ kmeans_init(map, max_threads, average_color);
+
+#if __GNUC__ >= 9 || __clang__
+ #pragma omp parallel for if (rows*cols > 3000) \
+ schedule(static) default(none) shared(acolormap,average_color,cols,input_image,map,n,output_pixels,rows,transparent_index) reduction(+:remapping_error)
+#else
+ #pragma omp parallel for if (rows*cols > 3000) \
+ schedule(static) default(none) shared(acolormap) shared(average_color) reduction(+:remapping_error)
+#endif
+ for(int row = 0; row < rows; ++row) {
+ const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row);
+ const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL;
+
+ unsigned int last_match=0;
+ for(unsigned int col = 0; col < cols; ++col) {
+ float diff;
+ last_match = nearest_search(n, &row_pixels[col], last_match, &diff);
+ if (bg_pixels && colordifference(bg_pixels[col], acolormap[last_match].acolor) <= diff) {
+ last_match = transparent_index;
+ }
+ output_pixels[row][col] = last_match;
+
+ remapping_error += diff;
+ kmeans_update_color(row_pixels[col], 1.0, map, last_match, omp_get_thread_num(), average_color);
+ }
+ }
+
+ kmeans_finalize(map, max_threads, average_color);
+
+ nearest_free(n);
+
+ return remapping_error / (input_image->width * input_image->height);
+}
+
+inline static f_pixel get_dithered_pixel(const float dither_level, const float max_dither_error, const f_pixel thiserr, const f_pixel px)
+{
+ /* Use Floyd-Steinberg errors to adjust actual color. */
+ const float sr = thiserr.r * dither_level,
+ sg = thiserr.g * dither_level,
+ sb = thiserr.b * dither_level,
+ sa = thiserr.a * dither_level;
+
+ float ratio = 1.0;
+ const float max_overflow = 1.1f;
+ const float max_underflow = -0.1f;
+
+ // allowing some overflow prevents undithered bands caused by clamping of all channels
+ if (px.r + sr > max_overflow) ratio = MIN(ratio, (max_overflow -px.r)/sr);
+ else { if (px.r + sr < max_underflow) ratio = MIN(ratio, (max_underflow-px.r)/sr); }
+ if (px.g + sg > max_overflow) ratio = MIN(ratio, (max_overflow -px.g)/sg);
+ else { if (px.g + sg < max_underflow) ratio = MIN(ratio, (max_underflow-px.g)/sg); }
+ if (px.b + sb > max_overflow) ratio = MIN(ratio, (max_overflow -px.b)/sb);
+ else { if (px.b + sb < max_underflow) ratio = MIN(ratio, (max_underflow-px.b)/sb); }
+
+ float a = px.a + sa;
+ if (a > 1.f) { a = 1.f; }
+ else if (a < 0) { a = 0; }
+
+ // If dithering error is crazy high, don't propagate it that much
+ // This prevents crazy geen pixels popping out of the blue (or red or black! ;)
+ const float dither_error = sr*sr + sg*sg + sb*sb + sa*sa;
+ if (dither_error > max_dither_error) {
+ ratio *= 0.8f;
+ } else if (dither_error < 2.f/256.f/256.f) {
+ // don't dither areas that don't have noticeable error — makes file smaller
+ return px;
+ }
+
+ return (f_pixel) {
+ .r=px.r + sr * ratio,
+ .g=px.g + sg * ratio,
+ .b=px.b + sb * ratio,
+ .a=a,
+ };
+}
+
+/**
+ Uses edge/noise map to apply dithering only to flat areas. Dithering on edges creates jagged lines, and noisy areas are "naturally" dithered.
+
+ If output_image_is_remapped is true, only pixels noticeably changed by error diffusion will be written to output image.
+ */
+LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned char *const output_pixels[], liq_remapping_result *quant, const float max_dither_error, const bool output_image_is_remapped)
+{
+ const int rows = input_image->height, cols = input_image->width;
+ const unsigned char *dither_map = quant->use_dither_map ? (input_image->dither_map ? input_image->dither_map : input_image->edges) : NULL;
+
+ const colormap *map = quant->palette;
+ const colormap_item *acolormap = map->palette;
+
+ if (!liq_image_get_row_f_init(input_image)) {
+ return false;
+ }
+ if (input_image->background && !liq_image_get_row_f_init(input_image->background)) {
+ return false;
+ }
+
+ /* Initialize Floyd-Steinberg error vectors. */
+ const size_t errwidth = cols+2;
+ f_pixel *restrict thiserr = input_image->malloc(errwidth * sizeof(thiserr[0]) * 2); // +2 saves from checking out of bounds access
+ if (!thiserr) return false;
+ f_pixel *restrict nexterr = thiserr + errwidth;
+ memset(thiserr, 0, errwidth * sizeof(thiserr[0]));
+
+ bool ok = true;
+ struct nearest_map *const n = nearest_init(map);
+ const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0;
+
+ // response to this value is non-linear and without it any value < 0.8 would give almost no dithering
+ float base_dithering_level = quant->dither_level;
+ base_dithering_level = 1.f - (1.f-base_dithering_level)*(1.f-base_dithering_level);
+
+ if (dither_map) {
+ base_dithering_level *= 1.f/255.f; // convert byte to float
+ }
+ base_dithering_level *= 15.f/16.f; // prevent small errors from accumulating
+
+ int fs_direction = 1;
+ unsigned int last_match=0;
+ for (int row = 0; row < rows; ++row) {
+ if (liq_remap_progress(quant, quant->progress_stage1 + row * (100.f - quant->progress_stage1) / rows)) {
+ ok = false;
+ break;
+ }
+
+ memset(nexterr, 0, errwidth * sizeof(nexterr[0]));
+
+ int col = (fs_direction > 0) ? 0 : (cols - 1);
+ const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row);
+ const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL;
+
+ do {
+ float dither_level = base_dithering_level;
+ if (dither_map) {
+ dither_level *= dither_map[row*cols + col];
+ }
+
+ const f_pixel spx = get_dithered_pixel(dither_level, max_dither_error, thiserr[col + 1], row_pixels[col]);
+
+ const unsigned int guessed_match = output_image_is_remapped ? output_pixels[row][col] : last_match;
+ float diff;
+ last_match = nearest_search(n, &spx, guessed_match, &diff);
+ f_pixel output_px = acolormap[last_match].acolor;
+ if (bg_pixels && colordifference(bg_pixels[col], output_px) <= diff) {
+ output_px = bg_pixels[col];
+ output_pixels[row][col] = transparent_index;
+ } else {
+ output_pixels[row][col] = last_match;
+ }
+
+ f_pixel err = {
+ .r = (spx.r - output_px.r),
+ .g = (spx.g - output_px.g),
+ .b = (spx.b - output_px.b),
+ .a = (spx.a - output_px.a),
+ };
+
+ // If dithering error is crazy high, don't propagate it that much
+ // This prevents crazy geen pixels popping out of the blue (or red or black! ;)
+ if (err.r*err.r + err.g*err.g + err.b*err.b + err.a*err.a > max_dither_error) {
+ err.r *= 0.75f;
+ err.g *= 0.75f;
+ err.b *= 0.75f;
+ err.a *= 0.75f;
+ }
+
+ /* Propagate Floyd-Steinberg error terms. */
+ if (fs_direction > 0) {
+ thiserr[col + 2].a += err.a * (7.f/16.f);
+ thiserr[col + 2].r += err.r * (7.f/16.f);
+ thiserr[col + 2].g += err.g * (7.f/16.f);
+ thiserr[col + 2].b += err.b * (7.f/16.f);
+
+ nexterr[col + 2].a = err.a * (1.f/16.f);
+ nexterr[col + 2].r = err.r * (1.f/16.f);
+ nexterr[col + 2].g = err.g * (1.f/16.f);
+ nexterr[col + 2].b = err.b * (1.f/16.f);
+
+ nexterr[col + 1].a += err.a * (5.f/16.f);
+ nexterr[col + 1].r += err.r * (5.f/16.f);
+ nexterr[col + 1].g += err.g * (5.f/16.f);
+ nexterr[col + 1].b += err.b * (5.f/16.f);
+
+ nexterr[col ].a += err.a * (3.f/16.f);
+ nexterr[col ].r += err.r * (3.f/16.f);
+ nexterr[col ].g += err.g * (3.f/16.f);
+ nexterr[col ].b += err.b * (3.f/16.f);
+
+ } else {
+ thiserr[col ].a += err.a * (7.f/16.f);
+ thiserr[col ].r += err.r * (7.f/16.f);
+ thiserr[col ].g += err.g * (7.f/16.f);
+ thiserr[col ].b += err.b * (7.f/16.f);
+
+ nexterr[col ].a = err.a * (1.f/16.f);
+ nexterr[col ].r = err.r * (1.f/16.f);
+ nexterr[col ].g = err.g * (1.f/16.f);
+ nexterr[col ].b = err.b * (1.f/16.f);
+
+ nexterr[col + 1].a += err.a * (5.f/16.f);
+ nexterr[col + 1].r += err.r * (5.f/16.f);
+ nexterr[col + 1].g += err.g * (5.f/16.f);
+ nexterr[col + 1].b += err.b * (5.f/16.f);
+
+ nexterr[col + 2].a += err.a * (3.f/16.f);
+ nexterr[col + 2].r += err.r * (3.f/16.f);
+ nexterr[col + 2].g += err.g * (3.f/16.f);
+ nexterr[col + 2].b += err.b * (3.f/16.f);
+ }
+
+ // remapping is done in zig-zag
+ col += fs_direction;
+ if (fs_direction > 0) {
+ if (col >= cols) break;
+ } else {
+ if (col < 0) break;
+ }
+ } while(1);
+
+ f_pixel *const temperr = thiserr;
+ thiserr = nexterr;
+ nexterr = temperr;
+ fs_direction = -fs_direction;
+ }
+
+ input_image->free(MIN(thiserr, nexterr)); // MIN because pointers were swapped
+ nearest_free(n);
+
+ return ok;
+}
+
+/* fixed colors are always included in the palette, so it would be wasteful to duplicate them in palette from histogram */
+LIQ_NONNULL static void remove_fixed_colors_from_histogram(histogram *hist, const int fixed_colors_count, const f_pixel fixed_colors[], const float target_mse)
+{
+ const float max_difference = MAX(target_mse/2.f, 2.f/256.f/256.f);
+ if (fixed_colors_count) {
+ for(int j=0; j < hist->size; j++) {
+ for(unsigned int i=0; i < fixed_colors_count; i++) {
+ if (colordifference(hist->achv[j].acolor, fixed_colors[i]) < max_difference) {
+ hist->achv[j] = hist->achv[--hist->size]; // remove color from histogram by overwriting with the last entry
+ j--; break; // continue searching histogram
+ }
+ }
+ }
+ }
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_colors(liq_histogram *input_hist, const liq_attr *options, const liq_histogram_entry entries[], int num_entries, double gamma)
+{
+ if (!CHECK_STRUCT_TYPE(options, liq_attr)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER;
+ if (!CHECK_USER_POINTER(entries)) return LIQ_INVALID_POINTER;
+ if (gamma < 0 || gamma >= 1.0) return LIQ_VALUE_OUT_OF_RANGE;
+ if (num_entries <= 0 || num_entries > 1<<30) return LIQ_VALUE_OUT_OF_RANGE;
+
+ if (input_hist->ignorebits > 0 && input_hist->had_image_added) {
+ return LIQ_UNSUPPORTED;
+ }
+ input_hist->ignorebits = 0;
+
+ input_hist->had_image_added = true;
+ input_hist->gamma = gamma ? gamma : 0.45455;
+
+ if (!input_hist->acht) {
+ input_hist->acht = pam_allocacolorhash(~0, num_entries*num_entries, 0, options->malloc, options->free);
+ if (!input_hist->acht) {
+ return LIQ_OUT_OF_MEMORY;
+ }
+ }
+ // Fake image size. It's only for hash size estimates.
+ if (!input_hist->acht->cols) {
+ input_hist->acht->cols = num_entries;
+ }
+ input_hist->acht->rows += num_entries;
+
+ const unsigned int hash_size = input_hist->acht->hash_size;
+ for(int i=0; i < num_entries; i++) {
+ const rgba_pixel rgba = {
+ .r = entries[i].color.r,
+ .g = entries[i].color.g,
+ .b = entries[i].color.b,
+ .a = entries[i].color.a,
+ };
+ union rgba_as_int px = {rgba};
+ unsigned int hash;
+ if (px.rgba.a) {
+ hash = px.l % hash_size;
+ } else {
+ hash=0; px.l=0;
+ }
+ if (!pam_add_to_hash(input_hist->acht, hash, entries[i].count, px, i, num_entries)) {
+ return LIQ_OUT_OF_MEMORY;
+ }
+ }
+
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_image(liq_histogram *input_hist, const liq_attr *options, liq_image *input_image)
+{
+ if (!CHECK_STRUCT_TYPE(options, liq_attr)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return LIQ_INVALID_POINTER;
+
+ const unsigned int cols = input_image->width, rows = input_image->height;
+
+ if (!input_image->importance_map && options->use_contrast_maps) {
+ contrast_maps(input_image);
+ }
+
+ input_hist->gamma = input_image->gamma;
+
+ for(int i = 0; i < input_image->fixed_colors_count; i++) {
+ liq_error res = liq_histogram_add_fixed_color_f(input_hist, input_image->fixed_colors[i]);
+ if (res != LIQ_OK) {
+ return res;
+ }
+ }
+
+ /*
+ ** Step 2: attempt to make a histogram of the colors, unclustered.
+ ** If at first we don't succeed, increase ignorebits to increase color
+ ** coherence and try again.
+ */
+
+ if (liq_progress(options, options->progress_stage1 * 0.4f)) {
+ return LIQ_ABORTED;
+ }
+
+ const bool all_rows_at_once = liq_image_can_use_rgba_rows(input_image);
+
+ // Usual solution is to start from scratch when limit is exceeded, but that's not possible if it's not
+ // the first image added
+ const unsigned int max_histogram_entries = input_hist->had_image_added ? ~0 : options->max_histogram_entries;
+ do {
+ if (!input_hist->acht) {
+ input_hist->acht = pam_allocacolorhash(max_histogram_entries, rows*cols, input_hist->ignorebits, options->malloc, options->free);
+ }
+ if (!input_hist->acht) return LIQ_OUT_OF_MEMORY;
+
+ // histogram uses noise contrast map for importance. Color accuracy in noisy areas is not very important.
+ // noise map does not include edges to avoid ruining anti-aliasing
+ for(unsigned int row=0; row < rows; row++) {
+ bool added_ok;
+ if (all_rows_at_once) {
+ added_ok = pam_computeacolorhash(input_hist->acht, (const rgba_pixel *const *)input_image->rows, cols, rows, input_image->importance_map);
+ if (added_ok) break;
+ } else {
+ const rgba_pixel* rows_p[1] = { liq_image_get_row_rgba(input_image, row) };
+ added_ok = pam_computeacolorhash(input_hist->acht, rows_p, cols, 1, input_image->importance_map ? &input_image->importance_map[row * cols] : NULL);
+ }
+ if (!added_ok) {
+ input_hist->ignorebits++;
+ liq_verbose_printf(options, " too many colors! Scaling colors to improve clustering... %d", input_hist->ignorebits);
+ pam_freeacolorhash(input_hist->acht);
+ input_hist->acht = NULL;
+ if (liq_progress(options, options->progress_stage1 * 0.6f)) return LIQ_ABORTED;
+ break;
+ }
+ }
+ } while(!input_hist->acht);
+
+ input_hist->had_image_added = true;
+
+ liq_image_free_importance_map(input_image);
+
+ if (input_image->free_pixels && input_image->f_pixels) {
+ liq_image_free_rgba_source(input_image); // bow can free the RGBA source if copy has been made in f_pixels
+ }
+
+ return LIQ_OK;
+}
+
+LIQ_NONNULL static liq_error finalize_histogram(liq_histogram *input_hist, liq_attr *options, histogram **hist_output)
+{
+ if (liq_progress(options, options->progress_stage1 * 0.9f)) {
+ return LIQ_ABORTED;
+ }
+
+ if (!input_hist->acht) {
+ return LIQ_BITMAP_NOT_AVAILABLE;
+ }
+
+ histogram *hist = pam_acolorhashtoacolorhist(input_hist->acht, input_hist->gamma, options->malloc, options->free);
+ pam_freeacolorhash(input_hist->acht);
+ input_hist->acht = NULL;
+
+ if (!hist) {
+ return LIQ_OUT_OF_MEMORY;
+ }
+ liq_verbose_printf(options, " made histogram...%d colors found", hist->size);
+ remove_fixed_colors_from_histogram(hist, input_hist->fixed_colors_count, input_hist->fixed_colors, options->target_mse);
+
+ *hist_output = hist;
+ return LIQ_OK;
+}
+
+/**
+ Builds two maps:
+ importance_map - approximation of areas with high-frequency noise, except straight edges. 1=flat, 0=noisy.
+ edges - noise map including all edges
+ */
+LIQ_NONNULL static void contrast_maps(liq_image *image)
+{
+ const unsigned int cols = image->width, rows = image->height;
+ if (cols < 4 || rows < 4 || (3*cols*rows) > LIQ_HIGH_MEMORY_LIMIT) {
+ return;
+ }
+
+ unsigned char *restrict noise = image->importance_map ? image->importance_map : image->malloc(cols*rows);
+ image->importance_map = NULL;
+ unsigned char *restrict edges = image->edges ? image->edges : image->malloc(cols*rows);
+ image->edges = NULL;
+
+ unsigned char *restrict tmp = image->malloc(cols*rows);
+
+ if (!noise || !edges || !tmp || !liq_image_get_row_f_init(image)) {
+ image->free(noise);
+ image->free(edges);
+ image->free(tmp);
+ return;
+ }
+
+ const f_pixel *curr_row, *prev_row, *next_row;
+ curr_row = prev_row = next_row = liq_image_get_row_f(image, 0);
+
+ for (unsigned int j=0; j < rows; j++) {
+ prev_row = curr_row;
+ curr_row = next_row;
+ next_row = liq_image_get_row_f(image, MIN(rows-1,j+1));
+
+ f_pixel prev, curr = curr_row[0], next=curr;
+ for (unsigned int i=0; i < cols; i++) {
+ prev=curr;
+ curr=next;
+ next = curr_row[MIN(cols-1,i+1)];
+
+ // contrast is difference between pixels neighbouring horizontally and vertically
+ const float a = fabsf(prev.a+next.a - curr.a*2.f),
+ r = fabsf(prev.r+next.r - curr.r*2.f),
+ g = fabsf(prev.g+next.g - curr.g*2.f),
+ b = fabsf(prev.b+next.b - curr.b*2.f);
+
+ const f_pixel prevl = prev_row[i];
+ const f_pixel nextl = next_row[i];
+
+ const float a1 = fabsf(prevl.a+nextl.a - curr.a*2.f),
+ r1 = fabsf(prevl.r+nextl.r - curr.r*2.f),
+ g1 = fabsf(prevl.g+nextl.g - curr.g*2.f),
+ b1 = fabsf(prevl.b+nextl.b - curr.b*2.f);
+
+ const float horiz = MAX(MAX(a,r),MAX(g,b));
+ const float vert = MAX(MAX(a1,r1),MAX(g1,b1));
+ const float edge = MAX(horiz,vert);
+ float z = edge - fabsf(horiz-vert)*.5f;
+ z = 1.f - MAX(z,MIN(horiz,vert));
+ z *= z; // noise is amplified
+ z *= z;
+ // 85 is about 1/3rd of weight (not 0, because noisy pixels still need to be included, just not as precisely).
+ const unsigned int z_int = 85 + (unsigned int)(z * 171.f);
+ noise[j*cols+i] = MIN(z_int, 255);
+ const int e_int = 255 - (int)(edge * 256.f);
+ edges[j*cols+i] = e_int > 0 ? MIN(e_int, 255) : 0;
+ }
+ }
+
+ // noise areas are shrunk and then expanded to remove thin edges from the map
+ liq_max3(noise, tmp, cols, rows);
+ liq_max3(tmp, noise, cols, rows);
+
+ liq_blur(noise, tmp, noise, cols, rows, 3);
+
+ liq_max3(noise, tmp, cols, rows);
+
+ liq_min3(tmp, noise, cols, rows);
+ liq_min3(noise, tmp, cols, rows);
+ liq_min3(tmp, noise, cols, rows);
+
+ liq_min3(edges, tmp, cols, rows);
+ liq_max3(tmp, edges, cols, rows);
+ for(unsigned int i=0; i < cols*rows; i++) edges[i] = MIN(noise[i], edges[i]);
+
+ image->free(tmp);
+
+ image->importance_map = noise;
+ image->edges = edges;
+}
+
+/**
+ * Builds map of neighbor pixels mapped to the same palette entry
+ *
+ * For efficiency/simplicity it mainly looks for same consecutive pixels horizontally
+ * and peeks 1 pixel above/below. Full 2d algorithm doesn't improve it significantly.
+ * Correct flood fill doesn't have visually good properties.
+ */
+LIQ_NONNULL static void update_dither_map(liq_image *input_image, unsigned char *const *const row_pointers, colormap *map)
+{
+ const unsigned int width = input_image->width;
+ const unsigned int height = input_image->height;
+ unsigned char *const edges = input_image->edges;
+
+ for(unsigned int row=0; row < height; row++) {
+ unsigned char lastpixel = row_pointers[row][0];
+ unsigned int lastcol=0;
+
+ for(unsigned int col=1; col < width; col++) {
+ const unsigned char px = row_pointers[row][col];
+ if (input_image->background && map->palette[px].acolor.a < 1.f/256.f) {
+ // Transparency may or may not create an edge. When there's an explicit background set, assume no edge.
+ continue;
+ }
+
+ if (px != lastpixel || col == width-1) {
+ int neighbor_count = 10 * (col-lastcol);
+
+ unsigned int i=lastcol;
+ while(i < col) {
+ if (row > 0) {
+ unsigned char pixelabove = row_pointers[row-1][i];
+ if (pixelabove == lastpixel) neighbor_count += 15;
+ }
+ if (row < height-1) {
+ unsigned char pixelbelow = row_pointers[row+1][i];
+ if (pixelbelow == lastpixel) neighbor_count += 15;
+ }
+ i++;
+ }
+
+ while(lastcol <= col) {
+ int e = edges[row*width + lastcol];
+ edges[row*width + lastcol++] = (e+128) * (255.f/(255+128)) * (1.f - 20.f / (20 + neighbor_count));
+ }
+ lastpixel = px;
+ }
+ }
+ }
+ input_image->dither_map = input_image->edges;
+ input_image->edges = NULL;
+}
+
+/**
+ * Palette can be NULL, in which case it creates a new palette from scratch.
+ */
+static colormap *add_fixed_colors_to_palette(colormap *palette, const int max_colors, const f_pixel fixed_colors[], const int fixed_colors_count, void* (*malloc)(size_t), void (*free)(void*))
+{
+ if (!fixed_colors_count) return palette;
+
+ colormap *newpal = pam_colormap(MIN(max_colors, (palette ? palette->colors : 0) + fixed_colors_count), malloc, free);
+ unsigned int i=0;
+ if (palette && fixed_colors_count < max_colors) {
+ unsigned int palette_max = MIN(palette->colors, max_colors - fixed_colors_count);
+ for(; i < palette_max; i++) {
+ newpal->palette[i] = palette->palette[i];
+ }
+ }
+ for(int j=0; j < MIN(max_colors, fixed_colors_count); j++) {
+ newpal->palette[i++] = (colormap_item){
+ .acolor = fixed_colors[j],
+ .fixed = true,
+ };
+ }
+ if (palette) pam_freecolormap(palette);
+ return newpal;
+}
+
+LIQ_NONNULL static void adjust_histogram_callback(hist_item *item, float diff)
+{
+ item->adjusted_weight = (item->perceptual_weight+item->adjusted_weight) * (sqrtf(1.f+diff));
+}
+
+/**
+ Repeats mediancut with different histogram weights to find palette with minimum error.
+
+ feedback_loop_trials controls how long the search will take. < 0 skips the iteration.
+ */
+static colormap *find_best_palette(histogram *hist, const liq_attr *options, const double max_mse, const f_pixel fixed_colors[], const unsigned int fixed_colors_count, double *palette_error_p)
+{
+ unsigned int max_colors = options->max_colors;
+
+ // if output is posterized it doesn't make sense to aim for perfrect colors, so increase target_mse
+ // at this point actual gamma is not set, so very conservative posterization estimate is used
+ const double target_mse = MIN(max_mse, MAX(options->target_mse, pow((1<min_posterization_output)/1024.0, 2)));
+ int feedback_loop_trials = options->feedback_loop_trials;
+ if (hist->size > 5000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;}
+ if (hist->size > 25000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;}
+ if (hist->size > 50000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;}
+ if (hist->size > 100000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;}
+ colormap *acolormap = NULL;
+ double least_error = MAX_DIFF;
+ double target_mse_overshoot = feedback_loop_trials>0 ? 1.05 : 1.0;
+ const float total_trials = (float)(feedback_loop_trials>0?feedback_loop_trials:1);
+ int fails_in_a_row=0;
+
+ do {
+ colormap *newmap;
+ if (hist->size && fixed_colors_count < max_colors) {
+ newmap = mediancut(hist, max_colors-fixed_colors_count, target_mse * target_mse_overshoot, MAX(MAX(45.0/65536.0, target_mse), least_error)*1.2,
+ options->malloc, options->free);
+ } else {
+ feedback_loop_trials = 0;
+ newmap = NULL;
+ }
+ newmap = add_fixed_colors_to_palette(newmap, max_colors, fixed_colors, fixed_colors_count, options->malloc, options->free);
+ if (!newmap) {
+ return NULL;
+ }
+
+ if (feedback_loop_trials <= 0) {
+ return newmap;
+ }
+
+ // after palette has been created, total error (MSE) is calculated to keep the best palette
+ // at the same time K-Means iteration is done to improve the palette
+ // and histogram weights are adjusted based on remapping error to give more weight to poorly matched colors
+
+ const bool first_run_of_target_mse = !acolormap && target_mse > 0;
+ double total_error = kmeans_do_iteration(hist, newmap, first_run_of_target_mse ? NULL : adjust_histogram_callback);
+
+ // goal is to increase quality or to reduce number of colors used if quality is good enough
+ if (!acolormap || total_error < least_error || (total_error <= target_mse && newmap->colors < max_colors)) {
+ if (acolormap) pam_freecolormap(acolormap);
+ acolormap = newmap;
+
+ if (total_error < target_mse && total_error > 0) {
+ // K-Means iteration improves quality above what mediancut aims for
+ // this compensates for it, making mediancut aim for worse
+ target_mse_overshoot = MIN(target_mse_overshoot*1.25, target_mse/total_error);
+ }
+
+ least_error = total_error;
+
+ // if number of colors could be reduced, try to keep it that way
+ // but allow extra color as a bit of wiggle room in case quality can be improved too
+ max_colors = MIN(newmap->colors+1, max_colors);
+
+ feedback_loop_trials -= 1; // asymptotic improvement could make it go on forever
+ fails_in_a_row = 0;
+ } else {
+ fails_in_a_row++;
+ target_mse_overshoot = 1.0;
+
+ // if error is really bad, it's unlikely to improve, so end sooner
+ feedback_loop_trials -= 5 + fails_in_a_row;
+ pam_freecolormap(newmap);
+ }
+
+ float fraction_done = 1.f-MAX(0.f, feedback_loop_trials/total_trials);
+ if (liq_progress(options, options->progress_stage1 + fraction_done * options->progress_stage2)) break;
+ liq_verbose_printf(options, " selecting colors...%d%%", (int)(100.f * fraction_done));
+ }
+ while(feedback_loop_trials > 0);
+
+ *palette_error_p = least_error;
+ return acolormap;
+}
+
+static colormap *histogram_to_palette(const histogram *hist, const liq_attr *options) {
+ if (!hist->size) {
+ return NULL;
+ }
+ colormap *acolormap = pam_colormap(hist->size, options->malloc, options->free);
+ for(unsigned int i=0; i < hist->size; i++) {
+ acolormap->palette[i].acolor = hist->achv[i].acolor;
+ acolormap->palette[i].popularity = hist->achv[i].perceptual_weight;
+ }
+ return acolormap;
+}
+
+LIQ_NONNULL static liq_error pngquant_quantize(histogram *hist, const liq_attr *options, const int fixed_colors_count, const f_pixel fixed_colors[], const double gamma, bool fixed_result_colors, liq_result **result_output)
+{
+ colormap *acolormap;
+ double palette_error = -1;
+
+ assert((verbose_print(options, "SLOW debug checks enabled. Recompile with NDEBUG for normal operation."),1));
+
+ const bool few_input_colors = hist->size+fixed_colors_count <= options->max_colors;
+
+ if (liq_progress(options, options->progress_stage1)) return LIQ_ABORTED;
+
+ // If image has few colors to begin with (and no quality degradation is required)
+ // then it's possible to skip quantization entirely
+ if (few_input_colors && options->target_mse == 0) {
+ acolormap = add_fixed_colors_to_palette(histogram_to_palette(hist, options), options->max_colors, fixed_colors, fixed_colors_count, options->malloc, options->free);
+ palette_error = 0;
+ } else {
+ const double max_mse = options->max_mse * (few_input_colors ? 0.33 : 1.0); // when degrading image that's already paletted, require much higher improvement, since pal2pal often looks bad and there's little gain
+ acolormap = find_best_palette(hist, options, max_mse, fixed_colors, fixed_colors_count, &palette_error);
+ if (!acolormap) {
+ return LIQ_VALUE_OUT_OF_RANGE;
+ }
+
+ // K-Means iteration approaches local minimum for the palette
+ double iteration_limit = options->kmeans_iteration_limit;
+ unsigned int iterations = options->kmeans_iterations;
+
+ if (!iterations && palette_error < 0 && max_mse < MAX_DIFF) iterations = 1; // otherwise total error is never calculated and MSE limit won't work
+
+ if (iterations) {
+ // likely_colormap_index (used and set in kmeans_do_iteration) can't point to index outside colormap
+ if (acolormap->colors < 256) for(unsigned int j=0; j < hist->size; j++) {
+ if (hist->achv[j].tmp.likely_colormap_index >= acolormap->colors) {
+ hist->achv[j].tmp.likely_colormap_index = 0; // actual value doesn't matter, as the guess is out of date anyway
+ }
+ }
+
+ if (hist->size > 5000) {iterations = (iterations*3 + 3)/4;}
+ if (hist->size > 25000) {iterations = (iterations*3 + 3)/4;}
+ if (hist->size > 50000) {iterations = (iterations*3 + 3)/4;}
+ if (hist->size > 100000) {iterations = (iterations*3 + 3)/4; iteration_limit *= 2;}
+
+ verbose_print(options, " moving colormap towards local minimum");
+
+ double previous_palette_error = MAX_DIFF;
+
+ for(unsigned int i=0; i < iterations; i++) {
+ palette_error = kmeans_do_iteration(hist, acolormap, NULL);
+
+ if (liq_progress(options, options->progress_stage1 + options->progress_stage2 + (i * options->progress_stage3 * 0.9f) / iterations)) {
+ break;
+ }
+
+ if (fabs(previous_palette_error-palette_error) < iteration_limit) {
+ break;
+ }
+
+ if (palette_error > max_mse*1.5) { // probably hopeless
+ if (palette_error > max_mse*3.0) break; // definitely hopeless
+ i++;
+ }
+
+ previous_palette_error = palette_error;
+ }
+ }
+
+ if (palette_error > max_mse) {
+ liq_verbose_printf(options, " image degradation MSE=%.3f (Q=%d) exceeded limit of %.3f (%d)",
+ mse_to_standard_mse(palette_error), mse_to_quality(palette_error),
+ mse_to_standard_mse(max_mse), mse_to_quality(max_mse));
+ pam_freecolormap(acolormap);
+ return LIQ_QUALITY_TOO_LOW;
+ }
+ }
+
+ if (liq_progress(options, options->progress_stage1 + options->progress_stage2 + options->progress_stage3 * 0.95f)) {
+ pam_freecolormap(acolormap);
+ return LIQ_ABORTED;
+ }
+
+ sort_palette(acolormap, options);
+
+ // If palette was created from a multi-image histogram,
+ // then it shouldn't be optimized for one image during remapping
+ if (fixed_result_colors) {
+ for(unsigned int i=0; i < acolormap->colors; i++) {
+ acolormap->palette[i].fixed = true;
+ }
+ }
+
+ liq_result *result = options->malloc(sizeof(liq_result));
+ if (!result) return LIQ_OUT_OF_MEMORY;
+ *result = (liq_result){
+ .magic_header = liq_result_magic,
+ .malloc = options->malloc,
+ .free = options->free,
+ .palette = acolormap,
+ .palette_error = palette_error,
+ .use_dither_map = options->use_dither_map,
+ .gamma = gamma,
+ .min_posterization_output = options->min_posterization_output,
+ };
+ *result_output = result;
+ return LIQ_OK;
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size)
+{
+ if (!CHECK_STRUCT_TYPE(result, liq_result)) {
+ return LIQ_INVALID_POINTER;
+ }
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) {
+ return LIQ_INVALID_POINTER;
+ }
+ if (!CHECK_USER_POINTER(buffer)) {
+ return LIQ_INVALID_POINTER;
+ }
+
+ const size_t required_size = (size_t)input_image->width * (size_t)input_image->height;
+ if (buffer_size < required_size) {
+ return LIQ_BUFFER_TOO_SMALL;
+ }
+
+ LIQ_ARRAY(unsigned char *, rows, input_image->height);
+ unsigned char *buffer_bytes = buffer;
+ for(unsigned int i=0; i < input_image->height; i++) {
+ rows[i] = &buffer_bytes[input_image->width * i];
+ }
+ return liq_write_remapped_image_rows(result, input_image, rows);
+}
+
+LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image_rows(liq_result *quant, liq_image *input_image, unsigned char **row_pointers)
+{
+ if (!CHECK_STRUCT_TYPE(quant, liq_result)) return LIQ_INVALID_POINTER;
+ if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return LIQ_INVALID_POINTER;
+ for(unsigned int i=0; i < input_image->height; i++) {
+ if (!CHECK_USER_POINTER(row_pointers+i) || !CHECK_USER_POINTER(row_pointers[i])) return LIQ_INVALID_POINTER;
+ }
+
+ if (quant->remapping) {
+ liq_remapping_result_destroy(quant->remapping);
+ }
+ liq_remapping_result *const result = quant->remapping = liq_remapping_result_create(quant);
+ if (!result) return LIQ_OUT_OF_MEMORY;
+
+ if (!input_image->edges && !input_image->dither_map && quant->use_dither_map) {
+ contrast_maps(input_image);
+ }
+
+ if (liq_remap_progress(result, result->progress_stage1 * 0.25f)) {
+ return LIQ_ABORTED;
+ }
+
+ /*
+ ** Step 4: map the colors in the image to their closest match in the
+ ** new colormap, and write 'em out.
+ */
+
+ float remapping_error = result->palette_error;
+ if (result->dither_level == 0) {
+ set_rounded_palette(&result->int_palette, result->palette, result->gamma, quant->min_posterization_output);
+ remapping_error = remap_to_palette(input_image, row_pointers, result->palette);
+ } else {
+ const bool is_image_huge = (input_image->width * input_image->height) > 2000 * 2000;
+ const bool allow_dither_map = result->use_dither_map == 2 || (!is_image_huge && result->use_dither_map);
+ const bool generate_dither_map = allow_dither_map && (input_image->edges && !input_image->dither_map);
+ if (generate_dither_map) {
+ // If dithering (with dither map) is required, this image is used to find areas that require dithering
+ remapping_error = remap_to_palette(input_image, row_pointers, result->palette);
+ update_dither_map(input_image, row_pointers, result->palette);
+ }
+
+ if (liq_remap_progress(result, result->progress_stage1 * 0.5f)) {
+ return LIQ_ABORTED;
+ }
+
+ // remapping above was the last chance to do K-Means iteration, hence the final palette is set after remapping
+ set_rounded_palette(&result->int_palette, result->palette, result->gamma, quant->min_posterization_output);
+
+ if (!remap_to_palette_floyd(input_image, row_pointers, result, MAX(remapping_error*2.4, 16.f/256.f), generate_dither_map)) {
+ return LIQ_ABORTED;
+ }
+ }
+
+ // remapping error from dithered image is absurd, so always non-dithered value is used
+ // palette_error includes some perceptual weighting from histogram which is closer correlated with dssim
+ // so that should be used when possible.
+ if (result->palette_error < 0) {
+ result->palette_error = remapping_error;
+ }
+
+ return LIQ_OK;
+}
+
+LIQ_EXPORT int liq_version() {
+ return LIQ_VERSION;
+}
diff --git a/src/platform/gba/packer/libimagequant/libimagequant.h b/src/platform/gba/packer/libimagequant/libimagequant.h
new file mode 100644
index 0000000..e227b0a
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/libimagequant.h
@@ -0,0 +1,151 @@
+/*
+ * https://pngquant.org
+ */
+
+#ifndef LIBIMAGEQUANT_H
+#define LIBIMAGEQUANT_H
+
+#ifdef IMAGEQUANT_EXPORTS
+#define LIQ_EXPORT __declspec(dllexport)
+#endif
+
+#ifndef LIQ_EXPORT
+#define LIQ_EXPORT extern
+#endif
+
+#define LIQ_VERSION 21300
+#define LIQ_VERSION_STRING "2.13.0"
+
+#ifndef LIQ_PRIVATE
+#if defined(__GNUC__) || defined (__llvm__)
+#define LIQ_PRIVATE __attribute__((visibility("hidden")))
+#define LIQ_NONNULL __attribute__((nonnull))
+#define LIQ_USERESULT __attribute__((warn_unused_result))
+#else
+#define LIQ_PRIVATE
+#define LIQ_NONNULL
+#define LIQ_USERESULT
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+typedef struct liq_attr liq_attr;
+typedef struct liq_image liq_image;
+typedef struct liq_result liq_result;
+typedef struct liq_histogram liq_histogram;
+
+typedef struct liq_color {
+ unsigned char r, g, b, a;
+} liq_color;
+
+typedef struct liq_palette {
+ unsigned int count;
+ liq_color entries[256];
+} liq_palette;
+
+typedef enum liq_error {
+ LIQ_OK = 0,
+ LIQ_QUALITY_TOO_LOW = 99,
+ LIQ_VALUE_OUT_OF_RANGE = 100,
+ LIQ_OUT_OF_MEMORY,
+ LIQ_ABORTED,
+ LIQ_BITMAP_NOT_AVAILABLE,
+ LIQ_BUFFER_TOO_SMALL,
+ LIQ_INVALID_POINTER,
+ LIQ_UNSUPPORTED,
+} liq_error;
+
+enum liq_ownership {
+ LIQ_OWN_ROWS=4,
+ LIQ_OWN_PIXELS=8,
+ LIQ_COPY_PIXELS=16,
+};
+
+typedef struct liq_histogram_entry {
+ liq_color color;
+ unsigned int count;
+} liq_histogram_entry;
+
+LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_create(void);
+LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_create_with_allocator(void* (*malloc)(size_t), void (*free)(void*));
+LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_copy(const liq_attr *orig) LIQ_NONNULL;
+LIQ_EXPORT void liq_attr_destroy(liq_attr *attr) LIQ_NONNULL;
+
+LIQ_EXPORT LIQ_USERESULT liq_histogram* liq_histogram_create(const liq_attr* attr);
+LIQ_EXPORT liq_error liq_histogram_add_image(liq_histogram *hist, const liq_attr *attr, liq_image* image) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_histogram_add_colors(liq_histogram *hist, const liq_attr *attr, const liq_histogram_entry entries[], int num_entries, double gamma) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma) LIQ_NONNULL;
+LIQ_EXPORT void liq_histogram_destroy(liq_histogram *hist) LIQ_NONNULL;
+
+LIQ_EXPORT liq_error liq_set_max_colors(liq_attr* attr, int colors) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_max_colors(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_set_speed(liq_attr* attr, int speed) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_speed(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_set_min_opacity(liq_attr* attr, int min) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_min_opacity(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_set_min_posterization(liq_attr* attr, int bits) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_min_posterization(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_set_quality(liq_attr* attr, int minimum, int maximum) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_min_quality(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_get_max_quality(const liq_attr* attr) LIQ_NONNULL;
+LIQ_EXPORT void liq_set_last_index_transparent(liq_attr* attr, int is_last) LIQ_NONNULL;
+
+typedef void liq_log_callback_function(const liq_attr*, const char *message, void* user_info);
+typedef void liq_log_flush_callback_function(const liq_attr*, void* user_info);
+LIQ_EXPORT void liq_set_log_callback(liq_attr*, liq_log_callback_function*, void* user_info);
+LIQ_EXPORT void liq_set_log_flush_callback(liq_attr*, liq_log_flush_callback_function*, void* user_info);
+
+typedef int liq_progress_callback_function(float progress_percent, void* user_info);
+LIQ_EXPORT void liq_attr_set_progress_callback(liq_attr*, liq_progress_callback_function*, void* user_info);
+LIQ_EXPORT void liq_result_set_progress_callback(liq_result*, liq_progress_callback_function*, void* user_info);
+
+// The rows and their data are not modified. The type of `rows` is non-const only due to a bug in C's typesystem design.
+LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_rgba_rows(const liq_attr *attr, void *const rows[], int width, int height, double gamma) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_rgba(const liq_attr *attr, const void *bitmap, int width, int height, double gamma) LIQ_NONNULL;
+
+typedef void liq_image_get_rgba_row_callback(liq_color row_out[], int row, int width, void* user_info);
+LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_custom(const liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma);
+
+LIQ_EXPORT liq_error liq_image_set_memory_ownership(liq_image *image, int ownership_flags) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_image_set_background(liq_image *img, liq_image *background_image) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_image_set_importance_map(liq_image *img, unsigned char buffer[], size_t buffer_size, enum liq_ownership memory_handling) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_image_add_fixed_color(liq_image *img, liq_color color) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_image_get_width(const liq_image *img) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT int liq_image_get_height(const liq_image *img) LIQ_NONNULL;
+LIQ_EXPORT void liq_image_destroy(liq_image *img) LIQ_NONNULL;
+
+LIQ_EXPORT LIQ_USERESULT liq_error liq_histogram_quantize(liq_histogram *const input_hist, liq_attr *const options, liq_result **result_output) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT liq_error liq_image_quantize(liq_image *const input_image, liq_attr *const options, liq_result **result_output) LIQ_NONNULL;
+
+LIQ_EXPORT liq_error liq_set_dithering_level(liq_result *res, float dither_level) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_set_output_gamma(liq_result* res, double gamma) LIQ_NONNULL;
+LIQ_EXPORT LIQ_USERESULT double liq_get_output_gamma(const liq_result *result) LIQ_NONNULL;
+
+LIQ_EXPORT LIQ_USERESULT const liq_palette *liq_get_palette(liq_result *result) LIQ_NONNULL;
+
+LIQ_EXPORT liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size) LIQ_NONNULL;
+LIQ_EXPORT liq_error liq_write_remapped_image_rows(liq_result *result, liq_image *input_image, unsigned char **row_pointers) LIQ_NONNULL;
+
+LIQ_EXPORT double liq_get_quantization_error(const liq_result *result) LIQ_NONNULL;
+LIQ_EXPORT int liq_get_quantization_quality(const liq_result *result) LIQ_NONNULL;
+LIQ_EXPORT double liq_get_remapping_error(const liq_result *result) LIQ_NONNULL;
+LIQ_EXPORT int liq_get_remapping_quality(const liq_result *result) LIQ_NONNULL;
+
+LIQ_EXPORT void liq_result_destroy(liq_result *) LIQ_NONNULL;
+
+LIQ_EXPORT int liq_version(void);
+
+
+// Deprecated
+LIQ_EXPORT LIQ_USERESULT liq_result *liq_quantize_image(liq_attr *options, liq_image *input_image) LIQ_NONNULL;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/mediancut.c b/src/platform/gba/packer/libimagequant/mediancut.c
new file mode 100644
index 0000000..2c6d1d8
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/mediancut.c
@@ -0,0 +1,464 @@
+/*
+** © 2009-2018 by Kornel Lesiński.
+** © 1989, 1991 by Jef Poskanzer.
+** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+**
+** See COPYRIGHT file for license.
+*/
+
+#include
+#include
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "mediancut.h"
+
+#define index_of_channel(ch) (offsetof(f_pixel,ch)/sizeof(float))
+
+static f_pixel averagepixels(unsigned int clrs, const hist_item achv[]);
+
+struct box {
+ f_pixel color;
+ f_pixel variance;
+ double sum, total_error, max_error;
+ unsigned int ind;
+ unsigned int colors;
+};
+
+ALWAYS_INLINE static double variance_diff(double val, const double good_enough);
+inline static double variance_diff(double val, const double good_enough)
+{
+ val *= val;
+ if (val < good_enough*good_enough) return val*0.25;
+ return val;
+}
+
+/** Weighted per-channel variance of the box. It's used to decide which channel to split by */
+static f_pixel box_variance(const hist_item achv[], const struct box *box)
+{
+ f_pixel mean = box->color;
+ double variancea=0, variancer=0, varianceg=0, varianceb=0;
+
+ for(unsigned int i = 0; i < box->colors; ++i) {
+ const f_pixel px = achv[box->ind + i].acolor;
+ double weight = achv[box->ind + i].adjusted_weight;
+ variancea += variance_diff(mean.a - px.a, 2.0/256.0)*weight;
+ variancer += variance_diff(mean.r - px.r, 1.0/256.0)*weight;
+ varianceg += variance_diff(mean.g - px.g, 1.0/256.0)*weight;
+ varianceb += variance_diff(mean.b - px.b, 1.0/256.0)*weight;
+ }
+
+ return (f_pixel){
+ .a = variancea*(4.0/16.0),
+ .r = variancer*(7.0/16.0),
+ .g = varianceg*(9.0/16.0),
+ .b = varianceb*(5.0/16.0),
+ };
+}
+
+static double box_max_error(const hist_item achv[], const struct box *box)
+{
+ f_pixel mean = box->color;
+ double max_error = 0;
+
+ for(unsigned int i = 0; i < box->colors; ++i) {
+ const double diff = colordifference(mean, achv[box->ind + i].acolor);
+ if (diff > max_error) {
+ max_error = diff;
+ }
+ }
+ return max_error;
+}
+
+ALWAYS_INLINE static double color_weight(f_pixel median, hist_item h);
+
+static inline void hist_item_swap(hist_item *l, hist_item *r)
+{
+ if (l != r) {
+ hist_item t = *l;
+ *l = *r;
+ *r = t;
+ }
+}
+
+ALWAYS_INLINE static unsigned int qsort_pivot(const hist_item *const base, const unsigned int len);
+inline static unsigned int qsort_pivot(const hist_item *const base, const unsigned int len)
+{
+ if (len < 32) {
+ return len/2;
+ }
+
+ const unsigned int aidx=8, bidx=len/2, cidx=len-1;
+ const unsigned int a=base[aidx].tmp.sort_value, b=base[bidx].tmp.sort_value, c=base[cidx].tmp.sort_value;
+ return (a < b) ? ((b < c) ? bidx : ((a < c) ? cidx : aidx ))
+ : ((b > c) ? bidx : ((a < c) ? aidx : cidx ));
+}
+
+ALWAYS_INLINE static unsigned int qsort_partition(hist_item *const base, const unsigned int len);
+inline static unsigned int qsort_partition(hist_item *const base, const unsigned int len)
+{
+ unsigned int l = 1, r = len;
+ if (len >= 8) {
+ hist_item_swap(&base[0], &base[qsort_pivot(base,len)]);
+ }
+
+ const unsigned int pivot_value = base[0].tmp.sort_value;
+ while (l < r) {
+ if (base[l].tmp.sort_value >= pivot_value) {
+ l++;
+ } else {
+ while(l < --r && base[r].tmp.sort_value <= pivot_value) {}
+ hist_item_swap(&base[l], &base[r]);
+ }
+ }
+ l--;
+ hist_item_swap(&base[0], &base[l]);
+
+ return l;
+}
+
+/** quick select algorithm */
+static void hist_item_sort_range(hist_item base[], unsigned int len, unsigned int sort_start)
+{
+ for(;;) {
+ const unsigned int l = qsort_partition(base, len), r = l+1;
+
+ if (l > 0 && sort_start < l) {
+ len = l;
+ }
+ else if (r < len && sort_start > r) {
+ base += r; len -= r; sort_start -= r;
+ }
+ else break;
+ }
+}
+
+/** sorts array to make sum of weights lower than halfvar one side, returns edge between halfvar parts of the set */
+static hist_item *hist_item_sort_halfvar(hist_item base[], unsigned int len, double *const lowervar, const double halfvar)
+{
+ do {
+ const unsigned int l = qsort_partition(base, len), r = l+1;
+
+ // check if sum of left side is smaller than half,
+ // if it is, then it doesn't need to be sorted
+ unsigned int t = 0; double tmpsum = *lowervar;
+ while (t <= l && tmpsum < halfvar) tmpsum += base[t++].color_weight;
+
+ if (tmpsum < halfvar) {
+ *lowervar = tmpsum;
+ } else {
+ if (l > 0) {
+ hist_item *res = hist_item_sort_halfvar(base, l, lowervar, halfvar);
+ if (res) return res;
+ } else {
+ // End of left recursion. This will be executed in order from the first element.
+ *lowervar += base[0].color_weight;
+ if (*lowervar > halfvar) return &base[0];
+ }
+ }
+
+ if (len > r) {
+ base += r; len -= r; // tail-recursive "call"
+ } else {
+ *lowervar += base[r].color_weight;
+ return (*lowervar > halfvar) ? &base[r] : NULL;
+ }
+ } while(1);
+}
+
+static f_pixel get_median(const struct box *b, hist_item achv[]);
+
+typedef struct {
+ unsigned int chan; float variance;
+} channelvariance;
+
+static int comparevariance(const void *ch1, const void *ch2)
+{
+ return ((const channelvariance*)ch1)->variance > ((const channelvariance*)ch2)->variance ? -1 :
+ (((const channelvariance*)ch1)->variance < ((const channelvariance*)ch2)->variance ? 1 : 0);
+}
+
+/** Finds which channels need to be sorted first and preproceses achv for fast sort */
+static double prepare_sort(struct box *b, hist_item achv[])
+{
+ /*
+ ** Sort dimensions by their variance, and then sort colors first by dimension with highest variance
+ */
+ channelvariance channels[4] = {
+ {index_of_channel(a), b->variance.a},
+ {index_of_channel(r), b->variance.r},
+ {index_of_channel(g), b->variance.g},
+ {index_of_channel(b), b->variance.b},
+ };
+
+ qsort(channels, 4, sizeof(channels[0]), comparevariance);
+
+ const unsigned int ind1 = b->ind;
+ const unsigned int colors = b->colors;
+#if __GNUC__ >= 9 || __clang__
+ #pragma omp parallel for if (colors > 25000) \
+ schedule(static) default(none) shared(achv, channels, colors, ind1)
+#else
+ #pragma omp parallel for if (colors > 25000) \
+ schedule(static) default(none) shared(achv, channels)
+#endif
+ for(unsigned int i=0; i < colors; i++) {
+ const float *chans = (const float *)&achv[ind1 + i].acolor;
+ // Only the first channel really matters. When trying median cut many times
+ // with different histogram weights, I don't want sort randomness to influence outcome.
+ achv[ind1 + i].tmp.sort_value = ((unsigned int)(chans[channels[0].chan]*65535.0)<<16) |
+ (unsigned int)((chans[channels[2].chan] + chans[channels[1].chan]/2.0 + chans[channels[3].chan]/4.0)*65535.0);
+ }
+
+ const f_pixel median = get_median(b, achv);
+
+ // box will be split to make color_weight of each side even
+ const unsigned int ind = b->ind, end = ind+b->colors;
+ double totalvar = 0;
+ #pragma omp parallel for if (end - ind > 15000) \
+ schedule(static) default(shared) reduction(+:totalvar)
+ for(unsigned int j=ind; j < end; j++) totalvar += (achv[j].color_weight = color_weight(median, achv[j]));
+ return totalvar / 2.0;
+}
+
+/** finds median in unsorted set by sorting only minimum required */
+static f_pixel get_median(const struct box *b, hist_item achv[])
+{
+ const unsigned int median_start = (b->colors-1)/2;
+
+ hist_item_sort_range(&(achv[b->ind]), b->colors,
+ median_start);
+
+ if (b->colors&1) return achv[b->ind + median_start].acolor;
+
+ // technically the second color is not guaranteed to be sorted correctly
+ // but most of the time it is good enough to be useful
+ return averagepixels(2, &achv[b->ind + median_start]);
+}
+
+/*
+ ** Find the best splittable box. -1 if no boxes are splittable.
+ */
+static int best_splittable_box(struct box bv[], unsigned int boxes, const double max_mse)
+{
+ int bi=-1; double maxsum=0;
+ for(unsigned int i=0; i < boxes; i++) {
+ if (bv[i].colors < 2) {
+ continue;
+ }
+
+ // looks only at max variance, because it's only going to split by it
+ const double cv = MAX(bv[i].variance.r, MAX(bv[i].variance.g,bv[i].variance.b));
+ double thissum = bv[i].sum * MAX(bv[i].variance.a, cv);
+
+ if (bv[i].max_error > max_mse) {
+ thissum = thissum* bv[i].max_error/max_mse;
+ }
+
+ if (thissum > maxsum) {
+ maxsum = thissum;
+ bi = i;
+ }
+ }
+ return bi;
+}
+
+inline static double color_weight(f_pixel median, hist_item h)
+{
+ float diff = colordifference(median, h.acolor);
+ return sqrt(diff) * (sqrt(1.0+h.adjusted_weight)-1.0);
+}
+
+static void set_colormap_from_boxes(colormap *map, struct box bv[], unsigned int boxes, hist_item *achv);
+static void adjust_histogram(hist_item *achv, const struct box bv[], unsigned int boxes);
+
+static double box_error(const struct box *box, const hist_item achv[])
+{
+ f_pixel avg = box->color;
+
+ double total_error=0;
+ for (unsigned int i = 0; i < box->colors; ++i) {
+ total_error += colordifference(avg, achv[box->ind + i].acolor) * achv[box->ind + i].perceptual_weight;
+ }
+
+ return total_error;
+}
+
+
+static bool total_box_error_below_target(double target_mse, struct box bv[], unsigned int boxes, const histogram *hist)
+{
+ target_mse *= hist->total_perceptual_weight;
+ double total_error=0;
+
+ for(unsigned int i=0; i < boxes; i++) {
+ // error is (re)calculated lazily
+ if (bv[i].total_error >= 0) {
+ total_error += bv[i].total_error;
+ }
+ if (total_error > target_mse) return false;
+ }
+
+ for(unsigned int i=0; i < boxes; i++) {
+ if (bv[i].total_error < 0) {
+ bv[i].total_error = box_error(&bv[i], hist->achv);
+ total_error += bv[i].total_error;
+ }
+ if (total_error > target_mse) return false;
+ }
+
+ return true;
+}
+
+static void box_init(struct box *box, const hist_item *achv, const unsigned int ind, const unsigned int colors, const double sum) {
+ box->ind = ind;
+ box->colors = colors;
+ box->sum = sum;
+ box->total_error = -1;
+
+ box->color = averagepixels(colors, &achv[ind]);
+ box->variance = box_variance(achv, box);
+ box->max_error = box_max_error(achv, box);
+}
+
+/*
+ ** Here is the fun part, the median-cut colormap generator. This is based
+ ** on Paul Heckbert's paper, "Color Image Quantization for Frame Buffer
+ ** Display," SIGGRAPH 1982 Proceedings, page 297.
+ */
+LIQ_PRIVATE colormap *mediancut(histogram *hist, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*))
+{
+ hist_item *achv = hist->achv;
+ LIQ_ARRAY(struct box, bv, newcolors);
+ unsigned int boxes = 1;
+
+ /*
+ ** Set up the initial box.
+ */
+ {
+ double sum = 0;
+ for(unsigned int i=0; i < hist->size; i++) {
+ sum += achv[i].adjusted_weight;
+ }
+ box_init(&bv[0], achv, 0, hist->size, sum);
+
+
+ /*
+ ** Main loop: split boxes until we have enough.
+ */
+ while (boxes < newcolors) {
+
+ // first splits boxes that exceed quality limit (to have colors for things like odd green pixel),
+ // later raises the limit to allow large smooth areas/gradients get colors.
+ const double current_max_mse = max_mse + (boxes/(double)newcolors)*16.0*max_mse;
+ const int bi = best_splittable_box(bv, boxes, current_max_mse);
+ if (bi < 0) {
+ break; /* ran out of colors! */
+ }
+
+ unsigned int indx = bv[bi].ind;
+ unsigned int clrs = bv[bi].colors;
+
+ /*
+ Classic implementation tries to get even number of colors or pixels in each subdivision.
+
+ Here, instead of popularity I use (sqrt(popularity)*variance) metric.
+ Each subdivision balances number of pixels (popular colors) and low variance -
+ boxes can be large if they have similar colors. Later boxes with high variance
+ will be more likely to be split.
+
+ Median used as expected value gives much better results than mean.
+ */
+
+ const double halfvar = prepare_sort(&bv[bi], achv);
+ double lowervar=0;
+
+ // hist_item_sort_halfvar sorts and sums lowervar at the same time
+ // returns item to break at …minus one, which does smell like an off-by-one error.
+ hist_item *break_p = hist_item_sort_halfvar(&achv[indx], clrs, &lowervar, halfvar);
+ unsigned int break_at = MIN(clrs-1, break_p - &achv[indx] + 1);
+
+ /*
+ ** Split the box.
+ */
+ double sm = bv[bi].sum;
+ double lowersum = 0;
+ for(unsigned int i=0; i < break_at; i++) lowersum += achv[indx + i].adjusted_weight;
+
+ box_init(&bv[bi], achv, indx, break_at, lowersum);
+ box_init(&bv[boxes], achv, indx + break_at, clrs - break_at, sm - lowersum);
+
+ ++boxes;
+
+ if (total_box_error_below_target(target_mse, bv, boxes, hist)) {
+ break;
+ }
+ }
+ }
+
+ colormap *map = pam_colormap(boxes, malloc, free);
+ set_colormap_from_boxes(map, bv, boxes, achv);
+
+ adjust_histogram(achv, bv, boxes);
+
+ return map;
+}
+
+static void set_colormap_from_boxes(colormap *map, struct box* bv, unsigned int boxes, hist_item *achv)
+{
+ /*
+ ** Ok, we've got enough boxes. Now choose a representative color for
+ ** each box. There are a number of possible ways to make this choice.
+ ** One would be to choose the center of the box; this ignores any structure
+ ** within the boxes. Another method would be to average all the colors in
+ ** the box - this is the method specified in Heckbert's paper.
+ */
+
+ for(unsigned int bi = 0; bi < boxes; ++bi) {
+ map->palette[bi].acolor = bv[bi].color;
+
+ /* store total color popularity (perceptual_weight is approximation of it) */
+ map->palette[bi].popularity = 0;
+ for(unsigned int i=bv[bi].ind; i < bv[bi].ind+bv[bi].colors; i++) {
+ map->palette[bi].popularity += achv[i].perceptual_weight;
+ }
+ }
+}
+
+/* increase histogram popularity by difference from the final color (this is used as part of feedback loop) */
+static void adjust_histogram(hist_item *achv, const struct box* bv, unsigned int boxes)
+{
+ for(unsigned int bi = 0; bi < boxes; ++bi) {
+ for(unsigned int i=bv[bi].ind; i < bv[bi].ind+bv[bi].colors; i++) {
+ achv[i].tmp.likely_colormap_index = bi;
+ }
+ }
+}
+
+static f_pixel averagepixels(unsigned int clrs, const hist_item achv[])
+{
+ double r = 0, g = 0, b = 0, a = 0, sum = 0;
+
+ #pragma omp parallel for if (clrs > 25000) \
+ schedule(static) default(shared) reduction(+:a) reduction(+:r) reduction(+:g) reduction(+:b) reduction(+:sum)
+ for(unsigned int i = 0; i < clrs; i++) {
+ const f_pixel px = achv[i].acolor;
+ const double weight = achv[i].adjusted_weight;
+
+ sum += weight;
+ a += px.a * weight;
+ r += px.r * weight;
+ g += px.g * weight;
+ b += px.b * weight;
+ }
+
+ if (sum) {
+ a /= sum;
+ r /= sum;
+ g /= sum;
+ b /= sum;
+ }
+
+ assert(!isnan(r) && !isnan(g) && !isnan(b) && !isnan(a));
+
+ return (f_pixel){.r=r, .g=g, .b=b, .a=a};
+}
diff --git a/src/platform/gba/packer/libimagequant/mediancut.h b/src/platform/gba/packer/libimagequant/mediancut.h
new file mode 100644
index 0000000..9a4cb53
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/mediancut.h
@@ -0,0 +1,6 @@
+#ifndef MEDIANCUT_H
+#define MEDIANCUT_H
+
+LIQ_PRIVATE colormap *mediancut(histogram *hist, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*));
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/mempool.c b/src/platform/gba/packer/libimagequant/mempool.c
new file mode 100644
index 0000000..cd49f59
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/mempool.c
@@ -0,0 +1,70 @@
+/*
+** © 2009-2017 by Kornel Lesiński.
+** © 1989, 1991 by Jef Poskanzer.
+** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+**
+** See COPYRIGHT file for license.
+*/
+
+#include "libimagequant.h"
+#include "mempool.h"
+#include
+#include
+#include
+
+#define ALIGN_MASK 15UL
+#define MEMPOOL_RESERVED ((sizeof(struct mempool)+ALIGN_MASK) & ~ALIGN_MASK)
+
+struct mempool {
+ unsigned int used, size;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+ struct mempool *next;
+};
+LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int max_size, void* (*malloc)(size_t), void (*free)(void*))
+{
+ if (*mptr && ((*mptr)->used+size) <= (*mptr)->size) {
+ unsigned int prevused = (*mptr)->used;
+ (*mptr)->used += (size+15UL) & ~0xFUL;
+ return ((char*)(*mptr)) + prevused;
+ }
+
+ mempoolptr old = *mptr;
+ if (!max_size) max_size = (1<<17);
+ max_size = size+ALIGN_MASK > max_size ? size+ALIGN_MASK : max_size;
+
+ *mptr = malloc(MEMPOOL_RESERVED + max_size);
+ if (!*mptr) return NULL;
+ **mptr = (struct mempool){
+ .malloc = malloc,
+ .free = free,
+ .size = MEMPOOL_RESERVED + max_size,
+ .used = sizeof(struct mempool),
+ .next = old,
+ };
+ uintptr_t mptr_used_start = (uintptr_t)(*mptr) + (*mptr)->used;
+ (*mptr)->used += (ALIGN_MASK + 1 - (mptr_used_start & ALIGN_MASK)) & ALIGN_MASK; // reserve bytes required to make subsequent allocations aligned
+ assert(!(((uintptr_t)(*mptr) + (*mptr)->used) & ALIGN_MASK));
+
+ return mempool_alloc(mptr, size, size);
+}
+
+LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int max_size)
+{
+ if (((*mptr)->used+size) <= (*mptr)->size) {
+ unsigned int prevused = (*mptr)->used;
+ (*mptr)->used += (size + ALIGN_MASK) & ~ALIGN_MASK;
+ return ((char*)(*mptr)) + prevused;
+ }
+
+ return mempool_create(mptr, size, max_size, (*mptr)->malloc, (*mptr)->free);
+}
+
+LIQ_PRIVATE void mempool_destroy(mempoolptr m)
+{
+ while (m) {
+ mempoolptr next = m->next;
+ m->free(m);
+ m = next;
+ }
+}
diff --git a/src/platform/gba/packer/libimagequant/mempool.h b/src/platform/gba/packer/libimagequant/mempool.h
new file mode 100644
index 0000000..9b7333b
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/mempool.h
@@ -0,0 +1,13 @@
+#ifndef MEMPOOL_H
+#define MEMPOOL_H
+
+#include
+
+struct mempool;
+typedef struct mempool *mempoolptr;
+
+LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int capacity, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int capacity);
+LIQ_PRIVATE void mempool_destroy(mempoolptr m);
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/nearest.c b/src/platform/gba/packer/libimagequant/nearest.c
new file mode 100644
index 0000000..cae477f
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/nearest.c
@@ -0,0 +1,230 @@
+/*
+** © 2009-2015 by Kornel Lesiński.
+** © 1989, 1991 by Jef Poskanzer.
+** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+**
+** See COPYRIGHT file for license.
+*/
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "nearest.h"
+#include "mempool.h"
+#include
+
+typedef struct vp_sort_tmp {
+ float distance_squared;
+ unsigned int idx;
+} vp_sort_tmp;
+
+typedef struct vp_search_tmp {
+ float distance;
+ float distance_squared;
+ unsigned int idx;
+ int exclude;
+} vp_search_tmp;
+
+struct leaf {
+ f_pixel color;
+ unsigned int idx;
+};
+
+typedef struct vp_node {
+ struct vp_node *near, *far;
+ f_pixel vantage_point;
+ float radius, radius_squared;
+ struct leaf *rest;
+ unsigned short idx;
+ unsigned short restcount;
+} vp_node;
+
+struct nearest_map {
+ vp_node *root;
+ const colormap_item *palette;
+ float nearest_other_color_dist[256];
+ mempoolptr mempool;
+};
+
+static void vp_search_node(const vp_node *node, const f_pixel *const needle, vp_search_tmp *const best_candidate);
+
+static int vp_compare_distance(const void *ap, const void *bp) {
+ float a = ((const vp_sort_tmp*)ap)->distance_squared;
+ float b = ((const vp_sort_tmp*)bp)->distance_squared;
+ return a > b ? 1 : -1;
+}
+
+static void vp_sort_indexes_by_distance(const f_pixel vantage_point, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) {
+ for(int i=0; i < num_indexes; i++) {
+ indexes[i].distance_squared = colordifference(vantage_point, items[indexes[i].idx].acolor);
+ }
+ qsort(indexes, num_indexes, sizeof(indexes[0]), vp_compare_distance);
+}
+
+/*
+ * Usually it should pick farthest point, but picking most popular point seems to make search quicker anyway
+ */
+static int vp_find_best_vantage_point_index(vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) {
+ int best = 0;
+ float best_popularity = items[indexes[0].idx].popularity;
+ for(int i = 1; i < num_indexes; i++) {
+ if (items[indexes[i].idx].popularity > best_popularity) {
+ best_popularity = items[indexes[i].idx].popularity;
+ best = i;
+ }
+ }
+ return best;
+}
+
+static vp_node *vp_create_node(mempoolptr *m, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) {
+ if (num_indexes <= 0) {
+ return NULL;
+ }
+
+ vp_node *node = mempool_alloc(m, sizeof(node[0]), 0);
+
+ if (num_indexes == 1) {
+ *node = (vp_node){
+ .vantage_point = items[indexes[0].idx].acolor,
+ .idx = indexes[0].idx,
+ .radius = MAX_DIFF,
+ .radius_squared = MAX_DIFF,
+ };
+ return node;
+ }
+
+ const int ref = vp_find_best_vantage_point_index(indexes, num_indexes, items);
+ const int ref_idx = indexes[ref].idx;
+
+ // Removes the `ref_idx` item from remaining items, because it's included in the current node
+ num_indexes -= 1;
+ indexes[ref] = indexes[num_indexes];
+
+ vp_sort_indexes_by_distance(items[ref_idx].acolor, indexes, num_indexes, items);
+
+ // Remaining items are split by the median distance
+ const int half_idx = num_indexes/2;
+
+ *node = (vp_node){
+ .vantage_point = items[ref_idx].acolor,
+ .idx = ref_idx,
+ .radius = sqrtf(indexes[half_idx].distance_squared),
+ .radius_squared = indexes[half_idx].distance_squared,
+ };
+ if (num_indexes < 7) {
+ node->rest = mempool_alloc(m, sizeof(node->rest[0]) * num_indexes, 0);
+ node->restcount = num_indexes;
+ for(int i=0; i < num_indexes; i++) {
+ node->rest[i].idx = indexes[i].idx;
+ node->rest[i].color = items[indexes[i].idx].acolor;
+ }
+ } else {
+ node->near = vp_create_node(m, indexes, half_idx, items);
+ node->far = vp_create_node(m, &indexes[half_idx], num_indexes - half_idx, items);
+ }
+
+ return node;
+}
+
+LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *map) {
+ mempoolptr m = NULL;
+ struct nearest_map *handle = mempool_create(&m, sizeof(handle[0]), sizeof(handle[0]) + sizeof(vp_node)*map->colors+16, map->malloc, map->free);
+
+ LIQ_ARRAY(vp_sort_tmp, indexes, map->colors);
+
+ for(unsigned int i=0; i < map->colors; i++) {
+ indexes[i].idx = i;
+ }
+
+ vp_node *root = vp_create_node(&m, indexes, map->colors, map->palette);
+ *handle = (struct nearest_map){
+ .root = root,
+ .palette = map->palette,
+ .mempool = m,
+ };
+
+ for(unsigned int i=0; i < map->colors; i++) {
+ vp_search_tmp best = {
+ .distance = MAX_DIFF,
+ .distance_squared = MAX_DIFF,
+ .exclude = i,
+ };
+ vp_search_node(root, &map->palette[i].acolor, &best);
+ handle->nearest_other_color_dist[i] = best.distance * best.distance / 4.0; // half of squared distance
+ }
+
+ return handle;
+}
+
+static void vp_search_node(const vp_node *node, const f_pixel *const needle, vp_search_tmp *const best_candidate) {
+ do {
+ const float distance_squared = colordifference(node->vantage_point, *needle);
+ const float distance = sqrtf(distance_squared);
+
+ if (distance_squared < best_candidate->distance_squared && best_candidate->exclude != node->idx) {
+ best_candidate->distance = distance;
+ best_candidate->distance_squared = distance_squared;
+ best_candidate->idx = node->idx;
+ }
+
+ if (node->restcount) {
+ for(int i=0; i < node->restcount; i++) {
+ const float distance_squared = colordifference(node->rest[i].color, *needle);
+ if (distance_squared < best_candidate->distance_squared && best_candidate->exclude != node->rest[i].idx) {
+ best_candidate->distance = sqrtf(distance_squared);
+ best_candidate->distance_squared = distance_squared;
+ best_candidate->idx = node->rest[i].idx;
+ }
+ }
+ return;
+ }
+
+ // Recurse towards most likely candidate first to narrow best candidate's distance as soon as possible
+ if (distance_squared < node->radius_squared) {
+ if (node->near) {
+ vp_search_node(node->near, needle, best_candidate);
+ }
+ // The best node (final answer) may be just ouside the radius, but not farther than
+ // the best distance we know so far. The vp_search_node above should have narrowed
+ // best_candidate->distance, so this path is rarely taken.
+ if (node->far && distance >= node->radius - best_candidate->distance) {
+ node = node->far; // Fast tail recursion
+ } else {
+ return;
+ }
+ } else {
+ if (node->far) {
+ vp_search_node(node->far, needle, best_candidate);
+ }
+ if (node->near && distance <= node->radius + best_candidate->distance) {
+ node = node->near; // Fast tail recursion
+ } else {
+ return;
+ }
+ }
+ } while(true);
+}
+
+LIQ_PRIVATE unsigned int nearest_search(const struct nearest_map *handle, const f_pixel *px, const int likely_colormap_index, float *diff) {
+ const float guess_diff = colordifference(handle->palette[likely_colormap_index].acolor, *px);
+ if (guess_diff < handle->nearest_other_color_dist[likely_colormap_index]) {
+ if (diff) *diff = guess_diff;
+ return likely_colormap_index;
+ }
+
+ vp_search_tmp best_candidate = {
+ .distance = sqrtf(guess_diff),
+ .distance_squared = guess_diff,
+ .idx = likely_colormap_index,
+ .exclude = -1,
+ };
+ vp_search_node(handle->root, px, &best_candidate);
+ if (diff) {
+ *diff = best_candidate.distance * best_candidate.distance;
+ }
+ return best_candidate.idx;
+}
+
+LIQ_PRIVATE void nearest_free(struct nearest_map *centroids)
+{
+ mempool_destroy(centroids->mempool);
+}
diff --git a/src/platform/gba/packer/libimagequant/nearest.h b/src/platform/gba/packer/libimagequant/nearest.h
new file mode 100644
index 0000000..10a0a2c
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/nearest.h
@@ -0,0 +1,14 @@
+//
+// nearest.h
+// pngquant
+//
+
+#ifndef NEAREST_H
+#define NEAREST_H
+
+struct nearest_map;
+LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *palette);
+LIQ_PRIVATE unsigned int nearest_search(const struct nearest_map *map, const f_pixel *px, const int palette_index_guess, float *diff);
+LIQ_PRIVATE void nearest_free(struct nearest_map *map);
+
+#endif
diff --git a/src/platform/gba/packer/libimagequant/pam.c b/src/platform/gba/packer/libimagequant/pam.c
new file mode 100644
index 0000000..6e36222
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/pam.c
@@ -0,0 +1,289 @@
+/* pam.c - pam (portable alpha map) utility library
+**
+** © 2009-2017 by Kornel Lesiński.
+** © 1989, 1991 by Jef Poskanzer.
+** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+**
+** See COPYRIGHT file for license.
+*/
+
+#include
+#include
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "mempool.h"
+
+LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map)
+{
+ const unsigned int ignorebits = acht->ignorebits;
+ const unsigned int channel_mask = 255U>>ignorebits<>ignorebits) ^ 0xFFU;
+ const unsigned int posterize_mask = channel_mask << 24 | channel_mask << 16 | channel_mask << 8 | channel_mask;
+ const unsigned int posterize_high_mask = channel_hmask << 24 | channel_hmask << 16 | channel_hmask << 8 | channel_hmask;
+
+ const unsigned int hash_size = acht->hash_size;
+
+ /* Go through the entire image, building a hash table of colors. */
+ for(unsigned int row = 0; row < rows; ++row) {
+
+ for(unsigned int col = 0; col < cols; ++col) {
+ unsigned int boost;
+
+ // RGBA color is casted to long for easier hasing/comparisons
+ union rgba_as_int px = {pixels[row][col]};
+ unsigned int hash;
+ if (!px.rgba.a) {
+ // "dirty alpha" has different RGBA values that end up being the same fully transparent color
+ px.l=0; hash=0;
+
+ boost = 2000;
+ if (importance_map) {
+ importance_map++;
+ }
+ } else {
+ // mask posterizes all 4 channels in one go
+ px.l = (px.l & posterize_mask) | ((px.l & posterize_high_mask) >> (8-ignorebits));
+ // fancier hashing algorithms didn't improve much
+ hash = px.l % hash_size;
+
+ if (importance_map) {
+ boost = *importance_map++;
+ } else {
+ boost = 255;
+ }
+ }
+
+ if (!pam_add_to_hash(acht, hash, boost, px, row, rows)) {
+ return false;
+ }
+ }
+
+ }
+ acht->cols = cols;
+ acht->rows += rows;
+ return true;
+}
+
+LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows)
+{
+ /* head of the hash function stores first 2 colors inline (achl->used = 1..2),
+ to reduce number of allocations of achl->other_items.
+ */
+ struct acolorhist_arr_head *achl = &acht->buckets[hash];
+ if (achl->inline1.color.l == px.l && achl->used) {
+ achl->inline1.perceptual_weight += boost;
+ return true;
+ }
+ if (achl->used) {
+ if (achl->used > 1) {
+ if (achl->inline2.color.l == px.l) {
+ achl->inline2.perceptual_weight += boost;
+ return true;
+ }
+ // other items are stored as an array (which gets reallocated if needed)
+ struct acolorhist_arr_item *other_items = achl->other_items;
+ unsigned int i = 0;
+ for (; i < achl->used-2; i++) {
+ if (other_items[i].color.l == px.l) {
+ other_items[i].perceptual_weight += boost;
+ return true;
+ }
+ }
+
+ // the array was allocated with spare items
+ if (i < achl->capacity) {
+ other_items[i] = (struct acolorhist_arr_item){
+ .color = px,
+ .perceptual_weight = boost,
+ };
+ achl->used++;
+ ++acht->colors;
+ return true;
+ }
+
+ if (++acht->colors > acht->maxcolors) {
+ return false;
+ }
+
+ struct acolorhist_arr_item *new_items;
+ unsigned int capacity;
+ if (!other_items) { // there was no array previously, alloc "small" array
+ capacity = 8;
+ if (acht->freestackp <= 0) {
+ // estimate how many colors are going to be + headroom
+ const size_t mempool_size = ((acht->rows + rows-row) * 2 * acht->colors / (acht->rows + row + 1) + 1024) * sizeof(struct acolorhist_arr_item);
+ new_items = mempool_alloc(&acht->mempool, sizeof(struct acolorhist_arr_item)*capacity, mempool_size);
+ } else {
+ // freestack stores previously freed (reallocated) arrays that can be reused
+ // (all pesimistically assumed to be capacity = 8)
+ new_items = acht->freestack[--acht->freestackp];
+ }
+ } else {
+ const unsigned int stacksize = sizeof(acht->freestack)/sizeof(acht->freestack[0]);
+
+ // simply reallocs and copies array to larger capacity
+ capacity = achl->capacity*2 + 16;
+ if (acht->freestackp < stacksize-1) {
+ acht->freestack[acht->freestackp++] = other_items;
+ }
+ const size_t mempool_size = ((acht->rows + rows-row) * 2 * acht->colors / (acht->rows + row + 1) + 32*capacity) * sizeof(struct acolorhist_arr_item);
+ new_items = mempool_alloc(&acht->mempool, sizeof(struct acolorhist_arr_item)*capacity, mempool_size);
+ if (!new_items) return false;
+ memcpy(new_items, other_items, sizeof(other_items[0])*achl->capacity);
+ }
+
+ achl->other_items = new_items;
+ achl->capacity = capacity;
+ new_items[i] = (struct acolorhist_arr_item){
+ .color = px,
+ .perceptual_weight = boost,
+ };
+ achl->used++;
+ } else {
+ // these are elses for first checks whether first and second inline-stored colors are used
+ achl->inline2.color.l = px.l;
+ achl->inline2.perceptual_weight = boost;
+ achl->used = 2;
+ ++acht->colors;
+ }
+ } else {
+ achl->inline1.color.l = px.l;
+ achl->inline1.perceptual_weight = boost;
+ achl->used = 1;
+ ++acht->colors;
+ }
+ return true;
+}
+
+LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*))
+{
+ const size_t estimated_colors = MIN(maxcolors, surface/(ignorebits + (surface > 512*512 ? 6 : 5)));
+ const size_t hash_size = estimated_colors < 66000 ? 6673 : (estimated_colors < 200000 ? 12011 : 24019);
+
+ mempoolptr m = NULL;
+ const size_t buckets_size = hash_size * sizeof(struct acolorhist_arr_head);
+ const size_t mempool_size = sizeof(struct acolorhash_table) + buckets_size + estimated_colors * sizeof(struct acolorhist_arr_item);
+ struct acolorhash_table *t = mempool_create(&m, sizeof(*t) + buckets_size, mempool_size, malloc, free);
+ if (!t) return NULL;
+ *t = (struct acolorhash_table){
+ .mempool = m,
+ .hash_size = hash_size,
+ .maxcolors = maxcolors,
+ .ignorebits = ignorebits,
+ };
+ memset(t->buckets, 0, buckets_size);
+ return t;
+}
+
+ALWAYS_INLINE static float pam_add_to_hist(const float *gamma_lut, hist_item *achv, unsigned int *j, const struct acolorhist_arr_item *entry, const float max_perceptual_weight)
+{
+ if (entry->perceptual_weight == 0) {
+ return 0;
+ }
+ const float w = MIN(entry->perceptual_weight/128.f, max_perceptual_weight);
+ achv[*j].adjusted_weight = achv[*j].perceptual_weight = w;
+ achv[*j].acolor = rgba_to_f(gamma_lut, entry->color.rgba);
+ *j += 1;
+ return w;
+}
+
+LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*))
+{
+ histogram *hist = malloc(sizeof(hist[0]));
+ if (!hist || !acht) return NULL;
+ *hist = (histogram){
+ .achv = malloc(MAX(1,acht->colors) * sizeof(hist->achv[0])),
+ .size = acht->colors,
+ .free = free,
+ .ignorebits = acht->ignorebits,
+ };
+ if (!hist->achv) return NULL;
+
+ float gamma_lut[256];
+ to_f_set_gamma(gamma_lut, gamma);
+
+ /* Limit perceptual weight to 1/10th of the image surface area to prevent
+ a single color from dominating all others. */
+ float max_perceptual_weight = 0.1f * acht->cols * acht->rows;
+ double total_weight = 0;
+
+ unsigned int j=0;
+ for(unsigned int i=0; i < acht->hash_size; ++i) {
+ const struct acolorhist_arr_head *const achl = &acht->buckets[i];
+ if (achl->used) {
+ total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline1, max_perceptual_weight);
+
+ if (achl->used > 1) {
+ total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline2, max_perceptual_weight);
+
+ for(unsigned int k=0; k < achl->used-2; k++) {
+ total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->other_items[k], max_perceptual_weight);
+ }
+ }
+ }
+ }
+ hist->size = j;
+ hist->total_perceptual_weight = total_weight;
+ for(unsigned int k=0; k < hist->size; k++) {
+ hist->achv[k].tmp.likely_colormap_index = 0;
+ }
+ if (!j) {
+ pam_freeacolorhist(hist);
+ return NULL;
+ }
+ return hist;
+}
+
+
+LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht)
+{
+ if (acht) {
+ mempool_destroy(acht->mempool);
+ }
+}
+
+LIQ_PRIVATE void pam_freeacolorhist(histogram *hist)
+{
+ hist->free(hist->achv);
+ hist->free(hist);
+}
+
+LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*))
+{
+ assert(colors > 0 && colors < 65536);
+
+ colormap *map;
+ const size_t colors_size = colors * sizeof(map->palette[0]);
+ map = malloc(sizeof(colormap) + colors_size);
+ if (!map) return NULL;
+ *map = (colormap){
+ .malloc = malloc,
+ .free = free,
+ .colors = colors,
+ };
+ memset(map->palette, 0, colors_size);
+ return map;
+}
+
+LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map)
+{
+ colormap *dupe = pam_colormap(map->colors, map->malloc, map->free);
+ for(unsigned int i=0; i < map->colors; i++) {
+ dupe->palette[i] = map->palette[i];
+ }
+ return dupe;
+}
+
+LIQ_PRIVATE void pam_freecolormap(colormap *c)
+{
+ c->free(c);
+}
+
+LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma)
+{
+ for(int i=0; i < 256; i++) {
+ gamma_lut[i] = pow((double)i/255.0, internal_gamma/gamma);
+ }
+}
+
diff --git a/src/platform/gba/packer/libimagequant/pam.h b/src/platform/gba/packer/libimagequant/pam.h
new file mode 100644
index 0000000..5a7d50e
--- /dev/null
+++ b/src/platform/gba/packer/libimagequant/pam.h
@@ -0,0 +1,283 @@
+/* pam.h - pam (portable alpha map) utility library
+ **
+ ** Colormap routines.
+ **
+ ** Copyright (C) 1989, 1991 by Jef Poskanzer.
+ ** Copyright (C) 1997 by Greg Roelofs.
+ **
+ ** Permission to use, copy, modify, and distribute this software and its
+ ** documentation for any purpose and without fee is hereby granted, provided
+ ** that the above copyright notice appear in all copies and that both that
+ ** copyright notice and this permission notice appear in supporting
+ ** documentation. This software is provided "as is" without express or
+ ** implied warranty.
+ */
+
+#ifndef PAM_H
+#define PAM_H
+
+// accidental debug assertions make color search much slower,
+// so force assertions off if there's no explicit setting
+#if !defined(NDEBUG) && !defined(DEBUG)
+#define NDEBUG
+#endif
+
+#include
+#include
+#include
+#include
+
+#ifndef MAX
+# define MAX(a,b) ((a) > (b)? (a) : (b))
+# define MIN(a,b) ((a) < (b)? (a) : (b))
+#endif
+
+#define MAX_DIFF 1e20
+
+#ifndef USE_SSE
+# if defined(__SSE__) && (defined(__amd64__) || defined(__X86_64__) || defined(_WIN64) || defined(WIN32) || defined(__WIN32__))
+# define USE_SSE 1
+# else
+# define USE_SSE 0
+# endif
+#endif
+
+#if USE_SSE
+# include
+# ifdef _MSC_VER
+# include
+# define SSE_ALIGN
+# else
+# define SSE_ALIGN __attribute__ ((aligned (16)))
+# if defined(__i386__) && defined(__PIC__)
+# define cpuid(func,ax,bx,cx,dx)\
+ __asm__ __volatile__ ( \
+ "push %%ebx\n" \
+ "cpuid\n" \
+ "mov %%ebx, %1\n" \
+ "pop %%ebx\n" \
+ : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
+# else
+# define cpuid(func,ax,bx,cx,dx)\
+ __asm__ __volatile__ ("cpuid":\
+ "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
+# endif
+#endif
+#else
+# define SSE_ALIGN
+#endif
+
+#ifndef _MSC_VER
+#define LIQ_ARRAY(type, var, count) type var[count]
+#else
+#define LIQ_ARRAY(type, var, count) type* var = (type*)_alloca(sizeof(type)*(count))
+#endif
+
+#if defined(__GNUC__) || defined (__llvm__)
+#define ALWAYS_INLINE __attribute__((always_inline)) inline
+#define NEVER_INLINE __attribute__ ((noinline))
+#elif defined(_MSC_VER)
+#define inline __inline
+#define restrict __restrict
+#define ALWAYS_INLINE __forceinline
+#define NEVER_INLINE __declspec(noinline)
+#else
+#define ALWAYS_INLINE inline
+#define NEVER_INLINE
+#endif
+
+/* from pam.h */
+
+typedef struct {
+ unsigned char r, g, b, a;
+} rgba_pixel;
+
+typedef struct {
+ float a, r, g, b;
+} SSE_ALIGN f_pixel;
+
+static const float internal_gamma = 0.5499f;
+
+LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma);
+
+/**
+ Converts 8-bit color to internal gamma and premultiplied alpha.
+ (premultiplied color space is much better for blending of semitransparent colors)
+ */
+ALWAYS_INLINE static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px);
+inline static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px)
+{
+ float a = px.a/255.f;
+
+ return (f_pixel) {
+ .a = a,
+ .r = gamma_lut[px.r]*a,
+ .g = gamma_lut[px.g]*a,
+ .b = gamma_lut[px.b]*a,
+ };
+}
+
+inline static rgba_pixel f_to_rgb(const float gamma, const f_pixel px)
+{
+ if (px.a < 1.f/256.f) {
+ return (rgba_pixel){0,0,0,0};
+ }
+
+ float r = px.r / px.a,
+ g = px.g / px.a,
+ b = px.b / px.a,
+ a = px.a;
+
+ r = powf(r, gamma/internal_gamma);
+ g = powf(g, gamma/internal_gamma);
+ b = powf(b, gamma/internal_gamma);
+
+ // 256, because numbers are in range 1..255.9999… rounded down
+ r *= 256.f;
+ g *= 256.f;
+ b *= 256.f;
+ a *= 256.f;
+
+ return (rgba_pixel){
+ .r = r>=255.f ? 255 : r,
+ .g = g>=255.f ? 255 : g,
+ .b = b>=255.f ? 255 : b,
+ .a = a>=255.f ? 255 : a,
+ };
+}
+
+ALWAYS_INLINE static double colordifference_ch(const double x, const double y, const double alphas);
+inline static double colordifference_ch(const double x, const double y, const double alphas)
+{
+ // maximum of channel blended on white, and blended on black
+ // premultiplied alpha and backgrounds 0/1 shorten the formula
+ const double black = x-y, white = black+alphas;
+ return MAX(black*black, white*white);
+}
+
+ALWAYS_INLINE static float colordifference_stdc(const f_pixel px, const f_pixel py);
+inline static float colordifference_stdc(const f_pixel px, const f_pixel py)
+{
+ // px_b.rgb = px.rgb + 0*(1-px.a) // blend px on black
+ // px_b.a = px.a + 1*(1-px.a)
+ // px_w.rgb = px.rgb + 1*(1-px.a) // blend px on white
+ // px_w.a = px.a + 1*(1-px.a)
+
+ // px_b.rgb = px.rgb // difference same as in opaque RGB
+ // px_b.a = 1
+ // px_w.rgb = px.rgb - px.a // difference simplifies to formula below
+ // px_w.a = 1
+
+ // (px.rgb - px.a) - (py.rgb - py.a)
+ // (px.rgb - py.rgb) + (py.a - px.a)
+
+ const double alphas = py.a-px.a;
+ return colordifference_ch(px.r, py.r, alphas) +
+ colordifference_ch(px.g, py.g, alphas) +
+ colordifference_ch(px.b, py.b, alphas);
+}
+
+ALWAYS_INLINE static float colordifference(f_pixel px, f_pixel py);
+inline static float colordifference(f_pixel px, f_pixel py)
+{
+#if USE_SSE
+ const __m128 vpx = _mm_load_ps((const float*)&px);
+ const __m128 vpy = _mm_load_ps((const float*)&py);
+
+ // y.a - x.a
+ __m128 alphas = _mm_sub_ss(vpy, vpx);
+ alphas = _mm_shuffle_ps(alphas,alphas,0); // copy first to all four
+
+ __m128 onblack = _mm_sub_ps(vpx, vpy); // x - y
+ __m128 onwhite = _mm_add_ps(onblack, alphas); // x - y + (y.a - x.a)
+
+ onblack = _mm_mul_ps(onblack, onblack);
+ onwhite = _mm_mul_ps(onwhite, onwhite);
+ const __m128 max = _mm_max_ps(onwhite, onblack);
+
+ // add rgb, not a
+ const __m128 maxhl = _mm_movehl_ps(max, max);
+ const __m128 tmp = _mm_add_ps(max, maxhl);
+ const __m128 sum = _mm_add_ss(maxhl, _mm_shuffle_ps(tmp, tmp, 1));
+
+ const float res = _mm_cvtss_f32(sum);
+ assert(fabs(res - colordifference_stdc(px,py)) < 0.001);
+ return res;
+#else
+ return colordifference_stdc(px,py);
+#endif
+}
+
+/* from pamcmap.h */
+union rgba_as_int {
+ rgba_pixel rgba;
+ unsigned int l;
+};
+
+typedef struct {
+ f_pixel acolor;
+ float adjusted_weight, // perceptual weight changed to tweak how mediancut selects colors
+ perceptual_weight; // number of pixels weighted by importance of different areas of the picture
+
+ float color_weight; // these two change every time histogram subset is sorted
+ union {
+ unsigned int sort_value;
+ unsigned char likely_colormap_index;
+ } tmp;
+} hist_item;
+
+typedef struct {
+ hist_item *achv;
+ void (*free)(void*);
+ double total_perceptual_weight;
+ unsigned int size;
+ unsigned int ignorebits;
+} histogram;
+
+typedef struct {
+ f_pixel acolor;
+ float popularity;
+ bool fixed; // if true it's user-supplied and must not be changed (e.g in K-Means iteration)
+} colormap_item;
+
+typedef struct colormap {
+ unsigned int colors;
+ void* (*malloc)(size_t);
+ void (*free)(void*);
+ colormap_item palette[];
+} colormap;
+
+struct acolorhist_arr_item {
+ union rgba_as_int color;
+ unsigned int perceptual_weight;
+};
+
+struct acolorhist_arr_head {
+ struct acolorhist_arr_item inline1, inline2;
+ unsigned int used, capacity;
+ struct acolorhist_arr_item *other_items;
+};
+
+struct acolorhash_table {
+ struct mempool *mempool;
+ unsigned int ignorebits, maxcolors, colors, cols, rows;
+ unsigned int hash_size;
+ unsigned int freestackp;
+ struct acolorhist_arr_item *freestack[512];
+ struct acolorhist_arr_head buckets[];
+};
+
+LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht);
+LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map);
+LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows);
+
+LIQ_PRIVATE void pam_freeacolorhist(histogram *h);
+
+LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map);
+LIQ_PRIVATE void pam_freecolormap(colormap *c);
+
+#endif
diff --git a/src/platform/gba/packer/stb_image_resize.h b/src/platform/gba/packer/stb_image_resize.h
new file mode 100644
index 0000000..bcca92c
--- /dev/null
+++ b/src/platform/gba/packer/stb_image_resize.h
@@ -0,0 +1,2585 @@
+/* stb_image_resize - v0.90 - public domain image resizing
+ by Jorge L Rodriguez (@VinoBS) - 2014
+ http://github.com/nothings/stb
+
+ Written with emphasis on usability, portability, and efficiency. (No
+ SIMD or threads, so it be easily outperformed by libs that use those.)
+ Only scaling and translation is supported, no rotations or shears.
+ Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation.
+
+ COMPILING & LINKING
+ In one C/C++ file that #includes this file, do this:
+ #define STB_IMAGE_RESIZE_IMPLEMENTATION
+ before the #include. That will create the implementation in that file.
+
+ QUICKSTART
+ stbir_resize_uint8( input_pixels , in_w , in_h , 0,
+ output_pixels, out_w, out_h, 0, num_channels)
+ stbir_resize_float(...)
+ stbir_resize_uint8_srgb( input_pixels , in_w , in_h , 0,
+ output_pixels, out_w, out_h, 0,
+ num_channels , alpha_chan , 0)
+ stbir_resize_uint8_srgb_edgemode(
+ input_pixels , in_w , in_h , 0,
+ output_pixels, out_w, out_h, 0,
+ num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP)
+ // WRAP/REFLECT/ZERO
+
+ FULL API
+ See the "header file" section of the source for API documentation.
+
+ ADDITIONAL DOCUMENTATION
+
+ SRGB & FLOATING POINT REPRESENTATION
+ The sRGB functions presume IEEE floating point. If you do not have
+ IEEE floating point, define STBIR_NON_IEEE_FLOAT. This will use
+ a slower implementation.
+
+ MEMORY ALLOCATION
+ The resize functions here perform a single memory allocation using
+ malloc. To control the memory allocation, before the #include that
+ triggers the implementation, do:
+
+ #define STBIR_MALLOC(size,context) ...
+ #define STBIR_FREE(ptr,context) ...
+
+ Each resize function makes exactly one call to malloc/free, so to use
+ temp memory, store the temp memory in the context and return that.
+
+ ASSERT
+ Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
+
+ OPTIMIZATION
+ Define STBIR_SATURATE_INT to compute clamp values in-range using
+ integer operations instead of float operations. This may be faster
+ on some platforms.
+
+ DEFAULT FILTERS
+ For functions which don't provide explicit control over what filters
+ to use, you can change the compile-time defaults with
+
+ #define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_something
+ #define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_something
+
+ See stbir_filter in the header-file section for the list of filters.
+
+ NEW FILTERS
+ A number of 1D filter kernels are used. For a list of
+ supported filters see the stbir_filter enum. To add a new filter,
+ write a filter function and add it to stbir__filter_info_table.
+
+ PROGRESS
+ For interactive use with slow resize operations, you can install
+ a progress-report callback:
+
+ #define STBIR_PROGRESS_REPORT(val) some_func(val)
+
+ The parameter val is a float which goes from 0 to 1 as progress is made.
+
+ For example:
+
+ static void my_progress_report(float progress);
+ #define STBIR_PROGRESS_REPORT(val) my_progress_report(val)
+
+ #define STB_IMAGE_RESIZE_IMPLEMENTATION
+ #include "stb_image_resize.h"
+
+ static void my_progress_report(float progress)
+ {
+ printf("Progress: %f%%\n", progress*100);
+ }
+
+ MAX CHANNELS
+ If your image has more than 64 channels, define STBIR_MAX_CHANNELS
+ to the max you'll have.
+
+ ALPHA CHANNEL
+ Most of the resizing functions provide the ability to control how
+ the alpha channel of an image is processed. The important things
+ to know about this:
+
+ 1. The best mathematically-behaved version of alpha to use is
+ called "premultiplied alpha", in which the other color channels
+ have had the alpha value multiplied in. If you use premultiplied
+ alpha, linear filtering (such as image resampling done by this
+ library, or performed in texture units on GPUs) does the "right
+ thing". While premultiplied alpha is standard in the movie CGI
+ industry, it is still uncommon in the videogame/real-time world.
+
+ If you linearly filter non-premultiplied alpha, strange effects
+ occur. (For example, the average of 1% opaque bright green
+ and 99% opaque black produces 50% transparent dark green when
+ non-premultiplied, whereas premultiplied it produces 50%
+ transparent near-black. The former introduces green energy
+ that doesn't exist in the source image.)
+
+ 2. Artists should not edit premultiplied-alpha images; artists
+ want non-premultiplied alpha images. Thus, art tools generally output
+ non-premultiplied alpha images.
+
+ 3. You will get best results in most cases by converting images
+ to premultiplied alpha before processing them mathematically.
+
+ 4. If you pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, the
+ resizer does not do anything special for the alpha channel;
+ it is resampled identically to other channels. This produces
+ the correct results for premultiplied-alpha images, but produces
+ less-than-ideal results for non-premultiplied-alpha images.
+
+ 5. If you do not pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED,
+ then the resizer weights the contribution of input pixels
+ based on their alpha values, or, equivalently, it multiplies
+ the alpha value into the color channels, resamples, then divides
+ by the resultant alpha value. Input pixels which have alpha=0 do
+ not contribute at all to output pixels unless _all_ of the input
+ pixels affecting that output pixel have alpha=0, in which case
+ the result for that pixel is the same as it would be without
+ STBIR_FLAG_ALPHA_PREMULTIPLIED. However, this is only true for
+ input images in integer formats. For input images in float format,
+ input pixels with alpha=0 have no effect, and output pixels
+ which have alpha=0 will be 0 in all channels. (For float images,
+ you can manually achieve the same result by adding a tiny epsilon
+ value to the alpha channel of every image, and then subtracting
+ or clamping it at the end.)
+
+ 6. You can suppress the behavior described in #5 and make
+ all-0-alpha pixels have 0 in all channels by #defining
+ STBIR_NO_ALPHA_EPSILON.
+
+ 7. You can separately control whether the alpha channel is
+ interpreted as linear or affected by the colorspace. By default
+ it is linear; you almost never want to apply the colorspace.
+ (For example, graphics hardware does not apply sRGB conversion
+ to the alpha channel.)
+
+ ADDITIONAL CONTRIBUTORS
+ Sean Barrett: API design, optimizations
+
+ REVISIONS
+ 0.90 (2014-09-17) first released version
+
+ LICENSE
+ This software is in the public domain. Where that dedication is not
+ recognized, you are granted a perpetual, irrevocable license to copy
+ and modify this file as you see fit.
+
+ TODO
+ Don't decode all of the image data when only processing a partial tile
+ Don't use full-width decode buffers when only processing a partial tile
+ When processing wide images, break processing into tiles so data fits in L1 cache
+ Installable filters?
+ Resize that respects alpha test coverage
+ (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage:
+ https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp )
+*/
+
+#ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE_H
+#define STBIR_INCLUDE_STB_IMAGE_RESIZE_H
+
+#ifdef _MSC_VER
+typedef unsigned char stbir_uint8;
+typedef unsigned short stbir_uint16;
+typedef unsigned int stbir_uint32;
+#else
+#include
+typedef uint8_t stbir_uint8;
+typedef uint16_t stbir_uint16;
+typedef uint32_t stbir_uint32;
+#endif
+
+#ifdef STB_IMAGE_RESIZE_STATIC
+#define STBIRDEF static
+#else
+#ifdef __cplusplus
+#define STBIRDEF extern "C"
+#else
+#define STBIRDEF extern
+#endif
+#endif
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Easy-to-use API:
+//
+// * "input pixels" points to an array of image data with 'num_channels' channels (e.g. RGB=3, RGBA=4)
+// * input_w is input image width (x-axis), input_h is input image height (y-axis)
+// * stride is the offset between successive rows of image data in memory, in bytes. you can
+// specify 0 to mean packed continuously in memory
+// * alpha channel is treated identically to other channels.
+// * colorspace is linear or sRGB as specified by function name
+// * returned result is 1 for success or 0 in case of an error.
+// #define STBIR_ASSERT() to trigger an assert on parameter validation errors.
+// * Memory required grows approximately linearly with input and output size, but with
+// discontinuities at input_w == output_w and input_h == output_h.
+// * These functions use a "default" resampling filter defined at compile time. To change the filter,
+// you can change the compile-time defaults by #defining STBIR_DEFAULT_FILTER_UPSAMPLE
+// and STBIR_DEFAULT_FILTER_DOWNSAMPLE, or you can use the medium-complexity API.
+
+STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels);
+
+STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels);
+
+
+// The following functions interpret image data as gamma-corrected sRGB.
+// Specify STBIR_ALPHA_CHANNEL_NONE if you have no alpha channel,
+// or otherwise provide the index of the alpha channel. Flags value
+// of 0 will probably do the right thing if you're not sure what
+// the flags mean.
+
+#define STBIR_ALPHA_CHANNEL_NONE -1
+
+// Set this flag if your texture has premultiplied alpha. Otherwise, stbir will
+// use alpha-weighted resampling (effectively premultiplying, resampling,
+// then unpremultiplying).
+#define STBIR_FLAG_ALPHA_PREMULTIPLIED (1 << 0)
+// The specified alpha channel should be handled as gamma-corrected value even
+// when doing sRGB operations.
+#define STBIR_FLAG_ALPHA_USES_COLORSPACE (1 << 1)
+
+STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags);
+
+
+typedef enum
+{
+ STBIR_EDGE_CLAMP = 1,
+ STBIR_EDGE_REFLECT = 2,
+ STBIR_EDGE_WRAP = 3,
+ STBIR_EDGE_ZERO = 4,
+} stbir_edge;
+
+// This function adds the ability to specify how requests to sample off the edge of the image are handled.
+STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode);
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Medium-complexity API
+//
+// This extends the easy-to-use API as follows:
+//
+// * Alpha-channel can be processed separately
+// * If alpha_channel is not STBIR_ALPHA_CHANNEL_NONE
+// * Alpha channel will not be gamma corrected (unless flags&STBIR_FLAG_GAMMA_CORRECT)
+// * Filters will be weighted by alpha channel (unless flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)
+// * Filter can be selected explicitly
+// * uint16 image type
+// * sRGB colorspace available for all types
+// * context parameter for passing to STBIR_MALLOC
+
+typedef enum
+{
+ STBIR_FILTER_DEFAULT = 0, // use same filter type that easy-to-use API chooses
+ STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
+ STBIR_FILTER_TRIANGLE = 2, // On upsampling, produces same results as bilinear texture filtering
+ STBIR_FILTER_CUBICBSPLINE = 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
+ STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline
+ STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3
+} stbir_filter;
+
+typedef enum
+{
+ STBIR_COLORSPACE_LINEAR,
+ STBIR_COLORSPACE_SRGB,
+
+ STBIR_MAX_COLORSPACES,
+} stbir_colorspace;
+
+// The following functions are all identical except for the type of the image data
+
+STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context);
+
+STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context);
+
+STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ float *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context);
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Full-complexity API
+//
+// This extends the medium API as follows:
+//
+// * uint32 image type
+// * not typesafe
+// * separate filter types for each axis
+// * separate edge modes for each axis
+// * can specify scale explicitly for subpixel correctness
+// * can specify image source tile using texture coordinates
+
+typedef enum
+{
+ STBIR_TYPE_UINT8 ,
+ STBIR_TYPE_UINT16,
+ STBIR_TYPE_UINT32,
+ STBIR_TYPE_FLOAT ,
+
+ STBIR_MAX_TYPES
+} stbir_datatype;
+
+STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context);
+
+STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context,
+ float x_scale, float y_scale,
+ float x_offset, float y_offset);
+
+STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context,
+ float s0, float t0, float s1, float t1);
+// (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1]) of a region of the input image to use.
+
+//
+//
+//// end header file /////////////////////////////////////////////////////
+#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H
+
+
+
+
+
+#ifdef STB_IMAGE_RESIZE_IMPLEMENTATION
+
+#ifndef STBIR_ASSERT
+#include
+#define STBIR_ASSERT(x) assert(x)
+#endif
+
+#ifdef STBIR_DEBUG
+#define STBIR__DEBUG_ASSERT STBIR_ASSERT
+#else
+#define STBIR__DEBUG_ASSERT
+#endif
+
+// If you hit this it means I haven't done it yet.
+#define STBIR__UNIMPLEMENTED(x) STBIR_ASSERT(!(x))
+
+// For memset
+#include
+
+#include
+
+#ifndef STBIR_MALLOC
+#include
+#define STBIR_MALLOC(size,c) malloc(size)
+#define STBIR_FREE(ptr,c) free(ptr)
+#endif
+
+#ifndef _MSC_VER
+#ifdef __cplusplus
+#define stbir__inline inline
+#else
+#define stbir__inline
+#endif
+#else
+#define stbir__inline __forceinline
+#endif
+
+
+// should produce compiler error if size is wrong
+typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBIR__NOTUSED(v) (void)(v)
+#else
+#define STBIR__NOTUSED(v) (void)sizeof(v)
+#endif
+
+#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
+
+#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
+#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM
+#endif
+
+#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
+#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL
+#endif
+
+#ifndef STBIR_PROGRESS_REPORT
+#define STBIR_PROGRESS_REPORT(float_0_to_1)
+#endif
+
+#ifndef STBIR_MAX_CHANNELS
+#define STBIR_MAX_CHANNELS 64
+#endif
+
+#if STBIR_MAX_CHANNELS > 65536
+#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536."
+// because we store the indices in 16-bit variables
+#endif
+
+// This value is added to alpha just before premultiplication to avoid
+// zeroing out color values. It is equivalent to 2^-80. If you don't want
+// that behavior (it may interfere if you have floating point images with
+// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to
+// disable it.
+#ifndef STBIR_ALPHA_EPSILON
+#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
+#endif
+
+
+
+#ifdef _MSC_VER
+#define STBIR__UNUSED_PARAM(v) (void)(v)
+#else
+#define STBIR__UNUSED_PARAM(v) (void)sizeof(v)
+#endif
+
+// must match stbir_datatype
+static unsigned char stbir__type_size[] = {
+ 1, // STBIR_TYPE_UINT8
+ 2, // STBIR_TYPE_UINT16
+ 4, // STBIR_TYPE_UINT32
+ 4, // STBIR_TYPE_FLOAT
+};
+
+// Kernel function centered at 0
+typedef float (stbir__kernel_fn)(float x, float scale);
+typedef float (stbir__support_fn)(float scale);
+
+typedef struct
+{
+ stbir__kernel_fn* kernel;
+ stbir__support_fn* support;
+} stbir__filter_info;
+
+// When upsampling, the contributors are which source pixels contribute.
+// When downsampling, the contributors are which destination pixels are contributed to.
+typedef struct
+{
+ int n0; // First contributing pixel
+ int n1; // Last contributing pixel
+} stbir__contributors;
+
+typedef struct
+{
+ const void* input_data;
+ int input_w;
+ int input_h;
+ int input_stride_bytes;
+
+ void* output_data;
+ int output_w;
+ int output_h;
+ int output_stride_bytes;
+
+ float s0, t0, s1, t1;
+
+ float horizontal_shift; // Units: output pixels
+ float vertical_shift; // Units: output pixels
+ float horizontal_scale;
+ float vertical_scale;
+
+ int channels;
+ int alpha_channel;
+ stbir_uint32 flags;
+ stbir_datatype type;
+ stbir_filter horizontal_filter;
+ stbir_filter vertical_filter;
+ stbir_edge edge_horizontal;
+ stbir_edge edge_vertical;
+ stbir_colorspace colorspace;
+
+ stbir__contributors* horizontal_contributors;
+ float* horizontal_coefficients;
+
+ stbir__contributors* vertical_contributors;
+ float* vertical_coefficients;
+
+ int decode_buffer_pixels;
+ float* decode_buffer;
+
+ float* horizontal_buffer;
+
+ // cache these because ceil/floor are inexplicably showing up in profile
+ int horizontal_coefficient_width;
+ int vertical_coefficient_width;
+ int horizontal_filter_pixel_width;
+ int vertical_filter_pixel_width;
+ int horizontal_filter_pixel_margin;
+ int vertical_filter_pixel_margin;
+ int horizontal_num_contributors;
+ int vertical_num_contributors;
+
+ int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
+ int ring_buffer_first_scanline;
+ int ring_buffer_last_scanline;
+ int ring_buffer_begin_index;
+ float* ring_buffer;
+
+ float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
+
+ int horizontal_contributors_size;
+ int horizontal_coefficients_size;
+ int vertical_contributors_size;
+ int vertical_coefficients_size;
+ int decode_buffer_size;
+ int horizontal_buffer_size;
+ int ring_buffer_size;
+ int encode_buffer_size;
+} stbir__info;
+
+static stbir__inline int stbir__min(int a, int b)
+{
+ return a < b ? a : b;
+}
+
+static stbir__inline int stbir__max(int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static stbir__inline float stbir__saturate(float x)
+{
+ if (x < 0)
+ return 0;
+
+ if (x > 1)
+ return 1;
+
+ return x;
+}
+
+#ifdef STBIR_SATURATE_INT
+static stbir__inline stbir_uint8 stbir__saturate8(int x)
+{
+ if ((unsigned int) x <= 255)
+ return x;
+
+ if (x < 0)
+ return 0;
+
+ return 255;
+}
+
+static stbir__inline stbir_uint16 stbir__saturate16(int x)
+{
+ if ((unsigned int) x <= 65535)
+ return x;
+
+ if (x < 0)
+ return 0;
+
+ return 65535;
+}
+#endif
+
+static float stbir__srgb_uchar_to_linear_float[256] = {
+ 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
+ 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
+ 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
+ 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
+ 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
+ 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
+ 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
+ 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
+ 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
+ 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
+ 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
+ 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
+ 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
+ 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
+ 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
+ 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
+ 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
+ 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
+ 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
+ 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
+ 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
+ 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
+ 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
+ 0.982251f, 0.991102f, 1.0f
+};
+
+static float stbir__srgb_to_linear(float f)
+{
+ if (f <= 0.04045f)
+ return f / 12.92f;
+ else
+ return (float)pow((f + 0.055f) / 1.055f, 2.4f);
+}
+
+static float stbir__linear_to_srgb(float f)
+{
+ if (f <= 0.0031308f)
+ return f * 12.92f;
+ else
+ return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f;
+}
+
+#ifndef STBIR_NON_IEEE_FLOAT
+// From https://gist.github.com/rygorous/2203834
+
+typedef union
+{
+ stbir_uint32 u;
+ float f;
+} stbir__FP32;
+
+static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
+ 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
+ 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
+ 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
+ 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
+ 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
+ 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
+ 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
+ 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
+ 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
+ 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
+ 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
+ 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
+ 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
+};
+
+static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
+{
+ static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
+ static const stbir__FP32 minval = { (127-13) << 23 };
+ stbir_uint32 tab,bias,scale,t;
+ stbir__FP32 f;
+
+ // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
+ // The tests are carefully written so that NaNs map to 0, same as in the reference
+ // implementation.
+ if (!(in > minval.f)) // written this way to catch NaNs
+ in = minval.f;
+ if (in > almostone.f)
+ in = almostone.f;
+
+ // Do the table lookup and unpack bias, scale
+ f.f = in;
+ tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
+ bias = (tab >> 16) << 9;
+ scale = tab & 0xffff;
+
+ // Grab next-highest mantissa bits and perform linear interpolation
+ t = (f.u >> 12) & 0xff;
+ return (unsigned char) ((bias + scale*t) >> 16);
+}
+
+#else
+// sRGB transition values, scaled by 1<<28
+static int stbir__srgb_offset_to_linear_scaled[256] =
+{
+ 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603,
+ 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926,
+ 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148,
+ 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856,
+ 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731,
+ 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369,
+ 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021,
+ 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073,
+ 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389,
+ 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552,
+ 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066,
+ 25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490,
+ 31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568,
+ 36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316,
+ 43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096,
+ 49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700,
+ 57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376,
+ 65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912,
+ 74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648,
+ 83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512,
+ 93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072,
+ 104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
+ 115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
+ 127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
+ 140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
+ 154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
+ 168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
+ 183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
+ 199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
+ 215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
+ 232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
+ 250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
+};
+
+static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
+{
+ int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
+ int v = 0;
+ int i;
+
+ // Refine the guess with a short binary search.
+ i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+ i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
+
+ return (stbir_uint8) v;
+}
+#endif
+
+static float stbir__filter_trapezoid(float x, float scale)
+{
+ float halfscale = scale / 2;
+ float t = 0.5f + halfscale;
+ STBIR__DEBUG_ASSERT(scale <= 1);
+
+ x = (float)fabs(x);
+
+ if (x >= t)
+ return 0;
+ else
+ {
+ float r = 0.5f - halfscale;
+ if (x <= r)
+ return 1;
+ else
+ return (t - x) / scale;
+ }
+}
+
+static float stbir__support_trapezoid(float scale)
+{
+ STBIR__DEBUG_ASSERT(scale <= 1);
+ return 0.5f + scale / 2;
+}
+
+static float stbir__filter_triangle(float x, float s)
+{
+ STBIR__UNUSED_PARAM(s);
+
+ x = (float)fabs(x);
+
+ if (x <= 1.0f)
+ return 1 - x;
+ else
+ return 0;
+}
+
+static float stbir__filter_cubic(float x, float s)
+{
+ STBIR__UNUSED_PARAM(s);
+
+ x = (float)fabs(x);
+
+ if (x < 1.0f)
+ return (4 + x*x*(3*x - 6))/6;
+ else if (x < 2.0f)
+ return (8 + x*(-12 + x*(6 - x)))/6;
+
+ return (0.0f);
+}
+
+static float stbir__filter_catmullrom(float x, float s)
+{
+ STBIR__UNUSED_PARAM(s);
+
+ x = (float)fabs(x);
+
+ if (x < 1.0f)
+ return 1 - x*x*(2.5f - 1.5f*x);
+ else if (x < 2.0f)
+ return 2 - x*(4 + x*(0.5f*x - 2.5f));
+
+ return (0.0f);
+}
+
+static float stbir__filter_mitchell(float x, float s)
+{
+ STBIR__UNUSED_PARAM(s);
+
+ x = (float)fabs(x);
+
+ if (x < 1.0f)
+ return (16 + x*x*(21 * x - 36))/18;
+ else if (x < 2.0f)
+ return (32 + x*(-60 + x*(36 - 7*x)))/18;
+
+ return (0.0f);
+}
+
+static float stbir__support_zero(float s)
+{
+ STBIR__UNUSED_PARAM(s);
+ return 0;
+}
+
+static float stbir__support_one(float s)
+{
+ STBIR__UNUSED_PARAM(s);
+ return 1;
+}
+
+static float stbir__support_two(float s)
+{
+ STBIR__UNUSED_PARAM(s);
+ return 2;
+}
+
+static stbir__filter_info stbir__filter_info_table[] = {
+ { NULL, stbir__support_zero },
+ { stbir__filter_trapezoid, stbir__support_trapezoid },
+ { stbir__filter_triangle, stbir__support_one },
+ { stbir__filter_cubic, stbir__support_two },
+ { stbir__filter_catmullrom, stbir__support_two },
+ { stbir__filter_mitchell, stbir__support_two },
+};
+
+stbir__inline static int stbir__use_upsampling(float ratio)
+{
+ return ratio > 1;
+}
+
+stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info)
+{
+ return stbir__use_upsampling(stbir_info->horizontal_scale);
+}
+
+stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info)
+{
+ return stbir__use_upsampling(stbir_info->vertical_scale);
+}
+
+// This is the maximum number of input samples that can affect an output sample
+// with the given filter
+static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
+{
+ STBIR_ASSERT(filter != 0);
+ STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
+
+ if (stbir__use_upsampling(scale))
+ return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
+ else
+ return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
+}
+
+// This is how much to expand buffers to account for filters seeking outside
+// the image boundaries.
+static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
+{
+ return stbir__get_filter_pixel_width(filter, scale) / 2;
+}
+
+static int stbir__get_coefficient_width(stbir_filter filter, float scale)
+{
+ if (stbir__use_upsampling(scale))
+ return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
+ else
+ return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
+}
+
+static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
+{
+ if (stbir__use_upsampling(scale))
+ return output_size;
+ else
+ return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
+}
+
+static int stbir__get_total_horizontal_coefficients(stbir__info* info)
+{
+ return info->horizontal_num_contributors
+ * stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
+}
+
+static int stbir__get_total_vertical_coefficients(stbir__info* info)
+{
+ return info->vertical_num_contributors
+ * stbir__get_coefficient_width (info->vertical_filter, info->vertical_scale);
+}
+
+static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
+{
+ return &contributors[n];
+}
+
+// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
+// if you change it here change it there too.
+static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
+{
+ int width = stbir__get_coefficient_width(filter, scale);
+ return &coefficients[width*n + c];
+}
+
+static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
+{
+ switch (edge)
+ {
+ case STBIR_EDGE_ZERO:
+ return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later
+
+ case STBIR_EDGE_CLAMP:
+ if (n < 0)
+ return 0;
+
+ if (n >= max)
+ return max - 1;
+
+ return n; // NOTREACHED
+
+ case STBIR_EDGE_REFLECT:
+ {
+ if (n < 0)
+ {
+ if (n < max)
+ return -n;
+ else
+ return max - 1;
+ }
+
+ if (n >= max)
+ {
+ int max2 = max * 2;
+ if (n >= max2)
+ return 0;
+ else
+ return max2 - n - 1;
+ }
+
+ return n; // NOTREACHED
+ }
+
+ case STBIR_EDGE_WRAP:
+ if (n >= 0)
+ return (n % max);
+ else
+ {
+ int m = (-n) % max;
+
+ if (m != 0)
+ m = max - m;
+
+ return (m);
+ }
+ return n; // NOTREACHED
+
+ default:
+ STBIR__UNIMPLEMENTED("Unimplemented edge type");
+ return 0;
+ }
+}
+
+stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
+{
+ // avoid per-pixel switch
+ if (n >= 0 && n < max)
+ return n;
+ return stbir__edge_wrap_slow(edge, n, max);
+}
+
+// What input pixels contribute to this output pixel?
+static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
+{
+ float out_pixel_center = (float)n + 0.5f;
+ float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
+ float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
+
+ float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
+ float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
+
+ *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
+ *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
+ *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
+}
+
+// What output pixels does this input pixel contribute to?
+static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
+{
+ float in_pixel_center = (float)n + 0.5f;
+ float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
+ float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
+
+ float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
+ float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
+
+ *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
+ *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
+ *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
+}
+
+static void stbir__calculate_coefficients_upsample(stbir__info* stbir_info, stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
+{
+ int i;
+ float total_filter = 0;
+ float filter_scale;
+
+ STBIR__DEBUG_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
+
+ contributor->n0 = in_first_pixel;
+ contributor->n1 = in_last_pixel;
+
+ STBIR__DEBUG_ASSERT(contributor->n1 >= contributor->n0);
+
+ for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
+ {
+ float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
+ coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
+
+ // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
+ if (i == 0 && !coefficient_group[i])
+ {
+ contributor->n0 = ++in_first_pixel;
+ i--;
+ continue;
+ }
+
+ total_filter += coefficient_group[i];
+ }
+
+ STBIR__DEBUG_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
+
+ STBIR__DEBUG_ASSERT(total_filter > 0.9);
+ STBIR__DEBUG_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
+
+ // Make sure the sum of all coefficients is 1.
+ filter_scale = 1 / total_filter;
+
+ for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
+ coefficient_group[i] *= filter_scale;
+
+ for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
+ {
+ if (coefficient_group[i])
+ break;
+
+ // This line has no weight. We can skip it.
+ contributor->n1 = contributor->n0 + i - 1;
+ }
+}
+
+static void stbir__calculate_coefficients_downsample(stbir__info* stbir_info, stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
+{
+ int i;
+
+ STBIR__DEBUG_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
+
+ contributor->n0 = out_first_pixel;
+ contributor->n1 = out_last_pixel;
+
+ STBIR__DEBUG_ASSERT(contributor->n1 >= contributor->n0);
+
+ for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
+ {
+ float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
+ float x = out_pixel_center - out_center_of_in;
+ coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
+ }
+
+ STBIR__DEBUG_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
+
+ for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
+ {
+ if (coefficient_group[i])
+ break;
+
+ // This line has no weight. We can skip it.
+ contributor->n1 = contributor->n0 + i - 1;
+ }
+}
+
+static void stbir__normalize_downsample_coefficients(stbir__info* stbir_info, stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
+{
+ int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
+ int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
+ int i, j;
+ int skip;
+
+ for (i = 0; i < output_size; i++)
+ {
+ float scale;
+ float total = 0;
+
+ for (j = 0; j < num_contributors; j++)
+ {
+ if (i >= contributors[j].n0 && i <= contributors[j].n1)
+ {
+ float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
+ total += coefficient;
+ }
+ else if (i < contributors[j].n0)
+ break;
+ }
+
+ STBIR__DEBUG_ASSERT(total > 0.9f);
+ STBIR__DEBUG_ASSERT(total < 1.1f);
+
+ scale = 1 / total;
+
+ for (j = 0; j < num_contributors; j++)
+ {
+ if (i >= contributors[j].n0 && i <= contributors[j].n1)
+ *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
+ else if (i < contributors[j].n0)
+ break;
+ }
+ }
+
+ // Optimize: Skip zero coefficients and contributions outside of image bounds.
+ // Do this after normalizing because normalization depends on the n0/n1 values.
+ for (j = 0; j < num_contributors; j++)
+ {
+ int range, max, width;
+
+ skip = 0;
+ while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
+ skip++;
+
+ contributors[j].n0 += skip;
+
+ while (contributors[j].n0 < 0)
+ {
+ contributors[j].n0++;
+ skip++;
+ }
+
+ range = contributors[j].n1 - contributors[j].n0 + 1;
+ max = stbir__min(num_coefficients, range);
+
+ width = stbir__get_coefficient_width(filter, scale_ratio);
+ for (i = 0; i < max; i++)
+ {
+ if (i + skip >= width)
+ break;
+
+ *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
+ }
+
+ continue;
+ }
+
+ // Using min to avoid writing into invalid pixels.
+ for (i = 0; i < num_contributors; i++)
+ contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
+}
+
+// Each scan line uses the same kernel values so we should calculate the kernel
+// values once and then we can use them for every scan line.
+static void stbir__calculate_filters(stbir__info* stbir_info, stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
+{
+ int n;
+ int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
+
+ if (stbir__use_upsampling(scale_ratio))
+ {
+ float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
+
+ // Looping through out pixels
+ for (n = 0; n < total_contributors; n++)
+ {
+ float in_center_of_out; // Center of the current out pixel in the in pixel space
+ int in_first_pixel, in_last_pixel;
+
+ stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
+
+ stbir__calculate_coefficients_upsample(stbir_info, filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
+ }
+ }
+ else
+ {
+ float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
+
+ // Looping through in pixels
+ for (n = 0; n < total_contributors; n++)
+ {
+ float out_center_of_in; // Center of the current out pixel in the in pixel space
+ int out_first_pixel, out_last_pixel;
+ int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
+
+ stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
+
+ stbir__calculate_coefficients_downsample(stbir_info, filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
+ }
+
+ stbir__normalize_downsample_coefficients(stbir_info, contributors, coefficients, filter, scale_ratio, shift, input_size, output_size);
+ }
+}
+
+static float* stbir__get_decode_buffer(stbir__info* stbir_info)
+{
+ // The 0 index of the decode buffer starts after the margin. This makes
+ // it okay to use negative indexes on the decode buffer.
+ return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
+}
+
+#define STBIR__DECODE(type, colorspace) ((type) * (STBIR_MAX_COLORSPACES) + (colorspace))
+
+static void stbir__decode_scanline(stbir__info* stbir_info, int n)
+{
+ int c;
+ int channels = stbir_info->channels;
+ int alpha_channel = stbir_info->alpha_channel;
+ int type = stbir_info->type;
+ int colorspace = stbir_info->colorspace;
+ int input_w = stbir_info->input_w;
+ int input_stride_bytes = stbir_info->input_stride_bytes;
+ float* decode_buffer = stbir__get_decode_buffer(stbir_info);
+ stbir_edge edge_horizontal = stbir_info->edge_horizontal;
+ stbir_edge edge_vertical = stbir_info->edge_vertical;
+ int in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes;
+ const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
+ int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
+ int decode = STBIR__DECODE(type, colorspace);
+
+ int x = -stbir_info->horizontal_filter_pixel_margin;
+
+ // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
+ // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
+ if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h))
+ {
+ for (; x < max_x; x++)
+ for (c = 0; c < channels; c++)
+ decode_buffer[x*channels + c] = 0;
+ return;
+ }
+
+ switch (decode)
+ {
+ case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / 255;
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / 255;
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535;
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535);
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / 65535;
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295);
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295));
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / 4294967295);
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
+ for (; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+ int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
+ for (c = 0; c < channels; c++)
+ decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
+ }
+
+ break;
+
+ default:
+ STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
+ break;
+ }
+
+ if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED))
+ {
+ for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
+ {
+ int decode_pixel_index = x * channels;
+
+ // If the alpha value is 0 it will clobber the color values. Make sure it's not.
+ float alpha = decode_buffer[decode_pixel_index + alpha_channel];
+#ifndef STBIR_NO_ALPHA_EPSILON
+ if (stbir_info->type != STBIR_TYPE_FLOAT) {
+ alpha += STBIR_ALPHA_EPSILON;
+ decode_buffer[decode_pixel_index + alpha_channel] = alpha;
+ }
+#endif
+ for (c = 0; c < channels; c++)
+ {
+ if (c == alpha_channel)
+ continue;
+
+ decode_buffer[decode_pixel_index + c] *= alpha;
+ }
+ }
+ }
+
+ if (edge_horizontal == STBIR_EDGE_ZERO)
+ {
+ for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
+ {
+ for (c = 0; c < channels; c++)
+ decode_buffer[x*channels + c] = 0;
+ }
+ for (x = input_w; x < max_x; x++)
+ {
+ for (c = 0; c < channels; c++)
+ decode_buffer[x*channels + c] = 0;
+ }
+ }
+}
+
+static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
+{
+ return &ring_buffer[index * ring_buffer_length];
+}
+
+static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
+{
+ int ring_buffer_index;
+ float* ring_buffer;
+
+ if (stbir_info->ring_buffer_begin_index < 0)
+ {
+ ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
+ stbir_info->ring_buffer_first_scanline = n;
+ }
+ else
+ {
+ ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline) + 1) % stbir_info->vertical_filter_pixel_width;
+ STBIR__DEBUG_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index);
+ }
+
+ ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
+ memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
+
+ stbir_info->ring_buffer_last_scanline = n;
+
+ return ring_buffer;
+}
+
+
+static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, int n, float* output_buffer)
+{
+ int x, k;
+ int output_w = stbir_info->output_w;
+ int kernel_pixel_width = stbir_info->horizontal_filter_pixel_width;
+ int channels = stbir_info->channels;
+ float* decode_buffer = stbir__get_decode_buffer(stbir_info);
+ stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
+ float* horizontal_coefficients = stbir_info->horizontal_coefficients;
+ int coefficient_width = stbir_info->horizontal_coefficient_width;
+
+ for (x = 0; x < output_w; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int out_pixel_index = x * channels;
+ int coefficient_group = coefficient_width * x;
+ int coefficient_counter = 0;
+
+ STBIR__DEBUG_ASSERT(n1 >= n0);
+ STBIR__DEBUG_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin);
+ STBIR__DEBUG_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin);
+ STBIR__DEBUG_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
+ STBIR__DEBUG_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
+
+ switch (channels) {
+ case 1:
+ for (k = n0; k <= n1; k++)
+ {
+ int in_pixel_index = k * 1;
+ float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ }
+ break;
+ case 2:
+ for (k = n0; k <= n1; k++)
+ {
+ int in_pixel_index = k * 2;
+ float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ }
+ break;
+ case 3:
+ for (k = n0; k <= n1; k++)
+ {
+ int in_pixel_index = k * 3;
+ float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
+ }
+ break;
+ case 4:
+ for (k = n0; k <= n1; k++)
+ {
+ int in_pixel_index = k * 4;
+ float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
+ output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
+ }
+ break;
+ default:
+ for (k = n0; k <= n1; k++)
+ {
+ int in_pixel_index = k * channels;
+ float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
+ int c;
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ for (c = 0; c < channels; c++)
+ output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
+ }
+ break;
+ }
+ }
+}
+
+static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int n, float* output_buffer)
+{
+ int x, k;
+ int input_w = stbir_info->input_w;
+ int output_w = stbir_info->output_w;
+ int kernel_pixel_width = stbir_info->horizontal_filter_pixel_width;
+ int channels = stbir_info->channels;
+ float* decode_buffer = stbir__get_decode_buffer(stbir_info);
+ stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
+ float* horizontal_coefficients = stbir_info->horizontal_coefficients;
+ int coefficient_width = stbir_info->horizontal_coefficient_width;
+ int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
+ int max_x = input_w + filter_pixel_margin * 2;
+
+ STBIR__DEBUG_ASSERT(!stbir__use_width_upsampling(stbir_info));
+
+ switch (channels) {
+ case 1:
+ for (x = 0; x < max_x; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int in_x = x - filter_pixel_margin;
+ int in_pixel_index = in_x * 1;
+ int max_n = n1;
+ int coefficient_group = coefficient_width * x;
+
+ for (k = n0; k <= max_n; k++)
+ {
+ int out_pixel_index = k * 1;
+ float coefficient = horizontal_coefficients[coefficient_group + k - n0];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ }
+ }
+ break;
+
+ case 2:
+ for (x = 0; x < max_x; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int in_x = x - filter_pixel_margin;
+ int in_pixel_index = in_x * 2;
+ int max_n = n1;
+ int coefficient_group = coefficient_width * x;
+
+ for (k = n0; k <= max_n; k++)
+ {
+ int out_pixel_index = k * 2;
+ float coefficient = horizontal_coefficients[coefficient_group + k - n0];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ }
+ }
+ break;
+
+ case 3:
+ for (x = 0; x < max_x; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int in_x = x - filter_pixel_margin;
+ int in_pixel_index = in_x * 3;
+ int max_n = n1;
+ int coefficient_group = coefficient_width * x;
+
+ for (k = n0; k <= max_n; k++)
+ {
+ int out_pixel_index = k * 3;
+ float coefficient = horizontal_coefficients[coefficient_group + k - n0];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
+ }
+ }
+ break;
+
+ case 4:
+ for (x = 0; x < max_x; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int in_x = x - filter_pixel_margin;
+ int in_pixel_index = in_x * 4;
+ int max_n = n1;
+ int coefficient_group = coefficient_width * x;
+
+ for (k = n0; k <= max_n; k++)
+ {
+ int out_pixel_index = k * 4;
+ float coefficient = horizontal_coefficients[coefficient_group + k - n0];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
+ output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
+ output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
+ output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
+ }
+ }
+ break;
+
+ default:
+ for (x = 0; x < max_x; x++)
+ {
+ int n0 = horizontal_contributors[x].n0;
+ int n1 = horizontal_contributors[x].n1;
+
+ int in_x = x - filter_pixel_margin;
+ int in_pixel_index = in_x * channels;
+ int max_n = n1;
+ int coefficient_group = coefficient_width * x;
+
+ for (k = n0; k <= max_n; k++)
+ {
+ int c;
+ int out_pixel_index = k * channels;
+ float coefficient = horizontal_coefficients[coefficient_group + k - n0];
+ STBIR__DEBUG_ASSERT(coefficient != 0);
+ for (c = 0; c < channels; c++)
+ output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
+ }
+ }
+ break;
+ }
+}
+
+static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
+{
+ // Decode the nth scanline from the source image into the decode buffer.
+ stbir__decode_scanline(stbir_info, n);
+
+ // Now resample it into the ring buffer.
+ if (stbir__use_width_upsampling(stbir_info))
+ stbir__resample_horizontal_upsample(stbir_info, n, stbir__add_empty_ring_buffer_entry(stbir_info, n));
+ else
+ stbir__resample_horizontal_downsample(stbir_info, n, stbir__add_empty_ring_buffer_entry(stbir_info, n));
+
+ // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
+}
+
+static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
+{
+ // Decode the nth scanline from the source image into the decode buffer.
+ stbir__decode_scanline(stbir_info, n);
+
+ memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
+
+ // Now resample it into the horizontal buffer.
+ if (stbir__use_width_upsampling(stbir_info))
+ stbir__resample_horizontal_upsample(stbir_info, n, stbir_info->horizontal_buffer);
+ else
+ stbir__resample_horizontal_downsample(stbir_info, n, stbir_info->horizontal_buffer);
+
+ // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
+}
+
+// Get the specified scan line from the ring buffer.
+static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_size, int ring_buffer_length)
+{
+ int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_size;
+ return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
+}
+
+
+static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode)
+{
+ int x;
+ int n;
+ int num_nonalpha;
+ stbir_uint16 nonalpha[STBIR_MAX_CHANNELS];
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
+ {
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ float alpha = encode_buffer[pixel_index + alpha_channel];
+ float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
+
+ // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
+ for (n = 0; n < channels; n++)
+ if (n != alpha_channel)
+ encode_buffer[pixel_index + n] *= reciprocal_alpha;
+
+ // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
+ // Because we only add it for integer types, it will automatically be discarded on integer
+ // conversion, so we don't need to subtract it back out (which would be problematic for
+ // numeric precision reasons).
+ }
+ }
+
+ // build a table of all channels that need colorspace correction, so
+ // we don't perform colorspace correction on channels that don't need it.
+ for (x=0, num_nonalpha=0; x < channels; ++x)
+ if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ nonalpha[num_nonalpha++] = x;
+
+ #define STBIR__ROUND_INT(f) ((int) ((f)+0.5))
+ #define STBIR__ROUND_UINT(f) ((stbir_uint32) ((f)+0.5))
+
+ #ifdef STBIR__SATURATE_INT
+ #define STBIR__ENCODE_LINEAR8(f) stbir__saturate8 (STBIR__ROUND_INT((f) * 255 ))
+ #define STBIR__ENCODE_LINEAR16(f) stbir__saturate16(STBIR__ROUND_INT((f) * 65535))
+ #else
+ #define STBIR__ENCODE_LINEAR8(f) (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * 255 )
+ #define STBIR__ENCODE_LINEAR16(f) (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * 65535)
+ #endif
+
+ switch (decode)
+ {
+ case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < channels; n++)
+ {
+ int index = pixel_index + n;
+ ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]);
+ }
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < num_nonalpha; n++)
+ {
+ int index = pixel_index + nonalpha[n];
+ ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
+ }
+
+ if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]);
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < channels; n++)
+ {
+ int index = pixel_index + n;
+ ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
+ }
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < num_nonalpha; n++)
+ {
+ int index = pixel_index + nonalpha[n];
+ ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * 65535);
+ }
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]);
+ }
+
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < channels; n++)
+ {
+ int index = pixel_index + n;
+ ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * 4294967295);
+ }
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < num_nonalpha; n++)
+ {
+ int index = pixel_index + nonalpha[n];
+ ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * 4294967295);
+ }
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * 4294967295);
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < channels; n++)
+ {
+ int index = pixel_index + n;
+ ((float*)output_buffer)[index] = encode_buffer[index];
+ }
+ }
+ break;
+
+ case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
+ for (x=0; x < num_pixels; ++x)
+ {
+ int pixel_index = x*channels;
+
+ for (n = 0; n < num_nonalpha; n++)
+ {
+ int index = pixel_index + nonalpha[n];
+ ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
+ }
+
+ if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+ ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
+ }
+ break;
+
+ default:
+ STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
+ break;
+ }
+}
+
+static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out)
+{
+ int x, k;
+ int output_w = stbir_info->output_w;
+ stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
+ float* vertical_coefficients = stbir_info->vertical_coefficients;
+ int channels = stbir_info->channels;
+ int alpha_channel = stbir_info->alpha_channel;
+ int type = stbir_info->type;
+ int colorspace = stbir_info->colorspace;
+ int kernel_pixel_width = stbir_info->vertical_filter_pixel_width;
+ void* output_data = stbir_info->output_data;
+ float* encode_buffer = stbir_info->encode_buffer;
+ int decode = STBIR__DECODE(type, colorspace);
+ int coefficient_width = stbir_info->vertical_coefficient_width;
+ int coefficient_counter;
+ int contributor = n;
+
+ float* ring_buffer = stbir_info->ring_buffer;
+ int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
+ int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
+ int ring_buffer_last_scanline = stbir_info->ring_buffer_last_scanline;
+ int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
+
+ int n0,n1, output_row_start;
+ int coefficient_group = coefficient_width * contributor;
+
+ n0 = vertical_contributors[contributor].n0;
+ n1 = vertical_contributors[contributor].n1;
+
+ output_row_start = n * stbir_info->output_stride_bytes;
+
+ STBIR__DEBUG_ASSERT(stbir__use_height_upsampling(stbir_info));
+
+ memset(encode_buffer, 0, output_w * sizeof(float) * channels);
+
+ // I tried reblocking this for better cache usage of encode_buffer
+ // (using x_outer, k, x_inner), but it lost speed. -- stb
+
+ coefficient_counter = 0;
+ switch (channels) {
+ case 1:
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = coefficient_counter++;
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+ for (x = 0; x < output_w; ++x)
+ {
+ int in_pixel_index = x * 1;
+ encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
+ }
+ }
+ break;
+ case 2:
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = coefficient_counter++;
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+ for (x = 0; x < output_w; ++x)
+ {
+ int in_pixel_index = x * 2;
+ encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
+ encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
+ }
+ }
+ break;
+ case 3:
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = coefficient_counter++;
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+ for (x = 0; x < output_w; ++x)
+ {
+ int in_pixel_index = x * 3;
+ encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
+ encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
+ encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
+ }
+ }
+ break;
+ case 4:
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = coefficient_counter++;
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+ for (x = 0; x < output_w; ++x)
+ {
+ int in_pixel_index = x * 4;
+ encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
+ encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
+ encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
+ encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
+ }
+ }
+ break;
+ default:
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = coefficient_counter++;
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+ for (x = 0; x < output_w; ++x)
+ {
+ int in_pixel_index = x * channels;
+ int c;
+ for (c = 0; c < channels; c++)
+ encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
+ }
+ }
+ break;
+ }
+ stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode);
+}
+
+static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out)
+{
+ int x, k;
+ int output_w = stbir_info->output_w;
+ int output_h = stbir_info->output_h;
+ stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
+ float* vertical_coefficients = stbir_info->vertical_coefficients;
+ int channels = stbir_info->channels;
+ int kernel_pixel_width = stbir_info->vertical_filter_pixel_width;
+ void* output_data = stbir_info->output_data;
+ float* horizontal_buffer = stbir_info->horizontal_buffer;
+ int coefficient_width = stbir_info->vertical_coefficient_width;
+ int contributor = n + stbir_info->vertical_filter_pixel_margin;
+
+ float* ring_buffer = stbir_info->ring_buffer;
+ int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
+ int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
+ int ring_buffer_last_scanline = stbir_info->ring_buffer_last_scanline;
+ int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
+ int n0,n1;
+
+ n0 = vertical_contributors[contributor].n0;
+ n1 = vertical_contributors[contributor].n1;
+
+ STBIR__DEBUG_ASSERT(!stbir__use_height_upsampling(stbir_info));
+
+ for (k = n0; k <= n1; k++)
+ {
+ int coefficient_index = k - n0;
+ int coefficient_group = coefficient_width * contributor;
+ float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
+
+ float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
+
+ switch (channels) {
+ case 1:
+ for (x = 0; x < output_w; x++)
+ {
+ int in_pixel_index = x * 1;
+ ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
+ }
+ break;
+ case 2:
+ for (x = 0; x < output_w; x++)
+ {
+ int in_pixel_index = x * 2;
+ ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
+ ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
+ }
+ break;
+ case 3:
+ for (x = 0; x < output_w; x++)
+ {
+ int in_pixel_index = x * 3;
+ ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
+ ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
+ ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
+ }
+ break;
+ case 4:
+ for (x = 0; x < output_w; x++)
+ {
+ int in_pixel_index = x * 4;
+ ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
+ ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
+ ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
+ ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
+ }
+ break;
+ default:
+ for (x = 0; x < output_w; x++)
+ {
+ int in_pixel_index = x * channels;
+
+ int c;
+ for (c = 0; c < channels; c++)
+ ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
+ }
+ break;
+ }
+ }
+}
+
+static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
+{
+ int y;
+ float scale_ratio = stbir_info->vertical_scale;
+ float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
+
+ STBIR__DEBUG_ASSERT(stbir__use_height_upsampling(stbir_info));
+
+ for (y = 0; y < stbir_info->output_h; y++)
+ {
+ float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
+ int in_first_scanline = 0, in_last_scanline = 0;
+
+ stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
+
+ STBIR__DEBUG_ASSERT(in_last_scanline - in_first_scanline <= stbir_info->vertical_filter_pixel_width);
+
+ if (stbir_info->ring_buffer_begin_index >= 0)
+ {
+ // Get rid of whatever we don't need anymore.
+ while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
+ {
+ if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
+ {
+ // We just popped the last scanline off the ring buffer.
+ // Reset it to the empty state.
+ stbir_info->ring_buffer_begin_index = -1;
+ stbir_info->ring_buffer_first_scanline = 0;
+ stbir_info->ring_buffer_last_scanline = 0;
+ break;
+ }
+ else
+ {
+ stbir_info->ring_buffer_first_scanline++;
+ stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->vertical_filter_pixel_width;
+ }
+ }
+ }
+
+ // Load in new ones.
+ if (stbir_info->ring_buffer_begin_index < 0)
+ stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
+
+ while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
+ stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
+
+ // Now all buffers should be ready to write a row of vertical sampling.
+ stbir__resample_vertical_upsample(stbir_info, y, in_first_scanline, in_last_scanline, in_center_of_out);
+
+ STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
+ }
+}
+
+static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
+{
+ int output_stride_bytes = stbir_info->output_stride_bytes;
+ int channels = stbir_info->channels;
+ int alpha_channel = stbir_info->alpha_channel;
+ int type = stbir_info->type;
+ int colorspace = stbir_info->colorspace;
+ int output_w = stbir_info->output_w;
+ void* output_data = stbir_info->output_data;
+ int decode = STBIR__DECODE(type, colorspace);
+
+ float* ring_buffer = stbir_info->ring_buffer;
+ int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
+
+ if (stbir_info->ring_buffer_begin_index >= 0)
+ {
+ // Get rid of whatever we don't need anymore.
+ while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
+ {
+ if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
+ {
+ int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
+ float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
+ stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode);
+ STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
+ }
+
+ if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
+ {
+ // We just popped the last scanline off the ring buffer.
+ // Reset it to the empty state.
+ stbir_info->ring_buffer_begin_index = -1;
+ stbir_info->ring_buffer_first_scanline = 0;
+ stbir_info->ring_buffer_last_scanline = 0;
+ break;
+ }
+ else
+ {
+ stbir_info->ring_buffer_first_scanline++;
+ stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->vertical_filter_pixel_width;
+ }
+ }
+ }
+}
+
+static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
+{
+ int y;
+ float scale_ratio = stbir_info->vertical_scale;
+ int output_h = stbir_info->output_h;
+ float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
+ int pixel_margin = stbir_info->vertical_filter_pixel_margin;
+ int max_y = stbir_info->input_h + pixel_margin;
+
+ STBIR__DEBUG_ASSERT(!stbir__use_height_upsampling(stbir_info));
+
+ for (y = -pixel_margin; y < max_y; y++)
+ {
+ float out_center_of_in; // Center of the current out scanline in the in scanline space
+ int out_first_scanline, out_last_scanline;
+
+ stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
+
+ STBIR__DEBUG_ASSERT(out_last_scanline - out_first_scanline <= stbir_info->vertical_filter_pixel_width);
+
+ if (out_last_scanline < 0 || out_first_scanline >= output_h)
+ continue;
+
+ stbir__empty_ring_buffer(stbir_info, out_first_scanline);
+
+ stbir__decode_and_resample_downsample(stbir_info, y);
+
+ // Load in new ones.
+ if (stbir_info->ring_buffer_begin_index < 0)
+ stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
+
+ while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
+ stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
+
+ // Now the horizontal buffer is ready to write to all ring buffer rows.
+ stbir__resample_vertical_downsample(stbir_info, y, out_first_scanline, out_last_scanline, out_center_of_in);
+ }
+
+ stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
+}
+
+static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
+{
+ info->input_w = input_w;
+ info->input_h = input_h;
+ info->output_w = output_w;
+ info->output_h = output_h;
+ info->channels = channels;
+}
+
+static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
+{
+ info->s0 = s0;
+ info->t0 = t0;
+ info->s1 = s1;
+ info->t1 = t1;
+
+ if (transform)
+ {
+ info->horizontal_scale = transform[0];
+ info->vertical_scale = transform[1];
+ info->horizontal_shift = transform[2];
+ info->vertical_shift = transform[3];
+ }
+ else
+ {
+ info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
+ info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
+
+ info->horizontal_shift = s0 * info->input_w / (s1 - s0);
+ info->vertical_shift = t0 * info->input_h / (t1 - t0);
+ }
+}
+
+static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
+{
+ if (h_filter == 0)
+ h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
+ if (v_filter == 0)
+ v_filter = stbir__use_upsampling(info->vertical_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
+ info->horizontal_filter = h_filter;
+ info->vertical_filter = v_filter;
+}
+
+static stbir_uint32 stbir__calculate_memory(stbir__info *info)
+{
+ int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
+ int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
+
+ info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
+ info->vertical_num_contributors = stbir__get_contributors(info->vertical_scale , info->vertical_filter , info->input_h, info->output_h);
+
+ info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
+ info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
+ info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
+ info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
+ info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
+ info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
+ info->ring_buffer_size = info->output_w * info->channels * filter_height * sizeof(float);
+ info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
+
+ STBIR_ASSERT(info->horizontal_filter != 0);
+ STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
+ STBIR_ASSERT(info->vertical_filter != 0);
+ STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
+
+ if (stbir__use_height_upsampling(info))
+ // The horizontal buffer is for when we're downsampling the height and we
+ // can't output the result of sampling the decode buffer directly into the
+ // ring buffers.
+ info->horizontal_buffer_size = 0;
+ else
+ // The encode buffer is to retain precision in the height upsampling method
+ // and isn't used when height downsampling.
+ info->encode_buffer_size = 0;
+
+ return info->horizontal_contributors_size + info->horizontal_coefficients_size
+ + info->vertical_contributors_size + info->vertical_coefficients_size
+ + info->decode_buffer_size + info->horizontal_buffer_size
+ + info->ring_buffer_size + info->encode_buffer_size;
+}
+
+static int stbir__resize_allocated(stbir__info *info,
+ const void* input_data, int input_stride_in_bytes,
+ void* output_data, int output_stride_in_bytes,
+ int alpha_channel, stbir_uint32 flags, stbir_datatype type,
+ stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace,
+ void* tempmem, size_t tempmem_size_in_bytes)
+{
+ size_t memory_required = stbir__calculate_memory(info);
+
+ int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type];
+ int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type];
+
+#ifdef STBIR_DEBUG_OVERWRITE_TEST
+#define OVERWRITE_ARRAY_SIZE 8
+ unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE];
+ unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE];
+ unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE];
+ unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE];
+
+ size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type];
+ memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
+ memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
+ memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
+ memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE);
+#endif
+
+ STBIR_ASSERT(info->channels >= 0);
+ STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS);
+
+ if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS)
+ return 0;
+
+ STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
+ STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
+
+ if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
+ return 0;
+ if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
+ return 0;
+
+ if (alpha_channel < 0)
+ flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED;
+
+ if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
+ STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels);
+
+ if (alpha_channel >= info->channels)
+ return 0;
+
+ STBIR_ASSERT(tempmem);
+
+ if (!tempmem)
+ return 0;
+
+ STBIR_ASSERT(tempmem_size_in_bytes >= memory_required);
+
+ if (tempmem_size_in_bytes < memory_required)
+ return 0;
+
+ memset(tempmem, 0, tempmem_size_in_bytes);
+
+ info->input_data = input_data;
+ info->input_stride_bytes = width_stride_input;
+
+ info->output_data = output_data;
+ info->output_stride_bytes = width_stride_output;
+
+ info->alpha_channel = alpha_channel;
+ info->flags = flags;
+ info->type = type;
+ info->edge_horizontal = edge_horizontal;
+ info->edge_vertical = edge_vertical;
+ info->colorspace = colorspace;
+
+ info->horizontal_coefficient_width = stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale);
+ info->vertical_coefficient_width = stbir__get_coefficient_width (info->vertical_filter , info->vertical_scale );
+ info->horizontal_filter_pixel_width = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
+ info->vertical_filter_pixel_width = stbir__get_filter_pixel_width (info->vertical_filter , info->vertical_scale );
+ info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
+ info->vertical_filter_pixel_margin = stbir__get_filter_pixel_margin(info->vertical_filter , info->vertical_scale );
+
+ info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
+ info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
+
+#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
+
+ info->horizontal_contributors = (stbir__contributors *) tempmem;
+ info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
+ info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
+ info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
+ info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
+
+ if (stbir__use_height_upsampling(info))
+ {
+ info->horizontal_buffer = NULL;
+ info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
+ info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
+
+ STBIR__DEBUG_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
+ }
+ else
+ {
+ info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
+ info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
+ info->encode_buffer = NULL;
+
+ STBIR__DEBUG_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
+ }
+
+#undef STBIR__NEXT_MEMPTR
+
+ // This signals that the ring buffer is empty
+ info->ring_buffer_begin_index = -1;
+
+ stbir__calculate_filters(info, info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
+ stbir__calculate_filters(info, info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
+
+ STBIR_PROGRESS_REPORT(0);
+
+ if (stbir__use_height_upsampling(info))
+ stbir__buffer_loop_upsample(info);
+ else
+ stbir__buffer_loop_downsample(info);
+
+ STBIR_PROGRESS_REPORT(1);
+
+#ifdef STBIR_DEBUG_OVERWRITE_TEST
+ STBIR__DEBUG_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
+ STBIR__DEBUG_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
+ STBIR__DEBUG_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
+ STBIR__DEBUG_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0);
+#endif
+
+ return 1;
+}
+
+
+static int stbir__resize_arbitrary(
+ void *alloc_context,
+ const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
+ void* output_data, int output_w, int output_h, int output_stride_in_bytes,
+ float s0, float t0, float s1, float t1, float *transform,
+ int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type,
+ stbir_filter h_filter, stbir_filter v_filter,
+ stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace)
+{
+ stbir__info info;
+ int result;
+ size_t memory_required;
+ void* extra_memory;
+
+ stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
+ stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
+ stbir__choose_filter(&info, h_filter, v_filter);
+ memory_required = stbir__calculate_memory(&info);
+ extra_memory = STBIR_MALLOC(memory_required, alloc_context);
+
+ if (!extra_memory)
+ return 0;
+
+ result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
+ output_data, output_stride_in_bytes,
+ alpha_channel, flags, type,
+ edge_horizontal, edge_vertical,
+ colorspace, extra_memory, memory_required);
+
+ STBIR_FREE(extra_memory, alloc_context);
+
+ return result;
+}
+
+STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels)
+{
+ return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
+ STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
+}
+
+STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels)
+{
+ return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
+ STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
+}
+
+STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags)
+{
+ return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
+ STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB);
+}
+
+STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode)
+{
+ return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
+ edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB);
+}
+
+STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context)
+{
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter,
+ edge_wrap_mode, edge_wrap_mode, space);
+}
+
+STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context)
+{
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter,
+ edge_wrap_mode, edge_wrap_mode, space);
+}
+
+
+STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ float *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
+ void *alloc_context)
+{
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter,
+ edge_wrap_mode, edge_wrap_mode, space);
+}
+
+
+STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context)
+{
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
+ edge_mode_horizontal, edge_mode_vertical, space);
+}
+
+
+STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context,
+ float x_scale, float y_scale,
+ float x_offset, float y_offset)
+{
+ float transform[4];
+ transform[0] = x_scale;
+ transform[1] = y_scale;
+ transform[2] = x_offset;
+ transform[3] = y_offset;
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ 0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
+ edge_mode_horizontal, edge_mode_vertical, space);
+}
+
+STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
+ stbir_datatype datatype,
+ int num_channels, int alpha_channel, int flags,
+ stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
+ stbir_filter filter_horizontal, stbir_filter filter_vertical,
+ stbir_colorspace space, void *alloc_context,
+ float s0, float t0, float s1, float t1)
+{
+ return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
+ output_pixels, output_w, output_h, output_stride_in_bytes,
+ s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
+ edge_mode_horizontal, edge_mode_vertical, space);
+}
+
+#endif // STB_IMAGE_RESIZE_IMPLEMENTATION
diff --git a/src/platform/gba/packer/tracks/cygwin1.dll b/src/platform/gba/packer/tracks/cygwin1.dll
new file mode 100644
index 0000000..fc4d322
Binary files /dev/null and b/src/platform/gba/packer/tracks/cygwin1.dll differ