diff --git a/src/platform/gba/packer/libimagequant/blur.c b/src/platform/gba/packer/libimagequant/blur.c new file mode 100644 index 0000000..7f0a716 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/blur.c @@ -0,0 +1,132 @@ +/* +© 2011-2015 by Kornel Lesiński. + +This file is part of libimagequant. + +libimagequant is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +libimagequant is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with libimagequant. If not, see . +*/ + +#include "libimagequant.h" +#include "pam.h" +#include "blur.h" + +/* + Blurs image horizontally (width 2*size+1) and writes it transposed to dst (called twice gives 2d blur) + */ +static void transposing_1d_blur(unsigned char *restrict src, unsigned char *restrict dst, unsigned int width, unsigned int height, const unsigned int size) +{ + assert(size > 0); + + for(unsigned int j=0; j < height; j++) { + unsigned char *restrict row = src + j*width; + + // accumulate sum for pixels outside line + unsigned int sum; + sum = row[0]*size; + for(unsigned int i=0; i < size; i++) { + sum += row[i]; + } + + // blur with left side outside line + for(unsigned int i=0; i < size; i++) { + sum -= row[0]; + sum += row[i+size]; + + dst[i*height + j] = sum / (size*2); + } + + for(unsigned int i=size; i < width-size; i++) { + sum -= row[i-size]; + sum += row[i+size]; + + dst[i*height + j] = sum / (size*2); + } + + // blur with right side outside line + for(unsigned int i=width-size; i < width; i++) { + sum -= row[i-size]; + sum += row[width-1]; + + dst[i*height + j] = sum / (size*2); + } + } +} + +/** + * Picks maximum of neighboring pixels (blur + lighten) + */ +LIQ_PRIVATE void liq_max3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height) +{ + for(unsigned int j=0; j < height; j++) { + const unsigned char *row = src + j*width, + *prevrow = src + (j > 1 ? j-1 : 0)*width, + *nextrow = src + MIN(height-1,j+1)*width; + + unsigned char prev,curr=row[0],next=row[0]; + + for(unsigned int i=0; i < width-1; i++) { + prev=curr; + curr=next; + next=row[i+1]; + + unsigned char t1 = MAX(prev,next); + unsigned char t2 = MAX(nextrow[i],prevrow[i]); + *dst++ = MAX(curr,MAX(t1,t2)); + } + unsigned char t1 = MAX(curr,next); + unsigned char t2 = MAX(nextrow[width-1],prevrow[width-1]); + *dst++ = MAX(t1,t2); + } +} + +/** + * Picks minimum of neighboring pixels (blur + darken) + */ +LIQ_PRIVATE void liq_min3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height) +{ + for(unsigned int j=0; j < height; j++) { + const unsigned char *row = src + j*width, + *prevrow = src + (j > 1 ? j-1 : 0)*width, + *nextrow = src + MIN(height-1,j+1)*width; + + unsigned char prev,curr=row[0],next=row[0]; + + for(unsigned int i=0; i < width-1; i++) { + prev=curr; + curr=next; + next=row[i+1]; + + unsigned char t1 = MIN(prev,next); + unsigned char t2 = MIN(nextrow[i],prevrow[i]); + *dst++ = MIN(curr,MIN(t1,t2)); + } + unsigned char t1 = MIN(curr,next); + unsigned char t2 = MIN(nextrow[width-1],prevrow[width-1]); + *dst++ = MIN(t1,t2); + } +} + +/* + Filters src image and saves it to dst, overwriting tmp in the process. + Image must be width*height pixels high. Size controls radius of box blur. + */ +LIQ_PRIVATE void liq_blur(unsigned char *src, unsigned char *tmp, unsigned char *dst, unsigned int width, unsigned int height, unsigned int size) +{ + assert(size > 0); + if (width < 2*size+1 || height < 2*size+1) { + return; + } + transposing_1d_blur(src, tmp, width, height, size); + transposing_1d_blur(tmp, dst, height, width, size); +} diff --git a/src/platform/gba/packer/libimagequant/blur.h b/src/platform/gba/packer/libimagequant/blur.h new file mode 100644 index 0000000..1e77819 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/blur.h @@ -0,0 +1,8 @@ +#ifndef BLUR_H +#define BLUR_H + +LIQ_PRIVATE void liq_blur(unsigned char *src, unsigned char *tmp, unsigned char *dst, unsigned int width, unsigned int height, unsigned int size); +LIQ_PRIVATE void liq_max3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height); +LIQ_PRIVATE void liq_min3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height); + +#endif diff --git a/src/platform/gba/packer/libimagequant/kmeans.c b/src/platform/gba/packer/libimagequant/kmeans.c new file mode 100644 index 0000000..005be65 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/kmeans.c @@ -0,0 +1,106 @@ +/* +** © 2011-2016 by Kornel Lesiński. +** See COPYRIGHT file for license. +*/ + +#include "libimagequant.h" +#include "pam.h" +#include "kmeans.h" +#include "nearest.h" +#include +#include + +#ifdef _OPENMP +#include +#else +#define omp_get_max_threads() 1 +#define omp_get_thread_num() 0 +#endif + +/* + * K-Means iteration: new palette color is computed from weighted average of colors that map to that palette entry. + */ +LIQ_PRIVATE void kmeans_init(const colormap *map, const unsigned int max_threads, kmeans_state average_color[]) +{ + memset(average_color, 0, sizeof(average_color[0])*(KMEANS_CACHE_LINE_GAP+map->colors)*max_threads); +} + +LIQ_PRIVATE void kmeans_update_color(const f_pixel acolor, const float value, const colormap *map, unsigned int match, const unsigned int thread, kmeans_state average_color[]) +{ + match += thread * (KMEANS_CACHE_LINE_GAP+map->colors); + average_color[match].a += acolor.a * value; + average_color[match].r += acolor.r * value; + average_color[match].g += acolor.g * value; + average_color[match].b += acolor.b * value; + average_color[match].total += value; +} + +LIQ_PRIVATE void kmeans_finalize(colormap *map, const unsigned int max_threads, const kmeans_state average_color[]) +{ + for (unsigned int i=0; i < map->colors; i++) { + double a=0, r=0, g=0, b=0, total=0; + + // Aggregate results from all threads + for(unsigned int t=0; t < max_threads; t++) { + const unsigned int offset = (KMEANS_CACHE_LINE_GAP+map->colors) * t + i; + + a += average_color[offset].a; + r += average_color[offset].r; + g += average_color[offset].g; + b += average_color[offset].b; + total += average_color[offset].total; + } + + if (!map->palette[i].fixed) { + map->palette[i].popularity = total; + if (total) { + map->palette[i].acolor = (f_pixel){ + .a = a / total, + .r = r / total, + .g = g / total, + .b = b / total, + }; + } else { + unsigned int r = (i + rand()%7); + map->palette[i].acolor.a = map->palette[r%map->colors].acolor.a; + map->palette[i].acolor.r = map->palette[r%map->colors].acolor.r; + map->palette[i].acolor.g = map->palette[(r+1)%map->colors].acolor.g; + map->palette[i].acolor.b = map->palette[(r+2)%map->colors].acolor.b; + } + } + } +} + +LIQ_PRIVATE double kmeans_do_iteration(histogram *hist, colormap *const map, kmeans_callback callback) +{ + const unsigned int max_threads = omp_get_max_threads(); + LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads); + kmeans_init(map, max_threads, average_color); + struct nearest_map *const n = nearest_init(map); + hist_item *const achv = hist->achv; + const int hist_size = hist->size; + + double total_diff=0; +#if __GNUC__ >= 9 || __clang__ + #pragma omp parallel for if (hist_size > 2000) \ + schedule(static) default(none) shared(achv,average_color,callback,hist_size,map,n) reduction(+:total_diff) +#else + #pragma omp parallel for if (hist_size > 2000) \ + schedule(static) default(none) shared(average_color,callback) reduction(+:total_diff) +#endif + for(int j=0; j < hist_size; j++) { + float diff; + unsigned int match = nearest_search(n, &achv[j].acolor, achv[j].tmp.likely_colormap_index, &diff); + achv[j].tmp.likely_colormap_index = match; + total_diff += diff * achv[j].perceptual_weight; + + if (callback) callback(&achv[j], diff); + + kmeans_update_color(achv[j].acolor, achv[j].perceptual_weight, map, match, omp_get_thread_num(), average_color); + } + + nearest_free(n); + kmeans_finalize(map, max_threads, average_color); + + return total_diff / hist->total_perceptual_weight; +} diff --git a/src/platform/gba/packer/libimagequant/kmeans.h b/src/platform/gba/packer/libimagequant/kmeans.h new file mode 100644 index 0000000..c51d7bb --- /dev/null +++ b/src/platform/gba/packer/libimagequant/kmeans.h @@ -0,0 +1,19 @@ + +#ifndef KMEANS_H +#define KMEANS_H + +// Spread memory touched by different threads at least 64B apart which I assume is the cache line size. This should avoid memory write contention. +#define KMEANS_CACHE_LINE_GAP ((64+sizeof(kmeans_state)-1)/sizeof(kmeans_state)) + +typedef struct { + double a, r, g, b, total; +} kmeans_state; + +typedef void (*kmeans_callback)(hist_item *item, float diff); + +LIQ_PRIVATE void kmeans_init(const colormap *map, const unsigned int max_threads, kmeans_state state[]); +LIQ_PRIVATE void kmeans_update_color(const f_pixel acolor, const float value, const colormap *map, unsigned int match, const unsigned int thread, kmeans_state average_color[]); +LIQ_PRIVATE void kmeans_finalize(colormap *map, const unsigned int max_threads, const kmeans_state state[]); +LIQ_PRIVATE double kmeans_do_iteration(histogram *hist, colormap *const map, kmeans_callback callback); + +#endif diff --git a/src/platform/gba/packer/libimagequant/libimagequant.c b/src/platform/gba/packer/libimagequant/libimagequant.c new file mode 100644 index 0000000..a1bfafb --- /dev/null +++ b/src/platform/gba/packer/libimagequant/libimagequant.c @@ -0,0 +1,2123 @@ +/* +** © 2009-2018 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#if !(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) && !(defined(_MSC_VER) && _MSC_VER >= 1800) +#error "This program requires C99, e.g. -std=c99 switch in GCC or it requires MSVC 18.0 or higher." +#error "Ignore torrent of syntax errors that may follow. It's only because compiler is set to use too old C version." +#endif + +#ifdef _OPENMP +#include +#define LIQ_TEMP_ROW_WIDTH(img_width) (((img_width) | 15) + 1) /* keep alignment & leave space between rows to avoid cache line contention */ +#else +#define LIQ_TEMP_ROW_WIDTH(img_width) (img_width) +#define omp_get_max_threads() 1 +#define omp_get_thread_num() 0 +#endif + +#include "libimagequant.h" + +#include "pam.h" +#include "mediancut.h" +#include "nearest.h" +#include "blur.h" +#include "kmeans.h" + +#define LIQ_HIGH_MEMORY_LIMIT (1<<26) /* avoid allocating buffers larger than 64MB */ + +// each structure has a pointer as a unique identifier that allows type checking at run time +static const char liq_attr_magic[] = "liq_attr"; +static const char liq_image_magic[] = "liq_image"; +static const char liq_result_magic[] = "liq_result"; +static const char liq_histogram_magic[] = "liq_histogram"; +static const char liq_remapping_result_magic[] = "liq_remapping_result"; +static const char liq_freed_magic[] = "free"; +#define CHECK_STRUCT_TYPE(attr, kind) liq_crash_if_invalid_handle_pointer_given((const liq_attr*)attr, kind ## _magic) +#define CHECK_USER_POINTER(ptr) liq_crash_if_invalid_pointer_given(ptr) + +struct liq_attr { + const char *magic_header; + void* (*malloc)(size_t); + void (*free)(void*); + + double target_mse, max_mse, kmeans_iteration_limit; + unsigned int max_colors, max_histogram_entries; + unsigned int min_posterization_output /* user setting */, min_posterization_input /* speed setting */; + unsigned int kmeans_iterations, feedback_loop_trials; + bool last_index_transparent, use_contrast_maps; + unsigned char use_dither_map; + unsigned char speed; + + unsigned char progress_stage1, progress_stage2, progress_stage3; + liq_progress_callback_function *progress_callback; + void *progress_callback_user_info; + + liq_log_callback_function *log_callback; + void *log_callback_user_info; + liq_log_flush_callback_function *log_flush_callback; + void *log_flush_callback_user_info; +}; + +struct liq_image { + const char *magic_header; + void* (*malloc)(size_t); + void (*free)(void*); + + f_pixel *f_pixels; + rgba_pixel **rows; + double gamma; + unsigned int width, height; + unsigned char *importance_map, *edges, *dither_map; + rgba_pixel *pixels, *temp_row; + f_pixel *temp_f_row; + liq_image_get_rgba_row_callback *row_callback; + void *row_callback_user_info; + liq_image *background; + f_pixel fixed_colors[256]; + unsigned short fixed_colors_count; + bool free_pixels, free_rows, free_rows_internal; +}; + +typedef struct liq_remapping_result { + const char *magic_header; + void* (*malloc)(size_t); + void (*free)(void*); + + unsigned char *pixels; + colormap *palette; + liq_progress_callback_function *progress_callback; + void *progress_callback_user_info; + + liq_palette int_palette; + double gamma, palette_error; + float dither_level; + unsigned char use_dither_map; + unsigned char progress_stage1; +} liq_remapping_result; + +struct liq_result { + const char *magic_header; + void* (*malloc)(size_t); + void (*free)(void*); + + liq_remapping_result *remapping; + colormap *palette; + liq_progress_callback_function *progress_callback; + void *progress_callback_user_info; + + liq_palette int_palette; + float dither_level; + double gamma, palette_error; + int min_posterization_output; + unsigned char use_dither_map; +}; + +struct liq_histogram { + const char *magic_header; + void* (*malloc)(size_t); + void (*free)(void*); + + struct acolorhash_table *acht; + double gamma; + f_pixel fixed_colors[256]; + unsigned short fixed_colors_count; + unsigned short ignorebits; + bool had_image_added; +}; + +static void contrast_maps(liq_image *image) LIQ_NONNULL; +static liq_error finalize_histogram(liq_histogram *input_hist, liq_attr *options, histogram **hist_output) LIQ_NONNULL; +static const rgba_pixel *liq_image_get_row_rgba(liq_image *input_image, unsigned int row) LIQ_NONNULL; +static bool liq_image_get_row_f_init(liq_image *img) LIQ_NONNULL; +static const f_pixel *liq_image_get_row_f(liq_image *input_image, unsigned int row) LIQ_NONNULL; +static void liq_remapping_result_destroy(liq_remapping_result *result) LIQ_NONNULL; +static liq_error pngquant_quantize(histogram *hist, const liq_attr *options, const int fixed_colors_count, const f_pixel fixed_colors[], const double gamma, bool fixed_result_colors, liq_result **) LIQ_NONNULL; +static liq_error liq_histogram_quantize_internal(liq_histogram *input_hist, liq_attr *attr, bool fixed_result_colors, liq_result **result_output) LIQ_NONNULL; + +LIQ_NONNULL static void liq_verbose_printf(const liq_attr *context, const char *fmt, ...) +{ + if (context->log_callback) { + va_list va; + va_start(va, fmt); + int required_space = vsnprintf(NULL, 0, fmt, va)+1; // +\0 + va_end(va); + + LIQ_ARRAY(char, buf, required_space); + va_start(va, fmt); + vsnprintf(buf, required_space, fmt, va); + va_end(va); + + context->log_callback(context, buf, context->log_callback_user_info); + } +} + +LIQ_NONNULL inline static void verbose_print(const liq_attr *attr, const char *msg) +{ + if (attr->log_callback) { + attr->log_callback(attr, msg, attr->log_callback_user_info); + } +} + +LIQ_NONNULL static void liq_verbose_printf_flush(liq_attr *attr) +{ + if (attr->log_flush_callback) { + attr->log_flush_callback(attr, attr->log_flush_callback_user_info); + } +} + +LIQ_NONNULL static bool liq_progress(const liq_attr *attr, const float percent) +{ + return attr->progress_callback && !attr->progress_callback(percent, attr->progress_callback_user_info); +} + +LIQ_NONNULL static bool liq_remap_progress(const liq_remapping_result *quant, const float percent) +{ + return quant->progress_callback && !quant->progress_callback(percent, quant->progress_callback_user_info); +} + +#if USE_SSE +inline static bool is_sse_available() +{ +#if (defined(__x86_64__) || defined(__amd64) || defined(_WIN64)) + return true; +#elif _MSC_VER + int info[4]; + __cpuid(info, 1); + /* bool is implemented as a built-in type of size 1 in MSVC */ + return info[3] & (1<<26) ? true : false; +#else + int a,b,c,d; + cpuid(1, a, b, c, d); + return d & (1<<25); // edx bit 25 is set when SSE is present +#endif +} +#endif + +/* make it clear in backtrace when user-supplied handle points to invalid memory */ +NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_handle_pointer_given(const liq_attr *user_supplied_pointer, const char *const expected_magic_header); +LIQ_EXPORT bool liq_crash_if_invalid_handle_pointer_given(const liq_attr *user_supplied_pointer, const char *const expected_magic_header) +{ + if (!user_supplied_pointer) { + return false; + } + + if (user_supplied_pointer->magic_header == liq_freed_magic) { + fprintf(stderr, "%s used after being freed", expected_magic_header); + // this is not normal error handling, this is programmer error that should crash the program. + // program cannot safely continue if memory has been used after it's been freed. + // abort() is nasty, but security vulnerability may be worse. + abort(); + } + + return user_supplied_pointer->magic_header == expected_magic_header; +} + +NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_pointer_given(const void *pointer); +LIQ_EXPORT bool liq_crash_if_invalid_pointer_given(const void *pointer) +{ + if (!pointer) { + return false; + } + // Force a read from the given (potentially invalid) memory location in order to check early whether this crashes the program or not. + // It doesn't matter what value is read, the code here is just to shut the compiler up about unused read. + char test_access = *((volatile char *)pointer); + return test_access || true; +} + +LIQ_NONNULL static void liq_log_error(const liq_attr *attr, const char *msg) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return; + liq_verbose_printf(attr, " error: %s", msg); +} + +static double quality_to_mse(long quality) +{ + if (quality == 0) { + return MAX_DIFF; + } + if (quality == 100) { + return 0; + } + + // curve fudged to be roughly similar to quality of libjpeg + // except lowest 10 for really low number of colors + const double extra_low_quality_fudge = MAX(0,0.016/(0.001+quality) - 0.001); + return extra_low_quality_fudge + 2.5/pow(210.0 + quality, 1.2) * (100.1-quality)/100.0; +} + +static unsigned int mse_to_quality(double mse) +{ + for(int i=100; i > 0; i--) { + if (mse <= quality_to_mse(i) + 0.000001) { // + epsilon for floating point errors + return i; + } + } + return 0; +} + +/** internally MSE is a sum of all channels with pixels 0..1 range, + but other software gives per-RGB-channel MSE for 0..255 range */ +static double mse_to_standard_mse(double mse) { + return mse * 65536.0/6.0; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_quality(liq_attr* attr, int minimum, int target) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (target < 0 || target > 100 || target < minimum || minimum < 0) return LIQ_VALUE_OUT_OF_RANGE; + + attr->target_mse = quality_to_mse(target); + attr->max_mse = quality_to_mse(minimum); + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_min_quality(const liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; + return mse_to_quality(attr->max_mse); +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_max_quality(const liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; + return mse_to_quality(attr->target_mse); +} + + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_max_colors(liq_attr* attr, int colors) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (colors < 2 || colors > 256) return LIQ_VALUE_OUT_OF_RANGE; + + attr->max_colors = colors; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_max_colors(const liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; + + return attr->max_colors; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_min_posterization(liq_attr *attr, int bits) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (bits < 0 || bits > 4) return LIQ_VALUE_OUT_OF_RANGE; + + attr->min_posterization_output = bits; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_min_posterization(const liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; + + return attr->min_posterization_output; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_speed(liq_attr* attr, int speed) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (speed < 1 || speed > 10) return LIQ_VALUE_OUT_OF_RANGE; + + unsigned int iterations = MAX(8-speed, 0); + iterations += iterations * iterations/2; + attr->kmeans_iterations = iterations; + attr->kmeans_iteration_limit = 1.0/(double)(1<<(23-speed)); + attr->feedback_loop_trials = MAX(56-9*speed, 0); + + attr->max_histogram_entries = (1<<17) + (1<<18)*(10-speed); + attr->min_posterization_input = (speed >= 8) ? 1 : 0; + attr->use_dither_map = (speed <= (omp_get_max_threads() > 1 ? 7 : 5)); // parallelized dither map might speed up floyd remapping + if (attr->use_dither_map && speed < 3) { + attr->use_dither_map = 2; // always + } + attr->use_contrast_maps = (speed <= 7) || attr->use_dither_map; + attr->speed = speed; + + attr->progress_stage1 = attr->use_contrast_maps ? 20 : 8; + if (attr->feedback_loop_trials < 2) { + attr->progress_stage1 += 30; + } + attr->progress_stage3 = 50 / (1+speed); + attr->progress_stage2 = 100 - attr->progress_stage1 - attr->progress_stage3; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_speed(const liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; + + return attr->speed; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_output_gamma(liq_result* res, double gamma) +{ + if (!CHECK_STRUCT_TYPE(res, liq_result)) return LIQ_INVALID_POINTER; + if (gamma <= 0 || gamma >= 1.0) return LIQ_VALUE_OUT_OF_RANGE; + + if (res->remapping) { + liq_remapping_result_destroy(res->remapping); + res->remapping = NULL; + } + + res->gamma = gamma; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_min_opacity(liq_attr* attr, int min) +{ + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_min_opacity(const liq_attr *attr) +{ + return 0; +} + +LIQ_EXPORT LIQ_NONNULL void liq_set_last_index_transparent(liq_attr* attr, int is_last) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return; + + attr->last_index_transparent = !!is_last; +} + +LIQ_EXPORT void liq_attr_set_progress_callback(liq_attr *attr, liq_progress_callback_function *callback, void *user_info) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return; + + attr->progress_callback = callback; + attr->progress_callback_user_info = user_info; +} + +LIQ_EXPORT void liq_result_set_progress_callback(liq_result *result, liq_progress_callback_function *callback, void *user_info) +{ + if (!CHECK_STRUCT_TYPE(result, liq_result)) return; + + result->progress_callback = callback; + result->progress_callback_user_info = user_info; +} + +LIQ_EXPORT void liq_set_log_callback(liq_attr *attr, liq_log_callback_function *callback, void* user_info) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return; + + liq_verbose_printf_flush(attr); + attr->log_callback = callback; + attr->log_callback_user_info = user_info; +} + +LIQ_EXPORT void liq_set_log_flush_callback(liq_attr *attr, liq_log_flush_callback_function *callback, void* user_info) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return; + + attr->log_flush_callback = callback; + attr->log_flush_callback_user_info = user_info; +} + +LIQ_EXPORT liq_attr* liq_attr_create() +{ + return liq_attr_create_with_allocator(NULL, NULL); +} + +LIQ_EXPORT LIQ_NONNULL void liq_attr_destroy(liq_attr *attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) { + return; + } + + liq_verbose_printf_flush(attr); + + attr->magic_header = liq_freed_magic; + attr->free(attr); +} + +LIQ_EXPORT LIQ_NONNULL liq_attr* liq_attr_copy(const liq_attr *orig) +{ + if (!CHECK_STRUCT_TYPE(orig, liq_attr)) { + return NULL; + } + + liq_attr *attr = orig->malloc(sizeof(liq_attr)); + if (!attr) return NULL; + *attr = *orig; + return attr; +} + +static void *liq_aligned_malloc(size_t size) +{ + unsigned char *ptr = malloc(size + 16); + if (!ptr) { + return NULL; + } + + uintptr_t offset = 16 - ((uintptr_t)ptr & 15); // also reserves 1 byte for ptr[-1] + ptr += offset; + assert(0 == (((uintptr_t)ptr) & 15)); + ptr[-1] = offset ^ 0x59; // store how much pointer was shifted to get the original for free() + return ptr; +} + +LIQ_NONNULL static void liq_aligned_free(void *inptr) +{ + unsigned char *ptr = inptr; + size_t offset = ptr[-1] ^ 0x59; + assert(offset > 0 && offset <= 16); + free(ptr - offset); +} + +LIQ_EXPORT liq_attr* liq_attr_create_with_allocator(void* (*custom_malloc)(size_t), void (*custom_free)(void*)) +{ +#if USE_SSE + if (!is_sse_available()) { + return NULL; + } +#endif + if (!custom_malloc && !custom_free) { + custom_malloc = liq_aligned_malloc; + custom_free = liq_aligned_free; + } else if (!custom_malloc != !custom_free) { + return NULL; // either specify both or none + } + + liq_attr *attr = custom_malloc(sizeof(liq_attr)); + if (!attr) return NULL; + *attr = (liq_attr) { + .magic_header = liq_attr_magic, + .malloc = custom_malloc, + .free = custom_free, + .max_colors = 256, + .last_index_transparent = false, // puts transparent color at last index. This is workaround for blu-ray subtitles. + .target_mse = 0, + .max_mse = MAX_DIFF, + }; + liq_set_speed(attr, 4); + return attr; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_add_fixed_color(liq_image *img, liq_color color) +{ + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (img->fixed_colors_count > 255) return LIQ_UNSUPPORTED; + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, img->gamma); + img->fixed_colors[img->fixed_colors_count++] = rgba_to_f(gamma_lut, (rgba_pixel){ + .r = color.r, + .g = color.g, + .b = color.b, + .a = color.a, + }); + return LIQ_OK; +} + +LIQ_NONNULL static liq_error liq_histogram_add_fixed_color_f(liq_histogram *hist, f_pixel color) +{ + if (hist->fixed_colors_count > 255) return LIQ_UNSUPPORTED; + + hist->fixed_colors[hist->fixed_colors_count++] = color; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma) +{ + if (!CHECK_STRUCT_TYPE(hist, liq_histogram)) return LIQ_INVALID_POINTER; + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, gamma ? gamma : 0.45455); + const f_pixel px = rgba_to_f(gamma_lut, (rgba_pixel){ + .r = color.r, + .g = color.g, + .b = color.b, + .a = color.a, + }); + return liq_histogram_add_fixed_color_f(hist, px); +} + +LIQ_NONNULL static bool liq_image_use_low_memory(liq_image *img) +{ + img->temp_f_row = img->malloc(sizeof(img->f_pixels[0]) * LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_max_threads()); + return img->temp_f_row != NULL; +} + +LIQ_NONNULL static bool liq_image_should_use_low_memory(liq_image *img, const bool low_memory_hint) +{ + return (size_t)img->width * (size_t)img->height > (low_memory_hint ? LIQ_HIGH_MEMORY_LIMIT/8 : LIQ_HIGH_MEMORY_LIMIT) / sizeof(f_pixel); // Watch out for integer overflow +} + +static liq_image *liq_image_create_internal(const liq_attr *attr, rgba_pixel* rows[], liq_image_get_rgba_row_callback *row_callback, void *row_callback_user_info, int width, int height, double gamma) +{ + if (gamma < 0 || gamma > 1.0) { + liq_log_error(attr, "gamma must be >= 0 and <= 1 (try 1/gamma instead)"); + return NULL; + } + + if (!rows && !row_callback) { + liq_log_error(attr, "missing row data"); + return NULL; + } + + liq_image *img = attr->malloc(sizeof(liq_image)); + if (!img) return NULL; + *img = (liq_image){ + .magic_header = liq_image_magic, + .malloc = attr->malloc, + .free = attr->free, + .width = width, .height = height, + .gamma = gamma ? gamma : 0.45455, + .rows = rows, + .row_callback = row_callback, + .row_callback_user_info = row_callback_user_info, + }; + + if (!rows) { + img->temp_row = attr->malloc(sizeof(img->temp_row[0]) * LIQ_TEMP_ROW_WIDTH(width) * omp_get_max_threads()); + if (!img->temp_row) return NULL; + } + + // if image is huge or converted pixels are not likely to be reused then don't cache converted pixels + if (liq_image_should_use_low_memory(img, !img->temp_row && !attr->use_contrast_maps && !attr->use_dither_map)) { + verbose_print(attr, " conserving memory"); + if (!liq_image_use_low_memory(img)) return NULL; + } + + return img; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_memory_ownership(liq_image *img, int ownership_flags) +{ + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (!img->rows || !ownership_flags || (ownership_flags & ~(LIQ_OWN_ROWS|LIQ_OWN_PIXELS))) { + return LIQ_VALUE_OUT_OF_RANGE; + } + + if (ownership_flags & LIQ_OWN_ROWS) { + if (img->free_rows_internal) return LIQ_VALUE_OUT_OF_RANGE; + img->free_rows = true; + } + + if (ownership_flags & LIQ_OWN_PIXELS) { + img->free_pixels = true; + if (!img->pixels) { + // for simplicity of this API there's no explicit bitmap argument, + // so the row with the lowest address is assumed to be at the start of the bitmap + img->pixels = img->rows[0]; + for(unsigned int i=1; i < img->height; i++) { + img->pixels = MIN(img->pixels, img->rows[i]); + } + } + } + + return LIQ_OK; +} + +LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image); +LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image); + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_importance_map(liq_image *img, unsigned char importance_map[], size_t buffer_size, enum liq_ownership ownership) { + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (!CHECK_USER_POINTER(importance_map)) return LIQ_INVALID_POINTER; + + const size_t required_size = (size_t)img->width * (size_t)img->height; + if (buffer_size < required_size) { + return LIQ_BUFFER_TOO_SMALL; + } + + if (ownership == LIQ_COPY_PIXELS) { + unsigned char *tmp = img->malloc(required_size); + if (!tmp) { + return LIQ_OUT_OF_MEMORY; + } + memcpy(tmp, importance_map, required_size); + importance_map = tmp; + } else if (ownership != LIQ_OWN_PIXELS) { + return LIQ_UNSUPPORTED; + } + + liq_image_free_importance_map(img); + img->importance_map = importance_map; + + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_background(liq_image *img, liq_image *background) +{ + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(background, liq_image)) return LIQ_INVALID_POINTER; + + if (background->background) { + return LIQ_UNSUPPORTED; + } + if (img->width != background->width || img->height != background->height) { + return LIQ_BUFFER_TOO_SMALL; + } + + if (img->background) { + liq_image_destroy(img->background); + } + + img->background = background; + liq_image_free_maps(img); // Force them to be re-analyzed with the background + + return LIQ_OK; +} + +LIQ_NONNULL static bool check_image_size(const liq_attr *attr, const int width, const int height) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) { + return false; + } + + if (width <= 0 || height <= 0) { + liq_log_error(attr, "width and height must be > 0"); + return false; + } + + if (width > INT_MAX/sizeof(rgba_pixel)/height || width > INT_MAX/16/sizeof(f_pixel) || height > INT_MAX/sizeof(size_t)) { + liq_log_error(attr, "image too large"); + return false; + } + return true; +} + +LIQ_EXPORT liq_image *liq_image_create_custom(const liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma) +{ + if (!check_image_size(attr, width, height)) { + return NULL; + } + return liq_image_create_internal(attr, NULL, row_callback, user_info, width, height, gamma); +} + +LIQ_EXPORT liq_image *liq_image_create_rgba_rows(const liq_attr *attr, void *const rows[], int width, int height, double gamma) +{ + if (!check_image_size(attr, width, height)) { + return NULL; + } + + for(int i=0; i < height; i++) { + if (!CHECK_USER_POINTER(rows+i) || !CHECK_USER_POINTER(rows[i])) { + liq_log_error(attr, "invalid row pointers"); + return NULL; + } + } + return liq_image_create_internal(attr, (rgba_pixel**)rows, NULL, NULL, width, height, gamma); +} + +LIQ_EXPORT LIQ_NONNULL liq_image *liq_image_create_rgba(const liq_attr *attr, const void* bitmap, int width, int height, double gamma) +{ + if (!check_image_size(attr, width, height)) { + return NULL; + } + if (!CHECK_USER_POINTER(bitmap)) { + liq_log_error(attr, "invalid bitmap pointer"); + return NULL; + } + + rgba_pixel *const pixels = (rgba_pixel *const)bitmap; + rgba_pixel **rows = attr->malloc(sizeof(rows[0])*height); + if (!rows) return NULL; + + for(int i=0; i < height; i++) { + rows[i] = pixels + width * i; + } + + liq_image *image = liq_image_create_internal(attr, rows, NULL, NULL, width, height, gamma); + if (!image) { + attr->free(rows); + return NULL; + } + image->free_rows = true; + image->free_rows_internal = true; + return image; +} + +NEVER_INLINE LIQ_EXPORT void liq_executing_user_callback(liq_image_get_rgba_row_callback *callback, liq_color *temp_row, int row, int width, void *user_info); +LIQ_EXPORT void liq_executing_user_callback(liq_image_get_rgba_row_callback *callback, liq_color *temp_row, int row, int width, void *user_info) +{ + assert(callback); + assert(temp_row); + callback(temp_row, row, width, user_info); +} + +LIQ_NONNULL inline static bool liq_image_has_rgba_pixels(const liq_image *img) +{ + if (!CHECK_STRUCT_TYPE(img, liq_image)) { + return false; + } + return img->rows || (img->temp_row && img->row_callback); +} + +LIQ_NONNULL inline static bool liq_image_can_use_rgba_rows(const liq_image *img) +{ + assert(liq_image_has_rgba_pixels(img)); + return img->rows; +} + +LIQ_NONNULL static const rgba_pixel *liq_image_get_row_rgba(liq_image *img, unsigned int row) +{ + if (liq_image_can_use_rgba_rows(img)) { + return img->rows[row]; + } + + assert(img->temp_row); + rgba_pixel *temp_row = img->temp_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num(); + if (img->rows) { + memcpy(temp_row, img->rows[row], img->width * sizeof(temp_row[0])); + } else { + liq_executing_user_callback(img->row_callback, (liq_color*)temp_row, row, img->width, img->row_callback_user_info); + } + + return temp_row; +} + +LIQ_NONNULL static void convert_row_to_f(liq_image *img, f_pixel *row_f_pixels, const unsigned int row, const float gamma_lut[]) +{ + assert(row_f_pixels); + assert(!USE_SSE || 0 == ((uintptr_t)row_f_pixels & 15)); + + const rgba_pixel *const row_pixels = liq_image_get_row_rgba(img, row); + + for(unsigned int col=0; col < img->width; col++) { + row_f_pixels[col] = rgba_to_f(gamma_lut, row_pixels[col]); + } +} + +LIQ_NONNULL static bool liq_image_get_row_f_init(liq_image *img) +{ + assert(omp_get_thread_num() == 0); + if (img->f_pixels) { + return true; + } + if (!liq_image_should_use_low_memory(img, false)) { + img->f_pixels = img->malloc(sizeof(img->f_pixels[0]) * img->width * img->height); + } + if (!img->f_pixels) { + return liq_image_use_low_memory(img); + } + + if (!liq_image_has_rgba_pixels(img)) { + return false; + } + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, img->gamma); + for(unsigned int i=0; i < img->height; i++) { + convert_row_to_f(img, &img->f_pixels[i*img->width], i, gamma_lut); + } + return true; +} + +LIQ_NONNULL static const f_pixel *liq_image_get_row_f(liq_image *img, unsigned int row) +{ + if (!img->f_pixels) { + assert(img->temp_f_row); // init should have done that + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, img->gamma); + f_pixel *row_for_thread = img->temp_f_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num(); + convert_row_to_f(img, row_for_thread, row, gamma_lut); + return row_for_thread; + } + return img->f_pixels + img->width * row; +} + +LIQ_EXPORT LIQ_NONNULL int liq_image_get_width(const liq_image *input_image) +{ + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return -1; + return input_image->width; +} + +LIQ_EXPORT LIQ_NONNULL int liq_image_get_height(const liq_image *input_image) +{ + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return -1; + return input_image->height; +} + +typedef void free_func(void*); + +LIQ_NONNULL static free_func *get_default_free_func(liq_image *img) +{ + // When default allocator is used then user-supplied pointers must be freed with free() + if (img->free_rows_internal || img->free != liq_aligned_free) { + return img->free; + } + return free; +} + +LIQ_NONNULL static void liq_image_free_rgba_source(liq_image *input_image) +{ + if (input_image->free_pixels && input_image->pixels) { + get_default_free_func(input_image)(input_image->pixels); + input_image->pixels = NULL; + } + + if (input_image->free_rows && input_image->rows) { + get_default_free_func(input_image)(input_image->rows); + input_image->rows = NULL; + } +} + +LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image) { + if (input_image->importance_map) { + input_image->free(input_image->importance_map); + input_image->importance_map = NULL; + } +} + +LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image) { + liq_image_free_importance_map(input_image); + + if (input_image->edges) { + input_image->free(input_image->edges); + input_image->edges = NULL; + } + + if (input_image->dither_map) { + input_image->free(input_image->dither_map); + input_image->dither_map = NULL; + } +} + +LIQ_EXPORT LIQ_NONNULL void liq_image_destroy(liq_image *input_image) +{ + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return; + + liq_image_free_rgba_source(input_image); + + liq_image_free_maps(input_image); + + if (input_image->f_pixels) { + input_image->free(input_image->f_pixels); + } + + if (input_image->temp_row) { + input_image->free(input_image->temp_row); + } + + if (input_image->temp_f_row) { + input_image->free(input_image->temp_f_row); + } + + if (input_image->background) { + liq_image_destroy(input_image->background); + } + + input_image->magic_header = liq_freed_magic; + input_image->free(input_image); +} + +LIQ_EXPORT liq_histogram* liq_histogram_create(const liq_attr* attr) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) { + return NULL; + } + + liq_histogram *hist = attr->malloc(sizeof(liq_histogram)); + if (!hist) return NULL; + *hist = (liq_histogram) { + .magic_header = liq_histogram_magic, + .malloc = attr->malloc, + .free = attr->free, + + .ignorebits = MAX(attr->min_posterization_output, attr->min_posterization_input), + }; + return hist; +} + +LIQ_EXPORT LIQ_NONNULL void liq_histogram_destroy(liq_histogram *hist) +{ + if (!CHECK_STRUCT_TYPE(hist, liq_histogram)) return; + hist->magic_header = liq_freed_magic; + + pam_freeacolorhash(hist->acht); + hist->free(hist); +} + +LIQ_EXPORT LIQ_NONNULL liq_result *liq_quantize_image(liq_attr *attr, liq_image *img) +{ + liq_result *res; + if (LIQ_OK != liq_image_quantize(img, attr, &res)) { + return NULL; + } + return res; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_quantize(liq_image *const img, liq_attr *const attr, liq_result **result_output) +{ + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (!liq_image_has_rgba_pixels(img)) { + return LIQ_UNSUPPORTED; + } + + liq_histogram *hist = liq_histogram_create(attr); + if (!hist) { + return LIQ_OUT_OF_MEMORY; + } + liq_error err = liq_histogram_add_image(hist, attr, img); + if (LIQ_OK != err) { + liq_histogram_destroy(hist); + return err; + } + + err = liq_histogram_quantize_internal(hist, attr, false, result_output); + liq_histogram_destroy(hist); + + return err; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_quantize(liq_histogram *input_hist, liq_attr *attr, liq_result **result_output) { + return liq_histogram_quantize_internal(input_hist, attr, true, result_output); +} + +LIQ_NONNULL static liq_error liq_histogram_quantize_internal(liq_histogram *input_hist, liq_attr *attr, bool fixed_result_colors, liq_result **result_output) +{ + if (!CHECK_USER_POINTER(result_output)) return LIQ_INVALID_POINTER; + *result_output = NULL; + + if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER; + + if (liq_progress(attr, 0)) return LIQ_ABORTED; + + histogram *hist; + liq_error err = finalize_histogram(input_hist, attr, &hist); + if (err != LIQ_OK) { + return err; + } + + err = pngquant_quantize(hist, attr, input_hist->fixed_colors_count, input_hist->fixed_colors, input_hist->gamma, fixed_result_colors, result_output); + pam_freeacolorhist(hist); + + return err; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_set_dithering_level(liq_result *res, float dither_level) +{ + if (!CHECK_STRUCT_TYPE(res, liq_result)) return LIQ_INVALID_POINTER; + + if (res->remapping) { + liq_remapping_result_destroy(res->remapping); + res->remapping = NULL; + } + + if (dither_level < 0 || dither_level > 1.0f) return LIQ_VALUE_OUT_OF_RANGE; + res->dither_level = dither_level; + return LIQ_OK; +} + +LIQ_NONNULL static liq_remapping_result *liq_remapping_result_create(liq_result *result) +{ + if (!CHECK_STRUCT_TYPE(result, liq_result)) { + return NULL; + } + + liq_remapping_result *res = result->malloc(sizeof(liq_remapping_result)); + if (!res) return NULL; + *res = (liq_remapping_result) { + .magic_header = liq_remapping_result_magic, + .malloc = result->malloc, + .free = result->free, + .dither_level = result->dither_level, + .use_dither_map = result->use_dither_map, + .palette_error = result->palette_error, + .gamma = result->gamma, + .palette = pam_duplicate_colormap(result->palette), + .progress_callback = result->progress_callback, + .progress_callback_user_info = result->progress_callback_user_info, + .progress_stage1 = result->use_dither_map ? 20 : 0, + }; + return res; +} + +LIQ_EXPORT LIQ_NONNULL double liq_get_output_gamma(const liq_result *result) +{ + if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1; + + return result->gamma; +} + +LIQ_NONNULL static void liq_remapping_result_destroy(liq_remapping_result *result) +{ + if (!CHECK_STRUCT_TYPE(result, liq_remapping_result)) return; + + if (result->palette) pam_freecolormap(result->palette); + if (result->pixels) result->free(result->pixels); + + result->magic_header = liq_freed_magic; + result->free(result); +} + +LIQ_EXPORT LIQ_NONNULL void liq_result_destroy(liq_result *res) +{ + if (!CHECK_STRUCT_TYPE(res, liq_result)) return; + + memset(&res->int_palette, 0, sizeof(liq_palette)); + + if (res->remapping) { + memset(&res->remapping->int_palette, 0, sizeof(liq_palette)); + liq_remapping_result_destroy(res->remapping); + } + + pam_freecolormap(res->palette); + + res->magic_header = liq_freed_magic; + res->free(res); +} + + +LIQ_EXPORT LIQ_NONNULL double liq_get_quantization_error(const liq_result *result) { + if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1; + + if (result->palette_error >= 0) { + return mse_to_standard_mse(result->palette_error); + } + + return -1; +} + +LIQ_EXPORT LIQ_NONNULL double liq_get_remapping_error(const liq_result *result) { + if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1; + + if (result->remapping && result->remapping->palette_error >= 0) { + return mse_to_standard_mse(result->remapping->palette_error); + } + + return -1; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_quantization_quality(const liq_result *result) { + if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1; + + if (result->palette_error >= 0) { + return mse_to_quality(result->palette_error); + } + + return -1; +} + +LIQ_EXPORT LIQ_NONNULL int liq_get_remapping_quality(const liq_result *result) { + if (!CHECK_STRUCT_TYPE(result, liq_result)) return -1; + + if (result->remapping && result->remapping->palette_error >= 0) { + return mse_to_quality(result->remapping->palette_error); + } + + return -1; +} + +LIQ_NONNULL static int compare_popularity(const void *ch1, const void *ch2) +{ + const float v1 = ((const colormap_item*)ch1)->popularity; + const float v2 = ((const colormap_item*)ch2)->popularity; + return v1 > v2 ? -1 : 1; +} + +LIQ_NONNULL static void sort_palette_qsort(colormap *map, int start, int nelem) +{ + if (!nelem) return; + qsort(map->palette + start, nelem, sizeof(map->palette[0]), compare_popularity); +} + +#define SWAP_PALETTE(map, a,b) { \ + const colormap_item tmp = (map)->palette[(a)]; \ + (map)->palette[(a)] = (map)->palette[(b)]; \ + (map)->palette[(b)] = tmp; } + +LIQ_NONNULL static void sort_palette(colormap *map, const liq_attr *options) +{ + /* + ** Step 3.5 [GRR]: remap the palette colors so that all entries with + ** the maximal alpha value (i.e., fully opaque) are at the end and can + ** therefore be omitted from the tRNS chunk. + */ + if (options->last_index_transparent) { + for(unsigned int i=0; i < map->colors; i++) { + if (map->palette[i].acolor.a < 1.f/256.f) { + const unsigned int old = i, transparent_dest = map->colors-1; + + SWAP_PALETTE(map, transparent_dest, old); + + /* colors sorted by popularity make pngs slightly more compressible */ + sort_palette_qsort(map, 0, map->colors-1); + return; + } + } + } + + unsigned int non_fixed_colors = 0; + for(unsigned int i = 0; i < map->colors; i++) { + if (map->palette[i].fixed) { + break; + } + non_fixed_colors++; + } + + /* move transparent colors to the beginning to shrink trns chunk */ + unsigned int num_transparent = 0; + for(unsigned int i = 0; i < non_fixed_colors; i++) { + if (map->palette[i].acolor.a < 255.f/256.f) { + // current transparent color is swapped with earlier opaque one + if (i != num_transparent) { + SWAP_PALETTE(map, num_transparent, i); + i--; + } + num_transparent++; + } + } + + liq_verbose_printf(options, " eliminated opaque tRNS-chunk entries...%d entr%s transparent", num_transparent, (num_transparent == 1)? "y" : "ies"); + + /* colors sorted by popularity make pngs slightly more compressible + * opaque and transparent are sorted separately + */ + sort_palette_qsort(map, 0, num_transparent); + sort_palette_qsort(map, num_transparent, non_fixed_colors - num_transparent); + + if (non_fixed_colors > 9 && map->colors > 16) { + SWAP_PALETTE(map, 7, 1); // slightly improves compression + SWAP_PALETTE(map, 8, 2); + SWAP_PALETTE(map, 9, 3); + } +} + +inline static unsigned int posterize_channel(unsigned int color, unsigned int bits) +{ + return (color & ~((1<> (8-bits)); +} + +LIQ_NONNULL static void set_rounded_palette(liq_palette *const dest, colormap *const map, const double gamma, unsigned int posterize) +{ + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, gamma); + + dest->count = map->colors; + for(unsigned int x = 0; x < map->colors; ++x) { + rgba_pixel px = f_to_rgb(gamma, map->palette[x].acolor); + + px.r = posterize_channel(px.r, posterize); + px.g = posterize_channel(px.g, posterize); + px.b = posterize_channel(px.b, posterize); + px.a = posterize_channel(px.a, posterize); + + map->palette[x].acolor = rgba_to_f(gamma_lut, px); /* saves rounding error introduced by to_rgb, which makes remapping & dithering more accurate */ + + if (!px.a && !map->palette[x].fixed) { + px.r = 71; px.g = 112; px.b = 76; + } + + dest->entries[x] = (liq_color){.r=px.r,.g=px.g,.b=px.b,.a=px.a}; + } +} + +LIQ_EXPORT LIQ_NONNULL const liq_palette *liq_get_palette(liq_result *result) +{ + if (!CHECK_STRUCT_TYPE(result, liq_result)) return NULL; + + if (result->remapping && result->remapping->int_palette.count) { + return &result->remapping->int_palette; + } + + if (!result->int_palette.count) { + set_rounded_palette(&result->int_palette, result->palette, result->gamma, result->min_posterization_output); + } + return &result->int_palette; +} + +LIQ_NONNULL static float remap_to_palette(liq_image *const input_image, unsigned char *const *const output_pixels, colormap *const map) +{ + const int rows = input_image->height; + const unsigned int cols = input_image->width; + double remapping_error=0; + + if (!liq_image_get_row_f_init(input_image)) { + return -1; + } + if (input_image->background && !liq_image_get_row_f_init(input_image->background)) { + return -1; + } + + const colormap_item *acolormap = map->palette; + + struct nearest_map *const n = nearest_init(map); + const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0; + + + const unsigned int max_threads = omp_get_max_threads(); + LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads); + kmeans_init(map, max_threads, average_color); + +#if __GNUC__ >= 9 || __clang__ + #pragma omp parallel for if (rows*cols > 3000) \ + schedule(static) default(none) shared(acolormap,average_color,cols,input_image,map,n,output_pixels,rows,transparent_index) reduction(+:remapping_error) +#else + #pragma omp parallel for if (rows*cols > 3000) \ + schedule(static) default(none) shared(acolormap) shared(average_color) reduction(+:remapping_error) +#endif + for(int row = 0; row < rows; ++row) { + const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row); + const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL; + + unsigned int last_match=0; + for(unsigned int col = 0; col < cols; ++col) { + float diff; + last_match = nearest_search(n, &row_pixels[col], last_match, &diff); + if (bg_pixels && colordifference(bg_pixels[col], acolormap[last_match].acolor) <= diff) { + last_match = transparent_index; + } + output_pixels[row][col] = last_match; + + remapping_error += diff; + kmeans_update_color(row_pixels[col], 1.0, map, last_match, omp_get_thread_num(), average_color); + } + } + + kmeans_finalize(map, max_threads, average_color); + + nearest_free(n); + + return remapping_error / (input_image->width * input_image->height); +} + +inline static f_pixel get_dithered_pixel(const float dither_level, const float max_dither_error, const f_pixel thiserr, const f_pixel px) +{ + /* Use Floyd-Steinberg errors to adjust actual color. */ + const float sr = thiserr.r * dither_level, + sg = thiserr.g * dither_level, + sb = thiserr.b * dither_level, + sa = thiserr.a * dither_level; + + float ratio = 1.0; + const float max_overflow = 1.1f; + const float max_underflow = -0.1f; + + // allowing some overflow prevents undithered bands caused by clamping of all channels + if (px.r + sr > max_overflow) ratio = MIN(ratio, (max_overflow -px.r)/sr); + else { if (px.r + sr < max_underflow) ratio = MIN(ratio, (max_underflow-px.r)/sr); } + if (px.g + sg > max_overflow) ratio = MIN(ratio, (max_overflow -px.g)/sg); + else { if (px.g + sg < max_underflow) ratio = MIN(ratio, (max_underflow-px.g)/sg); } + if (px.b + sb > max_overflow) ratio = MIN(ratio, (max_overflow -px.b)/sb); + else { if (px.b + sb < max_underflow) ratio = MIN(ratio, (max_underflow-px.b)/sb); } + + float a = px.a + sa; + if (a > 1.f) { a = 1.f; } + else if (a < 0) { a = 0; } + + // If dithering error is crazy high, don't propagate it that much + // This prevents crazy geen pixels popping out of the blue (or red or black! ;) + const float dither_error = sr*sr + sg*sg + sb*sb + sa*sa; + if (dither_error > max_dither_error) { + ratio *= 0.8f; + } else if (dither_error < 2.f/256.f/256.f) { + // don't dither areas that don't have noticeable error — makes file smaller + return px; + } + + return (f_pixel) { + .r=px.r + sr * ratio, + .g=px.g + sg * ratio, + .b=px.b + sb * ratio, + .a=a, + }; +} + +/** + Uses edge/noise map to apply dithering only to flat areas. Dithering on edges creates jagged lines, and noisy areas are "naturally" dithered. + + If output_image_is_remapped is true, only pixels noticeably changed by error diffusion will be written to output image. + */ +LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned char *const output_pixels[], liq_remapping_result *quant, const float max_dither_error, const bool output_image_is_remapped) +{ + const int rows = input_image->height, cols = input_image->width; + const unsigned char *dither_map = quant->use_dither_map ? (input_image->dither_map ? input_image->dither_map : input_image->edges) : NULL; + + const colormap *map = quant->palette; + const colormap_item *acolormap = map->palette; + + if (!liq_image_get_row_f_init(input_image)) { + return false; + } + if (input_image->background && !liq_image_get_row_f_init(input_image->background)) { + return false; + } + + /* Initialize Floyd-Steinberg error vectors. */ + const size_t errwidth = cols+2; + f_pixel *restrict thiserr = input_image->malloc(errwidth * sizeof(thiserr[0]) * 2); // +2 saves from checking out of bounds access + if (!thiserr) return false; + f_pixel *restrict nexterr = thiserr + errwidth; + memset(thiserr, 0, errwidth * sizeof(thiserr[0])); + + bool ok = true; + struct nearest_map *const n = nearest_init(map); + const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0; + + // response to this value is non-linear and without it any value < 0.8 would give almost no dithering + float base_dithering_level = quant->dither_level; + base_dithering_level = 1.f - (1.f-base_dithering_level)*(1.f-base_dithering_level); + + if (dither_map) { + base_dithering_level *= 1.f/255.f; // convert byte to float + } + base_dithering_level *= 15.f/16.f; // prevent small errors from accumulating + + int fs_direction = 1; + unsigned int last_match=0; + for (int row = 0; row < rows; ++row) { + if (liq_remap_progress(quant, quant->progress_stage1 + row * (100.f - quant->progress_stage1) / rows)) { + ok = false; + break; + } + + memset(nexterr, 0, errwidth * sizeof(nexterr[0])); + + int col = (fs_direction > 0) ? 0 : (cols - 1); + const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row); + const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL; + + do { + float dither_level = base_dithering_level; + if (dither_map) { + dither_level *= dither_map[row*cols + col]; + } + + const f_pixel spx = get_dithered_pixel(dither_level, max_dither_error, thiserr[col + 1], row_pixels[col]); + + const unsigned int guessed_match = output_image_is_remapped ? output_pixels[row][col] : last_match; + float diff; + last_match = nearest_search(n, &spx, guessed_match, &diff); + f_pixel output_px = acolormap[last_match].acolor; + if (bg_pixels && colordifference(bg_pixels[col], output_px) <= diff) { + output_px = bg_pixels[col]; + output_pixels[row][col] = transparent_index; + } else { + output_pixels[row][col] = last_match; + } + + f_pixel err = { + .r = (spx.r - output_px.r), + .g = (spx.g - output_px.g), + .b = (spx.b - output_px.b), + .a = (spx.a - output_px.a), + }; + + // If dithering error is crazy high, don't propagate it that much + // This prevents crazy geen pixels popping out of the blue (or red or black! ;) + if (err.r*err.r + err.g*err.g + err.b*err.b + err.a*err.a > max_dither_error) { + err.r *= 0.75f; + err.g *= 0.75f; + err.b *= 0.75f; + err.a *= 0.75f; + } + + /* Propagate Floyd-Steinberg error terms. */ + if (fs_direction > 0) { + thiserr[col + 2].a += err.a * (7.f/16.f); + thiserr[col + 2].r += err.r * (7.f/16.f); + thiserr[col + 2].g += err.g * (7.f/16.f); + thiserr[col + 2].b += err.b * (7.f/16.f); + + nexterr[col + 2].a = err.a * (1.f/16.f); + nexterr[col + 2].r = err.r * (1.f/16.f); + nexterr[col + 2].g = err.g * (1.f/16.f); + nexterr[col + 2].b = err.b * (1.f/16.f); + + nexterr[col + 1].a += err.a * (5.f/16.f); + nexterr[col + 1].r += err.r * (5.f/16.f); + nexterr[col + 1].g += err.g * (5.f/16.f); + nexterr[col + 1].b += err.b * (5.f/16.f); + + nexterr[col ].a += err.a * (3.f/16.f); + nexterr[col ].r += err.r * (3.f/16.f); + nexterr[col ].g += err.g * (3.f/16.f); + nexterr[col ].b += err.b * (3.f/16.f); + + } else { + thiserr[col ].a += err.a * (7.f/16.f); + thiserr[col ].r += err.r * (7.f/16.f); + thiserr[col ].g += err.g * (7.f/16.f); + thiserr[col ].b += err.b * (7.f/16.f); + + nexterr[col ].a = err.a * (1.f/16.f); + nexterr[col ].r = err.r * (1.f/16.f); + nexterr[col ].g = err.g * (1.f/16.f); + nexterr[col ].b = err.b * (1.f/16.f); + + nexterr[col + 1].a += err.a * (5.f/16.f); + nexterr[col + 1].r += err.r * (5.f/16.f); + nexterr[col + 1].g += err.g * (5.f/16.f); + nexterr[col + 1].b += err.b * (5.f/16.f); + + nexterr[col + 2].a += err.a * (3.f/16.f); + nexterr[col + 2].r += err.r * (3.f/16.f); + nexterr[col + 2].g += err.g * (3.f/16.f); + nexterr[col + 2].b += err.b * (3.f/16.f); + } + + // remapping is done in zig-zag + col += fs_direction; + if (fs_direction > 0) { + if (col >= cols) break; + } else { + if (col < 0) break; + } + } while(1); + + f_pixel *const temperr = thiserr; + thiserr = nexterr; + nexterr = temperr; + fs_direction = -fs_direction; + } + + input_image->free(MIN(thiserr, nexterr)); // MIN because pointers were swapped + nearest_free(n); + + return ok; +} + +/* fixed colors are always included in the palette, so it would be wasteful to duplicate them in palette from histogram */ +LIQ_NONNULL static void remove_fixed_colors_from_histogram(histogram *hist, const int fixed_colors_count, const f_pixel fixed_colors[], const float target_mse) +{ + const float max_difference = MAX(target_mse/2.f, 2.f/256.f/256.f); + if (fixed_colors_count) { + for(int j=0; j < hist->size; j++) { + for(unsigned int i=0; i < fixed_colors_count; i++) { + if (colordifference(hist->achv[j].acolor, fixed_colors[i]) < max_difference) { + hist->achv[j] = hist->achv[--hist->size]; // remove color from histogram by overwriting with the last entry + j--; break; // continue searching histogram + } + } + } + } +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_colors(liq_histogram *input_hist, const liq_attr *options, const liq_histogram_entry entries[], int num_entries, double gamma) +{ + if (!CHECK_STRUCT_TYPE(options, liq_attr)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER; + if (!CHECK_USER_POINTER(entries)) return LIQ_INVALID_POINTER; + if (gamma < 0 || gamma >= 1.0) return LIQ_VALUE_OUT_OF_RANGE; + if (num_entries <= 0 || num_entries > 1<<30) return LIQ_VALUE_OUT_OF_RANGE; + + if (input_hist->ignorebits > 0 && input_hist->had_image_added) { + return LIQ_UNSUPPORTED; + } + input_hist->ignorebits = 0; + + input_hist->had_image_added = true; + input_hist->gamma = gamma ? gamma : 0.45455; + + if (!input_hist->acht) { + input_hist->acht = pam_allocacolorhash(~0, num_entries*num_entries, 0, options->malloc, options->free); + if (!input_hist->acht) { + return LIQ_OUT_OF_MEMORY; + } + } + // Fake image size. It's only for hash size estimates. + if (!input_hist->acht->cols) { + input_hist->acht->cols = num_entries; + } + input_hist->acht->rows += num_entries; + + const unsigned int hash_size = input_hist->acht->hash_size; + for(int i=0; i < num_entries; i++) { + const rgba_pixel rgba = { + .r = entries[i].color.r, + .g = entries[i].color.g, + .b = entries[i].color.b, + .a = entries[i].color.a, + }; + union rgba_as_int px = {rgba}; + unsigned int hash; + if (px.rgba.a) { + hash = px.l % hash_size; + } else { + hash=0; px.l=0; + } + if (!pam_add_to_hash(input_hist->acht, hash, entries[i].count, px, i, num_entries)) { + return LIQ_OUT_OF_MEMORY; + } + } + + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_image(liq_histogram *input_hist, const liq_attr *options, liq_image *input_image) +{ + if (!CHECK_STRUCT_TYPE(options, liq_attr)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(input_hist, liq_histogram)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return LIQ_INVALID_POINTER; + + const unsigned int cols = input_image->width, rows = input_image->height; + + if (!input_image->importance_map && options->use_contrast_maps) { + contrast_maps(input_image); + } + + input_hist->gamma = input_image->gamma; + + for(int i = 0; i < input_image->fixed_colors_count; i++) { + liq_error res = liq_histogram_add_fixed_color_f(input_hist, input_image->fixed_colors[i]); + if (res != LIQ_OK) { + return res; + } + } + + /* + ** Step 2: attempt to make a histogram of the colors, unclustered. + ** If at first we don't succeed, increase ignorebits to increase color + ** coherence and try again. + */ + + if (liq_progress(options, options->progress_stage1 * 0.4f)) { + return LIQ_ABORTED; + } + + const bool all_rows_at_once = liq_image_can_use_rgba_rows(input_image); + + // Usual solution is to start from scratch when limit is exceeded, but that's not possible if it's not + // the first image added + const unsigned int max_histogram_entries = input_hist->had_image_added ? ~0 : options->max_histogram_entries; + do { + if (!input_hist->acht) { + input_hist->acht = pam_allocacolorhash(max_histogram_entries, rows*cols, input_hist->ignorebits, options->malloc, options->free); + } + if (!input_hist->acht) return LIQ_OUT_OF_MEMORY; + + // histogram uses noise contrast map for importance. Color accuracy in noisy areas is not very important. + // noise map does not include edges to avoid ruining anti-aliasing + for(unsigned int row=0; row < rows; row++) { + bool added_ok; + if (all_rows_at_once) { + added_ok = pam_computeacolorhash(input_hist->acht, (const rgba_pixel *const *)input_image->rows, cols, rows, input_image->importance_map); + if (added_ok) break; + } else { + const rgba_pixel* rows_p[1] = { liq_image_get_row_rgba(input_image, row) }; + added_ok = pam_computeacolorhash(input_hist->acht, rows_p, cols, 1, input_image->importance_map ? &input_image->importance_map[row * cols] : NULL); + } + if (!added_ok) { + input_hist->ignorebits++; + liq_verbose_printf(options, " too many colors! Scaling colors to improve clustering... %d", input_hist->ignorebits); + pam_freeacolorhash(input_hist->acht); + input_hist->acht = NULL; + if (liq_progress(options, options->progress_stage1 * 0.6f)) return LIQ_ABORTED; + break; + } + } + } while(!input_hist->acht); + + input_hist->had_image_added = true; + + liq_image_free_importance_map(input_image); + + if (input_image->free_pixels && input_image->f_pixels) { + liq_image_free_rgba_source(input_image); // bow can free the RGBA source if copy has been made in f_pixels + } + + return LIQ_OK; +} + +LIQ_NONNULL static liq_error finalize_histogram(liq_histogram *input_hist, liq_attr *options, histogram **hist_output) +{ + if (liq_progress(options, options->progress_stage1 * 0.9f)) { + return LIQ_ABORTED; + } + + if (!input_hist->acht) { + return LIQ_BITMAP_NOT_AVAILABLE; + } + + histogram *hist = pam_acolorhashtoacolorhist(input_hist->acht, input_hist->gamma, options->malloc, options->free); + pam_freeacolorhash(input_hist->acht); + input_hist->acht = NULL; + + if (!hist) { + return LIQ_OUT_OF_MEMORY; + } + liq_verbose_printf(options, " made histogram...%d colors found", hist->size); + remove_fixed_colors_from_histogram(hist, input_hist->fixed_colors_count, input_hist->fixed_colors, options->target_mse); + + *hist_output = hist; + return LIQ_OK; +} + +/** + Builds two maps: + importance_map - approximation of areas with high-frequency noise, except straight edges. 1=flat, 0=noisy. + edges - noise map including all edges + */ +LIQ_NONNULL static void contrast_maps(liq_image *image) +{ + const unsigned int cols = image->width, rows = image->height; + if (cols < 4 || rows < 4 || (3*cols*rows) > LIQ_HIGH_MEMORY_LIMIT) { + return; + } + + unsigned char *restrict noise = image->importance_map ? image->importance_map : image->malloc(cols*rows); + image->importance_map = NULL; + unsigned char *restrict edges = image->edges ? image->edges : image->malloc(cols*rows); + image->edges = NULL; + + unsigned char *restrict tmp = image->malloc(cols*rows); + + if (!noise || !edges || !tmp || !liq_image_get_row_f_init(image)) { + image->free(noise); + image->free(edges); + image->free(tmp); + return; + } + + const f_pixel *curr_row, *prev_row, *next_row; + curr_row = prev_row = next_row = liq_image_get_row_f(image, 0); + + for (unsigned int j=0; j < rows; j++) { + prev_row = curr_row; + curr_row = next_row; + next_row = liq_image_get_row_f(image, MIN(rows-1,j+1)); + + f_pixel prev, curr = curr_row[0], next=curr; + for (unsigned int i=0; i < cols; i++) { + prev=curr; + curr=next; + next = curr_row[MIN(cols-1,i+1)]; + + // contrast is difference between pixels neighbouring horizontally and vertically + const float a = fabsf(prev.a+next.a - curr.a*2.f), + r = fabsf(prev.r+next.r - curr.r*2.f), + g = fabsf(prev.g+next.g - curr.g*2.f), + b = fabsf(prev.b+next.b - curr.b*2.f); + + const f_pixel prevl = prev_row[i]; + const f_pixel nextl = next_row[i]; + + const float a1 = fabsf(prevl.a+nextl.a - curr.a*2.f), + r1 = fabsf(prevl.r+nextl.r - curr.r*2.f), + g1 = fabsf(prevl.g+nextl.g - curr.g*2.f), + b1 = fabsf(prevl.b+nextl.b - curr.b*2.f); + + const float horiz = MAX(MAX(a,r),MAX(g,b)); + const float vert = MAX(MAX(a1,r1),MAX(g1,b1)); + const float edge = MAX(horiz,vert); + float z = edge - fabsf(horiz-vert)*.5f; + z = 1.f - MAX(z,MIN(horiz,vert)); + z *= z; // noise is amplified + z *= z; + // 85 is about 1/3rd of weight (not 0, because noisy pixels still need to be included, just not as precisely). + const unsigned int z_int = 85 + (unsigned int)(z * 171.f); + noise[j*cols+i] = MIN(z_int, 255); + const int e_int = 255 - (int)(edge * 256.f); + edges[j*cols+i] = e_int > 0 ? MIN(e_int, 255) : 0; + } + } + + // noise areas are shrunk and then expanded to remove thin edges from the map + liq_max3(noise, tmp, cols, rows); + liq_max3(tmp, noise, cols, rows); + + liq_blur(noise, tmp, noise, cols, rows, 3); + + liq_max3(noise, tmp, cols, rows); + + liq_min3(tmp, noise, cols, rows); + liq_min3(noise, tmp, cols, rows); + liq_min3(tmp, noise, cols, rows); + + liq_min3(edges, tmp, cols, rows); + liq_max3(tmp, edges, cols, rows); + for(unsigned int i=0; i < cols*rows; i++) edges[i] = MIN(noise[i], edges[i]); + + image->free(tmp); + + image->importance_map = noise; + image->edges = edges; +} + +/** + * Builds map of neighbor pixels mapped to the same palette entry + * + * For efficiency/simplicity it mainly looks for same consecutive pixels horizontally + * and peeks 1 pixel above/below. Full 2d algorithm doesn't improve it significantly. + * Correct flood fill doesn't have visually good properties. + */ +LIQ_NONNULL static void update_dither_map(liq_image *input_image, unsigned char *const *const row_pointers, colormap *map) +{ + const unsigned int width = input_image->width; + const unsigned int height = input_image->height; + unsigned char *const edges = input_image->edges; + + for(unsigned int row=0; row < height; row++) { + unsigned char lastpixel = row_pointers[row][0]; + unsigned int lastcol=0; + + for(unsigned int col=1; col < width; col++) { + const unsigned char px = row_pointers[row][col]; + if (input_image->background && map->palette[px].acolor.a < 1.f/256.f) { + // Transparency may or may not create an edge. When there's an explicit background set, assume no edge. + continue; + } + + if (px != lastpixel || col == width-1) { + int neighbor_count = 10 * (col-lastcol); + + unsigned int i=lastcol; + while(i < col) { + if (row > 0) { + unsigned char pixelabove = row_pointers[row-1][i]; + if (pixelabove == lastpixel) neighbor_count += 15; + } + if (row < height-1) { + unsigned char pixelbelow = row_pointers[row+1][i]; + if (pixelbelow == lastpixel) neighbor_count += 15; + } + i++; + } + + while(lastcol <= col) { + int e = edges[row*width + lastcol]; + edges[row*width + lastcol++] = (e+128) * (255.f/(255+128)) * (1.f - 20.f / (20 + neighbor_count)); + } + lastpixel = px; + } + } + } + input_image->dither_map = input_image->edges; + input_image->edges = NULL; +} + +/** + * Palette can be NULL, in which case it creates a new palette from scratch. + */ +static colormap *add_fixed_colors_to_palette(colormap *palette, const int max_colors, const f_pixel fixed_colors[], const int fixed_colors_count, void* (*malloc)(size_t), void (*free)(void*)) +{ + if (!fixed_colors_count) return palette; + + colormap *newpal = pam_colormap(MIN(max_colors, (palette ? palette->colors : 0) + fixed_colors_count), malloc, free); + unsigned int i=0; + if (palette && fixed_colors_count < max_colors) { + unsigned int palette_max = MIN(palette->colors, max_colors - fixed_colors_count); + for(; i < palette_max; i++) { + newpal->palette[i] = palette->palette[i]; + } + } + for(int j=0; j < MIN(max_colors, fixed_colors_count); j++) { + newpal->palette[i++] = (colormap_item){ + .acolor = fixed_colors[j], + .fixed = true, + }; + } + if (palette) pam_freecolormap(palette); + return newpal; +} + +LIQ_NONNULL static void adjust_histogram_callback(hist_item *item, float diff) +{ + item->adjusted_weight = (item->perceptual_weight+item->adjusted_weight) * (sqrtf(1.f+diff)); +} + +/** + Repeats mediancut with different histogram weights to find palette with minimum error. + + feedback_loop_trials controls how long the search will take. < 0 skips the iteration. + */ +static colormap *find_best_palette(histogram *hist, const liq_attr *options, const double max_mse, const f_pixel fixed_colors[], const unsigned int fixed_colors_count, double *palette_error_p) +{ + unsigned int max_colors = options->max_colors; + + // if output is posterized it doesn't make sense to aim for perfrect colors, so increase target_mse + // at this point actual gamma is not set, so very conservative posterization estimate is used + const double target_mse = MIN(max_mse, MAX(options->target_mse, pow((1<min_posterization_output)/1024.0, 2))); + int feedback_loop_trials = options->feedback_loop_trials; + if (hist->size > 5000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 25000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 50000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 100000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + colormap *acolormap = NULL; + double least_error = MAX_DIFF; + double target_mse_overshoot = feedback_loop_trials>0 ? 1.05 : 1.0; + const float total_trials = (float)(feedback_loop_trials>0?feedback_loop_trials:1); + int fails_in_a_row=0; + + do { + colormap *newmap; + if (hist->size && fixed_colors_count < max_colors) { + newmap = mediancut(hist, max_colors-fixed_colors_count, target_mse * target_mse_overshoot, MAX(MAX(45.0/65536.0, target_mse), least_error)*1.2, + options->malloc, options->free); + } else { + feedback_loop_trials = 0; + newmap = NULL; + } + newmap = add_fixed_colors_to_palette(newmap, max_colors, fixed_colors, fixed_colors_count, options->malloc, options->free); + if (!newmap) { + return NULL; + } + + if (feedback_loop_trials <= 0) { + return newmap; + } + + // after palette has been created, total error (MSE) is calculated to keep the best palette + // at the same time K-Means iteration is done to improve the palette + // and histogram weights are adjusted based on remapping error to give more weight to poorly matched colors + + const bool first_run_of_target_mse = !acolormap && target_mse > 0; + double total_error = kmeans_do_iteration(hist, newmap, first_run_of_target_mse ? NULL : adjust_histogram_callback); + + // goal is to increase quality or to reduce number of colors used if quality is good enough + if (!acolormap || total_error < least_error || (total_error <= target_mse && newmap->colors < max_colors)) { + if (acolormap) pam_freecolormap(acolormap); + acolormap = newmap; + + if (total_error < target_mse && total_error > 0) { + // K-Means iteration improves quality above what mediancut aims for + // this compensates for it, making mediancut aim for worse + target_mse_overshoot = MIN(target_mse_overshoot*1.25, target_mse/total_error); + } + + least_error = total_error; + + // if number of colors could be reduced, try to keep it that way + // but allow extra color as a bit of wiggle room in case quality can be improved too + max_colors = MIN(newmap->colors+1, max_colors); + + feedback_loop_trials -= 1; // asymptotic improvement could make it go on forever + fails_in_a_row = 0; + } else { + fails_in_a_row++; + target_mse_overshoot = 1.0; + + // if error is really bad, it's unlikely to improve, so end sooner + feedback_loop_trials -= 5 + fails_in_a_row; + pam_freecolormap(newmap); + } + + float fraction_done = 1.f-MAX(0.f, feedback_loop_trials/total_trials); + if (liq_progress(options, options->progress_stage1 + fraction_done * options->progress_stage2)) break; + liq_verbose_printf(options, " selecting colors...%d%%", (int)(100.f * fraction_done)); + } + while(feedback_loop_trials > 0); + + *palette_error_p = least_error; + return acolormap; +} + +static colormap *histogram_to_palette(const histogram *hist, const liq_attr *options) { + if (!hist->size) { + return NULL; + } + colormap *acolormap = pam_colormap(hist->size, options->malloc, options->free); + for(unsigned int i=0; i < hist->size; i++) { + acolormap->palette[i].acolor = hist->achv[i].acolor; + acolormap->palette[i].popularity = hist->achv[i].perceptual_weight; + } + return acolormap; +} + +LIQ_NONNULL static liq_error pngquant_quantize(histogram *hist, const liq_attr *options, const int fixed_colors_count, const f_pixel fixed_colors[], const double gamma, bool fixed_result_colors, liq_result **result_output) +{ + colormap *acolormap; + double palette_error = -1; + + assert((verbose_print(options, "SLOW debug checks enabled. Recompile with NDEBUG for normal operation."),1)); + + const bool few_input_colors = hist->size+fixed_colors_count <= options->max_colors; + + if (liq_progress(options, options->progress_stage1)) return LIQ_ABORTED; + + // If image has few colors to begin with (and no quality degradation is required) + // then it's possible to skip quantization entirely + if (few_input_colors && options->target_mse == 0) { + acolormap = add_fixed_colors_to_palette(histogram_to_palette(hist, options), options->max_colors, fixed_colors, fixed_colors_count, options->malloc, options->free); + palette_error = 0; + } else { + const double max_mse = options->max_mse * (few_input_colors ? 0.33 : 1.0); // when degrading image that's already paletted, require much higher improvement, since pal2pal often looks bad and there's little gain + acolormap = find_best_palette(hist, options, max_mse, fixed_colors, fixed_colors_count, &palette_error); + if (!acolormap) { + return LIQ_VALUE_OUT_OF_RANGE; + } + + // K-Means iteration approaches local minimum for the palette + double iteration_limit = options->kmeans_iteration_limit; + unsigned int iterations = options->kmeans_iterations; + + if (!iterations && palette_error < 0 && max_mse < MAX_DIFF) iterations = 1; // otherwise total error is never calculated and MSE limit won't work + + if (iterations) { + // likely_colormap_index (used and set in kmeans_do_iteration) can't point to index outside colormap + if (acolormap->colors < 256) for(unsigned int j=0; j < hist->size; j++) { + if (hist->achv[j].tmp.likely_colormap_index >= acolormap->colors) { + hist->achv[j].tmp.likely_colormap_index = 0; // actual value doesn't matter, as the guess is out of date anyway + } + } + + if (hist->size > 5000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 25000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 50000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 100000) {iterations = (iterations*3 + 3)/4; iteration_limit *= 2;} + + verbose_print(options, " moving colormap towards local minimum"); + + double previous_palette_error = MAX_DIFF; + + for(unsigned int i=0; i < iterations; i++) { + palette_error = kmeans_do_iteration(hist, acolormap, NULL); + + if (liq_progress(options, options->progress_stage1 + options->progress_stage2 + (i * options->progress_stage3 * 0.9f) / iterations)) { + break; + } + + if (fabs(previous_palette_error-palette_error) < iteration_limit) { + break; + } + + if (palette_error > max_mse*1.5) { // probably hopeless + if (palette_error > max_mse*3.0) break; // definitely hopeless + i++; + } + + previous_palette_error = palette_error; + } + } + + if (palette_error > max_mse) { + liq_verbose_printf(options, " image degradation MSE=%.3f (Q=%d) exceeded limit of %.3f (%d)", + mse_to_standard_mse(palette_error), mse_to_quality(palette_error), + mse_to_standard_mse(max_mse), mse_to_quality(max_mse)); + pam_freecolormap(acolormap); + return LIQ_QUALITY_TOO_LOW; + } + } + + if (liq_progress(options, options->progress_stage1 + options->progress_stage2 + options->progress_stage3 * 0.95f)) { + pam_freecolormap(acolormap); + return LIQ_ABORTED; + } + + sort_palette(acolormap, options); + + // If palette was created from a multi-image histogram, + // then it shouldn't be optimized for one image during remapping + if (fixed_result_colors) { + for(unsigned int i=0; i < acolormap->colors; i++) { + acolormap->palette[i].fixed = true; + } + } + + liq_result *result = options->malloc(sizeof(liq_result)); + if (!result) return LIQ_OUT_OF_MEMORY; + *result = (liq_result){ + .magic_header = liq_result_magic, + .malloc = options->malloc, + .free = options->free, + .palette = acolormap, + .palette_error = palette_error, + .use_dither_map = options->use_dither_map, + .gamma = gamma, + .min_posterization_output = options->min_posterization_output, + }; + *result_output = result; + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size) +{ + if (!CHECK_STRUCT_TYPE(result, liq_result)) { + return LIQ_INVALID_POINTER; + } + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) { + return LIQ_INVALID_POINTER; + } + if (!CHECK_USER_POINTER(buffer)) { + return LIQ_INVALID_POINTER; + } + + const size_t required_size = (size_t)input_image->width * (size_t)input_image->height; + if (buffer_size < required_size) { + return LIQ_BUFFER_TOO_SMALL; + } + + LIQ_ARRAY(unsigned char *, rows, input_image->height); + unsigned char *buffer_bytes = buffer; + for(unsigned int i=0; i < input_image->height; i++) { + rows[i] = &buffer_bytes[input_image->width * i]; + } + return liq_write_remapped_image_rows(result, input_image, rows); +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image_rows(liq_result *quant, liq_image *input_image, unsigned char **row_pointers) +{ + if (!CHECK_STRUCT_TYPE(quant, liq_result)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return LIQ_INVALID_POINTER; + for(unsigned int i=0; i < input_image->height; i++) { + if (!CHECK_USER_POINTER(row_pointers+i) || !CHECK_USER_POINTER(row_pointers[i])) return LIQ_INVALID_POINTER; + } + + if (quant->remapping) { + liq_remapping_result_destroy(quant->remapping); + } + liq_remapping_result *const result = quant->remapping = liq_remapping_result_create(quant); + if (!result) return LIQ_OUT_OF_MEMORY; + + if (!input_image->edges && !input_image->dither_map && quant->use_dither_map) { + contrast_maps(input_image); + } + + if (liq_remap_progress(result, result->progress_stage1 * 0.25f)) { + return LIQ_ABORTED; + } + + /* + ** Step 4: map the colors in the image to their closest match in the + ** new colormap, and write 'em out. + */ + + float remapping_error = result->palette_error; + if (result->dither_level == 0) { + set_rounded_palette(&result->int_palette, result->palette, result->gamma, quant->min_posterization_output); + remapping_error = remap_to_palette(input_image, row_pointers, result->palette); + } else { + const bool is_image_huge = (input_image->width * input_image->height) > 2000 * 2000; + const bool allow_dither_map = result->use_dither_map == 2 || (!is_image_huge && result->use_dither_map); + const bool generate_dither_map = allow_dither_map && (input_image->edges && !input_image->dither_map); + if (generate_dither_map) { + // If dithering (with dither map) is required, this image is used to find areas that require dithering + remapping_error = remap_to_palette(input_image, row_pointers, result->palette); + update_dither_map(input_image, row_pointers, result->palette); + } + + if (liq_remap_progress(result, result->progress_stage1 * 0.5f)) { + return LIQ_ABORTED; + } + + // remapping above was the last chance to do K-Means iteration, hence the final palette is set after remapping + set_rounded_palette(&result->int_palette, result->palette, result->gamma, quant->min_posterization_output); + + if (!remap_to_palette_floyd(input_image, row_pointers, result, MAX(remapping_error*2.4, 16.f/256.f), generate_dither_map)) { + return LIQ_ABORTED; + } + } + + // remapping error from dithered image is absurd, so always non-dithered value is used + // palette_error includes some perceptual weighting from histogram which is closer correlated with dssim + // so that should be used when possible. + if (result->palette_error < 0) { + result->palette_error = remapping_error; + } + + return LIQ_OK; +} + +LIQ_EXPORT int liq_version() { + return LIQ_VERSION; +} diff --git a/src/platform/gba/packer/libimagequant/libimagequant.h b/src/platform/gba/packer/libimagequant/libimagequant.h new file mode 100644 index 0000000..e227b0a --- /dev/null +++ b/src/platform/gba/packer/libimagequant/libimagequant.h @@ -0,0 +1,151 @@ +/* + * https://pngquant.org + */ + +#ifndef LIBIMAGEQUANT_H +#define LIBIMAGEQUANT_H + +#ifdef IMAGEQUANT_EXPORTS +#define LIQ_EXPORT __declspec(dllexport) +#endif + +#ifndef LIQ_EXPORT +#define LIQ_EXPORT extern +#endif + +#define LIQ_VERSION 21300 +#define LIQ_VERSION_STRING "2.13.0" + +#ifndef LIQ_PRIVATE +#if defined(__GNUC__) || defined (__llvm__) +#define LIQ_PRIVATE __attribute__((visibility("hidden"))) +#define LIQ_NONNULL __attribute__((nonnull)) +#define LIQ_USERESULT __attribute__((warn_unused_result)) +#else +#define LIQ_PRIVATE +#define LIQ_NONNULL +#define LIQ_USERESULT +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct liq_attr liq_attr; +typedef struct liq_image liq_image; +typedef struct liq_result liq_result; +typedef struct liq_histogram liq_histogram; + +typedef struct liq_color { + unsigned char r, g, b, a; +} liq_color; + +typedef struct liq_palette { + unsigned int count; + liq_color entries[256]; +} liq_palette; + +typedef enum liq_error { + LIQ_OK = 0, + LIQ_QUALITY_TOO_LOW = 99, + LIQ_VALUE_OUT_OF_RANGE = 100, + LIQ_OUT_OF_MEMORY, + LIQ_ABORTED, + LIQ_BITMAP_NOT_AVAILABLE, + LIQ_BUFFER_TOO_SMALL, + LIQ_INVALID_POINTER, + LIQ_UNSUPPORTED, +} liq_error; + +enum liq_ownership { + LIQ_OWN_ROWS=4, + LIQ_OWN_PIXELS=8, + LIQ_COPY_PIXELS=16, +}; + +typedef struct liq_histogram_entry { + liq_color color; + unsigned int count; +} liq_histogram_entry; + +LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_create(void); +LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_create_with_allocator(void* (*malloc)(size_t), void (*free)(void*)); +LIQ_EXPORT LIQ_USERESULT liq_attr* liq_attr_copy(const liq_attr *orig) LIQ_NONNULL; +LIQ_EXPORT void liq_attr_destroy(liq_attr *attr) LIQ_NONNULL; + +LIQ_EXPORT LIQ_USERESULT liq_histogram* liq_histogram_create(const liq_attr* attr); +LIQ_EXPORT liq_error liq_histogram_add_image(liq_histogram *hist, const liq_attr *attr, liq_image* image) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_histogram_add_colors(liq_histogram *hist, const liq_attr *attr, const liq_histogram_entry entries[], int num_entries, double gamma) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma) LIQ_NONNULL; +LIQ_EXPORT void liq_histogram_destroy(liq_histogram *hist) LIQ_NONNULL; + +LIQ_EXPORT liq_error liq_set_max_colors(liq_attr* attr, int colors) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_max_colors(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_set_speed(liq_attr* attr, int speed) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_speed(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_set_min_opacity(liq_attr* attr, int min) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_min_opacity(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_set_min_posterization(liq_attr* attr, int bits) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_min_posterization(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_set_quality(liq_attr* attr, int minimum, int maximum) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_min_quality(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_get_max_quality(const liq_attr* attr) LIQ_NONNULL; +LIQ_EXPORT void liq_set_last_index_transparent(liq_attr* attr, int is_last) LIQ_NONNULL; + +typedef void liq_log_callback_function(const liq_attr*, const char *message, void* user_info); +typedef void liq_log_flush_callback_function(const liq_attr*, void* user_info); +LIQ_EXPORT void liq_set_log_callback(liq_attr*, liq_log_callback_function*, void* user_info); +LIQ_EXPORT void liq_set_log_flush_callback(liq_attr*, liq_log_flush_callback_function*, void* user_info); + +typedef int liq_progress_callback_function(float progress_percent, void* user_info); +LIQ_EXPORT void liq_attr_set_progress_callback(liq_attr*, liq_progress_callback_function*, void* user_info); +LIQ_EXPORT void liq_result_set_progress_callback(liq_result*, liq_progress_callback_function*, void* user_info); + +// The rows and their data are not modified. The type of `rows` is non-const only due to a bug in C's typesystem design. +LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_rgba_rows(const liq_attr *attr, void *const rows[], int width, int height, double gamma) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_rgba(const liq_attr *attr, const void *bitmap, int width, int height, double gamma) LIQ_NONNULL; + +typedef void liq_image_get_rgba_row_callback(liq_color row_out[], int row, int width, void* user_info); +LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_custom(const liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma); + +LIQ_EXPORT liq_error liq_image_set_memory_ownership(liq_image *image, int ownership_flags) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_image_set_background(liq_image *img, liq_image *background_image) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_image_set_importance_map(liq_image *img, unsigned char buffer[], size_t buffer_size, enum liq_ownership memory_handling) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_image_add_fixed_color(liq_image *img, liq_color color) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_image_get_width(const liq_image *img) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT int liq_image_get_height(const liq_image *img) LIQ_NONNULL; +LIQ_EXPORT void liq_image_destroy(liq_image *img) LIQ_NONNULL; + +LIQ_EXPORT LIQ_USERESULT liq_error liq_histogram_quantize(liq_histogram *const input_hist, liq_attr *const options, liq_result **result_output) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT liq_error liq_image_quantize(liq_image *const input_image, liq_attr *const options, liq_result **result_output) LIQ_NONNULL; + +LIQ_EXPORT liq_error liq_set_dithering_level(liq_result *res, float dither_level) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_set_output_gamma(liq_result* res, double gamma) LIQ_NONNULL; +LIQ_EXPORT LIQ_USERESULT double liq_get_output_gamma(const liq_result *result) LIQ_NONNULL; + +LIQ_EXPORT LIQ_USERESULT const liq_palette *liq_get_palette(liq_result *result) LIQ_NONNULL; + +LIQ_EXPORT liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_write_remapped_image_rows(liq_result *result, liq_image *input_image, unsigned char **row_pointers) LIQ_NONNULL; + +LIQ_EXPORT double liq_get_quantization_error(const liq_result *result) LIQ_NONNULL; +LIQ_EXPORT int liq_get_quantization_quality(const liq_result *result) LIQ_NONNULL; +LIQ_EXPORT double liq_get_remapping_error(const liq_result *result) LIQ_NONNULL; +LIQ_EXPORT int liq_get_remapping_quality(const liq_result *result) LIQ_NONNULL; + +LIQ_EXPORT void liq_result_destroy(liq_result *) LIQ_NONNULL; + +LIQ_EXPORT int liq_version(void); + + +// Deprecated +LIQ_EXPORT LIQ_USERESULT liq_result *liq_quantize_image(liq_attr *options, liq_image *input_image) LIQ_NONNULL; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/platform/gba/packer/libimagequant/mediancut.c b/src/platform/gba/packer/libimagequant/mediancut.c new file mode 100644 index 0000000..2c6d1d8 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/mediancut.c @@ -0,0 +1,464 @@ +/* +** © 2009-2018 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. +*/ + +#include +#include + +#include "libimagequant.h" +#include "pam.h" +#include "mediancut.h" + +#define index_of_channel(ch) (offsetof(f_pixel,ch)/sizeof(float)) + +static f_pixel averagepixels(unsigned int clrs, const hist_item achv[]); + +struct box { + f_pixel color; + f_pixel variance; + double sum, total_error, max_error; + unsigned int ind; + unsigned int colors; +}; + +ALWAYS_INLINE static double variance_diff(double val, const double good_enough); +inline static double variance_diff(double val, const double good_enough) +{ + val *= val; + if (val < good_enough*good_enough) return val*0.25; + return val; +} + +/** Weighted per-channel variance of the box. It's used to decide which channel to split by */ +static f_pixel box_variance(const hist_item achv[], const struct box *box) +{ + f_pixel mean = box->color; + double variancea=0, variancer=0, varianceg=0, varianceb=0; + + for(unsigned int i = 0; i < box->colors; ++i) { + const f_pixel px = achv[box->ind + i].acolor; + double weight = achv[box->ind + i].adjusted_weight; + variancea += variance_diff(mean.a - px.a, 2.0/256.0)*weight; + variancer += variance_diff(mean.r - px.r, 1.0/256.0)*weight; + varianceg += variance_diff(mean.g - px.g, 1.0/256.0)*weight; + varianceb += variance_diff(mean.b - px.b, 1.0/256.0)*weight; + } + + return (f_pixel){ + .a = variancea*(4.0/16.0), + .r = variancer*(7.0/16.0), + .g = varianceg*(9.0/16.0), + .b = varianceb*(5.0/16.0), + }; +} + +static double box_max_error(const hist_item achv[], const struct box *box) +{ + f_pixel mean = box->color; + double max_error = 0; + + for(unsigned int i = 0; i < box->colors; ++i) { + const double diff = colordifference(mean, achv[box->ind + i].acolor); + if (diff > max_error) { + max_error = diff; + } + } + return max_error; +} + +ALWAYS_INLINE static double color_weight(f_pixel median, hist_item h); + +static inline void hist_item_swap(hist_item *l, hist_item *r) +{ + if (l != r) { + hist_item t = *l; + *l = *r; + *r = t; + } +} + +ALWAYS_INLINE static unsigned int qsort_pivot(const hist_item *const base, const unsigned int len); +inline static unsigned int qsort_pivot(const hist_item *const base, const unsigned int len) +{ + if (len < 32) { + return len/2; + } + + const unsigned int aidx=8, bidx=len/2, cidx=len-1; + const unsigned int a=base[aidx].tmp.sort_value, b=base[bidx].tmp.sort_value, c=base[cidx].tmp.sort_value; + return (a < b) ? ((b < c) ? bidx : ((a < c) ? cidx : aidx )) + : ((b > c) ? bidx : ((a < c) ? aidx : cidx )); +} + +ALWAYS_INLINE static unsigned int qsort_partition(hist_item *const base, const unsigned int len); +inline static unsigned int qsort_partition(hist_item *const base, const unsigned int len) +{ + unsigned int l = 1, r = len; + if (len >= 8) { + hist_item_swap(&base[0], &base[qsort_pivot(base,len)]); + } + + const unsigned int pivot_value = base[0].tmp.sort_value; + while (l < r) { + if (base[l].tmp.sort_value >= pivot_value) { + l++; + } else { + while(l < --r && base[r].tmp.sort_value <= pivot_value) {} + hist_item_swap(&base[l], &base[r]); + } + } + l--; + hist_item_swap(&base[0], &base[l]); + + return l; +} + +/** quick select algorithm */ +static void hist_item_sort_range(hist_item base[], unsigned int len, unsigned int sort_start) +{ + for(;;) { + const unsigned int l = qsort_partition(base, len), r = l+1; + + if (l > 0 && sort_start < l) { + len = l; + } + else if (r < len && sort_start > r) { + base += r; len -= r; sort_start -= r; + } + else break; + } +} + +/** sorts array to make sum of weights lower than halfvar one side, returns edge between halfvar parts of the set */ +static hist_item *hist_item_sort_halfvar(hist_item base[], unsigned int len, double *const lowervar, const double halfvar) +{ + do { + const unsigned int l = qsort_partition(base, len), r = l+1; + + // check if sum of left side is smaller than half, + // if it is, then it doesn't need to be sorted + unsigned int t = 0; double tmpsum = *lowervar; + while (t <= l && tmpsum < halfvar) tmpsum += base[t++].color_weight; + + if (tmpsum < halfvar) { + *lowervar = tmpsum; + } else { + if (l > 0) { + hist_item *res = hist_item_sort_halfvar(base, l, lowervar, halfvar); + if (res) return res; + } else { + // End of left recursion. This will be executed in order from the first element. + *lowervar += base[0].color_weight; + if (*lowervar > halfvar) return &base[0]; + } + } + + if (len > r) { + base += r; len -= r; // tail-recursive "call" + } else { + *lowervar += base[r].color_weight; + return (*lowervar > halfvar) ? &base[r] : NULL; + } + } while(1); +} + +static f_pixel get_median(const struct box *b, hist_item achv[]); + +typedef struct { + unsigned int chan; float variance; +} channelvariance; + +static int comparevariance(const void *ch1, const void *ch2) +{ + return ((const channelvariance*)ch1)->variance > ((const channelvariance*)ch2)->variance ? -1 : + (((const channelvariance*)ch1)->variance < ((const channelvariance*)ch2)->variance ? 1 : 0); +} + +/** Finds which channels need to be sorted first and preproceses achv for fast sort */ +static double prepare_sort(struct box *b, hist_item achv[]) +{ + /* + ** Sort dimensions by their variance, and then sort colors first by dimension with highest variance + */ + channelvariance channels[4] = { + {index_of_channel(a), b->variance.a}, + {index_of_channel(r), b->variance.r}, + {index_of_channel(g), b->variance.g}, + {index_of_channel(b), b->variance.b}, + }; + + qsort(channels, 4, sizeof(channels[0]), comparevariance); + + const unsigned int ind1 = b->ind; + const unsigned int colors = b->colors; +#if __GNUC__ >= 9 || __clang__ + #pragma omp parallel for if (colors > 25000) \ + schedule(static) default(none) shared(achv, channels, colors, ind1) +#else + #pragma omp parallel for if (colors > 25000) \ + schedule(static) default(none) shared(achv, channels) +#endif + for(unsigned int i=0; i < colors; i++) { + const float *chans = (const float *)&achv[ind1 + i].acolor; + // Only the first channel really matters. When trying median cut many times + // with different histogram weights, I don't want sort randomness to influence outcome. + achv[ind1 + i].tmp.sort_value = ((unsigned int)(chans[channels[0].chan]*65535.0)<<16) | + (unsigned int)((chans[channels[2].chan] + chans[channels[1].chan]/2.0 + chans[channels[3].chan]/4.0)*65535.0); + } + + const f_pixel median = get_median(b, achv); + + // box will be split to make color_weight of each side even + const unsigned int ind = b->ind, end = ind+b->colors; + double totalvar = 0; + #pragma omp parallel for if (end - ind > 15000) \ + schedule(static) default(shared) reduction(+:totalvar) + for(unsigned int j=ind; j < end; j++) totalvar += (achv[j].color_weight = color_weight(median, achv[j])); + return totalvar / 2.0; +} + +/** finds median in unsorted set by sorting only minimum required */ +static f_pixel get_median(const struct box *b, hist_item achv[]) +{ + const unsigned int median_start = (b->colors-1)/2; + + hist_item_sort_range(&(achv[b->ind]), b->colors, + median_start); + + if (b->colors&1) return achv[b->ind + median_start].acolor; + + // technically the second color is not guaranteed to be sorted correctly + // but most of the time it is good enough to be useful + return averagepixels(2, &achv[b->ind + median_start]); +} + +/* + ** Find the best splittable box. -1 if no boxes are splittable. + */ +static int best_splittable_box(struct box bv[], unsigned int boxes, const double max_mse) +{ + int bi=-1; double maxsum=0; + for(unsigned int i=0; i < boxes; i++) { + if (bv[i].colors < 2) { + continue; + } + + // looks only at max variance, because it's only going to split by it + const double cv = MAX(bv[i].variance.r, MAX(bv[i].variance.g,bv[i].variance.b)); + double thissum = bv[i].sum * MAX(bv[i].variance.a, cv); + + if (bv[i].max_error > max_mse) { + thissum = thissum* bv[i].max_error/max_mse; + } + + if (thissum > maxsum) { + maxsum = thissum; + bi = i; + } + } + return bi; +} + +inline static double color_weight(f_pixel median, hist_item h) +{ + float diff = colordifference(median, h.acolor); + return sqrt(diff) * (sqrt(1.0+h.adjusted_weight)-1.0); +} + +static void set_colormap_from_boxes(colormap *map, struct box bv[], unsigned int boxes, hist_item *achv); +static void adjust_histogram(hist_item *achv, const struct box bv[], unsigned int boxes); + +static double box_error(const struct box *box, const hist_item achv[]) +{ + f_pixel avg = box->color; + + double total_error=0; + for (unsigned int i = 0; i < box->colors; ++i) { + total_error += colordifference(avg, achv[box->ind + i].acolor) * achv[box->ind + i].perceptual_weight; + } + + return total_error; +} + + +static bool total_box_error_below_target(double target_mse, struct box bv[], unsigned int boxes, const histogram *hist) +{ + target_mse *= hist->total_perceptual_weight; + double total_error=0; + + for(unsigned int i=0; i < boxes; i++) { + // error is (re)calculated lazily + if (bv[i].total_error >= 0) { + total_error += bv[i].total_error; + } + if (total_error > target_mse) return false; + } + + for(unsigned int i=0; i < boxes; i++) { + if (bv[i].total_error < 0) { + bv[i].total_error = box_error(&bv[i], hist->achv); + total_error += bv[i].total_error; + } + if (total_error > target_mse) return false; + } + + return true; +} + +static void box_init(struct box *box, const hist_item *achv, const unsigned int ind, const unsigned int colors, const double sum) { + box->ind = ind; + box->colors = colors; + box->sum = sum; + box->total_error = -1; + + box->color = averagepixels(colors, &achv[ind]); + box->variance = box_variance(achv, box); + box->max_error = box_max_error(achv, box); +} + +/* + ** Here is the fun part, the median-cut colormap generator. This is based + ** on Paul Heckbert's paper, "Color Image Quantization for Frame Buffer + ** Display," SIGGRAPH 1982 Proceedings, page 297. + */ +LIQ_PRIVATE colormap *mediancut(histogram *hist, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*)) +{ + hist_item *achv = hist->achv; + LIQ_ARRAY(struct box, bv, newcolors); + unsigned int boxes = 1; + + /* + ** Set up the initial box. + */ + { + double sum = 0; + for(unsigned int i=0; i < hist->size; i++) { + sum += achv[i].adjusted_weight; + } + box_init(&bv[0], achv, 0, hist->size, sum); + + + /* + ** Main loop: split boxes until we have enough. + */ + while (boxes < newcolors) { + + // first splits boxes that exceed quality limit (to have colors for things like odd green pixel), + // later raises the limit to allow large smooth areas/gradients get colors. + const double current_max_mse = max_mse + (boxes/(double)newcolors)*16.0*max_mse; + const int bi = best_splittable_box(bv, boxes, current_max_mse); + if (bi < 0) { + break; /* ran out of colors! */ + } + + unsigned int indx = bv[bi].ind; + unsigned int clrs = bv[bi].colors; + + /* + Classic implementation tries to get even number of colors or pixels in each subdivision. + + Here, instead of popularity I use (sqrt(popularity)*variance) metric. + Each subdivision balances number of pixels (popular colors) and low variance - + boxes can be large if they have similar colors. Later boxes with high variance + will be more likely to be split. + + Median used as expected value gives much better results than mean. + */ + + const double halfvar = prepare_sort(&bv[bi], achv); + double lowervar=0; + + // hist_item_sort_halfvar sorts and sums lowervar at the same time + // returns item to break at …minus one, which does smell like an off-by-one error. + hist_item *break_p = hist_item_sort_halfvar(&achv[indx], clrs, &lowervar, halfvar); + unsigned int break_at = MIN(clrs-1, break_p - &achv[indx] + 1); + + /* + ** Split the box. + */ + double sm = bv[bi].sum; + double lowersum = 0; + for(unsigned int i=0; i < break_at; i++) lowersum += achv[indx + i].adjusted_weight; + + box_init(&bv[bi], achv, indx, break_at, lowersum); + box_init(&bv[boxes], achv, indx + break_at, clrs - break_at, sm - lowersum); + + ++boxes; + + if (total_box_error_below_target(target_mse, bv, boxes, hist)) { + break; + } + } + } + + colormap *map = pam_colormap(boxes, malloc, free); + set_colormap_from_boxes(map, bv, boxes, achv); + + adjust_histogram(achv, bv, boxes); + + return map; +} + +static void set_colormap_from_boxes(colormap *map, struct box* bv, unsigned int boxes, hist_item *achv) +{ + /* + ** Ok, we've got enough boxes. Now choose a representative color for + ** each box. There are a number of possible ways to make this choice. + ** One would be to choose the center of the box; this ignores any structure + ** within the boxes. Another method would be to average all the colors in + ** the box - this is the method specified in Heckbert's paper. + */ + + for(unsigned int bi = 0; bi < boxes; ++bi) { + map->palette[bi].acolor = bv[bi].color; + + /* store total color popularity (perceptual_weight is approximation of it) */ + map->palette[bi].popularity = 0; + for(unsigned int i=bv[bi].ind; i < bv[bi].ind+bv[bi].colors; i++) { + map->palette[bi].popularity += achv[i].perceptual_weight; + } + } +} + +/* increase histogram popularity by difference from the final color (this is used as part of feedback loop) */ +static void adjust_histogram(hist_item *achv, const struct box* bv, unsigned int boxes) +{ + for(unsigned int bi = 0; bi < boxes; ++bi) { + for(unsigned int i=bv[bi].ind; i < bv[bi].ind+bv[bi].colors; i++) { + achv[i].tmp.likely_colormap_index = bi; + } + } +} + +static f_pixel averagepixels(unsigned int clrs, const hist_item achv[]) +{ + double r = 0, g = 0, b = 0, a = 0, sum = 0; + + #pragma omp parallel for if (clrs > 25000) \ + schedule(static) default(shared) reduction(+:a) reduction(+:r) reduction(+:g) reduction(+:b) reduction(+:sum) + for(unsigned int i = 0; i < clrs; i++) { + const f_pixel px = achv[i].acolor; + const double weight = achv[i].adjusted_weight; + + sum += weight; + a += px.a * weight; + r += px.r * weight; + g += px.g * weight; + b += px.b * weight; + } + + if (sum) { + a /= sum; + r /= sum; + g /= sum; + b /= sum; + } + + assert(!isnan(r) && !isnan(g) && !isnan(b) && !isnan(a)); + + return (f_pixel){.r=r, .g=g, .b=b, .a=a}; +} diff --git a/src/platform/gba/packer/libimagequant/mediancut.h b/src/platform/gba/packer/libimagequant/mediancut.h new file mode 100644 index 0000000..9a4cb53 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/mediancut.h @@ -0,0 +1,6 @@ +#ifndef MEDIANCUT_H +#define MEDIANCUT_H + +LIQ_PRIVATE colormap *mediancut(histogram *hist, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*)); + +#endif diff --git a/src/platform/gba/packer/libimagequant/mempool.c b/src/platform/gba/packer/libimagequant/mempool.c new file mode 100644 index 0000000..cd49f59 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/mempool.c @@ -0,0 +1,70 @@ +/* +** © 2009-2017 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. +*/ + +#include "libimagequant.h" +#include "mempool.h" +#include +#include +#include + +#define ALIGN_MASK 15UL +#define MEMPOOL_RESERVED ((sizeof(struct mempool)+ALIGN_MASK) & ~ALIGN_MASK) + +struct mempool { + unsigned int used, size; + void* (*malloc)(size_t); + void (*free)(void*); + struct mempool *next; +}; +LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int max_size, void* (*malloc)(size_t), void (*free)(void*)) +{ + if (*mptr && ((*mptr)->used+size) <= (*mptr)->size) { + unsigned int prevused = (*mptr)->used; + (*mptr)->used += (size+15UL) & ~0xFUL; + return ((char*)(*mptr)) + prevused; + } + + mempoolptr old = *mptr; + if (!max_size) max_size = (1<<17); + max_size = size+ALIGN_MASK > max_size ? size+ALIGN_MASK : max_size; + + *mptr = malloc(MEMPOOL_RESERVED + max_size); + if (!*mptr) return NULL; + **mptr = (struct mempool){ + .malloc = malloc, + .free = free, + .size = MEMPOOL_RESERVED + max_size, + .used = sizeof(struct mempool), + .next = old, + }; + uintptr_t mptr_used_start = (uintptr_t)(*mptr) + (*mptr)->used; + (*mptr)->used += (ALIGN_MASK + 1 - (mptr_used_start & ALIGN_MASK)) & ALIGN_MASK; // reserve bytes required to make subsequent allocations aligned + assert(!(((uintptr_t)(*mptr) + (*mptr)->used) & ALIGN_MASK)); + + return mempool_alloc(mptr, size, size); +} + +LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int max_size) +{ + if (((*mptr)->used+size) <= (*mptr)->size) { + unsigned int prevused = (*mptr)->used; + (*mptr)->used += (size + ALIGN_MASK) & ~ALIGN_MASK; + return ((char*)(*mptr)) + prevused; + } + + return mempool_create(mptr, size, max_size, (*mptr)->malloc, (*mptr)->free); +} + +LIQ_PRIVATE void mempool_destroy(mempoolptr m) +{ + while (m) { + mempoolptr next = m->next; + m->free(m); + m = next; + } +} diff --git a/src/platform/gba/packer/libimagequant/mempool.h b/src/platform/gba/packer/libimagequant/mempool.h new file mode 100644 index 0000000..9b7333b --- /dev/null +++ b/src/platform/gba/packer/libimagequant/mempool.h @@ -0,0 +1,13 @@ +#ifndef MEMPOOL_H +#define MEMPOOL_H + +#include + +struct mempool; +typedef struct mempool *mempoolptr; + +LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int capacity, void* (*malloc)(size_t), void (*free)(void*)); +LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int capacity); +LIQ_PRIVATE void mempool_destroy(mempoolptr m); + +#endif diff --git a/src/platform/gba/packer/libimagequant/nearest.c b/src/platform/gba/packer/libimagequant/nearest.c new file mode 100644 index 0000000..cae477f --- /dev/null +++ b/src/platform/gba/packer/libimagequant/nearest.c @@ -0,0 +1,230 @@ +/* +** © 2009-2015 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. +*/ + +#include "libimagequant.h" +#include "pam.h" +#include "nearest.h" +#include "mempool.h" +#include + +typedef struct vp_sort_tmp { + float distance_squared; + unsigned int idx; +} vp_sort_tmp; + +typedef struct vp_search_tmp { + float distance; + float distance_squared; + unsigned int idx; + int exclude; +} vp_search_tmp; + +struct leaf { + f_pixel color; + unsigned int idx; +}; + +typedef struct vp_node { + struct vp_node *near, *far; + f_pixel vantage_point; + float radius, radius_squared; + struct leaf *rest; + unsigned short idx; + unsigned short restcount; +} vp_node; + +struct nearest_map { + vp_node *root; + const colormap_item *palette; + float nearest_other_color_dist[256]; + mempoolptr mempool; +}; + +static void vp_search_node(const vp_node *node, const f_pixel *const needle, vp_search_tmp *const best_candidate); + +static int vp_compare_distance(const void *ap, const void *bp) { + float a = ((const vp_sort_tmp*)ap)->distance_squared; + float b = ((const vp_sort_tmp*)bp)->distance_squared; + return a > b ? 1 : -1; +} + +static void vp_sort_indexes_by_distance(const f_pixel vantage_point, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { + for(int i=0; i < num_indexes; i++) { + indexes[i].distance_squared = colordifference(vantage_point, items[indexes[i].idx].acolor); + } + qsort(indexes, num_indexes, sizeof(indexes[0]), vp_compare_distance); +} + +/* + * Usually it should pick farthest point, but picking most popular point seems to make search quicker anyway + */ +static int vp_find_best_vantage_point_index(vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { + int best = 0; + float best_popularity = items[indexes[0].idx].popularity; + for(int i = 1; i < num_indexes; i++) { + if (items[indexes[i].idx].popularity > best_popularity) { + best_popularity = items[indexes[i].idx].popularity; + best = i; + } + } + return best; +} + +static vp_node *vp_create_node(mempoolptr *m, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { + if (num_indexes <= 0) { + return NULL; + } + + vp_node *node = mempool_alloc(m, sizeof(node[0]), 0); + + if (num_indexes == 1) { + *node = (vp_node){ + .vantage_point = items[indexes[0].idx].acolor, + .idx = indexes[0].idx, + .radius = MAX_DIFF, + .radius_squared = MAX_DIFF, + }; + return node; + } + + const int ref = vp_find_best_vantage_point_index(indexes, num_indexes, items); + const int ref_idx = indexes[ref].idx; + + // Removes the `ref_idx` item from remaining items, because it's included in the current node + num_indexes -= 1; + indexes[ref] = indexes[num_indexes]; + + vp_sort_indexes_by_distance(items[ref_idx].acolor, indexes, num_indexes, items); + + // Remaining items are split by the median distance + const int half_idx = num_indexes/2; + + *node = (vp_node){ + .vantage_point = items[ref_idx].acolor, + .idx = ref_idx, + .radius = sqrtf(indexes[half_idx].distance_squared), + .radius_squared = indexes[half_idx].distance_squared, + }; + if (num_indexes < 7) { + node->rest = mempool_alloc(m, sizeof(node->rest[0]) * num_indexes, 0); + node->restcount = num_indexes; + for(int i=0; i < num_indexes; i++) { + node->rest[i].idx = indexes[i].idx; + node->rest[i].color = items[indexes[i].idx].acolor; + } + } else { + node->near = vp_create_node(m, indexes, half_idx, items); + node->far = vp_create_node(m, &indexes[half_idx], num_indexes - half_idx, items); + } + + return node; +} + +LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *map) { + mempoolptr m = NULL; + struct nearest_map *handle = mempool_create(&m, sizeof(handle[0]), sizeof(handle[0]) + sizeof(vp_node)*map->colors+16, map->malloc, map->free); + + LIQ_ARRAY(vp_sort_tmp, indexes, map->colors); + + for(unsigned int i=0; i < map->colors; i++) { + indexes[i].idx = i; + } + + vp_node *root = vp_create_node(&m, indexes, map->colors, map->palette); + *handle = (struct nearest_map){ + .root = root, + .palette = map->palette, + .mempool = m, + }; + + for(unsigned int i=0; i < map->colors; i++) { + vp_search_tmp best = { + .distance = MAX_DIFF, + .distance_squared = MAX_DIFF, + .exclude = i, + }; + vp_search_node(root, &map->palette[i].acolor, &best); + handle->nearest_other_color_dist[i] = best.distance * best.distance / 4.0; // half of squared distance + } + + return handle; +} + +static void vp_search_node(const vp_node *node, const f_pixel *const needle, vp_search_tmp *const best_candidate) { + do { + const float distance_squared = colordifference(node->vantage_point, *needle); + const float distance = sqrtf(distance_squared); + + if (distance_squared < best_candidate->distance_squared && best_candidate->exclude != node->idx) { + best_candidate->distance = distance; + best_candidate->distance_squared = distance_squared; + best_candidate->idx = node->idx; + } + + if (node->restcount) { + for(int i=0; i < node->restcount; i++) { + const float distance_squared = colordifference(node->rest[i].color, *needle); + if (distance_squared < best_candidate->distance_squared && best_candidate->exclude != node->rest[i].idx) { + best_candidate->distance = sqrtf(distance_squared); + best_candidate->distance_squared = distance_squared; + best_candidate->idx = node->rest[i].idx; + } + } + return; + } + + // Recurse towards most likely candidate first to narrow best candidate's distance as soon as possible + if (distance_squared < node->radius_squared) { + if (node->near) { + vp_search_node(node->near, needle, best_candidate); + } + // The best node (final answer) may be just ouside the radius, but not farther than + // the best distance we know so far. The vp_search_node above should have narrowed + // best_candidate->distance, so this path is rarely taken. + if (node->far && distance >= node->radius - best_candidate->distance) { + node = node->far; // Fast tail recursion + } else { + return; + } + } else { + if (node->far) { + vp_search_node(node->far, needle, best_candidate); + } + if (node->near && distance <= node->radius + best_candidate->distance) { + node = node->near; // Fast tail recursion + } else { + return; + } + } + } while(true); +} + +LIQ_PRIVATE unsigned int nearest_search(const struct nearest_map *handle, const f_pixel *px, const int likely_colormap_index, float *diff) { + const float guess_diff = colordifference(handle->palette[likely_colormap_index].acolor, *px); + if (guess_diff < handle->nearest_other_color_dist[likely_colormap_index]) { + if (diff) *diff = guess_diff; + return likely_colormap_index; + } + + vp_search_tmp best_candidate = { + .distance = sqrtf(guess_diff), + .distance_squared = guess_diff, + .idx = likely_colormap_index, + .exclude = -1, + }; + vp_search_node(handle->root, px, &best_candidate); + if (diff) { + *diff = best_candidate.distance * best_candidate.distance; + } + return best_candidate.idx; +} + +LIQ_PRIVATE void nearest_free(struct nearest_map *centroids) +{ + mempool_destroy(centroids->mempool); +} diff --git a/src/platform/gba/packer/libimagequant/nearest.h b/src/platform/gba/packer/libimagequant/nearest.h new file mode 100644 index 0000000..10a0a2c --- /dev/null +++ b/src/platform/gba/packer/libimagequant/nearest.h @@ -0,0 +1,14 @@ +// +// nearest.h +// pngquant +// + +#ifndef NEAREST_H +#define NEAREST_H + +struct nearest_map; +LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *palette); +LIQ_PRIVATE unsigned int nearest_search(const struct nearest_map *map, const f_pixel *px, const int palette_index_guess, float *diff); +LIQ_PRIVATE void nearest_free(struct nearest_map *map); + +#endif diff --git a/src/platform/gba/packer/libimagequant/pam.c b/src/platform/gba/packer/libimagequant/pam.c new file mode 100644 index 0000000..6e36222 --- /dev/null +++ b/src/platform/gba/packer/libimagequant/pam.c @@ -0,0 +1,289 @@ +/* pam.c - pam (portable alpha map) utility library +** +** © 2009-2017 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. +*/ + +#include +#include + +#include "libimagequant.h" +#include "pam.h" +#include "mempool.h" + +LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map) +{ + const unsigned int ignorebits = acht->ignorebits; + const unsigned int channel_mask = 255U>>ignorebits<>ignorebits) ^ 0xFFU; + const unsigned int posterize_mask = channel_mask << 24 | channel_mask << 16 | channel_mask << 8 | channel_mask; + const unsigned int posterize_high_mask = channel_hmask << 24 | channel_hmask << 16 | channel_hmask << 8 | channel_hmask; + + const unsigned int hash_size = acht->hash_size; + + /* Go through the entire image, building a hash table of colors. */ + for(unsigned int row = 0; row < rows; ++row) { + + for(unsigned int col = 0; col < cols; ++col) { + unsigned int boost; + + // RGBA color is casted to long for easier hasing/comparisons + union rgba_as_int px = {pixels[row][col]}; + unsigned int hash; + if (!px.rgba.a) { + // "dirty alpha" has different RGBA values that end up being the same fully transparent color + px.l=0; hash=0; + + boost = 2000; + if (importance_map) { + importance_map++; + } + } else { + // mask posterizes all 4 channels in one go + px.l = (px.l & posterize_mask) | ((px.l & posterize_high_mask) >> (8-ignorebits)); + // fancier hashing algorithms didn't improve much + hash = px.l % hash_size; + + if (importance_map) { + boost = *importance_map++; + } else { + boost = 255; + } + } + + if (!pam_add_to_hash(acht, hash, boost, px, row, rows)) { + return false; + } + } + + } + acht->cols = cols; + acht->rows += rows; + return true; +} + +LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows) +{ + /* head of the hash function stores first 2 colors inline (achl->used = 1..2), + to reduce number of allocations of achl->other_items. + */ + struct acolorhist_arr_head *achl = &acht->buckets[hash]; + if (achl->inline1.color.l == px.l && achl->used) { + achl->inline1.perceptual_weight += boost; + return true; + } + if (achl->used) { + if (achl->used > 1) { + if (achl->inline2.color.l == px.l) { + achl->inline2.perceptual_weight += boost; + return true; + } + // other items are stored as an array (which gets reallocated if needed) + struct acolorhist_arr_item *other_items = achl->other_items; + unsigned int i = 0; + for (; i < achl->used-2; i++) { + if (other_items[i].color.l == px.l) { + other_items[i].perceptual_weight += boost; + return true; + } + } + + // the array was allocated with spare items + if (i < achl->capacity) { + other_items[i] = (struct acolorhist_arr_item){ + .color = px, + .perceptual_weight = boost, + }; + achl->used++; + ++acht->colors; + return true; + } + + if (++acht->colors > acht->maxcolors) { + return false; + } + + struct acolorhist_arr_item *new_items; + unsigned int capacity; + if (!other_items) { // there was no array previously, alloc "small" array + capacity = 8; + if (acht->freestackp <= 0) { + // estimate how many colors are going to be + headroom + const size_t mempool_size = ((acht->rows + rows-row) * 2 * acht->colors / (acht->rows + row + 1) + 1024) * sizeof(struct acolorhist_arr_item); + new_items = mempool_alloc(&acht->mempool, sizeof(struct acolorhist_arr_item)*capacity, mempool_size); + } else { + // freestack stores previously freed (reallocated) arrays that can be reused + // (all pesimistically assumed to be capacity = 8) + new_items = acht->freestack[--acht->freestackp]; + } + } else { + const unsigned int stacksize = sizeof(acht->freestack)/sizeof(acht->freestack[0]); + + // simply reallocs and copies array to larger capacity + capacity = achl->capacity*2 + 16; + if (acht->freestackp < stacksize-1) { + acht->freestack[acht->freestackp++] = other_items; + } + const size_t mempool_size = ((acht->rows + rows-row) * 2 * acht->colors / (acht->rows + row + 1) + 32*capacity) * sizeof(struct acolorhist_arr_item); + new_items = mempool_alloc(&acht->mempool, sizeof(struct acolorhist_arr_item)*capacity, mempool_size); + if (!new_items) return false; + memcpy(new_items, other_items, sizeof(other_items[0])*achl->capacity); + } + + achl->other_items = new_items; + achl->capacity = capacity; + new_items[i] = (struct acolorhist_arr_item){ + .color = px, + .perceptual_weight = boost, + }; + achl->used++; + } else { + // these are elses for first checks whether first and second inline-stored colors are used + achl->inline2.color.l = px.l; + achl->inline2.perceptual_weight = boost; + achl->used = 2; + ++acht->colors; + } + } else { + achl->inline1.color.l = px.l; + achl->inline1.perceptual_weight = boost; + achl->used = 1; + ++acht->colors; + } + return true; +} + +LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*)) +{ + const size_t estimated_colors = MIN(maxcolors, surface/(ignorebits + (surface > 512*512 ? 6 : 5))); + const size_t hash_size = estimated_colors < 66000 ? 6673 : (estimated_colors < 200000 ? 12011 : 24019); + + mempoolptr m = NULL; + const size_t buckets_size = hash_size * sizeof(struct acolorhist_arr_head); + const size_t mempool_size = sizeof(struct acolorhash_table) + buckets_size + estimated_colors * sizeof(struct acolorhist_arr_item); + struct acolorhash_table *t = mempool_create(&m, sizeof(*t) + buckets_size, mempool_size, malloc, free); + if (!t) return NULL; + *t = (struct acolorhash_table){ + .mempool = m, + .hash_size = hash_size, + .maxcolors = maxcolors, + .ignorebits = ignorebits, + }; + memset(t->buckets, 0, buckets_size); + return t; +} + +ALWAYS_INLINE static float pam_add_to_hist(const float *gamma_lut, hist_item *achv, unsigned int *j, const struct acolorhist_arr_item *entry, const float max_perceptual_weight) +{ + if (entry->perceptual_weight == 0) { + return 0; + } + const float w = MIN(entry->perceptual_weight/128.f, max_perceptual_weight); + achv[*j].adjusted_weight = achv[*j].perceptual_weight = w; + achv[*j].acolor = rgba_to_f(gamma_lut, entry->color.rgba); + *j += 1; + return w; +} + +LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*)) +{ + histogram *hist = malloc(sizeof(hist[0])); + if (!hist || !acht) return NULL; + *hist = (histogram){ + .achv = malloc(MAX(1,acht->colors) * sizeof(hist->achv[0])), + .size = acht->colors, + .free = free, + .ignorebits = acht->ignorebits, + }; + if (!hist->achv) return NULL; + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, gamma); + + /* Limit perceptual weight to 1/10th of the image surface area to prevent + a single color from dominating all others. */ + float max_perceptual_weight = 0.1f * acht->cols * acht->rows; + double total_weight = 0; + + unsigned int j=0; + for(unsigned int i=0; i < acht->hash_size; ++i) { + const struct acolorhist_arr_head *const achl = &acht->buckets[i]; + if (achl->used) { + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline1, max_perceptual_weight); + + if (achl->used > 1) { + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline2, max_perceptual_weight); + + for(unsigned int k=0; k < achl->used-2; k++) { + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->other_items[k], max_perceptual_weight); + } + } + } + } + hist->size = j; + hist->total_perceptual_weight = total_weight; + for(unsigned int k=0; k < hist->size; k++) { + hist->achv[k].tmp.likely_colormap_index = 0; + } + if (!j) { + pam_freeacolorhist(hist); + return NULL; + } + return hist; +} + + +LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht) +{ + if (acht) { + mempool_destroy(acht->mempool); + } +} + +LIQ_PRIVATE void pam_freeacolorhist(histogram *hist) +{ + hist->free(hist->achv); + hist->free(hist); +} + +LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*)) +{ + assert(colors > 0 && colors < 65536); + + colormap *map; + const size_t colors_size = colors * sizeof(map->palette[0]); + map = malloc(sizeof(colormap) + colors_size); + if (!map) return NULL; + *map = (colormap){ + .malloc = malloc, + .free = free, + .colors = colors, + }; + memset(map->palette, 0, colors_size); + return map; +} + +LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map) +{ + colormap *dupe = pam_colormap(map->colors, map->malloc, map->free); + for(unsigned int i=0; i < map->colors; i++) { + dupe->palette[i] = map->palette[i]; + } + return dupe; +} + +LIQ_PRIVATE void pam_freecolormap(colormap *c) +{ + c->free(c); +} + +LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma) +{ + for(int i=0; i < 256; i++) { + gamma_lut[i] = pow((double)i/255.0, internal_gamma/gamma); + } +} + diff --git a/src/platform/gba/packer/libimagequant/pam.h b/src/platform/gba/packer/libimagequant/pam.h new file mode 100644 index 0000000..5a7d50e --- /dev/null +++ b/src/platform/gba/packer/libimagequant/pam.h @@ -0,0 +1,283 @@ +/* pam.h - pam (portable alpha map) utility library + ** + ** Colormap routines. + ** + ** Copyright (C) 1989, 1991 by Jef Poskanzer. + ** Copyright (C) 1997 by Greg Roelofs. + ** + ** Permission to use, copy, modify, and distribute this software and its + ** documentation for any purpose and without fee is hereby granted, provided + ** that the above copyright notice appear in all copies and that both that + ** copyright notice and this permission notice appear in supporting + ** documentation. This software is provided "as is" without express or + ** implied warranty. + */ + +#ifndef PAM_H +#define PAM_H + +// accidental debug assertions make color search much slower, +// so force assertions off if there's no explicit setting +#if !defined(NDEBUG) && !defined(DEBUG) +#define NDEBUG +#endif + +#include +#include +#include +#include + +#ifndef MAX +# define MAX(a,b) ((a) > (b)? (a) : (b)) +# define MIN(a,b) ((a) < (b)? (a) : (b)) +#endif + +#define MAX_DIFF 1e20 + +#ifndef USE_SSE +# if defined(__SSE__) && (defined(__amd64__) || defined(__X86_64__) || defined(_WIN64) || defined(WIN32) || defined(__WIN32__)) +# define USE_SSE 1 +# else +# define USE_SSE 0 +# endif +#endif + +#if USE_SSE +# include +# ifdef _MSC_VER +# include +# define SSE_ALIGN +# else +# define SSE_ALIGN __attribute__ ((aligned (16))) +# if defined(__i386__) && defined(__PIC__) +# define cpuid(func,ax,bx,cx,dx)\ + __asm__ __volatile__ ( \ + "push %%ebx\n" \ + "cpuid\n" \ + "mov %%ebx, %1\n" \ + "pop %%ebx\n" \ + : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); +# else +# define cpuid(func,ax,bx,cx,dx)\ + __asm__ __volatile__ ("cpuid":\ + "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); +# endif +#endif +#else +# define SSE_ALIGN +#endif + +#ifndef _MSC_VER +#define LIQ_ARRAY(type, var, count) type var[count] +#else +#define LIQ_ARRAY(type, var, count) type* var = (type*)_alloca(sizeof(type)*(count)) +#endif + +#if defined(__GNUC__) || defined (__llvm__) +#define ALWAYS_INLINE __attribute__((always_inline)) inline +#define NEVER_INLINE __attribute__ ((noinline)) +#elif defined(_MSC_VER) +#define inline __inline +#define restrict __restrict +#define ALWAYS_INLINE __forceinline +#define NEVER_INLINE __declspec(noinline) +#else +#define ALWAYS_INLINE inline +#define NEVER_INLINE +#endif + +/* from pam.h */ + +typedef struct { + unsigned char r, g, b, a; +} rgba_pixel; + +typedef struct { + float a, r, g, b; +} SSE_ALIGN f_pixel; + +static const float internal_gamma = 0.5499f; + +LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma); + +/** + Converts 8-bit color to internal gamma and premultiplied alpha. + (premultiplied color space is much better for blending of semitransparent colors) + */ +ALWAYS_INLINE static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px); +inline static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px) +{ + float a = px.a/255.f; + + return (f_pixel) { + .a = a, + .r = gamma_lut[px.r]*a, + .g = gamma_lut[px.g]*a, + .b = gamma_lut[px.b]*a, + }; +} + +inline static rgba_pixel f_to_rgb(const float gamma, const f_pixel px) +{ + if (px.a < 1.f/256.f) { + return (rgba_pixel){0,0,0,0}; + } + + float r = px.r / px.a, + g = px.g / px.a, + b = px.b / px.a, + a = px.a; + + r = powf(r, gamma/internal_gamma); + g = powf(g, gamma/internal_gamma); + b = powf(b, gamma/internal_gamma); + + // 256, because numbers are in range 1..255.9999… rounded down + r *= 256.f; + g *= 256.f; + b *= 256.f; + a *= 256.f; + + return (rgba_pixel){ + .r = r>=255.f ? 255 : r, + .g = g>=255.f ? 255 : g, + .b = b>=255.f ? 255 : b, + .a = a>=255.f ? 255 : a, + }; +} + +ALWAYS_INLINE static double colordifference_ch(const double x, const double y, const double alphas); +inline static double colordifference_ch(const double x, const double y, const double alphas) +{ + // maximum of channel blended on white, and blended on black + // premultiplied alpha and backgrounds 0/1 shorten the formula + const double black = x-y, white = black+alphas; + return MAX(black*black, white*white); +} + +ALWAYS_INLINE static float colordifference_stdc(const f_pixel px, const f_pixel py); +inline static float colordifference_stdc(const f_pixel px, const f_pixel py) +{ + // px_b.rgb = px.rgb + 0*(1-px.a) // blend px on black + // px_b.a = px.a + 1*(1-px.a) + // px_w.rgb = px.rgb + 1*(1-px.a) // blend px on white + // px_w.a = px.a + 1*(1-px.a) + + // px_b.rgb = px.rgb // difference same as in opaque RGB + // px_b.a = 1 + // px_w.rgb = px.rgb - px.a // difference simplifies to formula below + // px_w.a = 1 + + // (px.rgb - px.a) - (py.rgb - py.a) + // (px.rgb - py.rgb) + (py.a - px.a) + + const double alphas = py.a-px.a; + return colordifference_ch(px.r, py.r, alphas) + + colordifference_ch(px.g, py.g, alphas) + + colordifference_ch(px.b, py.b, alphas); +} + +ALWAYS_INLINE static float colordifference(f_pixel px, f_pixel py); +inline static float colordifference(f_pixel px, f_pixel py) +{ +#if USE_SSE + const __m128 vpx = _mm_load_ps((const float*)&px); + const __m128 vpy = _mm_load_ps((const float*)&py); + + // y.a - x.a + __m128 alphas = _mm_sub_ss(vpy, vpx); + alphas = _mm_shuffle_ps(alphas,alphas,0); // copy first to all four + + __m128 onblack = _mm_sub_ps(vpx, vpy); // x - y + __m128 onwhite = _mm_add_ps(onblack, alphas); // x - y + (y.a - x.a) + + onblack = _mm_mul_ps(onblack, onblack); + onwhite = _mm_mul_ps(onwhite, onwhite); + const __m128 max = _mm_max_ps(onwhite, onblack); + + // add rgb, not a + const __m128 maxhl = _mm_movehl_ps(max, max); + const __m128 tmp = _mm_add_ps(max, maxhl); + const __m128 sum = _mm_add_ss(maxhl, _mm_shuffle_ps(tmp, tmp, 1)); + + const float res = _mm_cvtss_f32(sum); + assert(fabs(res - colordifference_stdc(px,py)) < 0.001); + return res; +#else + return colordifference_stdc(px,py); +#endif +} + +/* from pamcmap.h */ +union rgba_as_int { + rgba_pixel rgba; + unsigned int l; +}; + +typedef struct { + f_pixel acolor; + float adjusted_weight, // perceptual weight changed to tweak how mediancut selects colors + perceptual_weight; // number of pixels weighted by importance of different areas of the picture + + float color_weight; // these two change every time histogram subset is sorted + union { + unsigned int sort_value; + unsigned char likely_colormap_index; + } tmp; +} hist_item; + +typedef struct { + hist_item *achv; + void (*free)(void*); + double total_perceptual_weight; + unsigned int size; + unsigned int ignorebits; +} histogram; + +typedef struct { + f_pixel acolor; + float popularity; + bool fixed; // if true it's user-supplied and must not be changed (e.g in K-Means iteration) +} colormap_item; + +typedef struct colormap { + unsigned int colors; + void* (*malloc)(size_t); + void (*free)(void*); + colormap_item palette[]; +} colormap; + +struct acolorhist_arr_item { + union rgba_as_int color; + unsigned int perceptual_weight; +}; + +struct acolorhist_arr_head { + struct acolorhist_arr_item inline1, inline2; + unsigned int used, capacity; + struct acolorhist_arr_item *other_items; +}; + +struct acolorhash_table { + struct mempool *mempool; + unsigned int ignorebits, maxcolors, colors, cols, rows; + unsigned int hash_size; + unsigned int freestackp; + struct acolorhist_arr_item *freestack[512]; + struct acolorhist_arr_head buckets[]; +}; + +LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht); +LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*)); +LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*)); +LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map); +LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows); + +LIQ_PRIVATE void pam_freeacolorhist(histogram *h); + +LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*)); +LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map); +LIQ_PRIVATE void pam_freecolormap(colormap *c); + +#endif diff --git a/src/platform/gba/packer/stb_image_resize.h b/src/platform/gba/packer/stb_image_resize.h new file mode 100644 index 0000000..bcca92c --- /dev/null +++ b/src/platform/gba/packer/stb_image_resize.h @@ -0,0 +1,2585 @@ +/* stb_image_resize - v0.90 - public domain image resizing + by Jorge L Rodriguez (@VinoBS) - 2014 + http://github.com/nothings/stb + + Written with emphasis on usability, portability, and efficiency. (No + SIMD or threads, so it be easily outperformed by libs that use those.) + Only scaling and translation is supported, no rotations or shears. + Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation. + + COMPILING & LINKING + In one C/C++ file that #includes this file, do this: + #define STB_IMAGE_RESIZE_IMPLEMENTATION + before the #include. That will create the implementation in that file. + + QUICKSTART + stbir_resize_uint8( input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, num_channels) + stbir_resize_float(...) + stbir_resize_uint8_srgb( input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, + num_channels , alpha_chan , 0) + stbir_resize_uint8_srgb_edgemode( + input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, + num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP) + // WRAP/REFLECT/ZERO + + FULL API + See the "header file" section of the source for API documentation. + + ADDITIONAL DOCUMENTATION + + SRGB & FLOATING POINT REPRESENTATION + The sRGB functions presume IEEE floating point. If you do not have + IEEE floating point, define STBIR_NON_IEEE_FLOAT. This will use + a slower implementation. + + MEMORY ALLOCATION + The resize functions here perform a single memory allocation using + malloc. To control the memory allocation, before the #include that + triggers the implementation, do: + + #define STBIR_MALLOC(size,context) ... + #define STBIR_FREE(ptr,context) ... + + Each resize function makes exactly one call to malloc/free, so to use + temp memory, store the temp memory in the context and return that. + + ASSERT + Define STBIR_ASSERT(boolval) to override assert() and not use assert.h + + OPTIMIZATION + Define STBIR_SATURATE_INT to compute clamp values in-range using + integer operations instead of float operations. This may be faster + on some platforms. + + DEFAULT FILTERS + For functions which don't provide explicit control over what filters + to use, you can change the compile-time defaults with + + #define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_something + #define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_something + + See stbir_filter in the header-file section for the list of filters. + + NEW FILTERS + A number of 1D filter kernels are used. For a list of + supported filters see the stbir_filter enum. To add a new filter, + write a filter function and add it to stbir__filter_info_table. + + PROGRESS + For interactive use with slow resize operations, you can install + a progress-report callback: + + #define STBIR_PROGRESS_REPORT(val) some_func(val) + + The parameter val is a float which goes from 0 to 1 as progress is made. + + For example: + + static void my_progress_report(float progress); + #define STBIR_PROGRESS_REPORT(val) my_progress_report(val) + + #define STB_IMAGE_RESIZE_IMPLEMENTATION + #include "stb_image_resize.h" + + static void my_progress_report(float progress) + { + printf("Progress: %f%%\n", progress*100); + } + + MAX CHANNELS + If your image has more than 64 channels, define STBIR_MAX_CHANNELS + to the max you'll have. + + ALPHA CHANNEL + Most of the resizing functions provide the ability to control how + the alpha channel of an image is processed. The important things + to know about this: + + 1. The best mathematically-behaved version of alpha to use is + called "premultiplied alpha", in which the other color channels + have had the alpha value multiplied in. If you use premultiplied + alpha, linear filtering (such as image resampling done by this + library, or performed in texture units on GPUs) does the "right + thing". While premultiplied alpha is standard in the movie CGI + industry, it is still uncommon in the videogame/real-time world. + + If you linearly filter non-premultiplied alpha, strange effects + occur. (For example, the average of 1% opaque bright green + and 99% opaque black produces 50% transparent dark green when + non-premultiplied, whereas premultiplied it produces 50% + transparent near-black. The former introduces green energy + that doesn't exist in the source image.) + + 2. Artists should not edit premultiplied-alpha images; artists + want non-premultiplied alpha images. Thus, art tools generally output + non-premultiplied alpha images. + + 3. You will get best results in most cases by converting images + to premultiplied alpha before processing them mathematically. + + 4. If you pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, the + resizer does not do anything special for the alpha channel; + it is resampled identically to other channels. This produces + the correct results for premultiplied-alpha images, but produces + less-than-ideal results for non-premultiplied-alpha images. + + 5. If you do not pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, + then the resizer weights the contribution of input pixels + based on their alpha values, or, equivalently, it multiplies + the alpha value into the color channels, resamples, then divides + by the resultant alpha value. Input pixels which have alpha=0 do + not contribute at all to output pixels unless _all_ of the input + pixels affecting that output pixel have alpha=0, in which case + the result for that pixel is the same as it would be without + STBIR_FLAG_ALPHA_PREMULTIPLIED. However, this is only true for + input images in integer formats. For input images in float format, + input pixels with alpha=0 have no effect, and output pixels + which have alpha=0 will be 0 in all channels. (For float images, + you can manually achieve the same result by adding a tiny epsilon + value to the alpha channel of every image, and then subtracting + or clamping it at the end.) + + 6. You can suppress the behavior described in #5 and make + all-0-alpha pixels have 0 in all channels by #defining + STBIR_NO_ALPHA_EPSILON. + + 7. You can separately control whether the alpha channel is + interpreted as linear or affected by the colorspace. By default + it is linear; you almost never want to apply the colorspace. + (For example, graphics hardware does not apply sRGB conversion + to the alpha channel.) + + ADDITIONAL CONTRIBUTORS + Sean Barrett: API design, optimizations + + REVISIONS + 0.90 (2014-09-17) first released version + + LICENSE + This software is in the public domain. Where that dedication is not + recognized, you are granted a perpetual, irrevocable license to copy + and modify this file as you see fit. + + TODO + Don't decode all of the image data when only processing a partial tile + Don't use full-width decode buffers when only processing a partial tile + When processing wide images, break processing into tiles so data fits in L1 cache + Installable filters? + Resize that respects alpha test coverage + (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage: + https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp ) +*/ + +#ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE_H +#define STBIR_INCLUDE_STB_IMAGE_RESIZE_H + +#ifdef _MSC_VER +typedef unsigned char stbir_uint8; +typedef unsigned short stbir_uint16; +typedef unsigned int stbir_uint32; +#else +#include +typedef uint8_t stbir_uint8; +typedef uint16_t stbir_uint16; +typedef uint32_t stbir_uint32; +#endif + +#ifdef STB_IMAGE_RESIZE_STATIC +#define STBIRDEF static +#else +#ifdef __cplusplus +#define STBIRDEF extern "C" +#else +#define STBIRDEF extern +#endif +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Easy-to-use API: +// +// * "input pixels" points to an array of image data with 'num_channels' channels (e.g. RGB=3, RGBA=4) +// * input_w is input image width (x-axis), input_h is input image height (y-axis) +// * stride is the offset between successive rows of image data in memory, in bytes. you can +// specify 0 to mean packed continuously in memory +// * alpha channel is treated identically to other channels. +// * colorspace is linear or sRGB as specified by function name +// * returned result is 1 for success or 0 in case of an error. +// #define STBIR_ASSERT() to trigger an assert on parameter validation errors. +// * Memory required grows approximately linearly with input and output size, but with +// discontinuities at input_w == output_w and input_h == output_h. +// * These functions use a "default" resampling filter defined at compile time. To change the filter, +// you can change the compile-time defaults by #defining STBIR_DEFAULT_FILTER_UPSAMPLE +// and STBIR_DEFAULT_FILTER_DOWNSAMPLE, or you can use the medium-complexity API. + +STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels); + +STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels); + + +// The following functions interpret image data as gamma-corrected sRGB. +// Specify STBIR_ALPHA_CHANNEL_NONE if you have no alpha channel, +// or otherwise provide the index of the alpha channel. Flags value +// of 0 will probably do the right thing if you're not sure what +// the flags mean. + +#define STBIR_ALPHA_CHANNEL_NONE -1 + +// Set this flag if your texture has premultiplied alpha. Otherwise, stbir will +// use alpha-weighted resampling (effectively premultiplying, resampling, +// then unpremultiplying). +#define STBIR_FLAG_ALPHA_PREMULTIPLIED (1 << 0) +// The specified alpha channel should be handled as gamma-corrected value even +// when doing sRGB operations. +#define STBIR_FLAG_ALPHA_USES_COLORSPACE (1 << 1) + +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags); + + +typedef enum +{ + STBIR_EDGE_CLAMP = 1, + STBIR_EDGE_REFLECT = 2, + STBIR_EDGE_WRAP = 3, + STBIR_EDGE_ZERO = 4, +} stbir_edge; + +// This function adds the ability to specify how requests to sample off the edge of the image are handled. +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode); + +////////////////////////////////////////////////////////////////////////////// +// +// Medium-complexity API +// +// This extends the easy-to-use API as follows: +// +// * Alpha-channel can be processed separately +// * If alpha_channel is not STBIR_ALPHA_CHANNEL_NONE +// * Alpha channel will not be gamma corrected (unless flags&STBIR_FLAG_GAMMA_CORRECT) +// * Filters will be weighted by alpha channel (unless flags&STBIR_FLAG_ALPHA_PREMULTIPLIED) +// * Filter can be selected explicitly +// * uint16 image type +// * sRGB colorspace available for all types +// * context parameter for passing to STBIR_MALLOC + +typedef enum +{ + STBIR_FILTER_DEFAULT = 0, // use same filter type that easy-to-use API chooses + STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios + STBIR_FILTER_TRIANGLE = 2, // On upsampling, produces same results as bilinear texture filtering + STBIR_FILTER_CUBICBSPLINE = 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque + STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline + STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3 +} stbir_filter; + +typedef enum +{ + STBIR_COLORSPACE_LINEAR, + STBIR_COLORSPACE_SRGB, + + STBIR_MAX_COLORSPACES, +} stbir_colorspace; + +// The following functions are all identical except for the type of the image data + +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + +STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + + + +////////////////////////////////////////////////////////////////////////////// +// +// Full-complexity API +// +// This extends the medium API as follows: +// +// * uint32 image type +// * not typesafe +// * separate filter types for each axis +// * separate edge modes for each axis +// * can specify scale explicitly for subpixel correctness +// * can specify image source tile using texture coordinates + +typedef enum +{ + STBIR_TYPE_UINT8 , + STBIR_TYPE_UINT16, + STBIR_TYPE_UINT32, + STBIR_TYPE_FLOAT , + + STBIR_MAX_TYPES +} stbir_datatype; + +STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context); + +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float x_scale, float y_scale, + float x_offset, float y_offset); + +STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float s0, float t0, float s1, float t1); +// (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1]) of a region of the input image to use. + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H + + + + + +#ifdef STB_IMAGE_RESIZE_IMPLEMENTATION + +#ifndef STBIR_ASSERT +#include +#define STBIR_ASSERT(x) assert(x) +#endif + +#ifdef STBIR_DEBUG +#define STBIR__DEBUG_ASSERT STBIR_ASSERT +#else +#define STBIR__DEBUG_ASSERT +#endif + +// If you hit this it means I haven't done it yet. +#define STBIR__UNIMPLEMENTED(x) STBIR_ASSERT(!(x)) + +// For memset +#include + +#include + +#ifndef STBIR_MALLOC +#include +#define STBIR_MALLOC(size,c) malloc(size) +#define STBIR_FREE(ptr,c) free(ptr) +#endif + +#ifndef _MSC_VER +#ifdef __cplusplus +#define stbir__inline inline +#else +#define stbir__inline +#endif +#else +#define stbir__inline __forceinline +#endif + + +// should produce compiler error if size is wrong +typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBIR__NOTUSED(v) (void)(v) +#else +#define STBIR__NOTUSED(v) (void)sizeof(v) +#endif + +#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0])) + +#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE +#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM +#endif + +#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL +#endif + +#ifndef STBIR_PROGRESS_REPORT +#define STBIR_PROGRESS_REPORT(float_0_to_1) +#endif + +#ifndef STBIR_MAX_CHANNELS +#define STBIR_MAX_CHANNELS 64 +#endif + +#if STBIR_MAX_CHANNELS > 65536 +#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536." +// because we store the indices in 16-bit variables +#endif + +// This value is added to alpha just before premultiplication to avoid +// zeroing out color values. It is equivalent to 2^-80. If you don't want +// that behavior (it may interfere if you have floating point images with +// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to +// disable it. +#ifndef STBIR_ALPHA_EPSILON +#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20)) +#endif + + + +#ifdef _MSC_VER +#define STBIR__UNUSED_PARAM(v) (void)(v) +#else +#define STBIR__UNUSED_PARAM(v) (void)sizeof(v) +#endif + +// must match stbir_datatype +static unsigned char stbir__type_size[] = { + 1, // STBIR_TYPE_UINT8 + 2, // STBIR_TYPE_UINT16 + 4, // STBIR_TYPE_UINT32 + 4, // STBIR_TYPE_FLOAT +}; + +// Kernel function centered at 0 +typedef float (stbir__kernel_fn)(float x, float scale); +typedef float (stbir__support_fn)(float scale); + +typedef struct +{ + stbir__kernel_fn* kernel; + stbir__support_fn* support; +} stbir__filter_info; + +// When upsampling, the contributors are which source pixels contribute. +// When downsampling, the contributors are which destination pixels are contributed to. +typedef struct +{ + int n0; // First contributing pixel + int n1; // Last contributing pixel +} stbir__contributors; + +typedef struct +{ + const void* input_data; + int input_w; + int input_h; + int input_stride_bytes; + + void* output_data; + int output_w; + int output_h; + int output_stride_bytes; + + float s0, t0, s1, t1; + + float horizontal_shift; // Units: output pixels + float vertical_shift; // Units: output pixels + float horizontal_scale; + float vertical_scale; + + int channels; + int alpha_channel; + stbir_uint32 flags; + stbir_datatype type; + stbir_filter horizontal_filter; + stbir_filter vertical_filter; + stbir_edge edge_horizontal; + stbir_edge edge_vertical; + stbir_colorspace colorspace; + + stbir__contributors* horizontal_contributors; + float* horizontal_coefficients; + + stbir__contributors* vertical_contributors; + float* vertical_coefficients; + + int decode_buffer_pixels; + float* decode_buffer; + + float* horizontal_buffer; + + // cache these because ceil/floor are inexplicably showing up in profile + int horizontal_coefficient_width; + int vertical_coefficient_width; + int horizontal_filter_pixel_width; + int vertical_filter_pixel_width; + int horizontal_filter_pixel_margin; + int vertical_filter_pixel_margin; + int horizontal_num_contributors; + int vertical_num_contributors; + + int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter) + int ring_buffer_first_scanline; + int ring_buffer_last_scanline; + int ring_buffer_begin_index; + float* ring_buffer; + + float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds. + + int horizontal_contributors_size; + int horizontal_coefficients_size; + int vertical_contributors_size; + int vertical_coefficients_size; + int decode_buffer_size; + int horizontal_buffer_size; + int ring_buffer_size; + int encode_buffer_size; +} stbir__info; + +static stbir__inline int stbir__min(int a, int b) +{ + return a < b ? a : b; +} + +static stbir__inline int stbir__max(int a, int b) +{ + return a > b ? a : b; +} + +static stbir__inline float stbir__saturate(float x) +{ + if (x < 0) + return 0; + + if (x > 1) + return 1; + + return x; +} + +#ifdef STBIR_SATURATE_INT +static stbir__inline stbir_uint8 stbir__saturate8(int x) +{ + if ((unsigned int) x <= 255) + return x; + + if (x < 0) + return 0; + + return 255; +} + +static stbir__inline stbir_uint16 stbir__saturate16(int x) +{ + if ((unsigned int) x <= 65535) + return x; + + if (x < 0) + return 0; + + return 65535; +} +#endif + +static float stbir__srgb_uchar_to_linear_float[256] = { + 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f, + 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f, + 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f, + 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, + 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f, + 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, + 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f, + 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f, + 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f, + 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f, + 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, + 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f, + 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, + 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f, + 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f, + 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f, + 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f, + 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f, + 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f, + 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, + 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f, + 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f, + 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f, + 0.982251f, 0.991102f, 1.0f +}; + +static float stbir__srgb_to_linear(float f) +{ + if (f <= 0.04045f) + return f / 12.92f; + else + return (float)pow((f + 0.055f) / 1.055f, 2.4f); +} + +static float stbir__linear_to_srgb(float f) +{ + if (f <= 0.0031308f) + return f * 12.92f; + else + return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f; +} + +#ifndef STBIR_NON_IEEE_FLOAT +// From https://gist.github.com/rygorous/2203834 + +typedef union +{ + stbir_uint32 u; + float f; +} stbir__FP32; + +static const stbir_uint32 fp32_to_srgb8_tab4[104] = { + 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d, + 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, + 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033, + 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067, + 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, + 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2, + 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, + 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, + 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, + 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300, + 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, + 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, + 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, +}; + +static stbir_uint8 stbir__linear_to_srgb_uchar(float in) +{ + static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps + static const stbir__FP32 minval = { (127-13) << 23 }; + stbir_uint32 tab,bias,scale,t; + stbir__FP32 f; + + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + // The tests are carefully written so that NaNs map to 0, same as in the reference + // implementation. + if (!(in > minval.f)) // written this way to catch NaNs + in = minval.f; + if (in > almostone.f) + in = almostone.f; + + // Do the table lookup and unpack bias, scale + f.f = in; + tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20]; + bias = (tab >> 16) << 9; + scale = tab & 0xffff; + + // Grab next-highest mantissa bits and perform linear interpolation + t = (f.u >> 12) & 0xff; + return (unsigned char) ((bias + scale*t) >> 16); +} + +#else +// sRGB transition values, scaled by 1<<28 +static int stbir__srgb_offset_to_linear_scaled[256] = +{ + 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603, + 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926, + 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148, + 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856, + 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731, + 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369, + 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021, + 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073, + 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389, + 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552, + 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066, + 25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490, + 31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568, + 36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316, + 43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096, + 49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700, + 57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376, + 65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912, + 74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648, + 83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512, + 93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072, + 104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544, + 115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832, + 127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528, + 140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968, + 154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184, + 168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992, + 183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968, + 199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480, + 215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656, + 232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464, + 250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664, +}; + +static stbir_uint8 stbir__linear_to_srgb_uchar(float f) +{ + int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp + int v = 0; + int i; + + // Refine the guess with a short binary search. + i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + + return (stbir_uint8) v; +} +#endif + +static float stbir__filter_trapezoid(float x, float scale) +{ + float halfscale = scale / 2; + float t = 0.5f + halfscale; + STBIR__DEBUG_ASSERT(scale <= 1); + + x = (float)fabs(x); + + if (x >= t) + return 0; + else + { + float r = 0.5f - halfscale; + if (x <= r) + return 1; + else + return (t - x) / scale; + } +} + +static float stbir__support_trapezoid(float scale) +{ + STBIR__DEBUG_ASSERT(scale <= 1); + return 0.5f + scale / 2; +} + +static float stbir__filter_triangle(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x <= 1.0f) + return 1 - x; + else + return 0; +} + +static float stbir__filter_cubic(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return (4 + x*x*(3*x - 6))/6; + else if (x < 2.0f) + return (8 + x*(-12 + x*(6 - x)))/6; + + return (0.0f); +} + +static float stbir__filter_catmullrom(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return 1 - x*x*(2.5f - 1.5f*x); + else if (x < 2.0f) + return 2 - x*(4 + x*(0.5f*x - 2.5f)); + + return (0.0f); +} + +static float stbir__filter_mitchell(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return (16 + x*x*(21 * x - 36))/18; + else if (x < 2.0f) + return (32 + x*(-60 + x*(36 - 7*x)))/18; + + return (0.0f); +} + +static float stbir__support_zero(float s) +{ + STBIR__UNUSED_PARAM(s); + return 0; +} + +static float stbir__support_one(float s) +{ + STBIR__UNUSED_PARAM(s); + return 1; +} + +static float stbir__support_two(float s) +{ + STBIR__UNUSED_PARAM(s); + return 2; +} + +static stbir__filter_info stbir__filter_info_table[] = { + { NULL, stbir__support_zero }, + { stbir__filter_trapezoid, stbir__support_trapezoid }, + { stbir__filter_triangle, stbir__support_one }, + { stbir__filter_cubic, stbir__support_two }, + { stbir__filter_catmullrom, stbir__support_two }, + { stbir__filter_mitchell, stbir__support_two }, +}; + +stbir__inline static int stbir__use_upsampling(float ratio) +{ + return ratio > 1; +} + +stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info) +{ + return stbir__use_upsampling(stbir_info->horizontal_scale); +} + +stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info) +{ + return stbir__use_upsampling(stbir_info->vertical_scale); +} + +// This is the maximum number of input samples that can affect an output sample +// with the given filter +static int stbir__get_filter_pixel_width(stbir_filter filter, float scale) +{ + STBIR_ASSERT(filter != 0); + STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + + if (stbir__use_upsampling(scale)) + return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2); + else + return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale); +} + +// This is how much to expand buffers to account for filters seeking outside +// the image boundaries. +static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale) +{ + return stbir__get_filter_pixel_width(filter, scale) / 2; +} + +static int stbir__get_coefficient_width(stbir_filter filter, float scale) +{ + if (stbir__use_upsampling(scale)) + return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2); + else + return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2); +} + +static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size) +{ + if (stbir__use_upsampling(scale)) + return output_size; + else + return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2); +} + +static int stbir__get_total_horizontal_coefficients(stbir__info* info) +{ + return info->horizontal_num_contributors + * stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale); +} + +static int stbir__get_total_vertical_coefficients(stbir__info* info) +{ + return info->vertical_num_contributors + * stbir__get_coefficient_width (info->vertical_filter, info->vertical_scale); +} + +static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n) +{ + return &contributors[n]; +} + +// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample, +// if you change it here change it there too. +static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c) +{ + int width = stbir__get_coefficient_width(filter, scale); + return &coefficients[width*n + c]; +} + +static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max) +{ + switch (edge) + { + case STBIR_EDGE_ZERO: + return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later + + case STBIR_EDGE_CLAMP: + if (n < 0) + return 0; + + if (n >= max) + return max - 1; + + return n; // NOTREACHED + + case STBIR_EDGE_REFLECT: + { + if (n < 0) + { + if (n < max) + return -n; + else + return max - 1; + } + + if (n >= max) + { + int max2 = max * 2; + if (n >= max2) + return 0; + else + return max2 - n - 1; + } + + return n; // NOTREACHED + } + + case STBIR_EDGE_WRAP: + if (n >= 0) + return (n % max); + else + { + int m = (-n) % max; + + if (m != 0) + m = max - m; + + return (m); + } + return n; // NOTREACHED + + default: + STBIR__UNIMPLEMENTED("Unimplemented edge type"); + return 0; + } +} + +stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max) +{ + // avoid per-pixel switch + if (n >= 0 && n < max) + return n; + return stbir__edge_wrap_slow(edge, n, max); +} + +// What input pixels contribute to this output pixel? +static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out) +{ + float out_pixel_center = (float)n + 0.5f; + float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius; + float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius; + + float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio; + float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio; + + *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio; + *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5)); + *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5)); +} + +// What output pixels does this input pixel contribute to? +static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in) +{ + float in_pixel_center = (float)n + 0.5f; + float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius; + float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius; + + float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift; + float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift; + + *out_center_of_in = in_pixel_center * scale_ratio - out_shift; + *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5)); + *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5)); +} + +static void stbir__calculate_coefficients_upsample(stbir__info* stbir_info, stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group) +{ + int i; + float total_filter = 0; + float filter_scale; + + STBIR__DEBUG_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical. + + contributor->n0 = in_first_pixel; + contributor->n1 = in_last_pixel; + + STBIR__DEBUG_ASSERT(contributor->n1 >= contributor->n0); + + for (i = 0; i <= in_last_pixel - in_first_pixel; i++) + { + float in_pixel_center = (float)(i + in_first_pixel) + 0.5f; + coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale); + + // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.) + if (i == 0 && !coefficient_group[i]) + { + contributor->n0 = ++in_first_pixel; + i--; + continue; + } + + total_filter += coefficient_group[i]; + } + + STBIR__DEBUG_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0); + + STBIR__DEBUG_ASSERT(total_filter > 0.9); + STBIR__DEBUG_ASSERT(total_filter < 1.1f); // Make sure it's not way off. + + // Make sure the sum of all coefficients is 1. + filter_scale = 1 / total_filter; + + for (i = 0; i <= in_last_pixel - in_first_pixel; i++) + coefficient_group[i] *= filter_scale; + + for (i = in_last_pixel - in_first_pixel; i >= 0; i--) + { + if (coefficient_group[i]) + break; + + // This line has no weight. We can skip it. + contributor->n1 = contributor->n0 + i - 1; + } +} + +static void stbir__calculate_coefficients_downsample(stbir__info* stbir_info, stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group) +{ + int i; + + STBIR__DEBUG_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical. + + contributor->n0 = out_first_pixel; + contributor->n1 = out_last_pixel; + + STBIR__DEBUG_ASSERT(contributor->n1 >= contributor->n0); + + for (i = 0; i <= out_last_pixel - out_first_pixel; i++) + { + float out_pixel_center = (float)(i + out_first_pixel) + 0.5f; + float x = out_pixel_center - out_center_of_in; + coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio; + } + + STBIR__DEBUG_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0); + + for (i = out_last_pixel - out_first_pixel; i >= 0; i--) + { + if (coefficient_group[i]) + break; + + // This line has no weight. We can skip it. + contributor->n1 = contributor->n0 + i - 1; + } +} + +static void stbir__normalize_downsample_coefficients(stbir__info* stbir_info, stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size) +{ + int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size); + int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio); + int i, j; + int skip; + + for (i = 0; i < output_size; i++) + { + float scale; + float total = 0; + + for (j = 0; j < num_contributors; j++) + { + if (i >= contributors[j].n0 && i <= contributors[j].n1) + { + float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0); + total += coefficient; + } + else if (i < contributors[j].n0) + break; + } + + STBIR__DEBUG_ASSERT(total > 0.9f); + STBIR__DEBUG_ASSERT(total < 1.1f); + + scale = 1 / total; + + for (j = 0; j < num_contributors; j++) + { + if (i >= contributors[j].n0 && i <= contributors[j].n1) + *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale; + else if (i < contributors[j].n0) + break; + } + } + + // Optimize: Skip zero coefficients and contributions outside of image bounds. + // Do this after normalizing because normalization depends on the n0/n1 values. + for (j = 0; j < num_contributors; j++) + { + int range, max, width; + + skip = 0; + while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0) + skip++; + + contributors[j].n0 += skip; + + while (contributors[j].n0 < 0) + { + contributors[j].n0++; + skip++; + } + + range = contributors[j].n1 - contributors[j].n0 + 1; + max = stbir__min(num_coefficients, range); + + width = stbir__get_coefficient_width(filter, scale_ratio); + for (i = 0; i < max; i++) + { + if (i + skip >= width) + break; + + *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip); + } + + continue; + } + + // Using min to avoid writing into invalid pixels. + for (i = 0; i < num_contributors; i++) + contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1); +} + +// Each scan line uses the same kernel values so we should calculate the kernel +// values once and then we can use them for every scan line. +static void stbir__calculate_filters(stbir__info* stbir_info, stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size) +{ + int n; + int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size); + + if (stbir__use_upsampling(scale_ratio)) + { + float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio; + + // Looping through out pixels + for (n = 0; n < total_contributors; n++) + { + float in_center_of_out; // Center of the current out pixel in the in pixel space + int in_first_pixel, in_last_pixel; + + stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out); + + stbir__calculate_coefficients_upsample(stbir_info, filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0)); + } + } + else + { + float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio; + + // Looping through in pixels + for (n = 0; n < total_contributors; n++) + { + float out_center_of_in; // Center of the current out pixel in the in pixel space + int out_first_pixel, out_last_pixel; + int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio); + + stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in); + + stbir__calculate_coefficients_downsample(stbir_info, filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0)); + } + + stbir__normalize_downsample_coefficients(stbir_info, contributors, coefficients, filter, scale_ratio, shift, input_size, output_size); + } +} + +static float* stbir__get_decode_buffer(stbir__info* stbir_info) +{ + // The 0 index of the decode buffer starts after the margin. This makes + // it okay to use negative indexes on the decode buffer. + return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels]; +} + +#define STBIR__DECODE(type, colorspace) ((type) * (STBIR_MAX_COLORSPACES) + (colorspace)) + +static void stbir__decode_scanline(stbir__info* stbir_info, int n) +{ + int c; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int input_w = stbir_info->input_w; + int input_stride_bytes = stbir_info->input_stride_bytes; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir_edge edge_horizontal = stbir_info->edge_horizontal; + stbir_edge edge_vertical = stbir_info->edge_vertical; + int in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes; + const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset; + int max_x = input_w + stbir_info->horizontal_filter_pixel_margin; + int decode = STBIR__DECODE(type, colorspace); + + int x = -stbir_info->horizontal_filter_pixel_margin; + + // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input, + // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO + if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h)) + { + for (; x < max_x; x++) + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + return; + } + + switch (decode) + { + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / 255; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]]; + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / 255; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / 65535; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295); + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295)); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / 4294967295); + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c]; + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel]; + } + + break; + + default: + STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination."); + break; + } + + if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED)) + { + for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++) + { + int decode_pixel_index = x * channels; + + // If the alpha value is 0 it will clobber the color values. Make sure it's not. + float alpha = decode_buffer[decode_pixel_index + alpha_channel]; +#ifndef STBIR_NO_ALPHA_EPSILON + if (stbir_info->type != STBIR_TYPE_FLOAT) { + alpha += STBIR_ALPHA_EPSILON; + decode_buffer[decode_pixel_index + alpha_channel] = alpha; + } +#endif + for (c = 0; c < channels; c++) + { + if (c == alpha_channel) + continue; + + decode_buffer[decode_pixel_index + c] *= alpha; + } + } + } + + if (edge_horizontal == STBIR_EDGE_ZERO) + { + for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++) + { + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + } + for (x = input_w; x < max_x; x++) + { + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + } + } +} + +static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length) +{ + return &ring_buffer[index * ring_buffer_length]; +} + +static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n) +{ + int ring_buffer_index; + float* ring_buffer; + + if (stbir_info->ring_buffer_begin_index < 0) + { + ring_buffer_index = stbir_info->ring_buffer_begin_index = 0; + stbir_info->ring_buffer_first_scanline = n; + } + else + { + ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline) + 1) % stbir_info->vertical_filter_pixel_width; + STBIR__DEBUG_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index); + } + + ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float)); + memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes); + + stbir_info->ring_buffer_last_scanline = n; + + return ring_buffer; +} + + +static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, int n, float* output_buffer) +{ + int x, k; + int output_w = stbir_info->output_w; + int kernel_pixel_width = stbir_info->horizontal_filter_pixel_width; + int channels = stbir_info->channels; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors; + float* horizontal_coefficients = stbir_info->horizontal_coefficients; + int coefficient_width = stbir_info->horizontal_coefficient_width; + + for (x = 0; x < output_w; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int out_pixel_index = x * channels; + int coefficient_group = coefficient_width * x; + int coefficient_counter = 0; + + STBIR__DEBUG_ASSERT(n1 >= n0); + STBIR__DEBUG_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin); + STBIR__DEBUG_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin); + STBIR__DEBUG_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin); + STBIR__DEBUG_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin); + + switch (channels) { + case 1: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 1; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + } + break; + case 2: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 2; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + } + break; + case 3: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 3; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + } + break; + case 4: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 4; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient; + } + break; + default: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * channels; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + int c; + STBIR__DEBUG_ASSERT(coefficient != 0); + for (c = 0; c < channels; c++) + output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient; + } + break; + } + } +} + +static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int n, float* output_buffer) +{ + int x, k; + int input_w = stbir_info->input_w; + int output_w = stbir_info->output_w; + int kernel_pixel_width = stbir_info->horizontal_filter_pixel_width; + int channels = stbir_info->channels; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors; + float* horizontal_coefficients = stbir_info->horizontal_coefficients; + int coefficient_width = stbir_info->horizontal_coefficient_width; + int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin; + int max_x = input_w + filter_pixel_margin * 2; + + STBIR__DEBUG_ASSERT(!stbir__use_width_upsampling(stbir_info)); + + switch (channels) { + case 1: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 1; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 1; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + } + } + break; + + case 2: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 2; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 2; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + } + } + break; + + case 3: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 3; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 3; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + } + } + break; + + case 4: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 4; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 4; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + STBIR__DEBUG_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient; + } + } + break; + + default: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * channels; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int c; + int out_pixel_index = k * channels; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + STBIR__DEBUG_ASSERT(coefficient != 0); + for (c = 0; c < channels; c++) + output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient; + } + } + break; + } +} + +static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n) +{ + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline(stbir_info, n); + + // Now resample it into the ring buffer. + if (stbir__use_width_upsampling(stbir_info)) + stbir__resample_horizontal_upsample(stbir_info, n, stbir__add_empty_ring_buffer_entry(stbir_info, n)); + else + stbir__resample_horizontal_downsample(stbir_info, n, stbir__add_empty_ring_buffer_entry(stbir_info, n)); + + // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling. +} + +static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n) +{ + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline(stbir_info, n); + + memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float)); + + // Now resample it into the horizontal buffer. + if (stbir__use_width_upsampling(stbir_info)) + stbir__resample_horizontal_upsample(stbir_info, n, stbir_info->horizontal_buffer); + else + stbir__resample_horizontal_downsample(stbir_info, n, stbir_info->horizontal_buffer); + + // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers. +} + +// Get the specified scan line from the ring buffer. +static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_size, int ring_buffer_length) +{ + int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_size; + return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length); +} + + +static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode) +{ + int x; + int n; + int num_nonalpha; + stbir_uint16 nonalpha[STBIR_MAX_CHANNELS]; + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) + { + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + float alpha = encode_buffer[pixel_index + alpha_channel]; + float reciprocal_alpha = alpha ? 1.0f / alpha : 0; + + // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb + for (n = 0; n < channels; n++) + if (n != alpha_channel) + encode_buffer[pixel_index + n] *= reciprocal_alpha; + + // We added in a small epsilon to prevent the color channel from being deleted with zero alpha. + // Because we only add it for integer types, it will automatically be discarded on integer + // conversion, so we don't need to subtract it back out (which would be problematic for + // numeric precision reasons). + } + } + + // build a table of all channels that need colorspace correction, so + // we don't perform colorspace correction on channels that don't need it. + for (x=0, num_nonalpha=0; x < channels; ++x) + if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE)) + nonalpha[num_nonalpha++] = x; + + #define STBIR__ROUND_INT(f) ((int) ((f)+0.5)) + #define STBIR__ROUND_UINT(f) ((stbir_uint32) ((f)+0.5)) + + #ifdef STBIR__SATURATE_INT + #define STBIR__ENCODE_LINEAR8(f) stbir__saturate8 (STBIR__ROUND_INT((f) * 255 )) + #define STBIR__ENCODE_LINEAR16(f) stbir__saturate16(STBIR__ROUND_INT((f) * 65535)) + #else + #define STBIR__ENCODE_LINEAR8(f) (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * 255 ) + #define STBIR__ENCODE_LINEAR16(f) (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * 65535) + #endif + + switch (decode) + { + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]); + } + + if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]); + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * 65535); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]); + } + + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * 4294967295); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * 4294967295); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * 4294967295); + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((float*)output_buffer)[index] = encode_buffer[index]; + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel]; + } + break; + + default: + STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination."); + break; + } +} + +static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out) +{ + int x, k; + int output_w = stbir_info->output_w; + stbir__contributors* vertical_contributors = stbir_info->vertical_contributors; + float* vertical_coefficients = stbir_info->vertical_coefficients; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int kernel_pixel_width = stbir_info->vertical_filter_pixel_width; + void* output_data = stbir_info->output_data; + float* encode_buffer = stbir_info->encode_buffer; + int decode = STBIR__DECODE(type, colorspace); + int coefficient_width = stbir_info->vertical_coefficient_width; + int coefficient_counter; + int contributor = n; + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index; + int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline; + int ring_buffer_last_scanline = stbir_info->ring_buffer_last_scanline; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + + int n0,n1, output_row_start; + int coefficient_group = coefficient_width * contributor; + + n0 = vertical_contributors[contributor].n0; + n1 = vertical_contributors[contributor].n1; + + output_row_start = n * stbir_info->output_stride_bytes; + + STBIR__DEBUG_ASSERT(stbir__use_height_upsampling(stbir_info)); + + memset(encode_buffer, 0, output_w * sizeof(float) * channels); + + // I tried reblocking this for better cache usage of encode_buffer + // (using x_outer, k, x_inner), but it lost speed. -- stb + + coefficient_counter = 0; + switch (channels) { + case 1: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 1; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + } + } + break; + case 2: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 2; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + } + } + break; + case 3: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 3; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; + } + } + break; + case 4: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 4; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; + encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient; + } + } + break; + default: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * channels; + int c; + for (c = 0; c < channels; c++) + encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient; + } + } + break; + } + stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode); +} + +static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out) +{ + int x, k; + int output_w = stbir_info->output_w; + int output_h = stbir_info->output_h; + stbir__contributors* vertical_contributors = stbir_info->vertical_contributors; + float* vertical_coefficients = stbir_info->vertical_coefficients; + int channels = stbir_info->channels; + int kernel_pixel_width = stbir_info->vertical_filter_pixel_width; + void* output_data = stbir_info->output_data; + float* horizontal_buffer = stbir_info->horizontal_buffer; + int coefficient_width = stbir_info->vertical_coefficient_width; + int contributor = n + stbir_info->vertical_filter_pixel_margin; + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index; + int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline; + int ring_buffer_last_scanline = stbir_info->ring_buffer_last_scanline; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + int n0,n1; + + n0 = vertical_contributors[contributor].n0; + n1 = vertical_contributors[contributor].n1; + + STBIR__DEBUG_ASSERT(!stbir__use_height_upsampling(stbir_info)); + + for (k = n0; k <= n1; k++) + { + int coefficient_index = k - n0; + int coefficient_group = coefficient_width * contributor; + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); + + switch (channels) { + case 1: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 1; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + } + break; + case 2: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 2; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + } + break; + case 3: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 3; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient; + } + break; + case 4: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 4; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient; + ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient; + } + break; + default: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * channels; + + int c; + for (c = 0; c < channels; c++) + ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient; + } + break; + } + } +} + +static void stbir__buffer_loop_upsample(stbir__info* stbir_info) +{ + int y; + float scale_ratio = stbir_info->vertical_scale; + float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio; + + STBIR__DEBUG_ASSERT(stbir__use_height_upsampling(stbir_info)); + + for (y = 0; y < stbir_info->output_h; y++) + { + float in_center_of_out = 0; // Center of the current out scanline in the in scanline space + int in_first_scanline = 0, in_last_scanline = 0; + + stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out); + + STBIR__DEBUG_ASSERT(in_last_scanline - in_first_scanline <= stbir_info->vertical_filter_pixel_width); + + if (stbir_info->ring_buffer_begin_index >= 0) + { + // Get rid of whatever we don't need anymore. + while (in_first_scanline > stbir_info->ring_buffer_first_scanline) + { + if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline) + { + // We just popped the last scanline off the ring buffer. + // Reset it to the empty state. + stbir_info->ring_buffer_begin_index = -1; + stbir_info->ring_buffer_first_scanline = 0; + stbir_info->ring_buffer_last_scanline = 0; + break; + } + else + { + stbir_info->ring_buffer_first_scanline++; + stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->vertical_filter_pixel_width; + } + } + } + + // Load in new ones. + if (stbir_info->ring_buffer_begin_index < 0) + stbir__decode_and_resample_upsample(stbir_info, in_first_scanline); + + while (in_last_scanline > stbir_info->ring_buffer_last_scanline) + stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1); + + // Now all buffers should be ready to write a row of vertical sampling. + stbir__resample_vertical_upsample(stbir_info, y, in_first_scanline, in_last_scanline, in_center_of_out); + + STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h); + } +} + +static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline) +{ + int output_stride_bytes = stbir_info->output_stride_bytes; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int output_w = stbir_info->output_w; + void* output_data = stbir_info->output_data; + int decode = STBIR__DECODE(type, colorspace); + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + + if (stbir_info->ring_buffer_begin_index >= 0) + { + // Get rid of whatever we don't need anymore. + while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline) + { + if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h) + { + int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes; + float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length); + stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode); + STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h); + } + + if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline) + { + // We just popped the last scanline off the ring buffer. + // Reset it to the empty state. + stbir_info->ring_buffer_begin_index = -1; + stbir_info->ring_buffer_first_scanline = 0; + stbir_info->ring_buffer_last_scanline = 0; + break; + } + else + { + stbir_info->ring_buffer_first_scanline++; + stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->vertical_filter_pixel_width; + } + } + } +} + +static void stbir__buffer_loop_downsample(stbir__info* stbir_info) +{ + int y; + float scale_ratio = stbir_info->vertical_scale; + int output_h = stbir_info->output_h; + float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio; + int pixel_margin = stbir_info->vertical_filter_pixel_margin; + int max_y = stbir_info->input_h + pixel_margin; + + STBIR__DEBUG_ASSERT(!stbir__use_height_upsampling(stbir_info)); + + for (y = -pixel_margin; y < max_y; y++) + { + float out_center_of_in; // Center of the current out scanline in the in scanline space + int out_first_scanline, out_last_scanline; + + stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in); + + STBIR__DEBUG_ASSERT(out_last_scanline - out_first_scanline <= stbir_info->vertical_filter_pixel_width); + + if (out_last_scanline < 0 || out_first_scanline >= output_h) + continue; + + stbir__empty_ring_buffer(stbir_info, out_first_scanline); + + stbir__decode_and_resample_downsample(stbir_info, y); + + // Load in new ones. + if (stbir_info->ring_buffer_begin_index < 0) + stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline); + + while (out_last_scanline > stbir_info->ring_buffer_last_scanline) + stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1); + + // Now the horizontal buffer is ready to write to all ring buffer rows. + stbir__resample_vertical_downsample(stbir_info, y, out_first_scanline, out_last_scanline, out_center_of_in); + } + + stbir__empty_ring_buffer(stbir_info, stbir_info->output_h); +} + +static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels) +{ + info->input_w = input_w; + info->input_h = input_h; + info->output_w = output_w; + info->output_h = output_h; + info->channels = channels; +} + +static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform) +{ + info->s0 = s0; + info->t0 = t0; + info->s1 = s1; + info->t1 = t1; + + if (transform) + { + info->horizontal_scale = transform[0]; + info->vertical_scale = transform[1]; + info->horizontal_shift = transform[2]; + info->vertical_shift = transform[3]; + } + else + { + info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0); + info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0); + + info->horizontal_shift = s0 * info->input_w / (s1 - s0); + info->vertical_shift = t0 * info->input_h / (t1 - t0); + } +} + +static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter) +{ + if (h_filter == 0) + h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE; + if (v_filter == 0) + v_filter = stbir__use_upsampling(info->vertical_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE; + info->horizontal_filter = h_filter; + info->vertical_filter = v_filter; +} + +static stbir_uint32 stbir__calculate_memory(stbir__info *info) +{ + int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale); + int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale); + + info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w); + info->vertical_num_contributors = stbir__get_contributors(info->vertical_scale , info->vertical_filter , info->input_h, info->output_h); + + info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors); + info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float); + info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors); + info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float); + info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float); + info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float); + info->ring_buffer_size = info->output_w * info->channels * filter_height * sizeof(float); + info->encode_buffer_size = info->output_w * info->channels * sizeof(float); + + STBIR_ASSERT(info->horizontal_filter != 0); + STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late + STBIR_ASSERT(info->vertical_filter != 0); + STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late + + if (stbir__use_height_upsampling(info)) + // The horizontal buffer is for when we're downsampling the height and we + // can't output the result of sampling the decode buffer directly into the + // ring buffers. + info->horizontal_buffer_size = 0; + else + // The encode buffer is to retain precision in the height upsampling method + // and isn't used when height downsampling. + info->encode_buffer_size = 0; + + return info->horizontal_contributors_size + info->horizontal_coefficients_size + + info->vertical_contributors_size + info->vertical_coefficients_size + + info->decode_buffer_size + info->horizontal_buffer_size + + info->ring_buffer_size + info->encode_buffer_size; +} + +static int stbir__resize_allocated(stbir__info *info, + const void* input_data, int input_stride_in_bytes, + void* output_data, int output_stride_in_bytes, + int alpha_channel, stbir_uint32 flags, stbir_datatype type, + stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace, + void* tempmem, size_t tempmem_size_in_bytes) +{ + size_t memory_required = stbir__calculate_memory(info); + + int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type]; + int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type]; + +#ifdef STBIR_DEBUG_OVERWRITE_TEST +#define OVERWRITE_ARRAY_SIZE 8 + unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE]; + + size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type]; + memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE); +#endif + + STBIR_ASSERT(info->channels >= 0); + STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS); + + if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS) + return 0; + + STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + + if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table)) + return 0; + if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table)) + return 0; + + if (alpha_channel < 0) + flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED; + + if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) + STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels); + + if (alpha_channel >= info->channels) + return 0; + + STBIR_ASSERT(tempmem); + + if (!tempmem) + return 0; + + STBIR_ASSERT(tempmem_size_in_bytes >= memory_required); + + if (tempmem_size_in_bytes < memory_required) + return 0; + + memset(tempmem, 0, tempmem_size_in_bytes); + + info->input_data = input_data; + info->input_stride_bytes = width_stride_input; + + info->output_data = output_data; + info->output_stride_bytes = width_stride_output; + + info->alpha_channel = alpha_channel; + info->flags = flags; + info->type = type; + info->edge_horizontal = edge_horizontal; + info->edge_vertical = edge_vertical; + info->colorspace = colorspace; + + info->horizontal_coefficient_width = stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale); + info->vertical_coefficient_width = stbir__get_coefficient_width (info->vertical_filter , info->vertical_scale ); + info->horizontal_filter_pixel_width = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale); + info->vertical_filter_pixel_width = stbir__get_filter_pixel_width (info->vertical_filter , info->vertical_scale ); + info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale); + info->vertical_filter_pixel_margin = stbir__get_filter_pixel_margin(info->vertical_filter , info->vertical_scale ); + + info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float); + info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2; + +#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size) + + info->horizontal_contributors = (stbir__contributors *) tempmem; + info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float); + info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors); + info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float); + info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float); + + if (stbir__use_height_upsampling(info)) + { + info->horizontal_buffer = NULL; + info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float); + info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float); + + STBIR__DEBUG_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes); + } + else + { + info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float); + info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float); + info->encode_buffer = NULL; + + STBIR__DEBUG_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes); + } + +#undef STBIR__NEXT_MEMPTR + + // This signals that the ring buffer is empty + info->ring_buffer_begin_index = -1; + + stbir__calculate_filters(info, info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w); + stbir__calculate_filters(info, info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h); + + STBIR_PROGRESS_REPORT(0); + + if (stbir__use_height_upsampling(info)) + stbir__buffer_loop_upsample(info); + else + stbir__buffer_loop_downsample(info); + + STBIR_PROGRESS_REPORT(1); + +#ifdef STBIR_DEBUG_OVERWRITE_TEST + STBIR__DEBUG_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0); + STBIR__DEBUG_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0); + STBIR__DEBUG_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0); + STBIR__DEBUG_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0); +#endif + + return 1; +} + + +static int stbir__resize_arbitrary( + void *alloc_context, + const void* input_data, int input_w, int input_h, int input_stride_in_bytes, + void* output_data, int output_w, int output_h, int output_stride_in_bytes, + float s0, float t0, float s1, float t1, float *transform, + int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type, + stbir_filter h_filter, stbir_filter v_filter, + stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace) +{ + stbir__info info; + int result; + size_t memory_required; + void* extra_memory; + + stbir__setup(&info, input_w, input_h, output_w, output_h, channels); + stbir__calculate_transform(&info, s0,t0,s1,t1,transform); + stbir__choose_filter(&info, h_filter, v_filter); + memory_required = stbir__calculate_memory(&info); + extra_memory = STBIR_MALLOC(memory_required, alloc_context); + + if (!extra_memory) + return 0; + + result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes, + output_data, output_stride_in_bytes, + alpha_channel, flags, type, + edge_horizontal, edge_vertical, + colorspace, extra_memory, memory_required); + + STBIR_FREE(extra_memory, alloc_context); + + return result; +} + +STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR); +} + +STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR); +} + +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB); +} + +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB); +} + +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + + +STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + + +STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + + +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float x_scale, float y_scale, + float x_offset, float y_offset) +{ + float transform[4]; + transform[0] = x_scale; + transform[1] = y_scale; + transform[2] = x_offset; + transform[3] = y_offset; + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + +STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float s0, float t0, float s1, float t1) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + +#endif // STB_IMAGE_RESIZE_IMPLEMENTATION diff --git a/src/platform/gba/packer/tracks/cygwin1.dll b/src/platform/gba/packer/tracks/cygwin1.dll new file mode 100644 index 0000000..fc4d322 Binary files /dev/null and b/src/platform/gba/packer/tracks/cygwin1.dll differ