2014-06-24 21:53:00 +02:00
// ****************************************************************************
// * This file is part of the HqMAME project. It is distributed under *
// * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
// * *
// * Additionally and as a special exception, the author gives permission *
// * to link the code of this program with the MAME library (or with modified *
// * versions of MAME that use the same license as MAME), and distribute *
// * linked combinations including the two. You must obey the GNU General *
// * Public License in all respects for all of the code used other than MAME. *
// * If you modify this file, you may extend this exception to your version *
// * of the file, but you are not obligated to do so. If you do not wish to *
// * do so, delete this exception statement from your version. *
// ****************************************************************************
# include "xbrz.h"
# include <cassert>
# include <algorithm>
2015-01-29 21:42:32 +01:00
# include <vector>
2014-06-24 21:53:00 +02:00
namespace
{
template < uint32_t N > inline
unsigned char getByte ( uint32_t val ) { return static_cast < unsigned char > ( ( val > > ( 8 * N ) ) & 0xff ) ; }
2015-01-29 21:42:32 +01:00
inline unsigned char getAlpha ( uint32_t val ) { return getByte < 3 > ( val ) ; }
2014-06-24 21:53:00 +02:00
inline unsigned char getRed ( uint32_t val ) { return getByte < 2 > ( val ) ; }
inline unsigned char getGreen ( uint32_t val ) { return getByte < 1 > ( val ) ; }
inline unsigned char getBlue ( uint32_t val ) { return getByte < 0 > ( val ) ; }
template < class T > inline
T abs ( T value )
{
2015-01-29 21:42:32 +01:00
//static_assert(std::is_signed<T>::value, "abs() requires signed types");
2014-06-24 21:53:00 +02:00
return value < 0 ? - value : value ;
}
const uint32_t redMask = 0xff0000 ;
const uint32_t greenMask = 0x00ff00 ;
const uint32_t blueMask = 0x0000ff ;
2015-01-29 21:42:32 +01:00
template < unsigned int M , unsigned int N > inline
void alphaBlend ( uint32_t & dst , uint32_t col ) //blend color over destination with opacity M / N
2014-06-24 21:53:00 +02:00
{
2015-01-29 21:42:32 +01:00
//static_assert(0 < M && M < N && N <= 256, "possible overflow of (col & byte1Mask) * M + (dst & byte1Mask) * (N - M)");
const uint32_t byte1Mask = 0x000000ff ;
const uint32_t byte2Mask = 0x0000ff00 ;
const uint32_t byte3Mask = 0x00ff0000 ;
const uint32_t byte4Mask = 0xff000000 ;
dst = ( byte1Mask & ( ( ( col & byte1Mask ) * M + ( dst & byte1Mask ) * ( N - M ) ) / N ) ) | //
( byte2Mask & ( ( ( col & byte2Mask ) * M + ( dst & byte2Mask ) * ( N - M ) ) / N ) ) | //this works because next higher 8 bits are free
( byte3Mask & ( ( ( col & byte3Mask ) * M + ( dst & byte3Mask ) * ( N - M ) ) / N ) ) | //
( byte4Mask & ( ( ( ( ( col & byte4Mask ) > > 8 ) * M + ( ( dst & byte4Mask ) > > 8 ) * ( N - M ) ) / N ) < < 8 ) ) ; //next 8 bits are not free, so shift
//the last row operating on a potential alpha channel costs only ~1% perf => negligible!
2014-06-24 21:53:00 +02:00
}
//inline
//double fastSqrt(double n)
//{
2015-01-29 21:42:32 +01:00
// __asm //speeds up xBRZ by about 9% compared to std::sqrt which internally uses the same assembler instructions but adds some "fluff"
2014-06-24 21:53:00 +02:00
// {
// fld n
// fsqrt
// }
//}
//
2015-01-29 21:42:32 +01:00
//inline
//uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
//{
// return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
// (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
// (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
//}
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
uint32_t * byteAdvance ( uint32_t * ptr , int bytes ) { return reinterpret_cast < uint32_t * > ( reinterpret_cast < char * > ( ptr ) + bytes ) ; }
const uint32_t * byteAdvance ( const uint32_t * ptr , int bytes ) { return reinterpret_cast < const uint32_t * > ( reinterpret_cast < const char * > ( ptr ) + bytes ) ; }
2014-06-24 21:53:00 +02:00
//fill block with the given color
inline
void fillBlock ( uint32_t * trg , int pitch , uint32_t col , int blockWidth , int blockHeight )
{
//for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
// std::fill(trg, trg + blockWidth, col);
for ( int y = 0 ; y < blockHeight ; + + y , trg = byteAdvance ( trg , pitch ) )
for ( int x = 0 ; x < blockWidth ; + + x )
trg [ x ] = col ;
}
inline
void fillBlock ( uint32_t * trg , int pitch , uint32_t col , int n ) { fillBlock ( trg , pitch , col , n , n ) ; }
# ifdef _MSC_VER
2015-01-29 21:42:32 +01:00
# define FORCE_INLINE __forceinline
2014-06-24 21:53:00 +02:00
# elif defined __GNUC__
2015-01-29 21:42:32 +01:00
# define FORCE_INLINE __attribute__((always_inline)) inline
2014-06-24 21:53:00 +02:00
# else
2015-01-29 21:42:32 +01:00
# define FORCE_INLINE inline
2014-06-24 21:53:00 +02:00
# endif
enum RotationDegree //clock-wise
{
ROT_0 ,
ROT_90 ,
ROT_180 ,
ROT_270
} ;
//calculate input matrix coordinates after rotation at compile time
template < RotationDegree rotDeg , size_t I , size_t J , size_t N >
struct MatrixRotation ;
template < size_t I , size_t J , size_t N >
struct MatrixRotation < ROT_0 , I , J , N >
{
static const size_t I_old = I ;
static const size_t J_old = J ;
} ;
template < RotationDegree rotDeg , size_t I , size_t J , size_t N > //(i, j) = (row, col) indices, N = size of (square) matrix
struct MatrixRotation
{
static const size_t I_old = N - 1 - MatrixRotation < static_cast < RotationDegree > ( rotDeg - 1 ) , I , J , N > : : J_old ; //old coordinates before rotation!
static const size_t J_old = MatrixRotation < static_cast < RotationDegree > ( rotDeg - 1 ) , I , J , N > : : I_old ; //
} ;
template < size_t N , RotationDegree rotDeg >
class OutputMatrix
{
public :
OutputMatrix ( uint32_t * out , int outWidth ) : //access matrix area, top-left at position "out" for image with given width
out_ ( out ) ,
outWidth_ ( outWidth ) { }
template < size_t I , size_t J >
uint32_t & ref ( ) const
{
static const size_t I_old = MatrixRotation < rotDeg , I , J , N > : : I_old ;
static const size_t J_old = MatrixRotation < rotDeg , I , J , N > : : J_old ;
return * ( out_ + J_old + I_old * outWidth_ ) ;
}
private :
uint32_t * out_ ;
const int outWidth_ ;
} ;
template < class T > inline
T square ( T value ) { return value * value ; }
/*
inline
void rgbtoLuv ( uint32_t c , double & L , double & u , double & v )
{
//http://www.easyrgb.com/index.php?X=MATH&H=02#text2
double r = getRed ( c ) / 255.0 ;
double g = getGreen ( c ) / 255.0 ;
double b = getBlue ( c ) / 255.0 ;
if ( r > 0.04045 )
r = std : : pow ( ( ( r + 0.055 ) / 1.055 ) , 2.4 ) ;
else
r / = 12.92 ;
if ( g > 0.04045 )
g = std : : pow ( ( ( g + 0.055 ) / 1.055 ) , 2.4 ) ;
else
g / = 12.92 ;
if ( b > 0.04045 )
b = std : : pow ( ( ( b + 0.055 ) / 1.055 ) , 2.4 ) ;
else
b / = 12.92 ;
r * = 100 ;
g * = 100 ;
b * = 100 ;
double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b ;
double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b ;
double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b ;
//---------------------
2015-01-29 21:42:32 +01:00
double var_U = 4 * x / ( x + 15 * y + 3 * z ) ;
double var_V = 9 * y / ( x + 15 * y + 3 * z ) ;
2014-06-24 21:53:00 +02:00
double var_Y = y / 100 ;
if ( var_Y > 0.008856 ) var_Y = std : : pow ( var_Y , 1.0 / 3 ) ;
2015-01-29 21:42:32 +01:00
else var_Y = 7.787 * var_Y + 16.0 / 116 ;
2014-06-24 21:53:00 +02:00
const double ref_X = 95.047 ; //Observer= 2<> , Illuminant= D65
const double ref_Y = 100.000 ;
const double ref_Z = 108.883 ;
const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) ) ;
const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) ) ;
L = ( 116 * var_Y ) - 16 ;
u = 13 * L * ( var_U - ref_U ) ;
v = 13 * L * ( var_V - ref_V ) ;
}
*/
inline
void rgbtoLab ( uint32_t c , unsigned char & L , signed char & A , signed char & B )
{
//code: http://www.easyrgb.com/index.php?X=MATH
//test: http://www.workwithcolor.com/color-converter-01.htm
//------RGB to XYZ------
double r = getRed ( c ) / 255.0 ;
double g = getGreen ( c ) / 255.0 ;
double b = getBlue ( c ) / 255.0 ;
r = r > 0.04045 ? std : : pow ( ( r + 0.055 ) / 1.055 , 2.4 ) : r / 12.92 ;
r = g > 0.04045 ? std : : pow ( ( g + 0.055 ) / 1.055 , 2.4 ) : g / 12.92 ;
r = b > 0.04045 ? std : : pow ( ( b + 0.055 ) / 1.055 , 2.4 ) : b / 12.92 ;
r * = 100 ;
g * = 100 ;
b * = 100 ;
double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b ;
double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b ;
double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b ;
//------XYZ to Lab------
const double refX = 95.047 ; //
const double refY = 100.000 ; //Observer= 2<> , Illuminant= D65
const double refZ = 108.883 ; //
double var_X = x / refX ;
double var_Y = y / refY ;
double var_Z = z / refZ ;
var_X = var_X > 0.008856 ? std : : pow ( var_X , 1.0 / 3 ) : 7.787 * var_X + 4.0 / 29 ;
var_Y = var_Y > 0.008856 ? std : : pow ( var_Y , 1.0 / 3 ) : 7.787 * var_Y + 4.0 / 29 ;
var_Z = var_Z > 0.008856 ? std : : pow ( var_Z , 1.0 / 3 ) : 7.787 * var_Z + 4.0 / 29 ;
L = static_cast < unsigned char > ( 116 * var_Y - 16 ) ;
A = static_cast < signed char > ( 500 * ( var_X - var_Y ) ) ;
B = static_cast < signed char > ( 200 * ( var_Y - var_Z ) ) ;
} ;
inline
double distLAB ( uint32_t pix1 , uint32_t pix2 )
{
unsigned char L1 = 0 ; //[0, 100]
signed char a1 = 0 ; //[-128, 127]
signed char b1 = 0 ; //[-128, 127]
rgbtoLab ( pix1 , L1 , a1 , b1 ) ;
unsigned char L2 = 0 ;
signed char a2 = 0 ;
signed char b2 = 0 ;
rgbtoLab ( pix2 , L2 , a2 , b2 ) ;
//-----------------------------
//http://www.easyrgb.com/index.php?X=DELT
//Delta E/CIE76
return std : : sqrt ( square ( 1.0 * L1 - L2 ) +
square ( 1.0 * a1 - a2 ) +
square ( 1.0 * b1 - b2 ) ) ;
}
/*
inline
void rgbtoHsl ( uint32_t c , double & h , double & s , double & l )
{
//http://www.easyrgb.com/index.php?X=MATH&H=18#text18
const int r = getRed ( c ) ;
const int g = getGreen ( c ) ;
const int b = getBlue ( c ) ;
const int varMin = numeric : : min ( r , g , b ) ;
const int varMax = numeric : : max ( r , g , b ) ;
const int delMax = varMax - varMin ;
l = ( varMax + varMin ) / 2.0 / 255.0 ;
if ( delMax = = 0 ) //gray, no chroma...
{
h = 0 ;
s = 0 ;
}
else
{
s = l < 0.5 ?
delMax / ( 1.0 * varMax + varMin ) :
delMax / ( 2.0 * 255 - varMax - varMin ) ;
double delR = ( ( varMax - r ) / 6.0 + delMax / 2.0 ) / delMax ;
double delG = ( ( varMax - g ) / 6.0 + delMax / 2.0 ) / delMax ;
double delB = ( ( varMax - b ) / 6.0 + delMax / 2.0 ) / delMax ;
if ( r = = varMax )
h = delB - delG ;
else if ( g = = varMax )
h = 1 / 3.0 + delR - delB ;
else if ( b = = varMax )
h = 2 / 3.0 + delG - delR ;
if ( h < 0 )
h + = 1 ;
if ( h > 1 )
h - = 1 ;
}
}
inline
double distHSL ( uint32_t pix1 , uint32_t pix2 , double lightningWeight )
{
double h1 = 0 ;
double s1 = 0 ;
double l1 = 0 ;
rgbtoHsl ( pix1 , h1 , s1 , l1 ) ;
double h2 = 0 ;
double s2 = 0 ;
double l2 = 0 ;
rgbtoHsl ( pix2 , h2 , s2 , l2 ) ;
//HSL is in cylindric coordinatates where L represents height, S radius, H angle,
//however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1
assert ( 0 < = h1 & & h1 < = 1 ) ;
assert ( 0 < = h2 & & h2 < = 1 ) ;
double r1 = l1 < 0.5 ?
l1 * 2 :
2 - l1 * 2 ;
double x1 = r1 * s1 * std : : cos ( h1 * 2 * numeric : : pi ) ;
double y1 = r1 * s1 * std : : sin ( h1 * 2 * numeric : : pi ) ;
double z1 = l1 ;
double r2 = l2 < 0.5 ?
l2 * 2 :
2 - l2 * 2 ;
double x2 = r2 * s2 * std : : cos ( h2 * 2 * numeric : : pi ) ;
double y2 = r2 * s2 * std : : sin ( h2 * 2 * numeric : : pi ) ;
double z2 = l2 ;
2015-01-29 21:42:32 +01:00
return 255 * std : : sqrt ( square ( x1 - x2 ) + square ( y1 - y2 ) + square ( lightningWeight * ( z1 - z2 ) ) ) ;
2014-06-24 21:53:00 +02:00
}
*/
inline
double distRGB ( uint32_t pix1 , uint32_t pix2 )
{
const double r_diff = static_cast < int > ( getRed ( pix1 ) ) - getRed ( pix2 ) ;
const double g_diff = static_cast < int > ( getGreen ( pix1 ) ) - getGreen ( pix2 ) ;
const double b_diff = static_cast < int > ( getBlue ( pix1 ) ) - getBlue ( pix2 ) ;
//euklidean RGB distance
return std : : sqrt ( square ( r_diff ) + square ( g_diff ) + square ( b_diff ) ) ;
}
inline
double distNonLinearRGB ( uint32_t pix1 , uint32_t pix2 )
{
//non-linear rgb: http://www.compuphase.com/cmetric.htm
const double r_diff = static_cast < int > ( getRed ( pix1 ) ) - getRed ( pix2 ) ;
const double g_diff = static_cast < int > ( getGreen ( pix1 ) ) - getGreen ( pix2 ) ;
const double b_diff = static_cast < int > ( getBlue ( pix1 ) ) - getBlue ( pix2 ) ;
const double r_avg = ( static_cast < double > ( getRed ( pix1 ) ) + getRed ( pix2 ) ) / 2 ;
return std : : sqrt ( ( 2 + r_avg / 255 ) * square ( r_diff ) + 4 * square ( g_diff ) + ( 2 + ( 255 - r_avg ) / 255 ) * square ( b_diff ) ) ;
}
inline
double distYCbCr ( uint32_t pix1 , uint32_t pix2 , double lumaWeight )
{
//http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
//YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
const int r_diff = static_cast < int > ( getRed ( pix1 ) ) - getRed ( pix2 ) ; //we may delay division by 255 to after matrix multiplication
const int g_diff = static_cast < int > ( getGreen ( pix1 ) ) - getGreen ( pix2 ) ; //
const int b_diff = static_cast < int > ( getBlue ( pix1 ) ) - getBlue ( pix2 ) ; //substraction for int is noticeable faster than for double!
2015-01-29 21:42:32 +01:00
//const double k_b = 0.0722; //ITU-R BT.709 conversion
//const double k_r = 0.2126; //
const double k_b = 0.0593 ; //ITU-R BT.2020 conversion
const double k_r = 0.2627 ; //
2014-06-24 21:53:00 +02:00
const double k_g = 1 - k_b - k_r ;
const double scale_b = 0.5 / ( 1 - k_b ) ;
const double scale_r = 0.5 / ( 1 - k_r ) ;
const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff ; //[!], analog YCbCr!
const double c_b = scale_b * ( b_diff - y ) ;
const double c_r = scale_r * ( r_diff - y ) ;
//we skip division by 255 to have similar range like other distance functions
2015-01-29 21:42:32 +01:00
return std : : sqrt ( square ( lumaWeight * y ) + square ( c_b ) + square ( c_r ) ) ;
2014-06-24 21:53:00 +02:00
}
2015-01-29 21:42:32 +01:00
struct DistYCbCrBuffer //30% perf boost compared to distYCbCr()!
{
public :
DistYCbCrBuffer ( ) : buffer ( 256 * 256 * 256 )
{
for ( uint32_t i = 0 ; i < 256 * 256 * 256 ; + + i ) //startup time: 114 ms on Intel Core i5 (four cores)
{
const int r_diff = getByte < 2 > ( i ) * 2 - 255 ;
const int g_diff = getByte < 1 > ( i ) * 2 - 255 ;
const int b_diff = getByte < 0 > ( i ) * 2 - 255 ;
const double k_b = 0.0593 ; //ITU-R BT.2020 conversion
const double k_r = 0.2627 ; //
const double k_g = 1 - k_b - k_r ;
const double scale_b = 0.5 / ( 1 - k_b ) ;
const double scale_r = 0.5 / ( 1 - k_r ) ;
const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff ; //[!], analog YCbCr!
const double c_b = scale_b * ( b_diff - y ) ;
const double c_r = scale_r * ( r_diff - y ) ;
buffer [ i ] = static_cast < float > ( std : : sqrt ( square ( y ) + square ( c_b ) + square ( c_r ) ) ) ;
}
}
double dist ( uint32_t pix1 , uint32_t pix2 ) const
{
//if (pix1 == pix2) -> 8% perf degradation!
// return 0;
//if (pix1 > pix2)
// std::swap(pix1, pix2); -> 30% perf degradation!!!
const int r_diff = static_cast < int > ( getRed ( pix1 ) ) - getRed ( pix2 ) ;
const int g_diff = static_cast < int > ( getGreen ( pix1 ) ) - getGreen ( pix2 ) ;
const int b_diff = static_cast < int > ( getBlue ( pix1 ) ) - getBlue ( pix2 ) ;
return buffer [ ( ( ( r_diff + 255 ) / 2 ) < < 16 ) | //slightly reduce precision (division by 2) to squeeze value into single byte
( ( ( g_diff + 255 ) / 2 ) < < 8 ) |
( ( b_diff + 255 ) / 2 ) ] ;
}
private :
std : : vector < float > buffer ; //consumes 64 MB memory; using double is 2% faster, but takes 128 MB
} distYCbCrBuffer ;
2014-06-24 21:53:00 +02:00
inline
double distYUV ( uint32_t pix1 , uint32_t pix2 , double luminanceWeight )
{
//perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%
//since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)
const double r_diff = static_cast < int > ( getRed ( pix1 ) ) - getRed ( pix2 ) ;
const double g_diff = static_cast < int > ( getGreen ( pix1 ) ) - getGreen ( pix2 ) ;
const double b_diff = static_cast < int > ( getBlue ( pix1 ) ) - getBlue ( pix2 ) ;
//http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
const double w_b = 0.114 ;
const double w_r = 0.299 ;
const double w_g = 1 - w_r - w_b ;
const double u_max = 0.436 ;
const double v_max = 0.615 ;
const double scale_u = u_max / ( 1 - w_b ) ;
const double scale_v = v_max / ( 1 - w_r ) ;
double y = w_r * r_diff + w_g * g_diff + w_b * b_diff ; //value range: 255 * [-1, 1]
double u = scale_u * ( b_diff - y ) ; //value range: 255 * 2 * u_max * [-1, 1]
double v = scale_v * ( r_diff - y ) ; //value range: 255 * 2 * v_max * [-1, 1]
# ifndef NDEBUG
const double eps = 0.5 ;
# endif
2015-01-29 21:42:32 +01:00
assert ( abs ( y ) < = 255 + eps ) ;
assert ( abs ( u ) < = 255 * 2 * u_max + eps ) ;
assert ( abs ( v ) < = 255 * 2 * v_max + eps ) ;
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
return std : : sqrt ( square ( luminanceWeight * y ) + square ( u ) + square ( v ) ) ;
2014-06-24 21:53:00 +02:00
}
enum BlendType
{
BLEND_NONE = 0 ,
BLEND_NORMAL , //a normal indication to blend
BLEND_DOMINANT , //a strong indication to blend
//attention: BlendType must fit into the value range of 2 bit!!!
} ;
struct BlendResult
{
BlendType
/**/ blend_f , blend_g ,
/**/ blend_j , blend_k ;
} ;
struct Kernel_4x4 //kernel for preprocessing step
{
uint32_t
/**/ a , b , c , d ,
/**/ e , f , g , h ,
/**/ i , j , k , l ,
/**/ m , n , o , p ;
} ;
2015-01-29 21:42:32 +01:00
# define cdist(pix1, pix2) ColorDistance::dist((pix1), (pix2), cfg.luminanceWeight_)
2014-06-24 21:53:00 +02:00
/*
input kernel area naming convention :
- - - - - - - - - - - - - - - - -
| A | B | C | D |
- - - - | - - - | - - - | - - - |
2015-01-29 21:42:32 +01:00
| E | F | G | H | //evaluate the four corners between F, G, J, K
2014-06-24 21:53:00 +02:00
- - - - | - - - | - - - | - - - | //input pixel is at position F
| I | J | K | L |
- - - - | - - - | - - - | - - - |
| M | N | O | P |
- - - - - - - - - - - - - - - - -
*/
2015-01-29 21:42:32 +01:00
template < class ColorDistance >
2014-06-24 21:53:00 +02:00
FORCE_INLINE //detect blend direction
BlendResult preProcessCorners ( const Kernel_4x4 & ker , const xbrz : : ScalerCfg & cfg ) //result: F, G, J, K corners of "GradientType"
{
BlendResult result = { } ;
if ( ( ker . f = = ker . g & &
ker . j = = ker . k ) | |
( ker . f = = ker . j & &
ker . g = = ker . k ) )
return result ;
2015-01-29 21:42:32 +01:00
//auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_); };
2014-06-24 21:53:00 +02:00
const int weight = 4 ;
2015-01-29 21:42:32 +01:00
double jg = cdist ( ker . i , ker . f ) + cdist ( ker . f , ker . c ) + cdist ( ker . n , ker . k ) + cdist ( ker . k , ker . h ) + weight * cdist ( ker . j , ker . g ) ;
double fk = cdist ( ker . e , ker . j ) + cdist ( ker . j , ker . o ) + cdist ( ker . b , ker . g ) + cdist ( ker . g , ker . l ) + weight * cdist ( ker . f , ker . k ) ;
2014-06-24 21:53:00 +02:00
if ( jg < fk ) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
{
const bool dominantGradient = cfg . dominantDirectionThreshold * jg < fk ;
if ( ker . f ! = ker . g & & ker . f ! = ker . j )
result . blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL ;
if ( ker . k ! = ker . j & & ker . k ! = ker . g )
result . blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL ;
}
else if ( fk < jg )
{
const bool dominantGradient = cfg . dominantDirectionThreshold * fk < jg ;
if ( ker . j ! = ker . f & & ker . j ! = ker . k )
result . blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL ;
if ( ker . g ! = ker . f & & ker . g ! = ker . k )
result . blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL ;
}
return result ;
}
struct Kernel_3x3
{
uint32_t
/**/ a , b , c ,
/**/ d , e , f ,
/**/ g , h , i ;
} ;
# define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
//we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
DEF_GETTER ( a ) DEF_GETTER ( b ) DEF_GETTER ( c )
DEF_GETTER ( d ) DEF_GETTER ( e ) DEF_GETTER ( f )
DEF_GETTER ( g ) DEF_GETTER ( h ) DEF_GETTER ( i )
# undef DEF_GETTER
# define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
DEF_GETTER ( a , g ) DEF_GETTER ( b , d ) DEF_GETTER ( c , a )
DEF_GETTER ( d , h ) DEF_GETTER ( e , e ) DEF_GETTER ( f , b )
DEF_GETTER ( g , i ) DEF_GETTER ( h , f ) DEF_GETTER ( i , c )
# undef DEF_GETTER
# define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
DEF_GETTER ( a , i ) DEF_GETTER ( b , h ) DEF_GETTER ( c , g )
DEF_GETTER ( d , f ) DEF_GETTER ( e , e ) DEF_GETTER ( f , d )
DEF_GETTER ( g , c ) DEF_GETTER ( h , b ) DEF_GETTER ( i , a )
# undef DEF_GETTER
# define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
DEF_GETTER ( a , c ) DEF_GETTER ( b , f ) DEF_GETTER ( c , i )
DEF_GETTER ( d , b ) DEF_GETTER ( e , e ) DEF_GETTER ( f , h )
DEF_GETTER ( g , a ) DEF_GETTER ( h , d ) DEF_GETTER ( i , g )
# undef DEF_GETTER
//compress four blend types into a single byte
inline BlendType getTopL ( unsigned char b ) { return static_cast < BlendType > ( 0x3 & b ) ; }
inline BlendType getTopR ( unsigned char b ) { return static_cast < BlendType > ( 0x3 & ( b > > 2 ) ) ; }
inline BlendType getBottomR ( unsigned char b ) { return static_cast < BlendType > ( 0x3 & ( b > > 4 ) ) ; }
inline BlendType getBottomL ( unsigned char b ) { return static_cast < BlendType > ( 0x3 & ( b > > 6 ) ) ; }
inline void setTopL ( unsigned char & b , BlendType bt ) { b | = bt ; } //buffer is assumed to be initialized before preprocessing!
inline void setTopR ( unsigned char & b , BlendType bt ) { b | = ( bt < < 2 ) ; }
inline void setBottomR ( unsigned char & b , BlendType bt ) { b | = ( bt < < 4 ) ; }
inline void setBottomL ( unsigned char & b , BlendType bt ) { b | = ( bt < < 6 ) ; }
inline bool blendingNeeded ( unsigned char b ) { return b ! = 0 ; }
template < RotationDegree rotDeg > inline
unsigned char rotateBlendInfo ( unsigned char b ) { return b ; }
template < > inline unsigned char rotateBlendInfo < ROT_90 > ( unsigned char b ) { return ( ( b < < 2 ) | ( b > > 6 ) ) & 0xff ; }
template < > inline unsigned char rotateBlendInfo < ROT_180 > ( unsigned char b ) { return ( ( b < < 4 ) | ( b > > 4 ) ) & 0xff ; }
template < > inline unsigned char rotateBlendInfo < ROT_270 > ( unsigned char b ) { return ( ( b < < 6 ) | ( b > > 2 ) ) & 0xff ; }
# ifndef NDEBUG
2015-01-29 21:42:32 +01:00
int debugPixelX = - 1 ;
int debugPixelY = 84 ;
bool breakIntoDebugger = false ;
2014-06-24 21:53:00 +02:00
# endif
2015-01-29 21:42:32 +01:00
# define eq(pix1, pix2) (ColorDistance::dist((pix1), (pix2), cfg.luminanceWeight_) < cfg.equalColorTolerance_)
2014-06-24 21:53:00 +02:00
/*
input kernel area naming convention :
- - - - - - - - - - - - -
| A | B | C |
- - - - | - - - | - - - |
| D | E | F | //input pixel is at position E
- - - - | - - - | - - - |
| G | H | I |
- - - - - - - - - - - - -
*/
2015-01-29 21:42:32 +01:00
template < class Scaler , class ColorDistance , RotationDegree rotDeg >
2014-06-24 21:53:00 +02:00
FORCE_INLINE //perf: quite worth it!
2015-01-29 21:42:32 +01:00
void blendPixel ( const Kernel_3x3 & ker ,
2014-06-24 21:53:00 +02:00
uint32_t * target , int trgWidth ,
unsigned char blendInfo , //result of preprocessing all four corners of pixel "e"
const xbrz : : ScalerCfg & cfg )
{
# define a get_a<rotDeg>(ker)
# define b get_b<rotDeg>(ker)
# define c get_c<rotDeg>(ker)
# define d get_d<rotDeg>(ker)
# define e get_e<rotDeg>(ker)
# define f get_f<rotDeg>(ker)
# define g get_g<rotDeg>(ker)
# define h get_h<rotDeg>(ker)
# define i get_i<rotDeg>(ker)
# ifndef NDEBUG
if ( breakIntoDebugger )
__debugbreak ( ) ; //__asm int 3;
# endif
const unsigned char blend = rotateBlendInfo < rotDeg > ( blendInfo ) ;
if ( getBottomR ( blend ) > = BLEND_NORMAL )
{
2015-01-29 21:42:32 +01:00
//auto eq = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
//auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_); };
2014-06-24 21:53:00 +02:00
bool doLineBlend = true ;
if ( getBottomR ( blend ) > = BLEND_DOMINANT )
doLineBlend = true ;
//make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
else if ( getTopR ( blend ) ! = BLEND_NONE & & ! eq ( e , g ) ) //but support double-blending for 90<39> corners
doLineBlend = false ;
else if ( getBottomL ( blend ) ! = BLEND_NONE & & ! eq ( e , c ) )
doLineBlend = false ;
//no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
2015-01-29 21:42:32 +01:00
else if ( ! eq ( e , i ) & & eq ( g , h ) & & eq ( h , i ) & & eq ( i , f ) & & eq ( f , c ) )
2014-06-24 21:53:00 +02:00
doLineBlend = false ;
else
doLineBlend = true ;
2015-01-29 21:42:32 +01:00
const uint32_t px = cdist ( e , f ) < = cdist ( e , h ) ? f : h ; //choose most similar color
2014-06-24 21:53:00 +02:00
OutputMatrix < Scaler : : scale , rotDeg > out ( target , trgWidth ) ;
if ( doLineBlend )
{
2015-01-29 21:42:32 +01:00
const double fg = cdist ( f , g ) ; //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
const double hc = cdist ( h , c ) ; //
2014-06-24 21:53:00 +02:00
const bool haveShallowLine = cfg . steepDirectionThreshold * fg < = hc & & e ! = g & & d ! = g ;
const bool haveSteepLine = cfg . steepDirectionThreshold * hc < = fg & & e ! = c & & b ! = c ;
if ( haveShallowLine )
{
if ( haveSteepLine )
Scaler : : blendLineSteepAndShallow ( px , out ) ;
else
Scaler : : blendLineShallow ( px , out ) ;
}
else
{
if ( haveSteepLine )
Scaler : : blendLineSteep ( px , out ) ;
else
Scaler : : blendLineDiagonal ( px , out ) ;
}
}
else
Scaler : : blendCorner ( px , out ) ;
}
# undef a
# undef b
# undef c
# undef d
# undef e
# undef f
# undef g
# undef h
# undef i
}
2015-01-29 21:42:32 +01:00
template < class Scaler , class ColorDistance > //scaler policy: see "Scaler2x" reference implementation
2014-06-24 21:53:00 +02:00
void scaleImage ( const uint32_t * src , uint32_t * trg , int srcWidth , int srcHeight , const xbrz : : ScalerCfg & cfg , int yFirst , int yLast )
{
yFirst = std : : max ( yFirst , 0 ) ;
yLast = std : : min ( yLast , srcHeight ) ;
if ( yFirst > = yLast | | srcWidth < = 0 )
return ;
const int trgWidth = srcWidth * Scaler : : scale ;
//"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
//"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
const int bufferSize = srcWidth ;
unsigned char * preProcBuffer = reinterpret_cast < unsigned char * > ( trg + yLast * Scaler : : scale * trgWidth ) - bufferSize ;
std : : fill ( preProcBuffer , preProcBuffer + bufferSize , 0 ) ;
//static_assert(BLEND_NONE == 0, "");
2015-01-29 21:42:32 +01:00
//initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
2014-06-24 21:53:00 +02:00
//this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
if ( yFirst > 0 )
{
const int y = yFirst - 1 ;
const uint32_t * s_m1 = src + srcWidth * std : : max ( y - 1 , 0 ) ;
const uint32_t * s_0 = src + srcWidth * y ; //center line
const uint32_t * s_p1 = src + srcWidth * std : : min ( y + 1 , srcHeight - 1 ) ;
const uint32_t * s_p2 = src + srcWidth * std : : min ( y + 2 , srcHeight - 1 ) ;
for ( int x = 0 ; x < srcWidth ; + + x )
{
const int x_m1 = std : : max ( x - 1 , 0 ) ;
const int x_p1 = std : : min ( x + 1 , srcWidth - 1 ) ;
const int x_p2 = std : : min ( x + 2 , srcWidth - 1 ) ;
2015-01-29 21:42:32 +01:00
Kernel_4x4 ker = { } ; //perf: initialization is negligible
2014-06-24 21:53:00 +02:00
ker . a = s_m1 [ x_m1 ] ; //read sequentially from memory as far as possible
ker . b = s_m1 [ x ] ;
ker . c = s_m1 [ x_p1 ] ;
ker . d = s_m1 [ x_p2 ] ;
ker . e = s_0 [ x_m1 ] ;
ker . f = s_0 [ x ] ;
ker . g = s_0 [ x_p1 ] ;
ker . h = s_0 [ x_p2 ] ;
ker . i = s_p1 [ x_m1 ] ;
ker . j = s_p1 [ x ] ;
ker . k = s_p1 [ x_p1 ] ;
ker . l = s_p1 [ x_p2 ] ;
ker . m = s_p2 [ x_m1 ] ;
ker . n = s_p2 [ x ] ;
ker . o = s_p2 [ x_p1 ] ;
ker . p = s_p2 [ x_p2 ] ;
2015-01-29 21:42:32 +01:00
const BlendResult res = preProcessCorners < ColorDistance > ( ker , cfg ) ;
2014-06-24 21:53:00 +02:00
/*
preprocessing blend result :
- - - - - - - - -
| F | G | //evalute corner between F, G, J, K
- - - - | - - - | //input pixel is at position F
| J | K |
- - - - - - - - -
*/
setTopR ( preProcBuffer [ x ] , res . blend_j ) ;
2015-01-29 21:42:32 +01:00
if ( x + 1 < bufferSize )
2014-06-24 21:53:00 +02:00
setTopL ( preProcBuffer [ x + 1 ] , res . blend_k ) ;
}
}
//------------------------------------------------------------------------------------
for ( int y = yFirst ; y < yLast ; + + y )
{
uint32_t * out = trg + Scaler : : scale * y * trgWidth ; //consider MT "striped" access
const uint32_t * s_m1 = src + srcWidth * std : : max ( y - 1 , 0 ) ;
const uint32_t * s_0 = src + srcWidth * y ; //center line
const uint32_t * s_p1 = src + srcWidth * std : : min ( y + 1 , srcHeight - 1 ) ;
const uint32_t * s_p2 = src + srcWidth * std : : min ( y + 2 , srcHeight - 1 ) ;
unsigned char blend_xy1 = 0 ; //corner blending for current (x, y + 1) position
for ( int x = 0 ; x < srcWidth ; + + x , out + = Scaler : : scale )
{
# ifndef NDEBUG
breakIntoDebugger = debugPixelX = = x & & debugPixelY = = y ;
# endif
//all those bounds checks have only insignificant impact on performance!
const int x_m1 = std : : max ( x - 1 , 0 ) ; //perf: prefer array indexing to additional pointers!
const int x_p1 = std : : min ( x + 1 , srcWidth - 1 ) ;
const int x_p2 = std : : min ( x + 2 , srcWidth - 1 ) ;
2015-01-29 21:42:32 +01:00
Kernel_4x4 ker4 = { } ; //perf: initialization is negligible
ker4 . a = s_m1 [ x_m1 ] ; //read sequentially from memory as far as possible
ker4 . b = s_m1 [ x ] ;
ker4 . c = s_m1 [ x_p1 ] ;
ker4 . d = s_m1 [ x_p2 ] ;
ker4 . e = s_0 [ x_m1 ] ;
ker4 . f = s_0 [ x ] ;
ker4 . g = s_0 [ x_p1 ] ;
ker4 . h = s_0 [ x_p2 ] ;
ker4 . i = s_p1 [ x_m1 ] ;
ker4 . j = s_p1 [ x ] ;
ker4 . k = s_p1 [ x_p1 ] ;
ker4 . l = s_p1 [ x_p2 ] ;
ker4 . m = s_p2 [ x_m1 ] ;
ker4 . n = s_p2 [ x ] ;
ker4 . o = s_p2 [ x_p1 ] ;
ker4 . p = s_p2 [ x_p2 ] ;
2014-06-24 21:53:00 +02:00
//evaluate the four corners on bottom-right of current pixel
unsigned char blend_xy = 0 ; //for current (x, y) position
{
2015-01-29 21:42:32 +01:00
const BlendResult res = preProcessCorners < ColorDistance > ( ker4 , cfg ) ;
2014-06-24 21:53:00 +02:00
/*
preprocessing blend result :
- - - - - - - - -
| F | G | //evalute corner between F, G, J, K
- - - - | - - - | //current input pixel is at position F
| J | K |
- - - - - - - - -
*/
blend_xy = preProcBuffer [ x ] ;
setBottomR ( blend_xy , res . blend_f ) ; //all four corners of (x, y) have been determined at this point due to processing sequence!
setTopR ( blend_xy1 , res . blend_j ) ; //set 2nd known corner for (x, y + 1)
preProcBuffer [ x ] = blend_xy1 ; //store on current buffer position for use on next row
blend_xy1 = 0 ;
setTopL ( blend_xy1 , res . blend_k ) ; //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
2015-01-29 21:42:32 +01:00
if ( x + 1 < bufferSize ) //set 3rd known corner for (x + 1, y)
2014-06-24 21:53:00 +02:00
setBottomL ( preProcBuffer [ x + 1 ] , res . blend_g ) ;
}
//fill block of size scale * scale with the given color
2015-01-29 21:42:32 +01:00
fillBlock ( out , trgWidth * sizeof ( uint32_t ) , ker4 . f , Scaler : : scale ) ; //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
2014-06-24 21:53:00 +02:00
//blend four corners of current pixel
2015-01-29 21:42:32 +01:00
if ( blendingNeeded ( blend_xy ) ) //good 5% perf-improvement
2014-06-24 21:53:00 +02:00
{
2015-01-29 21:42:32 +01:00
Kernel_3x3 ker3 = { } ; //perf: initialization is negligible
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
ker3 . a = ker4 . a ;
ker3 . b = ker4 . b ;
ker3 . c = ker4 . c ;
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
ker3 . d = ker4 . e ;
ker3 . e = ker4 . f ;
ker3 . f = ker4 . g ;
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
ker3 . g = ker4 . i ;
ker3 . h = ker4 . j ;
ker3 . i = ker4 . k ;
2014-06-24 21:53:00 +02:00
2015-01-29 21:42:32 +01:00
blendPixel < Scaler , ColorDistance , ROT_0 > ( ker3 , out , trgWidth , blend_xy , cfg ) ;
blendPixel < Scaler , ColorDistance , ROT_90 > ( ker3 , out , trgWidth , blend_xy , cfg ) ;
blendPixel < Scaler , ColorDistance , ROT_180 > ( ker3 , out , trgWidth , blend_xy , cfg ) ;
blendPixel < Scaler , ColorDistance , ROT_270 > ( ker3 , out , trgWidth , blend_xy , cfg ) ;
2014-06-24 21:53:00 +02:00
}
}
}
}
2015-01-29 21:42:32 +01:00
//------------------------------------------------------------------------------------
2014-06-24 21:53:00 +02:00
struct Scaler2x
{
static const int scale = 2 ;
template < class OutputMatrix >
static void blendLineShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < scale - 1 , 0 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 1 , 1 > ( ) , col ) ;
}
template < class OutputMatrix >
static void blendLineSteep ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 0 , scale - 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , scale - 1 > ( ) , col ) ;
}
template < class OutputMatrix >
static void blendLineSteepAndShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 1 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 0 , 1 > ( ) , col ) ;
alphaBlend < 5 , 6 > ( out . template ref < 1 , 1 > ( ) , col ) ; //[!] fixes 7/8 used in xBR
}
template < class OutputMatrix >
static void blendLineDiagonal ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 2 > ( out . template ref < 1 , 1 > ( ) , col ) ;
}
template < class OutputMatrix >
static void blendCorner ( uint32_t col , OutputMatrix & out )
{
//model a round corner
alphaBlend < 21 , 100 > ( out . template ref < 1 , 1 > ( ) , col ) ; //exact: 1 - pi/4 = 0.2146018366
}
} ;
struct Scaler3x
{
static const int scale = 3 ;
template < class OutputMatrix >
static void blendLineShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < scale - 1 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 2 , 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 1 , 1 > ( ) , col ) ;
out . template ref < scale - 1 , 2 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteep ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 0 , scale - 1 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 2 , scale - 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , scale - 1 > ( ) , col ) ;
out . template ref < 2 , scale - 1 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteepAndShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 2 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 0 , 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 2 , 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , 2 > ( ) , col ) ;
out . template ref < 2 , 2 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineDiagonal ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 8 > ( out . template ref < 1 , 2 > ( ) , col ) ;
alphaBlend < 1 , 8 > ( out . template ref < 2 , 1 > ( ) , col ) ;
alphaBlend < 7 , 8 > ( out . template ref < 2 , 2 > ( ) , col ) ;
}
template < class OutputMatrix >
static void blendCorner ( uint32_t col , OutputMatrix & out )
{
//model a round corner
alphaBlend < 45 , 100 > ( out . template ref < 2 , 2 > ( ) , col ) ; //exact: 0.4545939598
2015-01-29 21:42:32 +01:00
//alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligible
2014-06-24 21:53:00 +02:00
//alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
}
} ;
struct Scaler4x
{
static const int scale = 4 ;
template < class OutputMatrix >
static void blendLineShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < scale - 1 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 2 , 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 1 , 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 2 , 3 > ( ) , col ) ;
out . template ref < scale - 1 , 2 > ( ) = col ;
out . template ref < scale - 1 , 3 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteep ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 0 , scale - 1 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 2 , scale - 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , scale - 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 3 , scale - 2 > ( ) , col ) ;
out . template ref < 2 , scale - 1 > ( ) = col ;
out . template ref < 3 , scale - 1 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteepAndShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 3 , 4 > ( out . template ref < 3 , 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , 3 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 3 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 0 , 3 > ( ) , col ) ;
alphaBlend < 1 , 3 > ( out . template ref < 2 , 2 > ( ) , col ) ; //[!] fixes 1/4 used in xBR
out . template ref < 3 , 3 > ( ) = out . template ref < 3 , 2 > ( ) = out . template ref < 2 , 3 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineDiagonal ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 2 > ( out . template ref < scale - 1 , scale / 2 > ( ) , col ) ;
alphaBlend < 1 , 2 > ( out . template ref < scale - 2 , scale / 2 + 1 > ( ) , col ) ;
out . template ref < scale - 1 , scale - 1 > ( ) = col ;
}
template < class OutputMatrix >
static void blendCorner ( uint32_t col , OutputMatrix & out )
{
//model a round corner
alphaBlend < 68 , 100 > ( out . template ref < 3 , 3 > ( ) , col ) ; //exact: 0.6848532563
alphaBlend < 9 , 100 > ( out . template ref < 3 , 2 > ( ) , col ) ; //0.08677704501
alphaBlend < 9 , 100 > ( out . template ref < 2 , 3 > ( ) , col ) ; //0.08677704501
}
} ;
struct Scaler5x
{
static const int scale = 5 ;
template < class OutputMatrix >
static void blendLineShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < scale - 1 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 2 , 2 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 3 , 4 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 1 , 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 2 , 3 > ( ) , col ) ;
out . template ref < scale - 1 , 2 > ( ) = col ;
out . template ref < scale - 1 , 3 > ( ) = col ;
out . template ref < scale - 1 , 4 > ( ) = col ;
out . template ref < scale - 2 , 4 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteep ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 0 , scale - 1 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 2 , scale - 2 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 4 , scale - 3 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , scale - 1 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 3 , scale - 2 > ( ) , col ) ;
out . template ref < 2 , scale - 1 > ( ) = col ;
out . template ref < 3 , scale - 1 > ( ) = col ;
out . template ref < 4 , scale - 1 > ( ) = col ;
out . template ref < 4 , scale - 2 > ( ) = col ;
}
template < class OutputMatrix >
static void blendLineSteepAndShallow ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 4 > ( out . template ref < 0 , scale - 1 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < 2 , scale - 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < 1 , scale - 1 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 1 , 0 > ( ) , col ) ;
alphaBlend < 1 , 4 > ( out . template ref < scale - 2 , 2 > ( ) , col ) ;
alphaBlend < 3 , 4 > ( out . template ref < scale - 1 , 1 > ( ) , col ) ;
out . template ref < 2 , scale - 1 > ( ) = col ;
out . template ref < 3 , scale - 1 > ( ) = col ;
out . template ref < scale - 1 , 2 > ( ) = col ;
out . template ref < scale - 1 , 3 > ( ) = col ;
out . template ref < 4 , scale - 1 > ( ) = col ;
alphaBlend < 2 , 3 > ( out . template ref < 3 , 3 > ( ) , col ) ;
}
template < class OutputMatrix >
static void blendLineDiagonal ( uint32_t col , OutputMatrix & out )
{
alphaBlend < 1 , 8 > ( out . template ref < scale - 1 , scale / 2 > ( ) , col ) ;
alphaBlend < 1 , 8 > ( out . template ref < scale - 2 , scale / 2 + 1 > ( ) , col ) ;
alphaBlend < 1 , 8 > ( out . template ref < scale - 3 , scale / 2 + 2 > ( ) , col ) ;
alphaBlend < 7 , 8 > ( out . template ref < 4 , 3 > ( ) , col ) ;
alphaBlend < 7 , 8 > ( out . template ref < 3 , 4 > ( ) , col ) ;
out . template ref < 4 , 4 > ( ) = col ;
}
template < class OutputMatrix >
static void blendCorner ( uint32_t col , OutputMatrix & out )
{
//model a round corner
alphaBlend < 86 , 100 > ( out . template ref < 4 , 4 > ( ) , col ) ; //exact: 0.8631434088
alphaBlend < 23 , 100 > ( out . template ref < 4 , 3 > ( ) , col ) ; //0.2306749731
alphaBlend < 23 , 100 > ( out . template ref < 3 , 4 > ( ) , col ) ; //0.2306749731
2015-01-29 21:42:32 +01:00
//alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligible
2014-06-24 21:53:00 +02:00
//alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
}
} ;
2015-01-29 21:42:32 +01:00
//------------------------------------------------------------------------------------
struct ColorDistanceRGB
{
static double dist ( uint32_t pix1 , uint32_t pix2 , double luminanceWeight )
{
return distYCbCrBuffer . dist ( pix1 , pix2 ) ;
//if (pix1 == pix2) //about 4% perf boost
// return 0;
//return distYCbCr(pix1, pix2, luminanceWeight);
}
} ;
struct ColorDistanceARGB
{
static double dist ( uint32_t pix1 , uint32_t pix2 , double luminanceWeight )
{
const double a1 = getAlpha ( pix1 ) / 255.0 ;
const double a2 = getAlpha ( pix2 ) / 255.0 ;
/*
Requirements for a color distance handling alpha channel : with a1 , a2 in [ 0 , 1 ]
1. if a1 = a2 , distance should be : a1 * distYCbCr ( )
2. if a1 = 0 , distance should be : a2 * distYCbCr ( black , white ) = a2 * 255
3. if a1 = 1 , distance should be : 255 * ( 1 - a2 ) + a2 * distYCbCr ( )
*/
return std : : min ( a1 , a2 ) * distYCbCrBuffer . dist ( pix1 , pix2 ) + 255 * abs ( a1 - a2 ) ;
//if (pix1 == pix2)
// return 0;
//return std::min(a1, a2) * distYCbCr(pix1, pix2, luminanceWeight) + 255 * abs(a1 - a2);
}
} ;
2014-06-24 21:53:00 +02:00
}
2015-01-29 21:42:32 +01:00
void xbrz : : scale ( size_t factor , const uint32_t * src , uint32_t * trg , int srcWidth , int srcHeight , ColorFormat colFmt , const xbrz : : ScalerCfg & cfg , int yFirst , int yLast )
2014-06-24 21:53:00 +02:00
{
2015-01-29 21:42:32 +01:00
switch ( colFmt )
2014-06-24 21:53:00 +02:00
{
2015-01-29 21:42:32 +01:00
case ColorFormat : : ARGB :
switch ( factor )
{
case 2 :
return scaleImage < Scaler2x , ColorDistanceARGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 3 :
return scaleImage < Scaler3x , ColorDistanceARGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 4 :
return scaleImage < Scaler4x , ColorDistanceARGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 5 :
return scaleImage < Scaler5x , ColorDistanceARGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
}
break ;
case ColorFormat : : RGB :
switch ( factor )
{
case 2 :
return scaleImage < Scaler2x , ColorDistanceRGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 3 :
return scaleImage < Scaler3x , ColorDistanceRGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 4 :
return scaleImage < Scaler4x , ColorDistanceRGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
case 5 :
return scaleImage < Scaler5x , ColorDistanceRGB > ( src , trg , srcWidth , srcHeight , cfg , yFirst , yLast ) ;
}
break ;
2014-06-24 21:53:00 +02:00
}
assert ( false ) ;
}
2015-01-29 21:42:32 +01:00
bool xbrz : : equalColorTest ( uint32_t col1 , uint32_t col2 , ColorFormat colFmt , double luminanceWeight , double equalColorTolerance )
2014-06-24 21:53:00 +02:00
{
2015-01-29 21:42:32 +01:00
switch ( colFmt )
{
case ColorFormat : : ARGB :
return ColorDistanceARGB : : dist ( col1 , col2 , luminanceWeight ) < equalColorTolerance ;
case ColorFormat : : RGB :
return ColorDistanceRGB : : dist ( col1 , col2 , luminanceWeight ) < equalColorTolerance ;
}
assert ( false ) ;
return false ;
2014-06-24 21:53:00 +02:00
}
void xbrz : : nearestNeighborScale ( const uint32_t * src , int srcWidth , int srcHeight , int srcPitch ,
uint32_t * trg , int trgWidth , int trgHeight , int trgPitch ,
SliceType st , int yFirst , int yLast )
{
if ( srcPitch < srcWidth * static_cast < int > ( sizeof ( uint32_t ) ) | |
trgPitch < trgWidth * static_cast < int > ( sizeof ( uint32_t ) ) )
{
assert ( false ) ;
return ;
}
switch ( st )
{
case NN_SCALE_SLICE_SOURCE :
//nearest-neighbor (going over source image - fast for upscaling, since source is read only once
yFirst = std : : max ( yFirst , 0 ) ;
yLast = std : : min ( yLast , srcHeight ) ;
if ( yFirst > = yLast | | trgWidth < = 0 | | trgHeight < = 0 ) return ;
for ( int y = yFirst ; y < yLast ; + + y )
{
//mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
// => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
//keep within for loop to support MT input slices!
const int yTrg_first = ( y * trgHeight + srcHeight - 1 ) / srcHeight ; //=ceil(y * trgHeight / srcHeight)
const int yTrg_last = ( ( y + 1 ) * trgHeight + srcHeight - 1 ) / srcHeight ; //=ceil(((y + 1) * trgHeight) / srcHeight)
const int blockHeight = yTrg_last - yTrg_first ;
if ( blockHeight > 0 )
{
const uint32_t * srcLine = byteAdvance ( src , y * srcPitch ) ;
uint32_t * trgLine = byteAdvance ( trg , yTrg_first * trgPitch ) ;
int xTrg_first = 0 ;
for ( int x = 0 ; x < srcWidth ; + + x )
{
int xTrg_last = ( ( x + 1 ) * trgWidth + srcWidth - 1 ) / srcWidth ;
const int blockWidth = xTrg_last - xTrg_first ;
if ( blockWidth > 0 )
{
xTrg_first = xTrg_last ;
fillBlock ( trgLine , trgPitch , srcLine [ x ] , blockWidth , blockHeight ) ;
trgLine + = blockWidth ;
}
}
}
}
break ;
case NN_SCALE_SLICE_TARGET :
//nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
yFirst = std : : max ( yFirst , 0 ) ;
yLast = std : : min ( yLast , trgHeight ) ;
if ( yFirst > = yLast | | srcHeight < = 0 | | srcWidth < = 0 ) return ;
for ( int y = yFirst ; y < yLast ; + + y )
{
uint32_t * trgLine = byteAdvance ( trg , y * trgPitch ) ;
const int ySrc = srcHeight * y / trgHeight ;
const uint32_t * srcLine = byteAdvance ( src , ySrc * srcPitch ) ;
for ( int x = 0 ; x < trgWidth ; + + x )
{
const int xSrc = srcWidth * x / trgWidth ;
trgLine [ x ] = srcLine [ xSrc ] ;
}
}
break ;
}
}