diff options
Diffstat (limited to 'Source/WebCore/platform/graphics/filters')
8 files changed, 703 insertions, 6 deletions
diff --git a/Source/WebCore/platform/graphics/filters/FEFlood.cpp b/Source/WebCore/platform/graphics/filters/FEFlood.cpp index 3c48cf9..d832d2b 100644 --- a/Source/WebCore/platform/graphics/filters/FEFlood.cpp +++ b/Source/WebCore/platform/graphics/filters/FEFlood.cpp @@ -49,9 +49,12 @@ Color FEFlood::floodColor() const return m_floodColor; } -void FEFlood::setFloodColor(const Color& color) +bool FEFlood::setFloodColor(const Color& color) { + if (m_floodColor == color) + return false; m_floodColor = color; + return true; } float FEFlood::floodOpacity() const @@ -59,9 +62,12 @@ float FEFlood::floodOpacity() const return m_floodOpacity; } -void FEFlood::setFloodOpacity(float floodOpacity) +bool FEFlood::setFloodOpacity(float floodOpacity) { + if (m_floodOpacity == floodOpacity) + return false; m_floodOpacity = floodOpacity; + return true; } void FEFlood::apply() diff --git a/Source/WebCore/platform/graphics/filters/FEFlood.h b/Source/WebCore/platform/graphics/filters/FEFlood.h index 2e8824f..cac4153 100644 --- a/Source/WebCore/platform/graphics/filters/FEFlood.h +++ b/Source/WebCore/platform/graphics/filters/FEFlood.h @@ -34,10 +34,10 @@ public: static PassRefPtr<FEFlood> create(Filter* filter, const Color&, float); Color floodColor() const; - void setFloodColor(const Color &); + bool setFloodColor(const Color &); float floodOpacity() const; - void setFloodOpacity(float); + bool setFloodOpacity(float); virtual void apply(); virtual void dump(); diff --git a/Source/WebCore/platform/graphics/filters/FELighting.cpp b/Source/WebCore/platform/graphics/filters/FELighting.cpp index ec1ca88..13a69fd 100644 --- a/Source/WebCore/platform/graphics/filters/FELighting.cpp +++ b/Source/WebCore/platform/graphics/filters/FELighting.cpp @@ -30,6 +30,13 @@ #include "FELighting.h" #include "LightSource.h" +#include "PointLightSource.h" +#include "SpotLightSource.h" + +#if CPU(ARM_NEON) && COMPILER(GCC) +#include "FELightingNEON.h" +#include <wtf/Vector.h> +#endif namespace WebCore { @@ -301,6 +308,9 @@ bool FELighting::drawLighting(ByteArray* pixels, int width, int height) if (width >= 3 && height >= 3) { // Interior pixels +#if CPU(ARM_NEON) && COMPILER(GCC) + drawInteriorPixels(data, paintingData); +#else for (int y = 1; y < data.heightDecreasedByOne; ++y) { offset = y * data.widthMultipliedByPixelSize + cPixelSize; for (int x = 1; x < data.widthDecreasedByOne; ++x, offset += cPixelSize) { @@ -308,6 +318,7 @@ bool FELighting::drawLighting(ByteArray* pixels, int width, int height) inlineSetPixel(offset, data, paintingData, x, y, cFactor1div4, cFactor1div4, normalVector); } } +#endif } int lastPixel = data.widthMultipliedByPixelSize * height; @@ -354,6 +365,118 @@ void FELighting::apply() drawLighting(srcPixelArray, absolutePaintSize.width(), absolutePaintSize.height()); } +#if CPU(ARM_NEON) && COMPILER(GCC) + +static int getPowerCoefficients(float exponent) +{ + // Calling a powf function from the assembly code would require to save + // and reload a lot of NEON registers. Since the base is in range [0..1] + // and only 8 bit precision is required, we use our own powf function. + // This is probably not the best, but it uses only a few registers and + // gives us enough precision (modifying the exponent field directly would + // also be possible). + + // First, we limit the exponent to maximum of 64, which gives us enough + // precision. We split the exponent to an integer and fraction part, + // since a^x = (a^y)*(a^z) where x = y+z. The integer exponent of the + // power is estimated by square, and the fraction exponent of the power + // is estimated by square root assembly instructions. + int i, result; + + if (exponent < 0) + exponent = 1 / (-exponent); + + if (exponent > 63.99) + exponent = 63.99; + + exponent /= 64; + result = 0; + for (i = 11; i >= 0; --i) { + exponent *= 2; + if (exponent >= 1) { + result |= 1 << i; + exponent -= 1; + } + } + return result; +} + +void FELighting::drawInteriorPixels(LightingData& data, LightSource::PaintingData& paintingData) +{ + WTF_ALIGNED(FELightingFloatArgumentsForNeon, floatArguments, 16); + + FELightingPaintingDataForNeon neonData = { + data.pixels->data(), + data.widthDecreasedByOne - 1, + data.heightDecreasedByOne - 1, + 0, + 0, + 0, + &floatArguments, + feLightingConstantsForNeon() + }; + + // Set light source arguments. + floatArguments.constOne = 1; + + floatArguments.colorRed = m_lightingColor.red(); + floatArguments.colorGreen = m_lightingColor.green(); + floatArguments.colorBlue = m_lightingColor.blue(); + floatArguments.padding4 = 0; + + if (m_lightSource->type() == LS_POINT) { + neonData.flags |= FLAG_POINT_LIGHT; + PointLightSource* pointLightSource = static_cast<PointLightSource*>(m_lightSource.get()); + floatArguments.lightX = pointLightSource->position().x(); + floatArguments.lightY = pointLightSource->position().y(); + floatArguments.lightZ = pointLightSource->position().z(); + floatArguments.padding2 = 0; + } else if (m_lightSource->type() == LS_SPOT) { + neonData.flags |= FLAG_SPOT_LIGHT; + SpotLightSource* spotLightSource = static_cast<SpotLightSource*>(m_lightSource.get()); + floatArguments.lightX = spotLightSource->position().x(); + floatArguments.lightY = spotLightSource->position().y(); + floatArguments.lightZ = spotLightSource->position().z(); + floatArguments.padding2 = 0; + + floatArguments.directionX = paintingData.directionVector.x(); + floatArguments.directionY = paintingData.directionVector.y(); + floatArguments.directionZ = paintingData.directionVector.z(); + floatArguments.padding3 = 0; + + floatArguments.coneCutOffLimit = paintingData.coneCutOffLimit; + floatArguments.coneFullLight = paintingData.coneFullLight; + floatArguments.coneCutOffRange = paintingData.coneCutOffLimit - paintingData.coneFullLight; + neonData.coneExponent = getPowerCoefficients(spotLightSource->specularExponent()); + if (spotLightSource->specularExponent() == 1) + neonData.flags |= FLAG_CONE_EXPONENT_IS_1; + } else { + ASSERT(m_lightSource.type == LS_DISTANT); + floatArguments.lightX = paintingData.lightVector.x(); + floatArguments.lightY = paintingData.lightVector.y(); + floatArguments.lightZ = paintingData.lightVector.z(); + floatArguments.padding2 = 1; + } + + // Set lighting arguments. + floatArguments.surfaceScale = data.surfaceScale; + floatArguments.minusSurfaceScaleDividedByFour = -data.surfaceScale / 4; + if (m_lightingType == FELighting::DiffuseLighting) + floatArguments.diffuseConstant = m_diffuseConstant; + else { + neonData.flags |= FLAG_SPECULAR_LIGHT; + floatArguments.diffuseConstant = m_specularConstant; + neonData.specularExponent = getPowerCoefficients(m_specularExponent); + if (m_specularExponent == 1) + neonData.flags |= FLAG_SPECULAR_EXPONENT_IS_1; + } + if (floatArguments.diffuseConstant == 1) + neonData.flags |= FLAG_DIFFUSE_CONST_IS_1; + + neonDrawLighting(&neonData); +} +#endif // CPU(ARM_NEON) && COMPILER(GCC) + } // namespace WebCore #endif // ENABLE(FILTERS) diff --git a/Source/WebCore/platform/graphics/filters/FELighting.h b/Source/WebCore/platform/graphics/filters/FELighting.h index fa1c0aa..3dc46e9 100644 --- a/Source/WebCore/platform/graphics/filters/FELighting.h +++ b/Source/WebCore/platform/graphics/filters/FELighting.h @@ -33,6 +33,7 @@ #include "FilterEffect.h" #include "LightSource.h" #include <wtf/ByteArray.h> +#include <wtf/Platform.h> // Common base class for FEDiffuseLighting and FESpecularLighting @@ -79,6 +80,10 @@ protected: void setPixel(int offset, LightingData&, LightSource::PaintingData&, int lightX, int lightY, float factorX, float factorY, IntPoint& normalVector); +#if CPU(ARM_NEON) && COMPILER(GCC) + void drawInteriorPixels(LightingData&, LightSource::PaintingData&); +#endif + LightingType m_lightingType; RefPtr<LightSource> m_lightSource; diff --git a/Source/WebCore/platform/graphics/filters/FESpecularLighting.cpp b/Source/WebCore/platform/graphics/filters/FESpecularLighting.cpp index a20eb8c..2c7b1eb 100644 --- a/Source/WebCore/platform/graphics/filters/FESpecularLighting.cpp +++ b/Source/WebCore/platform/graphics/filters/FESpecularLighting.cpp @@ -54,9 +54,12 @@ Color FESpecularLighting::lightingColor() const return m_lightingColor; } -void FESpecularLighting::setLightingColor(const Color& lightingColor) +bool FESpecularLighting::setLightingColor(const Color& lightingColor) { + if (m_lightingColor == lightingColor) + return false; m_lightingColor = lightingColor; + return true; } float FESpecularLighting::surfaceScale() const diff --git a/Source/WebCore/platform/graphics/filters/FESpecularLighting.h b/Source/WebCore/platform/graphics/filters/FESpecularLighting.h index 9d3ea2d..9fa3add 100644 --- a/Source/WebCore/platform/graphics/filters/FESpecularLighting.h +++ b/Source/WebCore/platform/graphics/filters/FESpecularLighting.h @@ -34,7 +34,7 @@ public: virtual ~FESpecularLighting(); Color lightingColor() const; - void setLightingColor(const Color&); + bool setLightingColor(const Color&); float surfaceScale() const; bool setSurfaceScale(float); diff --git a/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.cpp b/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.cpp new file mode 100644 index 0000000..3807f1f --- /dev/null +++ b/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.cpp @@ -0,0 +1,464 @@ +/* + * Copyright (C) 2011 University of Szeged + * Copyright (C) 2011 Zoltan Herczeg + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "FELightingNEON.h" + +#if CPU(ARM_NEON) && COMPILER(GCC) + +#include <wtf/Vector.h> + +namespace WebCore { + +// These constants are copied to the following SIMD registers: +// ALPHAX_Q ALPHAY_Q REMAPX_D REMAPY_D + +WTF_ALIGNED(short, s_FELightingConstantsForNeon[], 16) = { + // Alpha coefficients. + -2, 1, 0, -1, 2, 1, 0, -1, + 0, -1, -2, -1, 0, 1, 2, 1, + // Remapping indicies. + 0x0f0e, 0x0302, 0x0504, 0x0706, + 0x0b0a, 0x1312, 0x1514, 0x1716, +}; + +short* feLightingConstantsForNeon() +{ + return s_FELightingConstantsForNeon; +} + +#define ASSTRING(str) #str +#define TOSTRING(value) ASSTRING(value) + +#define PIXELS_OFFSET TOSTRING(0) +#define WIDTH_OFFSET TOSTRING(4) +#define HEIGHT_OFFSET TOSTRING(8) +#define FLAGS_OFFSET TOSTRING(12) +#define SPECULAR_EXPONENT_OFFSET TOSTRING(16) +#define CONE_EXPONENT_OFFSET TOSTRING(20) +#define FLOAT_ARGUMENTS_OFFSET TOSTRING(24) +#define DRAWING_CONSTANTS_OFFSET TOSTRING(28) +#define NL "\n" + +// Register allocation +#define PAINTING_DATA_R "r11" +#define RESET_WIDTH_R PAINTING_DATA_R +#define PIXELS_R "r4" +#define WIDTH_R "r5" +#define HEIGHT_R "r6" +#define FLAGS_R "r7" +#define SPECULAR_EXPONENT_R "r8" +#define CONE_EXPONENT_R "r10" +#define SCANLINE_R "r12" + +#define TMP1_Q "q0" +#define TMP1_D0 "d0" +#define TMP1_S0 "s0" +#define TMP1_S1 "s1" +#define TMP1_D1 "d1" +#define TMP1_S2 "s2" +#define TMP1_S3 "s3" +#define TMP2_Q "q1" +#define TMP2_D0 "d2" +#define TMP2_S0 "s4" +#define TMP2_S1 "s5" +#define TMP2_D1 "d3" +#define TMP2_S2 "s6" +#define TMP2_S3 "s7" +#define TMP3_Q "q2" +#define TMP3_D0 "d4" +#define TMP3_S0 "s8" +#define TMP3_S1 "s9" +#define TMP3_D1 "d5" +#define TMP3_S2 "s10" +#define TMP3_S3 "s11" + +#define COSINE_OF_ANGLE "s12" +#define POWF_INT_S "s13" +#define POWF_FRAC_S "s14" +#define SPOT_COLOR_Q "q4" + +// Because of VMIN and VMAX CONST_ZERO_S and CONST_ONE_S +// must be placed on the same side of the double vector + +// Current pixel position +#define POSITION_Q "q5" +#define POSITION_X_S "s20" +#define POSITION_Y_S "s21" +#define POSITION_Z_S "s22" +#define CONST_ZERO_HI_D "d11" +#define CONST_ZERO_S "s23" + +// ------------------------------- +// Variable arguments +// Misc arguments +#define READ1_RANGE "d12-d15" +#define READ2_RANGE "d16-d19" +#define READ3_RANGE "d20-d21" + +#define SCALE_S "s24" +#define SCALE_DIV4_S "s25" +#define DIFFUSE_CONST_S "s26" + +// Light source position +#define CONE_CUT_OFF_S "s28" +#define CONE_FULL_LIGHT_S "s29" +#define CONE_CUT_OFF_RANGE_S "s30" +#define CONST_ONE_HI_D "d15" +#define CONST_ONE_S "s31" + +#define LIGHT_Q "q8" +#define DIRECTION_Q "q9" +#define COLOR_Q "q10" +// ------------------------------- +// Constant coefficients +#define READ4_RANGE "d22-d25" +#define READ5_RANGE "d26-d27" + +#define ALPHAX_Q "q11" +#define ALPHAY_Q "q12" +#define REMAPX_D "d26" +#define REMAPY_D "d27" +// ------------------------------- + +#define ALL_ROWS_D "{d28,d29,d30}" +#define TOP_ROW_D "d28" +#define MIDDLE_ROW_D "d29" +#define BOTTOM_ROW_D "d30" + +#define GET_LENGTH(source, temp) \ + "vmul.f32 " temp##_Q ", " source##_Q ", " source##_Q NL \ + "vadd.f32 " source##_S3 ", " temp##_S0 ", " temp##_S1 NL \ + "vadd.f32 " source##_S3 ", " source##_S3 ", " temp##_S2 NL \ + "vsqrt.f32 " source##_S3 ", " source##_S3 NL + +// destination##_S3 can contain the multiply of length. +#define DOT_PRODUCT(destination, source1, source2) \ + "vmul.f32 " destination##_Q ", " source1##_Q ", " source2##_Q NL \ + "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S1 NL \ + "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S2 NL + +#define MULTIPLY_BY_DIFFUSE_CONST(normalVectorLength, dotProductLength) \ + "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL \ + "vmuleq.f32 " TMP2_S1 ", " DIFFUSE_CONST_S ", " normalVectorLength NL \ + "vdiveq.f32 " TMP2_S1 ", " TMP2_S1 ", " dotProductLength NL \ + "vdivne.f32 " TMP2_S1 ", " normalVectorLength ", " dotProductLength NL + +#define POWF_SQR(value, exponent, current, remaining) \ + "tst " exponent ", #" ASSTRING(current) NL \ + "vmulne.f32 " value ", " value ", " POWF_INT_S NL \ + "tst " exponent ", #" ASSTRING(remaining) NL \ + "vmulne.f32 " POWF_INT_S ", " POWF_INT_S ", " POWF_INT_S NL + +#define POWF_SQRT(value, exponent, current, remaining) \ + "tst " exponent ", #" ASSTRING(remaining) NL \ + "vsqrtne.f32 " POWF_FRAC_S ", " POWF_FRAC_S NL \ + "tst " exponent ", #" ASSTRING(current) NL \ + "vmulne.f32 " value ", " value ", " POWF_FRAC_S NL + +// This simplified powf function is sufficiently accurate. +#define POWF(value, exponent) \ + "tst " exponent ", #0xfc0" NL \ + "vmovne.f32 " POWF_INT_S ", " value NL \ + "tst " exponent ", #0x03f" NL \ + "vmovne.f32 " POWF_FRAC_S ", " value NL \ + "vmov.f32 " value ", " CONST_ONE_S NL \ + \ + POWF_SQR(value, exponent, 0x040, 0xf80) \ + POWF_SQR(value, exponent, 0x080, 0xf00) \ + POWF_SQR(value, exponent, 0x100, 0xe00) \ + POWF_SQR(value, exponent, 0x200, 0xc00) \ + POWF_SQR(value, exponent, 0x400, 0x800) \ + "tst " exponent ", #0x800" NL \ + "vmulne.f32 " value ", " value ", " POWF_INT_S NL \ + \ + POWF_SQRT(value, exponent, 0x20, 0x3f) \ + POWF_SQRT(value, exponent, 0x10, 0x1f) \ + POWF_SQRT(value, exponent, 0x08, 0x0f) \ + POWF_SQRT(value, exponent, 0x04, 0x07) \ + POWF_SQRT(value, exponent, 0x02, 0x03) \ + POWF_SQRT(value, exponent, 0x01, 0x01) + +// The following algorithm is an ARM-NEON optimized version of +// the main loop found in FELighting.cpp. Since the whole code +// is redesigned to be as effective as possible (ARM specific +// thinking), it is four times faster than its C++ counterpart. + +asm ( // NOLINT +".globl " TOSTRING(neonDrawLighting) NL +TOSTRING(neonDrawLighting) ":" NL + // Because of the clever register allocation, nothing is stored on the stack + // except the saved registers. + // Stack must be aligned to 8 bytes. + "stmdb sp!, {r4-r8, r10, r11, lr}" NL + "vstmdb sp!, {d8-d15}" NL + "mov " PAINTING_DATA_R ", r0" NL + + // The following two arguments are loaded to SIMD registers. + "ldr r0, [" PAINTING_DATA_R ", #" FLOAT_ARGUMENTS_OFFSET "]" NL + "ldr r1, [" PAINTING_DATA_R ", #" DRAWING_CONSTANTS_OFFSET "]" NL + "ldr " PIXELS_R ", [" PAINTING_DATA_R ", #" PIXELS_OFFSET "]" NL + "ldr " WIDTH_R ", [" PAINTING_DATA_R ", #" WIDTH_OFFSET "]" NL + "ldr " HEIGHT_R ", [" PAINTING_DATA_R ", #" HEIGHT_OFFSET "]" NL + "ldr " FLAGS_R ", [" PAINTING_DATA_R ", #" FLAGS_OFFSET "]" NL + "ldr " SPECULAR_EXPONENT_R ", [" PAINTING_DATA_R ", #" SPECULAR_EXPONENT_OFFSET "]" NL + "ldr " CONE_EXPONENT_R ", [" PAINTING_DATA_R ", #" CONE_EXPONENT_OFFSET "]" NL + + // Load all data to the SIMD registers with the least number of instructions. + "vld1.f32 { " READ1_RANGE " }, [r0]!" NL + "vld1.f32 { " READ2_RANGE " }, [r0]!" NL + "vld1.f32 { " READ3_RANGE " }, [r0]!" NL + "vld1.s16 {" READ4_RANGE "}, [r1]!" NL + "vld1.s16 {" READ5_RANGE "}, [r1]!" NL + + // Initializing local variables. + "mov " SCANLINE_R ", " WIDTH_R ", lsl #2" NL + "add " SCANLINE_R ", " SCANLINE_R ", #8" NL + "add " PIXELS_R ", " PIXELS_R ", " SCANLINE_R NL + "add " PIXELS_R ", " PIXELS_R ", #3" NL + "mov r0, #0" NL + "vmov.f32 " CONST_ZERO_S ", r0" NL + "vmov.f32 " POSITION_Y_S ", " CONST_ONE_S NL + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL + "vmov.f32 " SPOT_COLOR_Q ", " COLOR_Q NL + "mov " RESET_WIDTH_R ", " WIDTH_R NL + +".mainloop:" NL + "mov r3, #3" NL + "vmov.f32 " POSITION_X_S ", " CONST_ONE_S NL + +".scanline:" NL + // The ROW registers are storing the alpha channel of the last three pixels. + // The alpha channel is stored as signed short (sint16) values. The fourth value + // is garbage. The following instructions are shifting out the unnecessary alpha + // values and load the next ones. + "ldrb r0, [" PIXELS_R ", -" SCANLINE_R "]" NL + "ldrb r1, [" PIXELS_R ", +" SCANLINE_R "]" NL + "ldrb r2, [" PIXELS_R "], #4" NL + "vext.s16 " TOP_ROW_D ", " TOP_ROW_D ", " TOP_ROW_D ", #3" NL + "vext.s16 " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", #3" NL + "vext.s16 " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", #3" NL + "vmov.s16 " TOP_ROW_D "[1], r0" NL + "vmov.s16 " MIDDLE_ROW_D "[1], r2" NL + "vmov.s16 " BOTTOM_ROW_D "[1], r1" NL + + // The two border pixels (rightmost and leftmost) are skipped when + // the next scanline is reached. It also jumps, when the algorithm + // is started, and the first free alpha values are loaded to each row. + "subs r3, r3, #1" NL + "bne .scanline" NL + + // The light vector goes to TMP1_Q. It is constant in case of distant light. + // The fourth value contains the length of the light vector. + "tst " FLAGS_R ", #" TOSTRING(FLAG_POINT_LIGHT | FLAG_SPOT_LIGHT) NL + "beq .distantLight" NL + + "vmov.s16 r3, " MIDDLE_ROW_D "[2]" NL + "vmov.f32 " POSITION_Z_S ", r3" NL + "vcvt.f32.s32 " POSITION_Z_S ", " POSITION_Z_S NL + "vmul.f32 " POSITION_Z_S ", " POSITION_Z_S ", " SCALE_S NL + + "vsub.f32 " TMP1_Q ", " LIGHT_Q ", " POSITION_Q NL + GET_LENGTH(TMP1, TMP2) + + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL + "bne .cosineOfAngle" NL +".visiblePixel:" NL + + // | -1 0 1 | | -1 -2 -1 | + // X = | -2 0 2 | Y = | 0 0 0 | + // | -1 0 1 | | 1 2 1 | + + // Multiply the alpha values by the X and Y matrices. + + // Moving the 8 alpha value to TMP3. + "vtbl.8 " TMP3_D0 ", " ALL_ROWS_D ", " REMAPX_D NL + "vtbl.8 " TMP3_D1 ", " ALL_ROWS_D ", " REMAPY_D NL + + "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAX_Q NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL + "vmov.s16 r0, " TMP2_D0 "[0]" NL + + "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAY_Q NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL + "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL + "vmov.s16 r1, " TMP2_D0 "[0]" NL + + // r0 and r1 contains the X and Y coordinates of the + // normal vector, respectively. + + // Calculating the spot light strength. + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL + "beq .endLight" NL + + "vneg.f32 " TMP3_S1 ", " COSINE_OF_ANGLE NL + "tst " FLAGS_R ", #" TOSTRING(FLAG_CONE_EXPONENT_IS_1) NL + "beq .coneExpPowf" NL +".coneExpPowfFinished:" NL + + // Smoothing the cone edge if necessary. + "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_FULL_LIGHT_S NL + "fmstat" NL + "bhi .cutOff" NL +".cutOffFinished:" NL + + "vmin.f32 " TMP3_D0 ", " TMP3_D0 ", " CONST_ONE_HI_D NL + "vmul.f32 " COLOR_Q ", " SPOT_COLOR_Q ", " TMP3_D0 "[1]" NL + +".endLight:" NL + // Summarize: + // r0 and r1 contains the normalVector. + // TMP1_Q contains the light vector and its length. + // COLOR_Q contains the color of the light vector. + + // Test whether both r0 and r1 are zero (Normal vector is (0, 0, 1)). + "orrs r2, r0, r1" NL + "bne .normalVectorIsNonZero" NL + + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL + "bne .specularLight1" NL + + // Calculate diffuse light strength. + MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3) + "b .lightStrengthCalculated" NL + +".specularLight1:" NL + // Calculating specular light strength. + "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL + GET_LENGTH(TMP1, TMP2) + + // When the exponent is 1, we don't need to call an expensive powf function. + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL + "vdiveq.f32 " TMP2_S1 ", " TMP1_S2 ", " TMP1_S3 NL + "beq .specularExpPowf" NL + + MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3) + "b .lightStrengthCalculated" NL + +".normalVectorIsNonZero:" NL + // Normal vector goes to TMP2, and its length is calculated as well. + "vmov.s32 " TMP2_S0 ", r0" NL + "vcvt.f32.s32 " TMP2_S0 ", " TMP2_S0 NL + "vmul.f32 " TMP2_S0 ", " TMP2_S0 ", " SCALE_DIV4_S NL + "vmov.s32 " TMP2_S1 ", r1" NL + "vcvt.f32.s32 " TMP2_S1 ", " TMP2_S1 NL + "vmul.f32 " TMP2_S1 ", " TMP2_S1 ", " SCALE_DIV4_S NL + "vmov.f32 " TMP2_S2 ", " CONST_ONE_S NL + GET_LENGTH(TMP2, TMP3) + + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL + "bne .specularLight2" NL + + // Calculating diffuse light strength. + DOT_PRODUCT(TMP3, TMP2, TMP1) + MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3) + "b .lightStrengthCalculated" NL + +".specularLight2:" NL + // Calculating specular light strength. + "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL + GET_LENGTH(TMP1, TMP3) + DOT_PRODUCT(TMP3, TMP2, TMP1) + + // When the exponent is 1, we don't need to call an expensive powf function. + "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL + "vdiveq.f32 " TMP2_S1 ", " TMP3_S0 ", " TMP3_S3 NL + "beq .specularExpPowf" NL + MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3) + +".lightStrengthCalculated:" NL + // TMP2_S1 contains the light strength. Clamp it to [0, 1] + "vmax.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ZERO_HI_D NL + "vmin.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ONE_HI_D NL + "vmul.f32 " TMP3_Q ", " COLOR_Q ", " TMP2_D0 "[1]" NL + "vcvt.u32.f32 " TMP3_Q ", " TMP3_Q NL + "vmov.u32 r2, r3, " TMP3_S0 ", " TMP3_S1 NL + // The color values are stored in-place. + "strb r2, [" PIXELS_R ", #-11]" NL + "strb r3, [" PIXELS_R ", #-10]" NL + "vmov.u32 r2, " TMP3_S2 NL + "strb r2, [" PIXELS_R ", #-9]" NL + + // Continue to the next pixel. +".blackPixel:" NL + "vadd.f32 " POSITION_X_S ", " CONST_ONE_S NL + "mov r3, #1" NL + "subs " WIDTH_R ", " WIDTH_R ", #1" NL + "bne .scanline" NL + + // If the end of the scanline is reached, we continue + // to the next scanline. + "vadd.f32 " POSITION_Y_S ", " CONST_ONE_S NL + "mov " WIDTH_R ", " RESET_WIDTH_R NL + "subs " HEIGHT_R ", " HEIGHT_R ", #1" NL + "bne .mainloop" NL + + // Return. + "vldmia sp!, {d8-d15}" NL + "ldmia sp!, {r4-r8, r10, r11, pc}" NL + +".distantLight:" NL + // In case of distant light, the light vector is constant, + // we simply copy it. + "vmov.f32 " TMP1_Q ", " LIGHT_Q NL + "b .visiblePixel" NL + +".cosineOfAngle:" NL + // If the pixel is outside of the cone angle, it is simply a black pixel. + DOT_PRODUCT(TMP3, TMP1, DIRECTION) + "vdiv.f32 " COSINE_OF_ANGLE ", " TMP3_S0 ", " TMP1_S3 NL + "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_CUT_OFF_S NL + "fmstat" NL + "bls .visiblePixel" NL + "mov r0, #0" NL + "strh r0, [" PIXELS_R ", #-11]" NL + "strb r0, [" PIXELS_R ", #-9]" NL + "b .blackPixel" NL + +".cutOff:" NL + // Smoothing the light strength on the cone edge. + "vsub.f32 " TMP3_S0 ", " CONE_CUT_OFF_S ", " COSINE_OF_ANGLE NL + "vdiv.f32 " TMP3_S0 ", " TMP3_S0 ", " CONE_CUT_OFF_RANGE_S NL + "vmul.f32 " TMP3_S1 ", " TMP3_S1 ", " TMP3_S0 NL + "b .cutOffFinished" NL + +".coneExpPowf:" NL + POWF(TMP3_S1, CONE_EXPONENT_R) + "b .coneExpPowfFinished" NL + +".specularExpPowf:" NL + POWF(TMP2_S1, SPECULAR_EXPONENT_R) + "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL + "vmuleq.f32 " TMP2_S1 ", " TMP2_S1 ", " DIFFUSE_CONST_S NL + "b .lightStrengthCalculated" NL +); // NOLINT + +} // namespace WebCore + +#endif // CPU(ARM_NEON) && COMPILER(GCC) diff --git a/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.h b/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.h new file mode 100644 index 0000000..d83b7fe --- /dev/null +++ b/Source/WebCore/platform/graphics/filters/arm/FELightingNEON.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2011 University of Szeged + * Copyright (C) 2011 Zoltan Herczeg + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FELightingNeon_h +#define FELightingNeon_h + +#include <wtf/Platform.h> + +#if CPU(ARM_NEON) && COMPILER(GCC) + +namespace WebCore { + +// Otherwise: Distant Light. +#define FLAG_POINT_LIGHT 0x01 +#define FLAG_SPOT_LIGHT 0x02 +#define FLAG_CONE_EXPONENT_IS_1 0x04 + +// Otherwise: Diffuse light. +#define FLAG_SPECULAR_LIGHT 0x10 +#define FLAG_DIFFUSE_CONST_IS_1 0x20 +#define FLAG_SPECULAR_EXPONENT_IS_1 0x40 + +// Must be aligned to 16 bytes. +struct FELightingFloatArgumentsForNeon { + float surfaceScale; + float minusSurfaceScaleDividedByFour; + float diffuseConstant; + float padding1; + + float coneCutOffLimit; + float coneFullLight; + float coneCutOffRange; + float constOne; + + float lightX; + float lightY; + float lightZ; + float padding2; + + float directionX; + float directionY; + float directionZ; + float padding3; + + float colorRed; + float colorGreen; + float colorBlue; + float padding4; +}; + +struct FELightingPaintingDataForNeon { + unsigned char* pixels; + int widthDecreasedByTwo; + int heightDecreasedByTwo; + // Combination of FLAG constants above. + int flags; + int specularExponent; + int coneExponent; + FELightingFloatArgumentsForNeon* floatArguments; + short* paintingConstants; +}; + +short* feLightingConstantsForNeon(); + +extern "C" { +void neonDrawLighting(FELightingPaintingDataForNeon*); +} + +} // namespace WebCore + +#endif // CPU(ARM_NEON) && COMPILER(GCC) + +#endif // FELightingNeon_h |