diff options
Diffstat (limited to 'media/mca/filterpacks/java/android/filterpacks/videoproc/BackDropperFilter.java')
-rw-r--r-- | media/mca/filterpacks/java/android/filterpacks/videoproc/BackDropperFilter.java | 976 |
1 files changed, 976 insertions, 0 deletions
diff --git a/media/mca/filterpacks/java/android/filterpacks/videoproc/BackDropperFilter.java b/media/mca/filterpacks/java/android/filterpacks/videoproc/BackDropperFilter.java new file mode 100644 index 0000000..52c9fda --- /dev/null +++ b/media/mca/filterpacks/java/android/filterpacks/videoproc/BackDropperFilter.java @@ -0,0 +1,976 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.filterpacks.videoproc; + +import android.filterfw.core.Filter; +import android.filterfw.core.FilterContext; +import android.filterfw.core.GenerateFieldPort; +import android.filterfw.core.GenerateFinalPort; +import android.filterfw.core.Frame; +import android.filterfw.core.GLFrame; +import android.filterfw.core.FrameFormat; +import android.filterfw.core.MutableFrameFormat; +import android.filterfw.core.Program; +import android.filterfw.core.ShaderProgram; +import android.filterfw.format.ImageFormat; +import android.opengl.GLES20; +import android.os.SystemClock; +import android.util.Log; + +import java.lang.ArrayIndexOutOfBoundsException; +import java.lang.Math; +import java.util.Arrays; +import java.nio.ByteBuffer; + +/** + * @hide + */ +public class BackDropperFilter extends Filter { + /** User-visible parameters */ + + private final int BACKGROUND_STRETCH = 0; + private final int BACKGROUND_FIT = 1; + private final int BACKGROUND_FILL_CROP = 2; + + @GenerateFieldPort(name = "backgroundFitMode", hasDefault = true) + private int mBackgroundFitMode = BACKGROUND_FILL_CROP; + @GenerateFieldPort(name = "learningDuration", hasDefault = true) + private int mLearningDuration = DEFAULT_LEARNING_DURATION; + @GenerateFieldPort(name = "learningVerifyDuration", hasDefault = true) + private int mLearningVerifyDuration = DEFAULT_LEARNING_VERIFY_DURATION; + @GenerateFieldPort(name = "acceptStddev", hasDefault = true) + private float mAcceptStddev = DEFAULT_ACCEPT_STDDEV; + @GenerateFieldPort(name = "hierLrgScale", hasDefault = true) + private float mHierarchyLrgScale = DEFAULT_HIER_LRG_SCALE; + @GenerateFieldPort(name = "hierMidScale", hasDefault = true) + private float mHierarchyMidScale = DEFAULT_HIER_MID_SCALE; + @GenerateFieldPort(name = "hierSmlScale", hasDefault = true) + private float mHierarchySmlScale = DEFAULT_HIER_SML_SCALE; + + // Dimensions of foreground / background mask. Optimum value should take into account only + // image contents, NOT dimensions of input video stream. + @GenerateFieldPort(name = "maskWidthExp", hasDefault = true) + private int mMaskWidthExp = DEFAULT_MASK_WIDTH_EXPONENT; + @GenerateFieldPort(name = "maskHeightExp", hasDefault = true) + private int mMaskHeightExp = DEFAULT_MASK_HEIGHT_EXPONENT; + + // Levels at which to compute foreground / background decision. Think of them as are deltas + // SUBTRACTED from maskWidthExp and maskHeightExp. + @GenerateFieldPort(name = "hierLrgExp", hasDefault = true) + private int mHierarchyLrgExp = DEFAULT_HIER_LRG_EXPONENT; + @GenerateFieldPort(name = "hierMidExp", hasDefault = true) + private int mHierarchyMidExp = DEFAULT_HIER_MID_EXPONENT; + @GenerateFieldPort(name = "hierSmlExp", hasDefault = true) + private int mHierarchySmlExp = DEFAULT_HIER_SML_EXPONENT; + + @GenerateFieldPort(name = "lumScale", hasDefault = true) + private float mLumScale = DEFAULT_Y_SCALE_FACTOR; + @GenerateFieldPort(name = "chromaScale", hasDefault = true) + private float mChromaScale = DEFAULT_UV_SCALE_FACTOR; + @GenerateFieldPort(name = "maskBg", hasDefault = true) + private float mMaskBg = DEFAULT_MASK_BLEND_BG; + @GenerateFieldPort(name = "maskFg", hasDefault = true) + private float mMaskFg = DEFAULT_MASK_BLEND_FG; + @GenerateFieldPort(name = "exposureChange", hasDefault = true) + private float mExposureChange = DEFAULT_EXPOSURE_CHANGE; + @GenerateFieldPort(name = "whitebalanceredChange", hasDefault = true) + private float mWhiteBalanceRedChange = DEFAULT_WHITE_BALANCE_RED_CHANGE; + @GenerateFieldPort(name = "whitebalanceblueChange", hasDefault = true) + private float mWhiteBalanceBlueChange = DEFAULT_WHITE_BALANCE_BLUE_CHANGE; + @GenerateFieldPort(name = "autowbToggle", hasDefault = true) + private int mAutoWBToggle = DEFAULT_WHITE_BALANCE_TOGGLE; + + // TODO: These are not updatable: + @GenerateFieldPort(name = "learningAdaptRate", hasDefault = true) + private float mAdaptRateLearning = DEFAULT_LEARNING_ADAPT_RATE; + @GenerateFieldPort(name = "adaptRateBg", hasDefault = true) + private float mAdaptRateBg = DEFAULT_ADAPT_RATE_BG; + @GenerateFieldPort(name = "adaptRateFg", hasDefault = true) + private float mAdaptRateFg = DEFAULT_ADAPT_RATE_FG; + @GenerateFieldPort(name = "maskVerifyRate", hasDefault = true) + private float mVerifyRate = DEFAULT_MASK_VERIFY_RATE; + @GenerateFieldPort(name = "learningDoneListener", hasDefault = true) + private LearningDoneListener mLearningDoneListener = null; + + @GenerateFieldPort(name = "useTheForce", hasDefault = true) + private boolean mUseTheForce = false; + + @GenerateFinalPort(name = "provideDebugOutputs", hasDefault = true) + private boolean mProvideDebugOutputs = false; + + // Whether to mirror the background or not. For ex, the Camera app + // would mirror the preview for the front camera + @GenerateFieldPort(name = "mirrorBg", hasDefault = true) + private boolean mMirrorBg = false; + + // The orientation of the display. This will change the flipping + // coordinates, if we were to mirror the background + @GenerateFieldPort(name = "orientation", hasDefault = true) + private int mOrientation = 0; + + /** Default algorithm parameter values, for non-shader use */ + + // Frame count for learning bg model + private static final int DEFAULT_LEARNING_DURATION = 40; + // Frame count for learning verification + private static final int DEFAULT_LEARNING_VERIFY_DURATION = 10; + // Maximum distance (in standard deviations) for considering a pixel as background + private static final float DEFAULT_ACCEPT_STDDEV = 0.85f; + // Variance threshold scale factor for large scale of hierarchy + private static final float DEFAULT_HIER_LRG_SCALE = 0.7f; + // Variance threshold scale factor for medium scale of hierarchy + private static final float DEFAULT_HIER_MID_SCALE = 0.6f; + // Variance threshold scale factor for small scale of hierarchy + private static final float DEFAULT_HIER_SML_SCALE = 0.5f; + // Width of foreground / background mask. + private static final int DEFAULT_MASK_WIDTH_EXPONENT = 8; + // Height of foreground / background mask. + private static final int DEFAULT_MASK_HEIGHT_EXPONENT = 8; + // Area over which to average for large scale (length in pixels = 2^HIERARCHY_*_EXPONENT) + private static final int DEFAULT_HIER_LRG_EXPONENT = 3; + // Area over which to average for medium scale + private static final int DEFAULT_HIER_MID_EXPONENT = 2; + // Area over which to average for small scale + private static final int DEFAULT_HIER_SML_EXPONENT = 0; + // Scale factor for luminance channel in distance calculations (larger = more significant) + private static final float DEFAULT_Y_SCALE_FACTOR = 0.40f; + // Scale factor for chroma channels in distance calculations + private static final float DEFAULT_UV_SCALE_FACTOR = 1.35f; + // Mask value to start blending away from background + private static final float DEFAULT_MASK_BLEND_BG = 0.65f; + // Mask value to start blending away from foreground + private static final float DEFAULT_MASK_BLEND_FG = 0.95f; + // Exposure stop number to change the brightness of foreground + private static final float DEFAULT_EXPOSURE_CHANGE = 1.0f; + // White balance change in Red channel for foreground + private static final float DEFAULT_WHITE_BALANCE_RED_CHANGE = 0.0f; + // White balance change in Blue channel for foreground + private static final float DEFAULT_WHITE_BALANCE_BLUE_CHANGE = 0.0f; + // Variable to control automatic white balance effect + // 0.f -> Auto WB is off; 1.f-> Auto WB is on + private static final int DEFAULT_WHITE_BALANCE_TOGGLE = 0; + + // Default rate at which to learn bg model during learning period + private static final float DEFAULT_LEARNING_ADAPT_RATE = 0.2f; + // Default rate at which to learn bg model from new background pixels + private static final float DEFAULT_ADAPT_RATE_BG = 0.0f; + // Default rate at which to learn bg model from new foreground pixels + private static final float DEFAULT_ADAPT_RATE_FG = 0.0f; + // Default rate at which to verify whether background is stable + private static final float DEFAULT_MASK_VERIFY_RATE = 0.25f; + // Default rate at which to verify whether background is stable + private static final int DEFAULT_LEARNING_DONE_THRESHOLD = 20; + + // Default 3x3 matrix, column major, for fitting background 1:1 + private static final float[] DEFAULT_BG_FIT_TRANSFORM = new float[] { + 1.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 1.0f + }; + + /** Default algorithm parameter values, for shader use */ + + // Area over which to blur binary mask values (length in pixels = 2^MASK_SMOOTH_EXPONENT) + private static final String MASK_SMOOTH_EXPONENT = "2.0"; + // Scale value for mapping variance distance to fit nicely to 0-1, 8-bit + private static final String DISTANCE_STORAGE_SCALE = "0.6"; + // Scale value for mapping variance to fit nicely to 0-1, 8-bit + private static final String VARIANCE_STORAGE_SCALE = "5.0"; + // Default scale of auto white balance parameters + private static final String DEFAULT_AUTO_WB_SCALE = "0.25"; + // Minimum variance (0-255 scale) + private static final String MIN_VARIANCE = "3.0"; + // Column-major array for 4x4 matrix converting RGB to YCbCr, JPEG definition (no pedestal) + private static final String RGB_TO_YUV_MATRIX = "0.299, -0.168736, 0.5, 0.000, " + + "0.587, -0.331264, -0.418688, 0.000, " + + "0.114, 0.5, -0.081312, 0.000, " + + "0.000, 0.5, 0.5, 1.000 "; + /** Stream names */ + + private static final String[] mInputNames = {"video", + "background"}; + + private static final String[] mOutputNames = {"video"}; + + private static final String[] mDebugOutputNames = {"debug1", + "debug2"}; + + /** Other private variables */ + + private FrameFormat mOutputFormat; + private MutableFrameFormat mMemoryFormat; + private MutableFrameFormat mMaskFormat; + private MutableFrameFormat mAverageFormat; + + private final boolean mLogVerbose; + private static final String TAG = "BackDropperFilter"; + + /** Shader source code */ + + // Shared uniforms and utility functions + private static String mSharedUtilShader = + "precision mediump float;\n" + + "uniform float fg_adapt_rate;\n" + + "uniform float bg_adapt_rate;\n" + + "const mat4 coeff_yuv = mat4(" + RGB_TO_YUV_MATRIX + ");\n" + + "const float dist_scale = " + DISTANCE_STORAGE_SCALE + ";\n" + + "const float inv_dist_scale = 1. / dist_scale;\n" + + "const float var_scale=" + VARIANCE_STORAGE_SCALE + ";\n" + + "const float inv_var_scale = 1. / var_scale;\n" + + "const float min_variance = inv_var_scale *" + MIN_VARIANCE + "/ 256.;\n" + + "const float auto_wb_scale = " + DEFAULT_AUTO_WB_SCALE + ";\n" + + "\n" + + // Variance distance in luminance between current pixel and background model + "float gauss_dist_y(float y, float mean, float variance) {\n" + + " float dist = (y - mean) * (y - mean) / variance;\n" + + " return dist;\n" + + "}\n" + + // Sum of variance distances in chroma between current pixel and background + // model + "float gauss_dist_uv(vec2 uv, vec2 mean, vec2 variance) {\n" + + " vec2 dist = (uv - mean) * (uv - mean) / variance;\n" + + " return dist.r + dist.g;\n" + + "}\n" + + // Select learning rate for pixel based on smoothed decision mask alpha + "float local_adapt_rate(float alpha) {\n" + + " return mix(bg_adapt_rate, fg_adapt_rate, alpha);\n" + + "}\n" + + "\n"; + + // Distance calculation shader. Calculates a distance metric between the foreground and the + // current background model, in both luminance and in chroma (yuv space). Distance is + // measured in variances from the mean background value. For chroma, the distance is the sum + // of the two individual color channel distances. The distances are output on the b and alpha + // channels, r and g are for debug information. + // Inputs: + // tex_sampler_0: Mip-map for foreground (live) video frame. + // tex_sampler_1: Background mean mask. + // tex_sampler_2: Background variance mask. + // subsample_level: Level on foreground frame's mip-map. + private static final String mBgDistanceShader = + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform sampler2D tex_sampler_2;\n" + + "uniform float subsample_level;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec4 fg_rgb = texture2D(tex_sampler_0, v_texcoord, subsample_level);\n" + + " vec4 fg = coeff_yuv * vec4(fg_rgb.rgb, 1.);\n" + + " vec4 mean = texture2D(tex_sampler_1, v_texcoord);\n" + + " vec4 variance = inv_var_scale * texture2D(tex_sampler_2, v_texcoord);\n" + + "\n" + + " float dist_y = gauss_dist_y(fg.r, mean.r, variance.r);\n" + + " float dist_uv = gauss_dist_uv(fg.gb, mean.gb, variance.gb);\n" + + " gl_FragColor = vec4(0.5*fg.rg, dist_scale*dist_y, dist_scale*dist_uv);\n" + + "}\n"; + + // Foreground/background mask decision shader. Decides whether a frame is in the foreground or + // the background using a hierarchical threshold on the distance. Binary foreground/background + // mask is placed in the alpha channel. The RGB channels contain debug information. + private static final String mBgMaskShader = + "uniform sampler2D tex_sampler_0;\n" + + "uniform float accept_variance;\n" + + "uniform vec2 yuv_weights;\n" + + "uniform float scale_lrg;\n" + + "uniform float scale_mid;\n" + + "uniform float scale_sml;\n" + + "uniform float exp_lrg;\n" + + "uniform float exp_mid;\n" + + "uniform float exp_sml;\n" + + "varying vec2 v_texcoord;\n" + + // Decide whether pixel is foreground or background based on Y and UV + // distance and maximum acceptable variance. + // yuv_weights.x is smaller than yuv_weights.y to discount the influence of shadow + "bool is_fg(vec2 dist_yc, float accept_variance) {\n" + + " return ( dot(yuv_weights, dist_yc) >= accept_variance );\n" + + "}\n" + + "void main() {\n" + + " vec4 dist_lrg_sc = texture2D(tex_sampler_0, v_texcoord, exp_lrg);\n" + + " vec4 dist_mid_sc = texture2D(tex_sampler_0, v_texcoord, exp_mid);\n" + + " vec4 dist_sml_sc = texture2D(tex_sampler_0, v_texcoord, exp_sml);\n" + + " vec2 dist_lrg = inv_dist_scale * dist_lrg_sc.ba;\n" + + " vec2 dist_mid = inv_dist_scale * dist_mid_sc.ba;\n" + + " vec2 dist_sml = inv_dist_scale * dist_sml_sc.ba;\n" + + " vec2 norm_dist = 0.75 * dist_sml / accept_variance;\n" + // For debug viz + " bool is_fg_lrg = is_fg(dist_lrg, accept_variance * scale_lrg);\n" + + " bool is_fg_mid = is_fg_lrg || is_fg(dist_mid, accept_variance * scale_mid);\n" + + " float is_fg_sml =\n" + + " float(is_fg_mid || is_fg(dist_sml, accept_variance * scale_sml));\n" + + " float alpha = 0.5 * is_fg_sml + 0.3 * float(is_fg_mid) + 0.2 * float(is_fg_lrg);\n" + + " gl_FragColor = vec4(alpha, norm_dist, is_fg_sml);\n" + + "}\n"; + + // Automatic White Balance parameter decision shader + // Use the Gray World assumption that in a white balance corrected image, the average of R, G, B + // channel will be a common gray value. + // To match the white balance of foreground and background, the average of R, G, B channel of + // two videos should match. + // Inputs: + // tex_sampler_0: Mip-map for foreground (live) video frame. + // tex_sampler_1: Mip-map for background (playback) video frame. + // pyramid_depth: Depth of input frames' mip-maps. + private static final String mAutomaticWhiteBalance = + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform float pyramid_depth;\n" + + "uniform bool autowb_toggle;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec4 mean_video = texture2D(tex_sampler_0, v_texcoord, pyramid_depth);\n"+ + " vec4 mean_bg = texture2D(tex_sampler_1, v_texcoord, pyramid_depth);\n" + + // If Auto WB is toggled off, the return texture will be a unicolor texture of value 1 + // If Auto WB is toggled on, the return texture will be a unicolor texture with + // adjustment parameters for R and B channels stored in the corresponding channel + " float green_normalizer = mean_video.g / mean_bg.g;\n"+ + " vec4 adjusted_value = vec4(mean_bg.r / mean_video.r * green_normalizer, 1., \n" + + " mean_bg.b / mean_video.b * green_normalizer, 1.) * auto_wb_scale; \n" + + " gl_FragColor = autowb_toggle ? adjusted_value : vec4(auto_wb_scale);\n" + + "}\n"; + + + // Background subtraction shader. Uses a mipmap of the binary mask map to blend smoothly between + // foreground and background + // Inputs: + // tex_sampler_0: Foreground (live) video frame. + // tex_sampler_1: Background (playback) video frame. + // tex_sampler_2: Foreground/background mask. + // tex_sampler_3: Auto white-balance factors. + private static final String mBgSubtractShader = + "uniform mat3 bg_fit_transform;\n" + + "uniform float mask_blend_bg;\n" + + "uniform float mask_blend_fg;\n" + + "uniform float exposure_change;\n" + + "uniform float whitebalancered_change;\n" + + "uniform float whitebalanceblue_change;\n" + + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform sampler2D tex_sampler_2;\n" + + "uniform sampler2D tex_sampler_3;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec2 bg_texcoord = (bg_fit_transform * vec3(v_texcoord, 1.)).xy;\n" + + " vec4 bg_rgb = texture2D(tex_sampler_1, bg_texcoord);\n" + + // The foreground texture is modified by multiplying both manual and auto white balance changes in R and B + // channel and multiplying exposure change in all R, G, B channels. + " vec4 wb_auto_scale = texture2D(tex_sampler_3, v_texcoord) * exposure_change / auto_wb_scale;\n" + + " vec4 wb_manual_scale = vec4(1. + whitebalancered_change, 1., 1. + whitebalanceblue_change, 1.);\n" + + " vec4 fg_rgb = texture2D(tex_sampler_0, v_texcoord);\n" + + " vec4 fg_adjusted = fg_rgb * wb_manual_scale * wb_auto_scale;\n"+ + " vec4 mask = texture2D(tex_sampler_2, v_texcoord, \n" + + " " + MASK_SMOOTH_EXPONENT + ");\n" + + " float alpha = smoothstep(mask_blend_bg, mask_blend_fg, mask.a);\n" + + " gl_FragColor = mix(bg_rgb, fg_adjusted, alpha);\n"; + + // May the Force... Makes the foreground object translucent blue, with a bright + // blue-white outline + private static final String mBgSubtractForceShader = + " vec4 ghost_rgb = (fg_adjusted * 0.7 + vec4(0.3,0.3,0.4,0.))*0.65 + \n" + + " 0.35*bg_rgb;\n" + + " float glow_start = 0.75 * mask_blend_bg; \n"+ + " float glow_max = mask_blend_bg; \n"+ + " gl_FragColor = mask.a < glow_start ? bg_rgb : \n" + + " mask.a < glow_max ? mix(bg_rgb, vec4(0.9,0.9,1.0,1.0), \n" + + " (mask.a - glow_start) / (glow_max - glow_start) ) : \n" + + " mask.a < mask_blend_fg ? mix(vec4(0.9,0.9,1.0,1.0), ghost_rgb, \n" + + " (mask.a - glow_max) / (mask_blend_fg - glow_max) ) : \n" + + " ghost_rgb;\n" + + "}\n"; + + // Background model mean update shader. Skews the current model mean toward the most recent pixel + // value for a pixel, weighted by the learning rate and by whether the pixel is classified as + // foreground or background. + // Inputs: + // tex_sampler_0: Mip-map for foreground (live) video frame. + // tex_sampler_1: Background mean mask. + // tex_sampler_2: Foreground/background mask. + // subsample_level: Level on foreground frame's mip-map. + private static final String mUpdateBgModelMeanShader = + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform sampler2D tex_sampler_2;\n" + + "uniform float subsample_level;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec4 fg_rgb = texture2D(tex_sampler_0, v_texcoord, subsample_level);\n" + + " vec4 fg = coeff_yuv * vec4(fg_rgb.rgb, 1.);\n" + + " vec4 mean = texture2D(tex_sampler_1, v_texcoord);\n" + + " vec4 mask = texture2D(tex_sampler_2, v_texcoord, \n" + + " " + MASK_SMOOTH_EXPONENT + ");\n" + + "\n" + + " float alpha = local_adapt_rate(mask.a);\n" + + " vec4 new_mean = mix(mean, fg, alpha);\n" + + " gl_FragColor = new_mean;\n" + + "}\n"; + + // Background model variance update shader. Skews the current model variance toward the most + // recent variance for the pixel, weighted by the learning rate and by whether the pixel is + // classified as foreground or background. + // Inputs: + // tex_sampler_0: Mip-map for foreground (live) video frame. + // tex_sampler_1: Background mean mask. + // tex_sampler_2: Background variance mask. + // tex_sampler_3: Foreground/background mask. + // subsample_level: Level on foreground frame's mip-map. + // TODO: to improve efficiency, use single mark for mean + variance, then merge this into + // mUpdateBgModelMeanShader. + private static final String mUpdateBgModelVarianceShader = + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform sampler2D tex_sampler_2;\n" + + "uniform sampler2D tex_sampler_3;\n" + + "uniform float subsample_level;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec4 fg_rgb = texture2D(tex_sampler_0, v_texcoord, subsample_level);\n" + + " vec4 fg = coeff_yuv * vec4(fg_rgb.rgb, 1.);\n" + + " vec4 mean = texture2D(tex_sampler_1, v_texcoord);\n" + + " vec4 variance = inv_var_scale * texture2D(tex_sampler_2, v_texcoord);\n" + + " vec4 mask = texture2D(tex_sampler_3, v_texcoord, \n" + + " " + MASK_SMOOTH_EXPONENT + ");\n" + + "\n" + + " float alpha = local_adapt_rate(mask.a);\n" + + " vec4 cur_variance = (fg-mean)*(fg-mean);\n" + + " vec4 new_variance = mix(variance, cur_variance, alpha);\n" + + " new_variance = max(new_variance, vec4(min_variance));\n" + + " gl_FragColor = var_scale * new_variance;\n" + + "}\n"; + + // Background verification shader. Skews the current background verification mask towards the + // most recent frame, weighted by the learning rate. + private static final String mMaskVerifyShader = + "uniform sampler2D tex_sampler_0;\n" + + "uniform sampler2D tex_sampler_1;\n" + + "uniform float verify_rate;\n" + + "varying vec2 v_texcoord;\n" + + "void main() {\n" + + " vec4 lastmask = texture2D(tex_sampler_0, v_texcoord);\n" + + " vec4 mask = texture2D(tex_sampler_1, v_texcoord);\n" + + " float newmask = mix(lastmask.a, mask.a, verify_rate);\n" + + " gl_FragColor = vec4(0., 0., 0., newmask);\n" + + "}\n"; + + /** Shader program objects */ + + private ShaderProgram mBgDistProgram; + private ShaderProgram mBgMaskProgram; + private ShaderProgram mBgSubtractProgram; + private ShaderProgram mBgUpdateMeanProgram; + private ShaderProgram mBgUpdateVarianceProgram; + private ShaderProgram mCopyOutProgram; + private ShaderProgram mAutomaticWhiteBalanceProgram; + private ShaderProgram mMaskVerifyProgram; + private ShaderProgram copyShaderProgram; + + /** Background model storage */ + + private boolean mPingPong; + private GLFrame mBgMean[]; + private GLFrame mBgVariance[]; + private GLFrame mMaskVerify[]; + private GLFrame mDistance; + private GLFrame mAutoWB; + private GLFrame mMask; + private GLFrame mVideoInput; + private GLFrame mBgInput; + private GLFrame mMaskAverage; + + /** Overall filter state */ + + private boolean isOpen; + private int mFrameCount; + private boolean mStartLearning; + private boolean mBackgroundFitModeChanged; + private float mRelativeAspect; + private int mPyramidDepth; + private int mSubsampleLevel; + + /** Learning listener object */ + + public interface LearningDoneListener { + public void onLearningDone(BackDropperFilter filter); + } + + /** Public Filter methods */ + + public BackDropperFilter(String name) { + super(name); + + mLogVerbose = Log.isLoggable(TAG, Log.VERBOSE); + } + + @Override + public void setupPorts() { + // Inputs. + // TODO: Target should be GPU, but relaxed for now. + FrameFormat imageFormat = ImageFormat.create(ImageFormat.COLORSPACE_RGBA, + FrameFormat.TARGET_UNSPECIFIED); + for (String inputName : mInputNames) { + addMaskedInputPort(inputName, imageFormat); + } + // Normal outputs + for (String outputName : mOutputNames) { + addOutputBasedOnInput(outputName, "video"); + } + + // Debug outputs + if (mProvideDebugOutputs) { + for (String outputName : mDebugOutputNames) { + addOutputBasedOnInput(outputName, "video"); + } + } + } + + @Override + public FrameFormat getOutputFormat(String portName, FrameFormat inputFormat) { + // Create memory format based on video input. + MutableFrameFormat format = inputFormat.mutableCopy(); + // Is this a debug output port? If so, leave dimensions unspecified. + if (!Arrays.asList(mOutputNames).contains(portName)) { + format.setDimensions(FrameFormat.SIZE_UNSPECIFIED, FrameFormat.SIZE_UNSPECIFIED); + } + return format; + } + + private boolean createMemoryFormat(FrameFormat inputFormat) { + // We can't resize because that would require re-learning. + if (mMemoryFormat != null) { + return false; + } + + if (inputFormat.getWidth() == FrameFormat.SIZE_UNSPECIFIED || + inputFormat.getHeight() == FrameFormat.SIZE_UNSPECIFIED) { + throw new RuntimeException("Attempting to process input frame with unknown size"); + } + + mMaskFormat = inputFormat.mutableCopy(); + int maskWidth = (int)Math.pow(2, mMaskWidthExp); + int maskHeight = (int)Math.pow(2, mMaskHeightExp); + mMaskFormat.setDimensions(maskWidth, maskHeight); + + mPyramidDepth = Math.max(mMaskWidthExp, mMaskHeightExp); + mMemoryFormat = mMaskFormat.mutableCopy(); + int widthExp = Math.max(mMaskWidthExp, pyramidLevel(inputFormat.getWidth())); + int heightExp = Math.max(mMaskHeightExp, pyramidLevel(inputFormat.getHeight())); + mPyramidDepth = Math.max(widthExp, heightExp); + int memWidth = Math.max(maskWidth, (int)Math.pow(2, widthExp)); + int memHeight = Math.max(maskHeight, (int)Math.pow(2, heightExp)); + mMemoryFormat.setDimensions(memWidth, memHeight); + mSubsampleLevel = mPyramidDepth - Math.max(mMaskWidthExp, mMaskHeightExp); + + if (mLogVerbose) { + Log.v(TAG, "Mask frames size " + maskWidth + " x " + maskHeight); + Log.v(TAG, "Pyramid levels " + widthExp + " x " + heightExp); + Log.v(TAG, "Memory frames size " + memWidth + " x " + memHeight); + } + + mAverageFormat = inputFormat.mutableCopy(); + mAverageFormat.setDimensions(1,1); + return true; + } + + public void prepare(FilterContext context){ + if (mLogVerbose) Log.v(TAG, "Preparing BackDropperFilter!"); + + mBgMean = new GLFrame[2]; + mBgVariance = new GLFrame[2]; + mMaskVerify = new GLFrame[2]; + copyShaderProgram = ShaderProgram.createIdentity(context); + } + + private void allocateFrames(FrameFormat inputFormat, FilterContext context) { + if (!createMemoryFormat(inputFormat)) { + return; // All set. + } + if (mLogVerbose) Log.v(TAG, "Allocating BackDropperFilter frames"); + + // Create initial background model values + int numBytes = mMaskFormat.getSize(); + byte[] initialBgMean = new byte[numBytes]; + byte[] initialBgVariance = new byte[numBytes]; + byte[] initialMaskVerify = new byte[numBytes]; + for (int i = 0; i < numBytes; i++) { + initialBgMean[i] = (byte)128; + initialBgVariance[i] = (byte)10; + initialMaskVerify[i] = (byte)0; + } + + // Get frames to store background model in + for (int i = 0; i < 2; i++) { + mBgMean[i] = (GLFrame)context.getFrameManager().newFrame(mMaskFormat); + mBgMean[i].setData(initialBgMean, 0, numBytes); + + mBgVariance[i] = (GLFrame)context.getFrameManager().newFrame(mMaskFormat); + mBgVariance[i].setData(initialBgVariance, 0, numBytes); + + mMaskVerify[i] = (GLFrame)context.getFrameManager().newFrame(mMaskFormat); + mMaskVerify[i].setData(initialMaskVerify, 0, numBytes); + } + + // Get frames to store other textures in + if (mLogVerbose) Log.v(TAG, "Done allocating texture for Mean and Variance objects!"); + + mDistance = (GLFrame)context.getFrameManager().newFrame(mMaskFormat); + mMask = (GLFrame)context.getFrameManager().newFrame(mMaskFormat); + mAutoWB = (GLFrame)context.getFrameManager().newFrame(mAverageFormat); + mVideoInput = (GLFrame)context.getFrameManager().newFrame(mMemoryFormat); + mBgInput = (GLFrame)context.getFrameManager().newFrame(mMemoryFormat); + mMaskAverage = (GLFrame)context.getFrameManager().newFrame(mAverageFormat); + + // Create shader programs + mBgDistProgram = new ShaderProgram(context, mSharedUtilShader + mBgDistanceShader); + mBgDistProgram.setHostValue("subsample_level", (float)mSubsampleLevel); + + mBgMaskProgram = new ShaderProgram(context, mSharedUtilShader + mBgMaskShader); + mBgMaskProgram.setHostValue("accept_variance", mAcceptStddev * mAcceptStddev); + float[] yuvWeights = { mLumScale, mChromaScale }; + mBgMaskProgram.setHostValue("yuv_weights", yuvWeights ); + mBgMaskProgram.setHostValue("scale_lrg", mHierarchyLrgScale); + mBgMaskProgram.setHostValue("scale_mid", mHierarchyMidScale); + mBgMaskProgram.setHostValue("scale_sml", mHierarchySmlScale); + mBgMaskProgram.setHostValue("exp_lrg", (float)(mSubsampleLevel + mHierarchyLrgExp)); + mBgMaskProgram.setHostValue("exp_mid", (float)(mSubsampleLevel + mHierarchyMidExp)); + mBgMaskProgram.setHostValue("exp_sml", (float)(mSubsampleLevel + mHierarchySmlExp)); + + if (mUseTheForce) { + mBgSubtractProgram = new ShaderProgram(context, mSharedUtilShader + mBgSubtractShader + mBgSubtractForceShader); + } else { + mBgSubtractProgram = new ShaderProgram(context, mSharedUtilShader + mBgSubtractShader + "}\n"); + } + mBgSubtractProgram.setHostValue("bg_fit_transform", DEFAULT_BG_FIT_TRANSFORM); + mBgSubtractProgram.setHostValue("mask_blend_bg", mMaskBg); + mBgSubtractProgram.setHostValue("mask_blend_fg", mMaskFg); + mBgSubtractProgram.setHostValue("exposure_change", mExposureChange); + mBgSubtractProgram.setHostValue("whitebalanceblue_change", mWhiteBalanceBlueChange); + mBgSubtractProgram.setHostValue("whitebalancered_change", mWhiteBalanceRedChange); + + + mBgUpdateMeanProgram = new ShaderProgram(context, mSharedUtilShader + mUpdateBgModelMeanShader); + mBgUpdateMeanProgram.setHostValue("subsample_level", (float)mSubsampleLevel); + + mBgUpdateVarianceProgram = new ShaderProgram(context, mSharedUtilShader + mUpdateBgModelVarianceShader); + mBgUpdateVarianceProgram.setHostValue("subsample_level", (float)mSubsampleLevel); + + mCopyOutProgram = ShaderProgram.createIdentity(context); + + mAutomaticWhiteBalanceProgram = new ShaderProgram(context, mSharedUtilShader + mAutomaticWhiteBalance); + mAutomaticWhiteBalanceProgram.setHostValue("pyramid_depth", (float)mPyramidDepth); + mAutomaticWhiteBalanceProgram.setHostValue("autowb_toggle", mAutoWBToggle); + + mMaskVerifyProgram = new ShaderProgram(context, mSharedUtilShader + mMaskVerifyShader); + mMaskVerifyProgram.setHostValue("verify_rate", mVerifyRate); + + if (mLogVerbose) Log.v(TAG, "Shader width set to " + mMemoryFormat.getWidth()); + + mRelativeAspect = 1.f; + + mFrameCount = 0; + mStartLearning = true; + } + + public void process(FilterContext context) { + // Grab inputs and ready intermediate frames and outputs. + Frame video = pullInput("video"); + Frame background = pullInput("background"); + allocateFrames(video.getFormat(), context); + + // Update learning rate after initial learning period + if (mStartLearning) { + if (mLogVerbose) Log.v(TAG, "Starting learning"); + mBgUpdateMeanProgram.setHostValue("bg_adapt_rate", mAdaptRateLearning); + mBgUpdateMeanProgram.setHostValue("fg_adapt_rate", mAdaptRateLearning); + mBgUpdateVarianceProgram.setHostValue("bg_adapt_rate", mAdaptRateLearning); + mBgUpdateVarianceProgram.setHostValue("fg_adapt_rate", mAdaptRateLearning); + mFrameCount = 0; + mStartLearning = false; + } + + // Select correct pingpong buffers + int inputIndex = mPingPong ? 0 : 1; + int outputIndex = mPingPong ? 1 : 0; + mPingPong = !mPingPong; + + // Check relative aspect ratios + updateBgScaling(video, background, mBackgroundFitModeChanged); + mBackgroundFitModeChanged = false; + + // Make copies for input frames to GLFrames + + copyShaderProgram.process(video, mVideoInput); + copyShaderProgram.process(background, mBgInput); + + mVideoInput.generateMipMap(); + mVideoInput.setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + + mBgInput.generateMipMap(); + mBgInput.setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + + // Process shaders + Frame[] distInputs = { mVideoInput, mBgMean[inputIndex], mBgVariance[inputIndex] }; + mBgDistProgram.process(distInputs, mDistance); + mDistance.generateMipMap(); + mDistance.setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + + mBgMaskProgram.process(mDistance, mMask); + mMask.generateMipMap(); + mMask.setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + + Frame[] autoWBInputs = { mVideoInput, mBgInput }; + mAutomaticWhiteBalanceProgram.process(autoWBInputs, mAutoWB); + + if (mFrameCount <= mLearningDuration) { + // During learning + pushOutput("video", video); + + if (mFrameCount == mLearningDuration - mLearningVerifyDuration) { + copyShaderProgram.process(mMask, mMaskVerify[outputIndex]); + + mBgUpdateMeanProgram.setHostValue("bg_adapt_rate", mAdaptRateBg); + mBgUpdateMeanProgram.setHostValue("fg_adapt_rate", mAdaptRateFg); + mBgUpdateVarianceProgram.setHostValue("bg_adapt_rate", mAdaptRateBg); + mBgUpdateVarianceProgram.setHostValue("fg_adapt_rate", mAdaptRateFg); + + + } else if (mFrameCount > mLearningDuration - mLearningVerifyDuration) { + // In the learning verification stage, compute background masks and a weighted average + // with weights grow exponentially with time + Frame[] maskVerifyInputs = {mMaskVerify[inputIndex], mMask}; + mMaskVerifyProgram.process(maskVerifyInputs, mMaskVerify[outputIndex]); + mMaskVerify[outputIndex].generateMipMap(); + mMaskVerify[outputIndex].setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + } + + if (mFrameCount == mLearningDuration) { + // In the last verification frame, verify if the verification mask is almost blank + // If not, restart learning + copyShaderProgram.process(mMaskVerify[outputIndex], mMaskAverage); + ByteBuffer mMaskAverageByteBuffer = mMaskAverage.getData(); + byte[] mask_average = mMaskAverageByteBuffer.array(); + int bi = (int)(mask_average[3] & 0xFF); + if (mLogVerbose) Log.v(TAG, String.format("Mask_average is %d", bi)); + + if (bi >= DEFAULT_LEARNING_DONE_THRESHOLD) { + mStartLearning = true; // Restart learning + } else { + if (mLogVerbose) Log.v(TAG, "Learning done"); + if (mLearningDoneListener != null) { + mLearningDoneListener.onLearningDone(this); + } + } + } + } else { + Frame output = context.getFrameManager().newFrame(video.getFormat()); + Frame[] subtractInputs = { video, background, mMask, mAutoWB }; + mBgSubtractProgram.process(subtractInputs, output); + pushOutput("video", output); + output.release(); + } + + // Compute mean and variance of the background + if (mFrameCount < mLearningDuration - mLearningVerifyDuration || + mAdaptRateBg > 0.0 || mAdaptRateFg > 0.0) { + Frame[] meanUpdateInputs = { mVideoInput, mBgMean[inputIndex], mMask }; + mBgUpdateMeanProgram.process(meanUpdateInputs, mBgMean[outputIndex]); + mBgMean[outputIndex].generateMipMap(); + mBgMean[outputIndex].setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + + Frame[] varianceUpdateInputs = { + mVideoInput, mBgMean[inputIndex], mBgVariance[inputIndex], mMask + }; + mBgUpdateVarianceProgram.process(varianceUpdateInputs, mBgVariance[outputIndex]); + mBgVariance[outputIndex].generateMipMap(); + mBgVariance[outputIndex].setTextureParameter(GLES20.GL_TEXTURE_MIN_FILTER, + GLES20.GL_LINEAR_MIPMAP_NEAREST); + } + + // Provide debug output to two smaller viewers + if (mProvideDebugOutputs) { + Frame dbg1 = context.getFrameManager().newFrame(video.getFormat()); + mCopyOutProgram.process(video, dbg1); + pushOutput("debug1", dbg1); + dbg1.release(); + + Frame dbg2 = context.getFrameManager().newFrame(mMemoryFormat); + mCopyOutProgram.process(mMask, dbg2); + pushOutput("debug2", dbg2); + dbg2.release(); + } + + mFrameCount++; + + if (mLogVerbose) { + if (mFrameCount % 30 == 0) { + if (startTime == -1) { + context.getGLEnvironment().activate(); + GLES20.glFinish(); + startTime = SystemClock.elapsedRealtime(); + } else { + context.getGLEnvironment().activate(); + GLES20.glFinish(); + long endTime = SystemClock.elapsedRealtime(); + Log.v(TAG, "Avg. frame duration: " + String.format("%.2f",(endTime-startTime)/30.) + + " ms. Avg. fps: " + String.format("%.2f", 1000./((endTime-startTime)/30.)) ); + startTime = endTime; + } + } + } + } + + private long startTime = -1; + + public void close(FilterContext context) { + if (mMemoryFormat == null) { + return; + } + + if (mLogVerbose) Log.v(TAG, "Filter Closing!"); + for (int i = 0; i < 2; i++) { + mBgMean[i].release(); + mBgVariance[i].release(); + mMaskVerify[i].release(); + } + mDistance.release(); + mMask.release(); + mAutoWB.release(); + mVideoInput.release(); + mBgInput.release(); + mMaskAverage.release(); + + mMemoryFormat = null; + } + + // Relearn background model + synchronized public void relearn() { + // Let the processing thread know about learning restart + mStartLearning = true; + } + + @Override + public void fieldPortValueUpdated(String name, FilterContext context) { + // TODO: Many of these can be made ProgramPorts! + if (name.equals("backgroundFitMode")) { + mBackgroundFitModeChanged = true; + } else if (name.equals("acceptStddev")) { + mBgMaskProgram.setHostValue("accept_variance", mAcceptStddev * mAcceptStddev); + } else if (name.equals("hierLrgScale")) { + mBgMaskProgram.setHostValue("scale_lrg", mHierarchyLrgScale); + } else if (name.equals("hierMidScale")) { + mBgMaskProgram.setHostValue("scale_mid", mHierarchyMidScale); + } else if (name.equals("hierSmlScale")) { + mBgMaskProgram.setHostValue("scale_sml", mHierarchySmlScale); + } else if (name.equals("hierLrgExp")) { + mBgMaskProgram.setHostValue("exp_lrg", (float)(mSubsampleLevel + mHierarchyLrgExp)); + } else if (name.equals("hierMidExp")) { + mBgMaskProgram.setHostValue("exp_mid", (float)(mSubsampleLevel + mHierarchyMidExp)); + } else if (name.equals("hierSmlExp")) { + mBgMaskProgram.setHostValue("exp_sml", (float)(mSubsampleLevel + mHierarchySmlExp)); + } else if (name.equals("lumScale") || name.equals("chromaScale")) { + float[] yuvWeights = { mLumScale, mChromaScale }; + mBgMaskProgram.setHostValue("yuv_weights", yuvWeights ); + } else if (name.equals("maskBg")) { + mBgSubtractProgram.setHostValue("mask_blend_bg", mMaskBg); + } else if (name.equals("maskFg")) { + mBgSubtractProgram.setHostValue("mask_blend_fg", mMaskFg); + } else if (name.equals("exposureChange")) { + mBgSubtractProgram.setHostValue("exposure_change", mExposureChange); + } else if (name.equals("whitebalanceredChange")) { + mBgSubtractProgram.setHostValue("whitebalancered_change", mWhiteBalanceRedChange); + } else if (name.equals("whitebalanceblueChange")) { + mBgSubtractProgram.setHostValue("whitebalanceblue_change", mWhiteBalanceBlueChange); + } else if (name.equals("autowbToggle")){ + mAutomaticWhiteBalanceProgram.setHostValue("autowb_toggle", mAutoWBToggle); + } + } + + private void updateBgScaling(Frame video, Frame background, boolean fitModeChanged) { + float foregroundAspect = (float)video.getFormat().getWidth() / video.getFormat().getHeight(); + float backgroundAspect = (float)background.getFormat().getWidth() / background.getFormat().getHeight(); + float currentRelativeAspect = foregroundAspect/backgroundAspect; + if (currentRelativeAspect != mRelativeAspect || fitModeChanged) { + mRelativeAspect = currentRelativeAspect; + float xMin = 0.f, xWidth = 1.f, yMin = 0.f, yWidth = 1.f; + switch (mBackgroundFitMode) { + case BACKGROUND_STRETCH: + // Just map 1:1 + break; + case BACKGROUND_FIT: + if (mRelativeAspect > 1.0f) { + // Foreground is wider than background, scale down + // background in X + xMin = 0.5f - 0.5f * mRelativeAspect; + xWidth = 1.f * mRelativeAspect; + } else { + // Foreground is taller than background, scale down + // background in Y + yMin = 0.5f - 0.5f / mRelativeAspect; + yWidth = 1 / mRelativeAspect; + } + break; + case BACKGROUND_FILL_CROP: + if (mRelativeAspect > 1.0f) { + // Foreground is wider than background, crop + // background in Y + yMin = 0.5f - 0.5f / mRelativeAspect; + yWidth = 1.f / mRelativeAspect; + } else { + // Foreground is taller than background, crop + // background in X + xMin = 0.5f - 0.5f * mRelativeAspect; + xWidth = mRelativeAspect; + } + break; + } + // If mirroring is required (for ex. the camera mirrors the preview + // in the front camera) + // TODO: Backdropper does not attempt to apply any other transformation + // than just flipping. However, in the current state, it's "x-axis" is always aligned + // with the Camera's width. Hence, we need to define the mirroring based on the camera + // orientation. In the future, a cleaner design would be to cast away all the rotation + // in a separate place. + if (mMirrorBg) { + if (mLogVerbose) Log.v(TAG, "Mirroring the background!"); + // Mirroring in portrait + if (mOrientation == 0 || mOrientation == 180) { + xWidth = -xWidth; + xMin = 1.0f - xMin; + } else { + // Mirroring in landscape + yWidth = -yWidth; + yMin = 1.0f - yMin; + } + } + if (mLogVerbose) Log.v(TAG, "bgTransform: xMin, yMin, xWidth, yWidth : " + + xMin + ", " + yMin + ", " + xWidth + ", " + yWidth + + ", mRelAspRatio = " + mRelativeAspect); + // The following matrix is the transpose of the actual matrix + float[] bgTransform = {xWidth, 0.f, 0.f, + 0.f, yWidth, 0.f, + xMin, yMin, 1.f}; + mBgSubtractProgram.setHostValue("bg_fit_transform", bgTransform); + } + } + + private int pyramidLevel(int size) { + return (int)Math.floor(Math.log10(size) / Math.log10(2)) - 1; + } + +} |