Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
4
TMessagesProj/jni/third_party/libyuv/AUTHORS
vendored
Normal file
4
TMessagesProj/jni/third_party/libyuv/AUTHORS
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# Names should be added to this file like so:
|
||||
# Name or Organization <email address>
|
||||
|
||||
Google Inc.
|
||||
29
TMessagesProj/jni/third_party/libyuv/LICENSE
vendored
Normal file
29
TMessagesProj/jni/third_party/libyuv/LICENSE
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of Google nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
8
TMessagesProj/jni/third_party/libyuv/LICENSE_THIRD_PARTY
vendored
Normal file
8
TMessagesProj/jni/third_party/libyuv/LICENSE_THIRD_PARTY
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
This source tree contains third party source code which is governed by third
|
||||
party licenses. This file contains references to files which are under other
|
||||
licenses than the one provided in the LICENSE file in the root of the source
|
||||
tree.
|
||||
|
||||
Files governed by third party licenses:
|
||||
source/x86inc.asm
|
||||
|
||||
24
TMessagesProj/jni/third_party/libyuv/PATENTS
vendored
Normal file
24
TMessagesProj/jni/third_party/libyuv/PATENTS
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
Additional IP Rights Grant (Patents)
|
||||
|
||||
"This implementation" means the copyrightable works distributed by
|
||||
Google as part of the LibYuv code package.
|
||||
|
||||
Google hereby grants to you a perpetual, worldwide, non-exclusive,
|
||||
no-charge, irrevocable (except as stated in this section) patent
|
||||
license to make, have made, use, offer to sell, sell, import,
|
||||
transfer, and otherwise run, modify and propagate the contents of this
|
||||
implementation of the LibYuv code package, where such license applies
|
||||
only to those patent claims, both currently owned by Google and
|
||||
acquired in the future, licensable by Google that are necessarily
|
||||
infringed by this implementation of the LibYuv code package. This
|
||||
grant does not include claims that would be infringed only as a
|
||||
consequence of further modification of this implementation. If you or
|
||||
your agent or exclusive licensee institute or order or agree to the
|
||||
institution of patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that this
|
||||
implementation of the LibYuv code package or any code incorporated
|
||||
within this implementation of the LibYuv code package constitutes
|
||||
direct or contributory patent infringement, or inducement of patent
|
||||
infringement, then any patent rights granted to you under this License
|
||||
for this implementation of the LibYuv code package shall terminate as
|
||||
of the date such litigation is filed.
|
||||
33
TMessagesProj/jni/third_party/libyuv/include/libyuv.h
vendored
Normal file
33
TMessagesProj/jni/third_party/libyuv/include/libyuv.h
vendored
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_H_
|
||||
#define INCLUDE_LIBYUV_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/convert_from_argb.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale.h"
|
||||
#include "libyuv/scale_argb.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
#include "libyuv/scale_uv.h"
|
||||
#include "libyuv/version.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#endif // INCLUDE_LIBYUV_H_
|
||||
68
TMessagesProj/jni/third_party/libyuv/include/libyuv/basic_types.h
vendored
Normal file
68
TMessagesProj/jni/third_party/libyuv/include/libyuv/basic_types.h
vendored
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
|
||||
#include <stddef.h> // For size_t and NULL
|
||||
|
||||
#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
|
||||
#define INT_TYPES_DEFINED
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
#include <sys/types.h> // for uintptr_t on x86
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h> // for uintptr_t and C99 types
|
||||
#endif // defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
// Types are deprecated. Enable this macro for legacy types.
|
||||
#ifdef LIBYUV_LEGACY_TYPES
|
||||
typedef uint64_t uint64;
|
||||
typedef int64_t int64;
|
||||
typedef uint32_t uint32;
|
||||
typedef int32_t int32;
|
||||
typedef uint16_t uint16;
|
||||
typedef int16_t int16;
|
||||
typedef uint8_t uint8;
|
||||
typedef int8_t int8;
|
||||
#endif // LIBYUV_LEGACY_TYPES
|
||||
#endif // INT_TYPES_DEFINED
|
||||
|
||||
#if !defined(LIBYUV_API)
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
|
||||
#define LIBYUV_API __declspec(dllexport)
|
||||
#elif defined(LIBYUV_USING_SHARED_LIBRARY)
|
||||
#define LIBYUV_API __declspec(dllimport)
|
||||
#else
|
||||
#define LIBYUV_API
|
||||
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
|
||||
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
|
||||
defined(LIBYUV_USING_SHARED_LIBRARY))
|
||||
#define LIBYUV_API __attribute__((visibility("default")))
|
||||
#else
|
||||
#define LIBYUV_API
|
||||
#endif // __GNUC__
|
||||
#endif // LIBYUV_API
|
||||
|
||||
// TODO(fbarchard): Remove bool macros.
|
||||
#define LIBYUV_BOOL int
|
||||
#define LIBYUV_FALSE 0
|
||||
#define LIBYUV_TRUE 1
|
||||
|
||||
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
111
TMessagesProj/jni/third_party/libyuv/include/libyuv/compare.h
vendored
Normal file
111
TMessagesProj/jni/third_party/libyuv/include/libyuv/compare.h
vendored
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_COMPARE_H_
|
||||
#define INCLUDE_LIBYUV_COMPARE_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Compute a hash for specified memory. Seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
|
||||
|
||||
// Hamming Distance
|
||||
LIBYUV_API
|
||||
uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||
LIBYUV_API
|
||||
uint32_t ARGBDetect(const uint8_t* argb,
|
||||
int stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
||||
LIBYUV_API
|
||||
uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
LIBYUV_API
|
||||
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
static const int kMaxPsnr = 128;
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64_t sse, uint64_t count);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_COMPARE_H_
|
||||
130
TMessagesProj/jni/third_party/libyuv/include/libyuv/compare_row.h
vendored
Normal file
130
TMessagesProj/jni/third_party/libyuv/include/libyuv/compare_row.h
vendored
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_
|
||||
#define INCLUDE_LIBYUV_COMPARE_ROW_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
|
||||
_MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
// clang >= 3.4.0 required for AVX2.
|
||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||
#define CLANG_HAS_AVX2 1
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
// The following are available for Visual C and GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
#define HAS_HAMMINGDISTANCE_SSE42
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
|
||||
!defined(__clang__) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
#define HAS_SUMSQUAREERROR_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for GCC and clangcl:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_HAMMINGDISTANCE_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for GCC and clangcl:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
||||
(defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_HAMMINGDISTANCE_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for Neon:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
#define HAS_HAMMINGDISTANCE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#define HAS_HAMMINGDISTANCE_MSA
|
||||
#define HAS_SUMSQUAREERROR_MSA
|
||||
#endif
|
||||
|
||||
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
|
||||
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
|
||||
uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_COMPARE_ROW_H_
|
||||
996
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert.h
vendored
Normal file
996
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert.h
vendored
Normal file
|
|
@ -0,0 +1,996 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_H_
|
||||
#define INCLUDE_LIBYUV_CONVERT_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/rotate.h" // For enum RotationMode.
|
||||
|
||||
// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert I444 to I420.
|
||||
LIBYUV_API
|
||||
int I444ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to NV12.
|
||||
LIBYUV_API
|
||||
int I444ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to NV21.
|
||||
LIBYUV_API
|
||||
int I444ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to I420.
|
||||
LIBYUV_API
|
||||
int I422ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to I444.
|
||||
LIBYUV_API
|
||||
int I422ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to I210.
|
||||
LIBYUV_API
|
||||
int I422ToI210(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MM21 to NV12.
|
||||
LIBYUV_API
|
||||
int MM21ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MM21 to I420.
|
||||
LIBYUV_API
|
||||
int MM21ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MM21 to YUY2
|
||||
LIBYUV_API
|
||||
int MM21ToYUY2(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MT2T to P010
|
||||
// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
|
||||
// be 10 / 8 times the dimensions of the image. Also for this reason,
|
||||
// src_stride_y and src_stride_uv are given in bytes.
|
||||
LIBYUV_API
|
||||
int MT2TToP010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to NV21.
|
||||
LIBYUV_API
|
||||
int I422ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to I444.
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I010 to I010
|
||||
#define I010ToI010 I010Copy
|
||||
#define H010ToH010 I010Copy
|
||||
LIBYUV_API
|
||||
int I010Copy(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 10 bit YUV to 8 bit
|
||||
#define H010ToH420 I010ToI420
|
||||
LIBYUV_API
|
||||
int I010ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H210ToH420 I210ToI420
|
||||
LIBYUV_API
|
||||
int I210ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H210ToH422 I210ToI422
|
||||
LIBYUV_API
|
||||
int I210ToI422(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H410ToH420 I410ToI420
|
||||
LIBYUV_API
|
||||
int I410ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H410ToH444 I410ToI444
|
||||
LIBYUV_API
|
||||
int I410ToI444(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H012ToH420 I012ToI420
|
||||
LIBYUV_API
|
||||
int I012ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H212ToH422 I212ToI422
|
||||
LIBYUV_API
|
||||
int I212ToI422(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H412ToH444 I412ToI444
|
||||
LIBYUV_API
|
||||
int I412ToI444(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define I412ToI012 I410ToI010
|
||||
#define H410ToH010 I410ToI010
|
||||
#define H412ToH012 I410ToI010
|
||||
LIBYUV_API
|
||||
int I410ToI010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define I212ToI012 I210ToI010
|
||||
#define H210ToH010 I210ToI010
|
||||
#define H212ToH012 I210ToI010
|
||||
LIBYUV_API
|
||||
int I210ToI010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to I410
|
||||
LIBYUV_API
|
||||
int I010ToI410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I012 to I412
|
||||
#define I012ToI412 I010ToI410
|
||||
|
||||
// Convert I210 to I410
|
||||
LIBYUV_API
|
||||
int I210ToI410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I212 to I412
|
||||
#define I212ToI412 I210ToI410
|
||||
|
||||
// Convert I010 to P010
|
||||
LIBYUV_API
|
||||
int I010ToP010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I210 to P210
|
||||
LIBYUV_API
|
||||
int I210ToP210(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I012 to P012
|
||||
LIBYUV_API
|
||||
int I012ToP012(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I212 to P212
|
||||
LIBYUV_API
|
||||
int I212ToP212(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to I420.
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to NV21.
|
||||
LIBYUV_API
|
||||
int I400ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define J400ToJ420 I400ToI420
|
||||
|
||||
// Convert NV12 to I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to I420.
|
||||
LIBYUV_API
|
||||
int NV21ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to NV24.
|
||||
LIBYUV_API
|
||||
int NV12ToNV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV16 to NV24.
|
||||
LIBYUV_API
|
||||
int NV16ToNV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P010 to I010.
|
||||
LIBYUV_API
|
||||
int P010ToI010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P012 to I012.
|
||||
LIBYUV_API
|
||||
int P012ToI012(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P010 to P410.
|
||||
LIBYUV_API
|
||||
int P010ToP410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P012 to P412.
|
||||
#define P012ToP412 P010ToP410
|
||||
|
||||
// Convert P016 to P416.
|
||||
#define P016ToP416 P010ToP410
|
||||
|
||||
// Convert P210 to P410.
|
||||
LIBYUV_API
|
||||
int P210ToP410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P212 to P412.
|
||||
#define P212ToP412 P210ToP410
|
||||
|
||||
// Convert P216 to P416.
|
||||
#define P216ToP416 P210ToP410
|
||||
|
||||
// Convert YUY2 to I420.
|
||||
LIBYUV_API
|
||||
int YUY2ToI420(const uint8_t* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to I420.
|
||||
LIBYUV_API
|
||||
int UYVYToI420(const uint8_t* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AYUV to NV12.
|
||||
LIBYUV_API
|
||||
int AYUVToNV12(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AYUV to NV21.
|
||||
LIBYUV_API
|
||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert Android420 to I420.
|
||||
LIBYUV_API
|
||||
int Android420ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int BGRAToI420(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGBAToI420(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGB24ToI420(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to J420.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ420(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to J420.
|
||||
LIBYUV_API
|
||||
int RAWToJ420(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to J400.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to J400.
|
||||
LIBYUV_API
|
||||
int RAWToJ400(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// JPEG to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// JPEG to NV12
|
||||
LIBYUV_API
|
||||
int MJPGToNV12(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
int* width,
|
||||
int* height);
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// "src_size" is needed to parse MJPG.
|
||||
// "dst_stride_y" number of bytes in a row of the dst_y plane.
|
||||
// Normally this would be the same as dst_width, with recommended alignment
|
||||
// to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected. The caller should
|
||||
// allocate the I420 buffer according to rotation.
|
||||
// "dst_stride_u" number of bytes in a row of the dst_u plane.
|
||||
// Normally this would be the same as (dst_width + 1) / 2, with
|
||||
// recommended alignment to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected.
|
||||
// "crop_x" and "crop_y" are starting position for cropping.
|
||||
// To center, crop_x = (src_width - dst_width) / 2
|
||||
// crop_y = (src_height - dst_height) / 2
|
||||
// "src_width" / "src_height" is size of src_frame in pixels.
|
||||
// "src_height" can be negative indicating a vertically flipped image source.
|
||||
// "crop_width" / "crop_height" is the size to crop the src to.
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_H_
|
||||
2313
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_argb.h
vendored
Normal file
2313
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_argb.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
203
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_from.h
vendored
Normal file
203
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_from.h
vendored
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_
|
||||
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// See Also convert.h for conversions from formats to I420.
|
||||
|
||||
// Convert 8 bit YUV to 10 bit.
|
||||
#define H420ToH010 I420ToI010
|
||||
LIBYUV_API
|
||||
int I420ToI010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 8 bit YUV to 12 bit.
|
||||
#define H420ToH012 I420ToI012
|
||||
LIBYUV_API
|
||||
int I420ToI012(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// The following are from convert_argb.h
|
||||
// DEPRECATED: The prototypes will be removed in future. Use convert_argb.h
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to ABGR.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to specified format.
|
||||
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
|
||||
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8_t* y,
|
||||
int y_stride,
|
||||
const uint8_t* u,
|
||||
int u_stride,
|
||||
const uint8_t* v,
|
||||
int v_stride,
|
||||
uint8_t* dst_sample,
|
||||
int dst_sample_stride,
|
||||
int width,
|
||||
int height,
|
||||
uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
|
||||
381
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_from_argb.h
vendored
Normal file
381
TMessagesProj/jni/third_party/libyuv/include/libyuv/convert_from_argb.h
vendored
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
|
||||
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To BGRA.
|
||||
LIBYUV_API
|
||||
int ARGBToBGRA(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ABGR.
|
||||
LIBYUV_API
|
||||
int ARGBToABGR(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGBA.
|
||||
LIBYUV_API
|
||||
int ARGBToRGBA(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Aliases
|
||||
#define ARGBToAB30 ABGRToAR30
|
||||
#define ABGRToAB30 ARGBToAR30
|
||||
|
||||
// Convert ABGR To AR30.
|
||||
LIBYUV_API
|
||||
int ABGRToAR30(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To AR30.
|
||||
LIBYUV_API
|
||||
int ARGBToAR30(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Aliases
|
||||
#define ABGRToRGB24 ARGBToRAW
|
||||
#define ABGRToRAW ARGBToRGB24
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB24(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
LIBYUV_API
|
||||
int ARGBToRAW(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
// Values in dither matrix from 0 to 7 recommended.
|
||||
// The order of the dither matrix is first byte is upper left.
|
||||
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
|
||||
// const uint8_t(*dither)[4][4];
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565Dither(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8_t* dither4x4,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB1555.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB1555(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB4444.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB4444(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I444.
|
||||
LIBYUV_API
|
||||
int ARGBToI444(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to AR64.
|
||||
LIBYUV_API
|
||||
int ARGBToAR64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int dst_stride_ar64,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to AB64.
|
||||
#define ABGRToAB64 ARGBToAR64
|
||||
|
||||
// Convert ARGB to AB64.
|
||||
LIBYUV_API
|
||||
int ARGBToAB64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int dst_stride_ab64,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to AR64.
|
||||
#define ABGRToAR64 ARGBToAB64
|
||||
|
||||
// Convert ARGB To I422.
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I420. (also in convert.h)
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ARGBToJ420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_uj,
|
||||
int dst_stride_uj,
|
||||
uint8_t* dst_vj,
|
||||
int dst_stride_vj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J422.
|
||||
LIBYUV_API
|
||||
int ARGBToJ422(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_uj,
|
||||
int dst_stride_uj,
|
||||
uint8_t* dst_vj,
|
||||
int dst_stride_vj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ABGRToJ420(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_uj,
|
||||
int dst_stride_uj,
|
||||
uint8_t* dst_vj,
|
||||
int dst_stride_vj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to J422.
|
||||
LIBYUV_API
|
||||
int ABGRToJ422(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_uj,
|
||||
int dst_stride_uj,
|
||||
uint8_t* dst_vj,
|
||||
int dst_stride_vj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ABGRToJ400(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert RGBA to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int RGBAToJ400(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to I400.
|
||||
LIBYUV_API
|
||||
int ARGBToI400(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
|
||||
LIBYUV_API
|
||||
int ARGBToG(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_g,
|
||||
int dst_stride_g,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV12.
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR To NV12.
|
||||
LIBYUV_API
|
||||
int ABGRToNV12(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR To NV21.
|
||||
LIBYUV_API
|
||||
int ABGRToNV21(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To UYVY.
|
||||
LIBYUV_API
|
||||
int ARGBToUYVY(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RAW to JNV21 full range NV21
|
||||
LIBYUV_API
|
||||
int RAWToJNV21(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
|
||||
127
TMessagesProj/jni/third_party/libyuv/include/libyuv/cpu_id.h
vendored
Normal file
127
TMessagesProj/jni/third_party/libyuv/include/libyuv/cpu_id.h
vendored
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
|
||||
#define INCLUDE_LIBYUV_CPU_ID_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Internal flag to indicate cpuid requires initialization.
|
||||
static const int kCpuInitialized = 0x1;
|
||||
|
||||
// These flags are only valid on ARM processors.
|
||||
static const int kCpuHasARM = 0x2;
|
||||
static const int kCpuHasNEON = 0x4;
|
||||
// 0x8 reserved for future ARM flag.
|
||||
|
||||
// These flags are only valid on x86 processors.
|
||||
static const int kCpuHasX86 = 0x10;
|
||||
static const int kCpuHasSSE2 = 0x20;
|
||||
static const int kCpuHasSSSE3 = 0x40;
|
||||
static const int kCpuHasSSE41 = 0x80;
|
||||
static const int kCpuHasSSE42 = 0x100; // unused at this time.
|
||||
static const int kCpuHasAVX = 0x200;
|
||||
static const int kCpuHasAVX2 = 0x400;
|
||||
static const int kCpuHasERMS = 0x800;
|
||||
static const int kCpuHasFMA3 = 0x1000;
|
||||
static const int kCpuHasF16C = 0x2000;
|
||||
static const int kCpuHasGFNI = 0x4000;
|
||||
static const int kCpuHasAVX512BW = 0x8000;
|
||||
static const int kCpuHasAVX512VL = 0x10000;
|
||||
static const int kCpuHasAVX512VNNI = 0x20000;
|
||||
static const int kCpuHasAVX512VBMI = 0x40000;
|
||||
static const int kCpuHasAVX512VBMI2 = 0x80000;
|
||||
static const int kCpuHasAVX512VBITALG = 0x100000;
|
||||
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
|
||||
|
||||
// These flags are only valid on MIPS processors.
|
||||
static const int kCpuHasMIPS = 0x400000;
|
||||
static const int kCpuHasMSA = 0x800000;
|
||||
|
||||
// These flags are only valid on LOONGARCH processors.
|
||||
static const int kCpuHasLOONGARCH = 0x2000000;
|
||||
static const int kCpuHasLSX = 0x4000000;
|
||||
static const int kCpuHasLASX = 0x8000000;
|
||||
|
||||
// Optional init function. TestCpuFlag does an auto-init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void);
|
||||
|
||||
// Detect CPU has SSE2 etc.
|
||||
// Test_flag parameter should be one of kCpuHas constants above.
|
||||
// Returns non-zero if instruction set is detected
|
||||
static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
|
||||
#else
|
||||
int cpu_info = cpu_info_;
|
||||
#endif
|
||||
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
|
||||
}
|
||||
|
||||
// Internal function for parsing /proc/cpuinfo.
|
||||
LIBYUV_API
|
||||
int ArmCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int MipsCpuCaps(const char* cpuinfo_name);
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
|
||||
// MaskCpuFlags(1) to disable all cpu specific optimizations.
|
||||
// MaskCpuFlags(0) to reset state so next call will auto init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
int MaskCpuFlags(int enable_flags);
|
||||
|
||||
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
|
||||
// should be a valid combination of the kCpuHas constants above and include
|
||||
// kCpuInitialized. Use this method when running in a sandboxed process where
|
||||
// the detection code might fail (as it might access /proc/cpuinfo). In such
|
||||
// cases the cpu_info can be obtained from a non sandboxed process by calling
|
||||
// InitCpuFlags() and passed to the sandboxed process (via command line
|
||||
// parameters, IPC...) which can then call this method to initialize the CPU
|
||||
// flags.
|
||||
// Notes:
|
||||
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
|
||||
// again.
|
||||
// - enabling CPU features that are not supported by the CPU will result in
|
||||
// undefined behavior.
|
||||
// TODO(fbarchard): consider writing a helper function that translates from
|
||||
// other library CPU info to libyuv CPU info and add a .md doc that explains
|
||||
// CPU detection.
|
||||
static __inline void SetCpuFlags(int cpu_flags) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
|
||||
#else
|
||||
cpu_info_ = cpu_flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Low level cpuid for X86. Returns zeros on other CPUs.
|
||||
// eax is the info type that you want.
|
||||
// ecx is typically the cpu number, and should normally be zero.
|
||||
LIBYUV_API
|
||||
void CpuId(int info_eax, int info_ecx, int* cpu_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CPU_ID_H_
|
||||
1949
TMessagesProj/jni/third_party/libyuv/include/libyuv/loongson_intrinsics.h
vendored
Normal file
1949
TMessagesProj/jni/third_party/libyuv/include/libyuv/loongson_intrinsics.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
246
TMessagesProj/jni/third_party/libyuv/include/libyuv/macros_msa.h
vendored
Normal file
246
TMessagesProj/jni/third_party/libyuv/include/libyuv/macros_msa.h
vendored
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
#define INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include <msa.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if (__mips_isa_rev >= 6)
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val_m; \
|
||||
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_lw_m] "m"(*psrc_lw_m)); \
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_ld_m] "m"(*psrc_ld_m)); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64_t)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SW(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val_m = (val); \
|
||||
asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
|
||||
: [pdst_sw_m] "=m"(*pdst_sw_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint64_t val_m = (val); \
|
||||
asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
|
||||
: [pdst_sd_m] "=m"(*pdst_sd_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val0_m, val1_m; \
|
||||
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
SW(val0_m, pdst_sd_m); \
|
||||
SW(val1_m, pdst_sd_m + 4); \
|
||||
})
|
||||
#endif // !(__mips == 64)
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
uint8_t* psrc_lw_m = (uint8_t*)(psrc); \
|
||||
uint32_t val_lw_m; \
|
||||
\
|
||||
__asm__ volatile( \
|
||||
"lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
|
||||
"lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
|
||||
\
|
||||
: [val_lw_m] "=&r"(val_lw_m) \
|
||||
: [psrc_lw_m] "r"(psrc_lw_m)); \
|
||||
\
|
||||
val_lw_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
uint8_t* psrc_ld_m = (uint8_t*)(psrc); \
|
||||
uint64_t val_ld_m = 0; \
|
||||
\
|
||||
__asm__ volatile( \
|
||||
"ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
|
||||
"ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
|
||||
\
|
||||
: [val_ld_m] "=&r"(val_ld_m) \
|
||||
: [psrc_ld_m] "r"(psrc_ld_m)); \
|
||||
\
|
||||
val_ld_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64_t)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SW(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val_m = (val); \
|
||||
asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
|
||||
: [pdst_sw_m] "=m"(*pdst_sw_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val0_m, val1_m; \
|
||||
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
SW(val0_m, pdst_sd_m); \
|
||||
SW(val1_m, pdst_sd_m + 4); \
|
||||
})
|
||||
#endif // (__mips_isa_rev >= 6)
|
||||
|
||||
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
|
||||
|
||||
#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__)
|
||||
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
|
||||
|
||||
/* Description : Load two vectors with 16 'byte' sized elements
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Load 16 byte elements in 'out0' from (psrc)
|
||||
Load 16 byte elements in 'out1' from (psrc + stride)
|
||||
*/
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) \
|
||||
{ \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
|
||||
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
|
||||
{ \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Store two vectors with stride each having 16 'byte' sized
|
||||
elements
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Store 16 byte elements from 'in0' to (pdst)
|
||||
Store 16 byte elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) \
|
||||
{ \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
|
||||
{ \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Store vectors of 8 halfword elements with stride
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Store 8 halfword elements from 'in0' to (pdst)
|
||||
Store 8 halfword elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_H2(RTYPE, in0, in1, pdst, stride) \
|
||||
{ \
|
||||
ST_H(RTYPE, in0, (pdst)); \
|
||||
ST_H(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
|
||||
|
||||
// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
|
||||
/* Description : Shuffle byte vector elements as per mask vector
|
||||
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
||||
'out0' as per control vector 'mask0'
|
||||
*/
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
|
||||
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
|
||||
}
|
||||
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Interleave both left and right half of input vectors
|
||||
Arguments : Inputs - in0, in1
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Right half of byte elements from 'in0' and 'in1' are
|
||||
interleaved and written to 'out0'
|
||||
*/
|
||||
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
|
||||
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
|
||||
}
|
||||
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
|
||||
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
195
TMessagesProj/jni/third_party/libyuv/include/libyuv/mjpeg_decoder.h
vendored
Normal file
195
TMessagesProj/jni/third_party/libyuv/include/libyuv/mjpeg_decoder.h
vendored
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_
|
||||
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
// NOTE: For a simplified public API use convert.h MJPGToI420().
|
||||
|
||||
struct jpeg_common_struct;
|
||||
struct jpeg_decompress_struct;
|
||||
struct jpeg_source_mgr;
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
static const uint32_t kUnknownDataSize = 0xFFFFFFFF;
|
||||
|
||||
enum JpegSubsamplingType {
|
||||
kJpegYuv420,
|
||||
kJpegYuv422,
|
||||
kJpegYuv444,
|
||||
kJpegYuv400,
|
||||
kJpegUnknown
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
const uint8_t* data;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct BufferVector {
|
||||
Buffer* buffers;
|
||||
int len;
|
||||
int pos;
|
||||
};
|
||||
|
||||
struct SetJmpErrorMgr;
|
||||
|
||||
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
|
||||
// simply independent JPEG images with a fixed huffman table (which is omitted).
|
||||
// It is rarely used in video transmission, but is common as a camera capture
|
||||
// format, especially in Logitech devices. This class implements a decoder for
|
||||
// MJPEG frames.
|
||||
//
|
||||
// See http://tools.ietf.org/html/rfc2435
|
||||
class LIBYUV_API MJpegDecoder {
|
||||
public:
|
||||
typedef void (*CallbackFunction)(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows);
|
||||
|
||||
static const int kColorSpaceUnknown;
|
||||
static const int kColorSpaceGrayscale;
|
||||
static const int kColorSpaceRgb;
|
||||
static const int kColorSpaceYCbCr;
|
||||
static const int kColorSpaceCMYK;
|
||||
static const int kColorSpaceYCCK;
|
||||
|
||||
MJpegDecoder();
|
||||
~MJpegDecoder();
|
||||
|
||||
// Loads a new frame, reads its headers, and determines the uncompressed
|
||||
// image format.
|
||||
// Returns LIBYUV_TRUE if image looks valid and format is supported.
|
||||
// If return value is LIBYUV_TRUE, then the values for all the following
|
||||
// getters are populated.
|
||||
// src_len is the size of the compressed mjpeg frame in bytes.
|
||||
LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len);
|
||||
|
||||
// Returns width of the last loaded frame in pixels.
|
||||
int GetWidth();
|
||||
|
||||
// Returns height of the last loaded frame in pixels.
|
||||
int GetHeight();
|
||||
|
||||
// Returns format of the last loaded frame. The return value is one of the
|
||||
// kColorSpace* constants.
|
||||
int GetColorSpace();
|
||||
|
||||
// Number of color components in the color space.
|
||||
int GetNumComponents();
|
||||
|
||||
// Sample factors of the n-th component.
|
||||
int GetHorizSampFactor(int component);
|
||||
|
||||
int GetVertSampFactor(int component);
|
||||
|
||||
int GetHorizSubSampFactor(int component);
|
||||
|
||||
int GetVertSubSampFactor(int component);
|
||||
|
||||
// Public for testability.
|
||||
int GetImageScanlinesPerImcuRow();
|
||||
|
||||
// Public for testability.
|
||||
int GetComponentScanlinesPerImcuRow(int component);
|
||||
|
||||
// Width of a component in bytes.
|
||||
int GetComponentWidth(int component);
|
||||
|
||||
// Height of a component.
|
||||
int GetComponentHeight(int component);
|
||||
|
||||
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
|
||||
int GetComponentStride(int component);
|
||||
|
||||
// Size of a component in bytes.
|
||||
int GetComponentSize(int component);
|
||||
|
||||
// Call this after LoadFrame() if you decide you don't want to decode it
|
||||
// after all.
|
||||
LIBYUV_BOOL UnloadFrame();
|
||||
|
||||
// Decodes the entire image into a one-buffer-per-color-component format.
|
||||
// dst_width must match exactly. dst_height must be <= to image height; if
|
||||
// less, the image is cropped. "planes" must have size equal to at least
|
||||
// GetNumComponents() and they must point to non-overlapping buffers of size
|
||||
// at least GetComponentSize(i). The pointers in planes are incremented
|
||||
// to point to after the end of the written data.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height);
|
||||
|
||||
// Decodes the entire image and passes the data via repeated calls to a
|
||||
// callback function. Each call will get the data for a whole number of
|
||||
// image scanlines.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
static JpegSubsamplingType JpegSubsamplingTypeHelper(
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components);
|
||||
|
||||
private:
|
||||
void AllocOutputBuffers(int num_outbufs);
|
||||
void DestroyOutputBuffers();
|
||||
|
||||
LIBYUV_BOOL StartDecode();
|
||||
LIBYUV_BOOL FinishDecode();
|
||||
|
||||
void SetScanlinePointers(uint8_t** data);
|
||||
LIBYUV_BOOL DecodeImcuRow();
|
||||
|
||||
int GetComponentScanlinePadding(int component);
|
||||
|
||||
// A buffer holding the input data for a frame.
|
||||
Buffer buf_;
|
||||
BufferVector buf_vec_;
|
||||
|
||||
jpeg_decompress_struct* decompress_struct_;
|
||||
jpeg_source_mgr* source_mgr_;
|
||||
SetJmpErrorMgr* error_mgr_;
|
||||
|
||||
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
|
||||
// GetComponentScanlinePadding() != 0.)
|
||||
LIBYUV_BOOL has_scanline_padding_;
|
||||
|
||||
// Temporaries used to point to scanline outputs.
|
||||
int num_outbufs_; // Outermost size of all arrays below.
|
||||
uint8_t*** scanlines_;
|
||||
int* scanlines_sizes_;
|
||||
// Temporary buffer used for decoding when we can't decode directly to the
|
||||
// output buffers. Large enough for just one iMCU row.
|
||||
uint8_t** databuf_;
|
||||
int* databuf_strides_;
|
||||
};
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
#endif // __cplusplus
|
||||
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_
|
||||
1160
TMessagesProj/jni/third_party/libyuv/include/libyuv/planar_functions.h
vendored
Normal file
1160
TMessagesProj/jni/third_party/libyuv/include/libyuv/planar_functions.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
296
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate.h
vendored
Normal file
296
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate.h
vendored
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_H_
|
||||
#define INCLUDE_LIBYUV_ROTATE_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Supported rotation.
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate180 = 180, // Rotate 180 degrees.
|
||||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
|
||||
// Deprecated.
|
||||
kRotateNone = 0,
|
||||
kRotateClockwise = 90,
|
||||
kRotateCounterClockwise = 270,
|
||||
} RotationModeEnum;
|
||||
|
||||
// Rotate I420 frame.
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I422 frame.
|
||||
LIBYUV_API
|
||||
int I422Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I444 frame.
|
||||
LIBYUV_API
|
||||
int I444Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I010 frame.
|
||||
LIBYUV_API
|
||||
int I010Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I210 frame.
|
||||
LIBYUV_API
|
||||
int I210Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I410 frame.
|
||||
LIBYUV_API
|
||||
int I410Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Convert Android420 to I420 with rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
LIBYUV_API
|
||||
int Android420ToI420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode rotation);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate planes by 90, 180, 270. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotations for when U and V are interleaved.
|
||||
// These functions take one UV input pointer and
|
||||
// split the data into two buffers while
|
||||
// rotating them.
|
||||
// width and height expected to be half size for NV12.
|
||||
LIBYUV_API
|
||||
int SplitRotateUV(const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
LIBYUV_API
|
||||
void SplitRotateUV90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void SplitRotateUV180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void SplitRotateUV270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// The 90 and 270 functions are based on transposes.
|
||||
// Doing a transpose with reversing the read/write
|
||||
// order will result in a rotation by +- 90 degrees.
|
||||
// Deprecated.
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void SplitTransposeUV(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_H_
|
||||
37
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate_argb.h
vendored
Normal file
37
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate_argb.h
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_
|
||||
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/rotate.h" // For RotationMode.
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Rotate ARGB frame
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_
|
||||
240
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate_row.h
vendored
Normal file
240
TMessagesProj/jni/third_party/libyuv/include/libyuv/rotate_row.h
vendored
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
#define INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// The following are available for Visual C 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
|
||||
!defined(__clang__)
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available for GCC 32 or 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for 64 bit GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
|
||||
#define HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_TRANSPOSEWX8_NEON
|
||||
#define HAS_TRANSPOSEUVWX8_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#define HAS_TRANSPOSEWX16_MSA
|
||||
#define HAS_TRANSPOSEUVWX16_MSA
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
|
||||
#define HAS_TRANSPOSEWX16_LSX
|
||||
#define HAS_TRANSPOSEUVWX16_LSX
|
||||
#endif
|
||||
|
||||
void TransposeWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_Any_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Any_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_Any_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_Any_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeUVWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeUVWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_Any_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_Any_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeWxH_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx1_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_
|
||||
5790
TMessagesProj/jni/third_party/libyuv/include/libyuv/row.h
vendored
Normal file
5790
TMessagesProj/jni/third_party/libyuv/include/libyuv/row.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
320
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale.h
vendored
Normal file
320
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale.h
vendored
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Supported filtering.
|
||||
typedef enum FilterMode {
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
} FilterModeEnum;
|
||||
|
||||
// Scale a YUV plane.
|
||||
LIBYUV_API
|
||||
void ScalePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
void ScalePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Sample is expected to be in the low 12 bits.
|
||||
LIBYUV_API
|
||||
void ScalePlane_12(const uint16_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:2:0 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// If filtering is kFilterBox, averaging is used to produce ever better
|
||||
// quality image, at further expense of speed.
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale_12(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:4:4 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// If filtering is kFilterBox, averaging is used to produce ever better
|
||||
// quality image, at further expense of speed.
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_12(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:2:2 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// If filtering is kFilterBox, averaging is used to produce ever better
|
||||
// quality image, at further expense of speed.
|
||||
// Returns 0 if successful.
|
||||
LIBYUV_API
|
||||
int I422Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I422Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I422Scale_12(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales an NV12 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// kFilterBox is not supported for the UV channel and will be treated as
|
||||
// bilinear.
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int NV12Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
int Scale(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_y,
|
||||
int dst_stride_u,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
LIBYUV_BOOL interpolate);
|
||||
|
||||
// For testing, allow disabling of specialized scalers.
|
||||
LIBYUV_API
|
||||
void SetUseReferenceImpl(LIBYUV_BOOL use);
|
||||
#endif // __cplusplus
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_H_
|
||||
76
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_argb.h
vendored
Normal file
76
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_argb.h
vendored
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/scale.h" // For FilterMode
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Clipped scale takes destination rectangle coordinates for clip values.
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint32_t src_fourcc,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32_t dst_fourcc,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_
|
||||
42
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_rgb.h
vendored
Normal file
42
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_rgb.h
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_RGB_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_RGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/scale.h" // For FilterMode
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// RGB can be RAW, RGB24 or YUV24
|
||||
// RGB scales 24 bit images by converting a row at a time to ARGB
|
||||
// and using ARGB row functions to scale, then convert to RGB.
|
||||
// TODO(fbarchard): Allow input/output formats to be specified.
|
||||
LIBYUV_API
|
||||
int RGBScale(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_rgb,
|
||||
int dst_stride_rgb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_UV_H_
|
||||
1750
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_row.h
vendored
Normal file
1750
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_row.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
51
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_uv.h
vendored
Normal file
51
TMessagesProj/jni/third_party/libyuv/include/libyuv/scale_uv.h
vendored
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright 2020 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_UV_H_
|
||||
#define INCLUDE_LIBYUV_SCALE_UV_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/scale.h" // For FilterMode
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
int UVScale(const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scale a 16 bit UV image.
|
||||
// This function is currently incomplete, it can't handle all cases.
|
||||
LIBYUV_API
|
||||
int UVScale_16(const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_UV_H_
|
||||
16
TMessagesProj/jni/third_party/libyuv/include/libyuv/version.h
vendored
Normal file
16
TMessagesProj/jni/third_party/libyuv/include/libyuv/version.h
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1857
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
222
TMessagesProj/jni/third_party/libyuv/include/libyuv/video_common.h
vendored
Normal file
222
TMessagesProj/jni/third_party/libyuv/include/libyuv/video_common.h
vendored
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Common definitions for video, including fourcc and VideoFormat.
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Definition of FourCC codes
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Convert four characters to a FourCC code.
|
||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||
// constants are used in a switch.
|
||||
#ifdef __cplusplus
|
||||
#define FOURCC(a, b, c, d) \
|
||||
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
|
||||
(static_cast<uint32_t>(c) << 16) | /* NOLINT */ \
|
||||
(static_cast<uint32_t>(d) << 24)) /* NOLINT */
|
||||
#else
|
||||
#define FOURCC(a, b, c, d) \
|
||||
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
|
||||
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
|
||||
#endif
|
||||
|
||||
// Some pages discussing FourCC codes:
|
||||
// http://www.fourcc.org/yuv.php
|
||||
// http://v4l2spec.bytesex.org/spec/book1.htm
|
||||
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
|
||||
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
|
||||
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
|
||||
|
||||
// FourCC codes grouped according to implementation efficiency.
|
||||
// Primary formats should convert in 1 efficient step.
|
||||
// Secondary formats are converted in 2 steps.
|
||||
// Auxilliary formats call primary converters.
|
||||
enum FourCC {
|
||||
// 10 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
|
||||
FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
|
||||
|
||||
// 1 Secondary YUV format: row biplanar. deprecated.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
|
||||
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
|
||||
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
|
||||
|
||||
// 1 Primary Compressed YUV format.
|
||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||
|
||||
// 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
|
||||
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
|
||||
FOURCC_J420 =
|
||||
FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J422 =
|
||||
FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J444 =
|
||||
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_J400 =
|
||||
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
|
||||
FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
|
||||
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
|
||||
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc
|
||||
FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc
|
||||
FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc
|
||||
FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc
|
||||
FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420
|
||||
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420
|
||||
FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420
|
||||
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
|
||||
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
|
||||
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
|
||||
FOURCC_P010 = FOURCC('P', '0', '1', '0'),
|
||||
FOURCC_P210 = FOURCC('P', '2', '1', '0'),
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
|
||||
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
|
||||
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
|
||||
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
|
||||
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
|
||||
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
|
||||
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
|
||||
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
|
||||
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
|
||||
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
|
||||
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
|
||||
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
|
||||
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
|
||||
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
|
||||
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
|
||||
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
|
||||
|
||||
// deprecated formats. Not supported, but defined for backward compatibility.
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_ANY = -1,
|
||||
};
|
||||
|
||||
enum FourCCBpp {
|
||||
// Canonical fourcc codes used in our code.
|
||||
FOURCC_BPP_I420 = 12,
|
||||
FOURCC_BPP_I422 = 16,
|
||||
FOURCC_BPP_I444 = 24,
|
||||
FOURCC_BPP_I411 = 12,
|
||||
FOURCC_BPP_I400 = 8,
|
||||
FOURCC_BPP_NV21 = 12,
|
||||
FOURCC_BPP_NV12 = 12,
|
||||
FOURCC_BPP_YUY2 = 16,
|
||||
FOURCC_BPP_UYVY = 16,
|
||||
FOURCC_BPP_M420 = 12, // deprecated
|
||||
FOURCC_BPP_Q420 = 12,
|
||||
FOURCC_BPP_ARGB = 32,
|
||||
FOURCC_BPP_BGRA = 32,
|
||||
FOURCC_BPP_ABGR = 32,
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_AR30 = 32,
|
||||
FOURCC_BPP_AB30 = 32,
|
||||
FOURCC_BPP_AR64 = 64,
|
||||
FOURCC_BPP_AB64 = 64,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
FOURCC_BPP_RGBO = 16,
|
||||
FOURCC_BPP_R444 = 16,
|
||||
FOURCC_BPP_RGGB = 8,
|
||||
FOURCC_BPP_BGGR = 8,
|
||||
FOURCC_BPP_GRBG = 8,
|
||||
FOURCC_BPP_GBRG = 8,
|
||||
FOURCC_BPP_YV12 = 12,
|
||||
FOURCC_BPP_YV16 = 16,
|
||||
FOURCC_BPP_YV24 = 24,
|
||||
FOURCC_BPP_YU12 = 12,
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_H420 = 12,
|
||||
FOURCC_BPP_H422 = 16,
|
||||
FOURCC_BPP_I010 = 15,
|
||||
FOURCC_BPP_I210 = 20,
|
||||
FOURCC_BPP_H010 = 15,
|
||||
FOURCC_BPP_H210 = 20,
|
||||
FOURCC_BPP_P010 = 15,
|
||||
FOURCC_BPP_P210 = 20,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
FOURCC_BPP_YU16 = 16,
|
||||
FOURCC_BPP_YU24 = 24,
|
||||
FOURCC_BPP_YUYV = 16,
|
||||
FOURCC_BPP_YUVS = 16,
|
||||
FOURCC_BPP_HDYC = 16,
|
||||
FOURCC_BPP_2VUY = 16,
|
||||
FOURCC_BPP_JPEG = 1,
|
||||
FOURCC_BPP_DMB1 = 1,
|
||||
FOURCC_BPP_BA81 = 8,
|
||||
FOURCC_BPP_RGB3 = 24,
|
||||
FOURCC_BPP_BGR3 = 24,
|
||||
FOURCC_BPP_CM32 = 32,
|
||||
FOURCC_BPP_CM24 = 24,
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
};
|
||||
|
||||
// Converts fourcc aliases into canonical ones.
|
||||
LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
430
TMessagesProj/jni/third_party/libyuv/source/compare.cc
vendored
Normal file
430
TMessagesProj/jni/third_party/libyuv/source/compare.cc
vendored
Normal file
|
|
@ -0,0 +1,430 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/compare.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder;
|
||||
uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
|
||||
HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
HashDjb2_SSE = HashDjb2_SSE41;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HASHDJB2_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
HashDjb2_SSE = HashDjb2_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (count >= (uint64_t)kBlockSize) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
}
|
||||
remainder = (int)count & ~15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_SSE(src, remainder, seed);
|
||||
src += remainder;
|
||||
count -= remainder;
|
||||
}
|
||||
remainder = (int)count & 15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_C(src, remainder, seed);
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
|
||||
static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
|
||||
return FOURCC_BGRA;
|
||||
}
|
||||
if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA.
|
||||
return FOURCC_ARGB;
|
||||
}
|
||||
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
|
||||
return FOURCC_BGRA;
|
||||
}
|
||||
if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255.
|
||||
return FOURCC_ARGB;
|
||||
}
|
||||
argb += 8;
|
||||
}
|
||||
if (width & 1) {
|
||||
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
|
||||
return FOURCC_BGRA;
|
||||
}
|
||||
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
|
||||
return FOURCC_ARGB;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||
LIBYUV_API
|
||||
uint32_t ARGBDetect(const uint8_t* argb,
|
||||
int stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
uint32_t fourcc = 0;
|
||||
int h;
|
||||
|
||||
// Coalesce rows.
|
||||
if (stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
stride_argb = 0;
|
||||
}
|
||||
for (h = 0; h < height && fourcc == 0; ++h) {
|
||||
fourcc = ARGBDetectRow_C(argb, width);
|
||||
argb += stride_argb;
|
||||
}
|
||||
return fourcc;
|
||||
}
|
||||
|
||||
// NEON version accumulates in 16 bit shorts which overflow at 65536 bytes.
|
||||
// So actual maximum is 1 less loop, which is 64436 - 32 bytes.
|
||||
|
||||
LIBYUV_API
|
||||
uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
const int kSimdSize = 64;
|
||||
// SIMD for multiple of 64, and C for remainder
|
||||
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
|
||||
int count) = HammingDistance_C;
|
||||
#if defined(HAS_HAMMINGDISTANCE_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
HammingDistance = HammingDistance_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
HammingDistance = HammingDistance_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_SSE42)
|
||||
if (TestCpuFlag(kCpuHasSSE42)) {
|
||||
HammingDistance = HammingDistance_SSE42;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
HammingDistance = HammingDistance_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
HammingDistance = HammingDistance_MSA;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+ : diff)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
diff += HammingDistance(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
src_b += remainder;
|
||||
}
|
||||
remainder = count & (kSimdSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance_C(src_a, src_b, remainder);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Refactor into row function.
|
||||
LIBYUV_API
|
||||
uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32_t.
|
||||
// After each block of 65536 pixels, accumulate into a uint64_t.
|
||||
const int kBlockSize = 65536;
|
||||
int remainder = count & (kBlockSize - 1) & ~31;
|
||||
uint64_t sse = 0;
|
||||
int i;
|
||||
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
|
||||
int count) = SumSquareError_C;
|
||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SumSquareError = SumSquareError_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
// Note only used for multiples of 16 so count is not checked.
|
||||
SumSquareError = SumSquareError_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
// Note only used for multiples of 32 so count is not checked.
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
SumSquareError = SumSquareError_MSA;
|
||||
}
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+ : sse)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
if (remainder) {
|
||||
sse += SumSquareError(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
src_b += remainder;
|
||||
}
|
||||
remainder = count & 31;
|
||||
if (remainder) {
|
||||
sse += SumSquareError_C(src_a, src_b, remainder);
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
uint64_t sse = 0;
|
||||
int h;
|
||||
// Coalesce rows.
|
||||
if (stride_a == width && stride_b == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
stride_a = stride_b = 0;
|
||||
}
|
||||
for (h = 0; h < height; ++h) {
|
||||
sse += ComputeSumSquareError(src_a, src_b, width);
|
||||
src_a += stride_a;
|
||||
src_b += stride_b;
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) {
|
||||
double psnr;
|
||||
if (sse > 0) {
|
||||
double mse = (double)count / (double)sse;
|
||||
psnr = 10.0 * log10(255.0 * 255.0 * mse);
|
||||
} else {
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
}
|
||||
|
||||
if (psnr > kMaxPsnr) {
|
||||
psnr = kMaxPsnr;
|
||||
}
|
||||
|
||||
return psnr;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64_t samples = (uint64_t)width * (uint64_t)height;
|
||||
const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
|
||||
stride_b, width, height);
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64_t sse_y = ComputeSumSquareErrorPlane(
|
||||
src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const uint64_t sse_u = ComputeSumSquareErrorPlane(
|
||||
src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv);
|
||||
const uint64_t sse_v = ComputeSumSquareErrorPlane(
|
||||
src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
|
||||
const uint64_t samples = (uint64_t)width * (uint64_t)height +
|
||||
2 * ((uint64_t)width_uv * (uint64_t)height_uv);
|
||||
const uint64_t sse = sse_y + sse_u + sse_v;
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
|
||||
|
||||
static double Ssim8x8_C(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b) {
|
||||
int64_t sum_a = 0;
|
||||
int64_t sum_b = 0;
|
||||
int64_t sum_sq_a = 0;
|
||||
int64_t sum_sq_b = 0;
|
||||
int64_t sum_axb = 0;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
sum_a += src_a[j];
|
||||
sum_b += src_b[j];
|
||||
sum_sq_a += src_a[j] * src_a[j];
|
||||
sum_sq_b += src_b[j] * src_b[j];
|
||||
sum_axb += src_a[j] * src_b[j];
|
||||
}
|
||||
|
||||
src_a += stride_a;
|
||||
src_b += stride_b;
|
||||
}
|
||||
|
||||
{
|
||||
const int64_t count = 64;
|
||||
// scale the constants by number of pixels
|
||||
const int64_t c1 = (cc1 * count * count) >> 12;
|
||||
const int64_t c2 = (cc2 * count * count) >> 12;
|
||||
|
||||
const int64_t sum_a_x_sum_b = sum_a * sum_b;
|
||||
|
||||
const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) *
|
||||
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
|
||||
|
||||
const int64_t sum_a_sq = sum_a * sum_a;
|
||||
const int64_t sum_b_sq = sum_b * sum_b;
|
||||
|
||||
const int64_t ssim_d =
|
||||
(sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0) {
|
||||
return DBL_MAX;
|
||||
}
|
||||
return (double)ssim_n / (double)ssim_d;
|
||||
}
|
||||
}
|
||||
|
||||
// We are using a 8x8 moving window with starting location of each 8x8 window
|
||||
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
|
||||
// block boundaries to penalize blocking artifacts.
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int samples = 0;
|
||||
double ssim_total = 0;
|
||||
double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b,
|
||||
int stride_b) = Ssim8x8_C;
|
||||
|
||||
// sample point start with each 4x4 location
|
||||
int i;
|
||||
for (i = 0; i < height - 8; i += 4) {
|
||||
int j;
|
||||
for (j = 0; j < width - 8; j += 4) {
|
||||
ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
|
||||
samples++;
|
||||
}
|
||||
|
||||
src_a += stride_a * 4;
|
||||
src_b += stride_b * 4;
|
||||
}
|
||||
|
||||
ssim_total /= samples;
|
||||
return ssim_total;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const double ssim_y =
|
||||
CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
74
TMessagesProj/jni/third_party/libyuv/source/compare_common.cc
vendored
Normal file
74
TMessagesProj/jni/third_party/libyuv/source/compare_common.cc
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Hakmem method for hamming distance.
|
||||
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 3; i += 4) {
|
||||
uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b);
|
||||
uint32_t u = x - ((x >> 1) & 0x55555555);
|
||||
u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
|
||||
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
|
||||
src_a += 4;
|
||||
src_b += 4;
|
||||
}
|
||||
|
||||
for (; i < count; ++i) {
|
||||
uint32_t x = *src_a ^ *src_b;
|
||||
uint32_t u = x - ((x >> 1) & 0x55);
|
||||
u = ((u >> 2) & 0x33) + (u & 0x33);
|
||||
diff += (u + (u >> 4)) & 0x0f;
|
||||
src_a += 1;
|
||||
src_b += 1;
|
||||
}
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
int diff = src_a[i] - src_b[i];
|
||||
sse += (uint32_t)(diff * diff);
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
// Internal C version of HashDjb2 with int sized count for efficiency.
|
||||
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) {
|
||||
uint32_t hash = seed;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
hash += (hash << 5) + src[i];
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
359
TMessagesProj/jni/third_party/libyuv/source/compare_gcc.cc
vendored
Normal file
359
TMessagesProj/jni/third_party/libyuv/source/compare_gcc.cc
vendored
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
#if defined(__x86_64__)
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint64_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
"xor %3,%3 \n"
|
||||
"xor %%r8,%%r8 \n"
|
||||
"xor %%r9,%%r9 \n"
|
||||
"xor %%r10,%%r10 \n"
|
||||
|
||||
// Process 32 bytes per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"mov (%0),%%rcx \n"
|
||||
"mov 0x8(%0),%%rdx \n"
|
||||
"xor (%1),%%rcx \n"
|
||||
"xor 0x8(%1),%%rdx \n"
|
||||
"popcnt %%rcx,%%rcx \n"
|
||||
"popcnt %%rdx,%%rdx \n"
|
||||
"mov 0x10(%0),%%rsi \n"
|
||||
"mov 0x18(%0),%%rdi \n"
|
||||
"xor 0x10(%1),%%rsi \n"
|
||||
"xor 0x18(%1),%%rdi \n"
|
||||
"popcnt %%rsi,%%rsi \n"
|
||||
"popcnt %%rdi,%%rdi \n"
|
||||
"add $0x20,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"add %%rcx,%3 \n"
|
||||
"add %%rdx,%%r8 \n"
|
||||
"add %%rsi,%%r9 \n"
|
||||
"add %%rdi,%%r10 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"add %%r8, %3 \n"
|
||||
"add %%r9, %3 \n"
|
||||
"add %%r10, %3 \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
:
|
||||
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
|
||||
|
||||
return (uint32_t)(diff);
|
||||
}
|
||||
#else
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
// Process 16 bytes per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"mov (%0),%%ecx \n"
|
||||
"mov 0x4(%0),%%edx \n"
|
||||
"xor (%1),%%ecx \n"
|
||||
"xor 0x4(%1),%%edx \n"
|
||||
"popcnt %%ecx,%%ecx \n"
|
||||
"add %%ecx,%3 \n"
|
||||
"popcnt %%edx,%%edx \n"
|
||||
"add %%edx,%3 \n"
|
||||
"mov 0x8(%0),%%ecx \n"
|
||||
"mov 0xc(%0),%%edx \n"
|
||||
"xor 0x8(%1),%%ecx \n"
|
||||
"xor 0xc(%1),%%edx \n"
|
||||
"popcnt %%ecx,%%ecx \n"
|
||||
"add %%ecx,%3 \n"
|
||||
"popcnt %%edx,%%edx \n"
|
||||
"add %%edx,%3 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"+r"(diff) // %3
|
||||
:
|
||||
: "memory", "cc", "ecx", "edx");
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15};
|
||||
static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
||||
|
||||
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
"movdqa %4,%%xmm2 \n"
|
||||
"movdqa %5,%%xmm3 \n"
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm4 \n"
|
||||
"movdqa 0x10(%0), %%xmm5 \n"
|
||||
"pxor (%0,%1), %%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"pand %%xmm2,%%xmm6 \n"
|
||||
"psrlw $0x4,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"pshufb %%xmm6,%%xmm7 \n"
|
||||
"pand %%xmm2,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm6 \n"
|
||||
"pshufb %%xmm4,%%xmm6 \n"
|
||||
"paddb %%xmm7,%%xmm6 \n"
|
||||
"pxor 0x10(%0,%1),%%xmm5 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"movdqa %%xmm5,%%xmm4 \n"
|
||||
"pand %%xmm2,%%xmm5 \n"
|
||||
"psrlw $0x4,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"pshufb %%xmm5,%%xmm7 \n"
|
||||
"pand %%xmm2,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm5 \n"
|
||||
"pshufb %%xmm4,%%xmm5 \n"
|
||||
"paddb %%xmm7,%%xmm5 \n"
|
||||
"paddb %%xmm5,%%xmm6 \n"
|
||||
"psadbw %%xmm1,%%xmm6 \n"
|
||||
"paddd %%xmm6,%%xmm0 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"pshufd $0xaa,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0, %3 \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
: "m"(kNibbleMask), // %4
|
||||
"m"(kBitCount) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
"vbroadcastf128 %4,%%ymm2 \n"
|
||||
"vbroadcastf128 %5,%%ymm3 \n"
|
||||
"vpxor %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpxor %%ymm1,%%ymm1,%%ymm1 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqa (%0),%%ymm4 \n"
|
||||
"vmovdqa 0x20(%0), %%ymm5 \n"
|
||||
"vpxor (%0,%1), %%ymm4, %%ymm4 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm6 \n"
|
||||
"vpsrlw $0x4,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm6,%%ymm3,%%ymm6 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
|
||||
"vpaddb %%ymm4,%%ymm6,%%ymm6 \n"
|
||||
"vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n"
|
||||
"add $0x40,%0 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm5 \n"
|
||||
"vpsrlw $0x4,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm5,%%ymm3,%%ymm5 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
|
||||
"vpaddb %%ymm5,%%ymm4,%%ymm4 \n"
|
||||
"vpaddb %%ymm6,%%ymm4,%%ymm4 \n"
|
||||
"vpsadbw %%ymm1,%%ymm4,%%ymm4 \n"
|
||||
"vpaddd %%ymm0,%%ymm4,%%ymm0 \n"
|
||||
"sub $0x40,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"vpermq $0xb1,%%ymm0,%%ymm1 \n"
|
||||
"vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xaa,%%ymm0,%%ymm1 \n"
|
||||
"vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vmovd %%xmm0, %3 \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
: "m"(kNibbleMask), // %4
|
||||
"m"(kBitCount) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif // HAS_HAMMINGDISTANCE_AVX2
|
||||
|
||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu (%1),%%xmm2 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
::"memory",
|
||||
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||
return sse;
|
||||
}
|
||||
|
||||
static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
static const uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static const uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static const uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static const uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
||||
uint32_t hash;
|
||||
asm volatile(
|
||||
"movd %2,%%xmm0 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
"movdqa %4,%%xmm6 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"pmulld %%xmm6,%%xmm0 \n"
|
||||
"movdqa %5,%%xmm5 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklwd %%xmm7,%%xmm3 \n"
|
||||
"pmulld %%xmm5,%%xmm3 \n"
|
||||
"movdqa %6,%%xmm5 \n"
|
||||
"movdqa %%xmm2,%%xmm4 \n"
|
||||
"punpckhwd %%xmm7,%%xmm4 \n"
|
||||
"pmulld %%xmm5,%%xmm4 \n"
|
||||
"movdqa %7,%%xmm5 \n"
|
||||
"punpckhbw %%xmm7,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklwd %%xmm7,%%xmm2 \n"
|
||||
"pmulld %%xmm5,%%xmm2 \n"
|
||||
"movdqa %8,%%xmm5 \n"
|
||||
"punpckhwd %%xmm7,%%xmm1 \n"
|
||||
"pmulld %%xmm5,%%xmm1 \n"
|
||||
"paddd %%xmm4,%%xmm3 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm3,%%xmm1 \n"
|
||||
"pshufd $0xe,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"pshufd $0x1,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%1 \n"
|
||||
"jg 1b \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(count), // %1
|
||||
"+rm"(seed), // %2
|
||||
"=g"(hash) // %3
|
||||
: "m"(kHash16x33), // %4
|
||||
"m"(kHashMul0), // %5
|
||||
"m"(kHashMul1), // %6
|
||||
"m"(kHashMul2), // %7
|
||||
"m"(kHashMul3) // %8
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
return hash;
|
||||
}
|
||||
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
123
TMessagesProj/jni/third_party/libyuv/source/compare_mmi.cc
vendored
Normal file
123
TMessagesProj/jni/third_party/libyuv/source/compare_mmi.cc
vendored
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Mips MMI.
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
// Hakmem method for hamming distance.
|
||||
uint32_t HammingDistance_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
|
||||
uint64_t c1 = 0x5555555555555555;
|
||||
uint64_t c2 = 0x3333333333333333;
|
||||
uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
|
||||
uint32_t c4 = 0x01010101;
|
||||
uint64_t s1 = 1, s2 = 2, s3 = 4;
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"ldc1 %[ta], 0(%[src_a]) \n\t"
|
||||
"ldc1 %[tb], 0(%[src_b]) \n\t"
|
||||
"xor %[temp], %[ta], %[tb] \n\t"
|
||||
"psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
|
||||
"and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
|
||||
"psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
|
||||
"and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
|
||||
"psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
|
||||
"and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
|
||||
"paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
|
||||
"psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
|
||||
"paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
|
||||
"and %[temp1], %[temp1], %[c3] \n\t" //&c3
|
||||
"dmfc1 $t0, %[temp1] \n\t"
|
||||
"dsrl32 $t0, $t0, 0 \n\t "
|
||||
"mul $t0, $t0, %[c4] \n\t"
|
||||
"dsrl $t0, $t0, 24 \n\t"
|
||||
"dadd %[diff], %[diff], $t0 \n\t"
|
||||
"dmfc1 $t0, %[temp1] \n\t"
|
||||
"mul $t0, $t0, %[c4] \n\t"
|
||||
"dsrl $t0, $t0, 24 \n\t"
|
||||
"dadd %[diff], %[diff], $t0 \n\t"
|
||||
"daddiu %[src_a], %[src_a], 8 \n\t"
|
||||
"daddiu %[src_b], %[src_b], 8 \n\t"
|
||||
"addiu %[count], %[count], -8 \n\t"
|
||||
"bgtz %[count], 1b \n\t"
|
||||
"nop \n\t"
|
||||
: [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
|
||||
[count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
|
||||
[temp1] "+f"(temp1)
|
||||
: [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
|
||||
[s2] "f"(s2), [s3] "f"(s3)
|
||||
: "memory");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
uint32_t sse_hi = 0u, sse_lo = 0u;
|
||||
|
||||
uint64_t src1, src2;
|
||||
uint64_t diff, diff_hi, diff_lo;
|
||||
uint64_t sse_sum, sse_tmp;
|
||||
|
||||
const uint64_t mask = 0x0ULL;
|
||||
|
||||
__asm__ volatile(
|
||||
"xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"ldc1 %[src1], 0x00(%[src_a]) \n\t"
|
||||
"ldc1 %[src2], 0x00(%[src_b]) \n\t"
|
||||
"pasubub %[diff], %[src1], %[src2] \n\t"
|
||||
"punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
|
||||
"punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
|
||||
"pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
|
||||
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
|
||||
"pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
|
||||
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
|
||||
|
||||
"daddiu %[src_a], %[src_a], 0x08 \n\t"
|
||||
"daddiu %[src_b], %[src_b], 0x08 \n\t"
|
||||
"daddiu %[count], %[count], -0x08 \n\t"
|
||||
"bnez %[count], 1b \n\t"
|
||||
|
||||
"mfc1 %[sse_lo], %[sse_sum] \n\t"
|
||||
"mfhc1 %[sse_hi], %[sse_sum] \n\t"
|
||||
"daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
|
||||
: [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
|
||||
[src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
|
||||
[sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
|
||||
[sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
|
||||
: [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
|
||||
[mask] "f"(mask)
|
||||
: "memory");
|
||||
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
97
TMessagesProj/jni/third_party/libyuv/source/compare_msa.cc
vendored
Normal file
97
TMessagesProj/jni/third_party/libyuv/source/compare_msa.cc
vendored
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright 2017 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
// This module is for GCC MSA
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
int i;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v2i64 vec0 = {0}, vec1 = {0};
|
||||
|
||||
for (i = 0; i < count; i += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
|
||||
src0 ^= src2;
|
||||
src1 ^= src3;
|
||||
vec0 += __msa_pcnt_d((v2i64)src0);
|
||||
vec1 += __msa_pcnt_d((v2i64)src1);
|
||||
src_a += 32;
|
||||
src_b += 32;
|
||||
}
|
||||
|
||||
vec0 += vec1;
|
||||
diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
|
||||
diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
int i;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v8i16 vec0, vec1, vec2, vec3;
|
||||
v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
|
||||
v2i64 tmp0;
|
||||
|
||||
for (i = 0; i < count; i += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
|
||||
vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
|
||||
vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
|
||||
vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
|
||||
vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
|
||||
reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
|
||||
reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
|
||||
reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
|
||||
reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
|
||||
src_a += 32;
|
||||
src_b += 32;
|
||||
}
|
||||
|
||||
reg0 += reg1;
|
||||
reg2 += reg3;
|
||||
reg0 += reg2;
|
||||
tmp0 = __msa_hadd_s_d(reg0, reg0);
|
||||
sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
|
||||
sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
|
||||
return sse;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
96
TMessagesProj/jni/third_party/libyuv/source/compare_neon.cc
vendored
Normal file
96
TMessagesProj/jni/third_party/libyuv/source/compare_neon.cc
vendored
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
|
||||
!defined(__aarch64__)
|
||||
|
||||
// 256 bits at a time
|
||||
// uses short accumulator which restricts count to 131 KB
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff;
|
||||
|
||||
asm volatile(
|
||||
"vmov.u16 q4, #0 \n" // accumulator
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q0, q1}, [%0]! \n"
|
||||
"vld1.8 {q2, q3}, [%1]! \n"
|
||||
"veor.32 q0, q0, q2 \n"
|
||||
"veor.32 q1, q1, q3 \n"
|
||||
"vcnt.i8 q0, q0 \n"
|
||||
"vcnt.i8 q1, q1 \n"
|
||||
"subs %2, %2, #32 \n"
|
||||
"vadd.u8 q0, q0, q1 \n" // 16 byte counts
|
||||
"vpadal.u8 q4, q0 \n" // 8 shorts
|
||||
"bgt 1b \n"
|
||||
|
||||
"vpaddl.u16 q0, q4 \n" // 4 ints
|
||||
"vpadd.u32 d0, d0, d1 \n"
|
||||
"vpadd.u32 d0, d0, d0 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
|
||||
:
|
||||
: "cc", "q0", "q1", "q2", "q3", "q4");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q11, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
"vld1.8 {q1}, [%1]! \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q8, d4, d4 \n"
|
||||
"vmlal.s16 q9, d6, d6 \n"
|
||||
"vmlal.s16 q10, d5, d5 \n"
|
||||
"vmlal.s16 q11, d7, d7 \n"
|
||||
"bgt 1b \n"
|
||||
|
||||
"vadd.u32 q8, q8, q9 \n"
|
||||
"vadd.u32 q10, q10, q11 \n"
|
||||
"vadd.u32 q11, q8, q10 \n"
|
||||
"vpaddl.u32 q1, q11 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
94
TMessagesProj/jni/third_party/libyuv/source/compare_neon64.cc
vendored
Normal file
94
TMessagesProj/jni/third_party/libyuv/source/compare_neon64.cc
vendored
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
// 256 bits at a time
|
||||
// uses short accumulator which restricts count to 131 KB
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff;
|
||||
asm volatile(
|
||||
"movi v4.8h, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v0.16b, v1.16b}, [%0], #32 \n"
|
||||
"ld1 {v2.16b, v3.16b}, [%1], #32 \n"
|
||||
"eor v0.16b, v0.16b, v2.16b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
|
||||
"eor v1.16b, v1.16b, v3.16b \n"
|
||||
"cnt v0.16b, v0.16b \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"cnt v1.16b, v1.16b \n"
|
||||
"subs %w2, %w2, #32 \n"
|
||||
"add v0.16b, v0.16b, v1.16b \n"
|
||||
"uadalp v4.8h, v0.16b \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"uaddlv s4, v4.8h \n"
|
||||
"fmov %w3, s4 \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v4");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
241
TMessagesProj/jni/third_party/libyuv/source/compare_win.cc
vendored
Normal file
241
TMessagesProj/jni/third_party/libyuv/source/compare_win.cc
vendored
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __popcnt
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for 32 bit Visual C x86
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
||||
!defined(__clang__) && defined(_M_IX86)
|
||||
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 3; i += 4) {
|
||||
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
|
||||
src_a += 4;
|
||||
src_b += 4;
|
||||
diff += __popcnt(x);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
__declspec(naked) uint32_t
|
||||
SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm5, xmm5
|
||||
|
||||
wloop:
|
||||
movdqu xmm1, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqu xmm2, [edx]
|
||||
lea edx, [edx + 16]
|
||||
movdqa xmm3, xmm1 // abs trick
|
||||
psubusb xmm1, xmm2
|
||||
psubusb xmm2, xmm3
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm1, xmm5
|
||||
punpckhbw xmm2, xmm5
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm0, xmm1
|
||||
paddd xmm0, xmm2
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
pshufd xmm1, xmm0, 0xee
|
||||
paddd xmm0, xmm1
|
||||
pshufd xmm1, xmm0, 0x01
|
||||
paddd xmm0, xmm1
|
||||
movd eax, xmm0
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_SUMSQUAREERROR_AVX2
|
||||
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
|
||||
#pragma warning(disable : 4752)
|
||||
__declspec(naked) uint32_t
|
||||
SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
vpxor ymm0, ymm0, ymm0 // sum
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
|
||||
sub edx, eax
|
||||
|
||||
wloop:
|
||||
vmovdqu ymm1, [eax]
|
||||
vmovdqu ymm2, [eax + edx]
|
||||
lea eax, [eax + 32]
|
||||
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
|
||||
vpsubusb ymm2, ymm2, ymm1
|
||||
vpor ymm1, ymm2, ymm3
|
||||
vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
|
||||
vpunpckhbw ymm1, ymm1, ymm5
|
||||
vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
|
||||
vpmaddwd ymm1, ymm1, ymm1
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpaddd ymm0, ymm0, ymm2
|
||||
sub ecx, 32
|
||||
jg wloop
|
||||
|
||||
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpermq ymm1, ymm0, 0x02 // high + low lane.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vmovd eax, xmm0
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_SUMSQUAREERROR_AVX2
|
||||
|
||||
uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
__declspec(naked) uint32_t
|
||||
HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
movdqa xmm6, xmmword ptr kHash16x33
|
||||
|
||||
wloop:
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
movdqa xmm5, xmmword ptr kHashMul0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
movdqa xmm3, xmm2
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
pmulld xmm3, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul1
|
||||
movdqa xmm4, xmm2
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
pmulld xmm4, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul2
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
pmulld xmm2, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul3
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
pmulld xmm1, xmm5
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm1, xmm3
|
||||
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#ifdef HAS_HASHDJB2_AVX2
|
||||
__declspec(naked) uint32_t
|
||||
HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
vmovd xmm0, [esp + 12] // seed
|
||||
|
||||
wloop:
|
||||
vpmovzxbd xmm3, [eax] // src[0-3]
|
||||
vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
|
||||
vpmovzxbd xmm4, [eax + 4] // src[4-7]
|
||||
vpmulld xmm3, xmm3, xmmword ptr kHashMul0
|
||||
vpmovzxbd xmm2, [eax + 8] // src[8-11]
|
||||
vpmulld xmm4, xmm4, xmmword ptr kHashMul1
|
||||
vpmovzxbd xmm1, [eax + 12] // src[12-15]
|
||||
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm1, xmm1, xmm3
|
||||
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
vpaddd xmm1, xmm1,xmm2
|
||||
vpshufd xmm2, xmm1, 0x01
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
vmovd eax, xmm0 // return hash
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_HASHDJB2_AVX2
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
3620
TMessagesProj/jni/third_party/libyuv/source/convert.cc
vendored
Normal file
3620
TMessagesProj/jni/third_party/libyuv/source/convert.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
7988
TMessagesProj/jni/third_party/libyuv/source/convert_argb.cc
vendored
Normal file
7988
TMessagesProj/jni/third_party/libyuv/source/convert_argb.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
858
TMessagesProj/jni/third_party/libyuv/source/convert_from.cc
vendored
Normal file
858
TMessagesProj/jni/third_party/libyuv/source/convert_from.cc
vendored
Normal file
|
|
@ -0,0 +1,858 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert_from.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/convert.h" // For I420Copy
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale.h" // For ScalePlane()
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
|
||||
// I420 To any I4xx YUV format with mirroring.
|
||||
// TODO(fbarchard): Consider kFilterNone for Y, or CopyPlane
|
||||
|
||||
static int I420ToI4xx(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_y_width,
|
||||
int src_y_height,
|
||||
int dst_uv_width,
|
||||
int dst_uv_height) {
|
||||
const int dst_y_width = Abs(src_y_width);
|
||||
const int dst_y_height = Abs(src_y_height);
|
||||
const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
|
||||
const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
|
||||
if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
|
||||
dst_uv_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
if (dst_y) {
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
|
||||
dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
|
||||
}
|
||||
ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
|
||||
dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
|
||||
ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
|
||||
dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert 8 bit YUV to 10 bit.
|
||||
LIBYUV_API
|
||||
int I420ToI010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
// Convert Y plane.
|
||||
Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width,
|
||||
height);
|
||||
// Convert UV planes.
|
||||
Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth,
|
||||
halfheight);
|
||||
Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert 8 bit YUV to 12 bit.
|
||||
LIBYUV_API
|
||||
int I420ToI012(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
// Convert Y plane.
|
||||
Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 4096, width,
|
||||
height);
|
||||
// Convert UV planes.
|
||||
Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 4096, halfwidth,
|
||||
halfheight);
|
||||
Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 4096, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
const int dst_uv_width = (Abs(width) + 1) >> 1;
|
||||
const int dst_uv_height = Abs(height);
|
||||
return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height, dst_uv_width,
|
||||
dst_uv_height);
|
||||
}
|
||||
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// 444 chroma is 1x width, 1x height
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
const int dst_uv_width = Abs(width);
|
||||
const int dst_uv_height = Abs(height);
|
||||
return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height, dst_uv_width,
|
||||
dst_uv_height);
|
||||
}
|
||||
|
||||
// 420 chroma to 444 chroma, 10/12 bit version
|
||||
LIBYUV_API
|
||||
int I010ToI410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
if (width == 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
|
||||
Abs(width), Abs(height), kFilterBilinear);
|
||||
}
|
||||
ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
|
||||
SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
|
||||
Abs(height), kFilterBilinear);
|
||||
ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
|
||||
SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
|
||||
Abs(height), kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 422 chroma to 444 chroma, 10/12 bit version
|
||||
LIBYUV_API
|
||||
int I210ToI410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
if (width == 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
|
||||
Abs(width), Abs(height), kFilterBilinear);
|
||||
}
|
||||
ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
|
||||
dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
|
||||
ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
|
||||
dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
// 444 chroma is 1x width, 1x height
|
||||
LIBYUV_API
|
||||
int I422ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
if (width == 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
|
||||
Abs(width), Abs(height), kFilterBilinear);
|
||||
}
|
||||
ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
|
||||
dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
|
||||
ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
|
||||
dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height) {
|
||||
if (!src_y || !dst_y || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_stride_y = -src_stride_y;
|
||||
}
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I422ToYUY2(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
||||
const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
|
||||
I422ToYUY2Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
|
||||
dst_stride_yuy2 = -dst_stride_yuy2;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width && src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOYUY2ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_yuy2 += dst_stride_yuy2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
||||
const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
|
||||
I422ToYUY2Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
|
||||
dst_stride_yuy2 = -dst_stride_yuy2;
|
||||
}
|
||||
#if defined(HAS_I422TOYUY2ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
|
||||
I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
|
||||
dst_yuy2 + dst_stride_yuy2, width);
|
||||
src_y += src_stride_y * 2;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_yuy2 += dst_stride_yuy2 * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I422ToUYVY(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
||||
const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
|
||||
I422ToUYVYRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
|
||||
dst_stride_uyvy = -dst_stride_uyvy;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width && src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width && dst_stride_uyvy == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_uyvy += dst_stride_uyvy;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
||||
const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
|
||||
I422ToUYVYRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
|
||||
dst_stride_uyvy = -dst_stride_uyvy;
|
||||
}
|
||||
#if defined(HAS_I422TOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
|
||||
I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
|
||||
dst_uyvy + dst_stride_uyvy, width);
|
||||
src_y += src_stride_y * 2;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_uyvy += dst_stride_uyvy * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) / 2;
|
||||
int halfheight = (height + 1) / 2;
|
||||
if (!src_y || !src_u || !src_v || !dst_uv || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
MergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, dst_stride_uv,
|
||||
halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u,
|
||||
src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to specified format
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8_t* y,
|
||||
int y_stride,
|
||||
const uint8_t* u,
|
||||
int u_stride,
|
||||
const uint8_t* v,
|
||||
int v_stride,
|
||||
uint8_t* dst_sample,
|
||||
int dst_sample_stride,
|
||||
int width,
|
||||
int height,
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
int r = 0;
|
||||
if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
r = I420ToYUY2(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
r = I420ToUYVY(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
r = I420ToARGB4444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
r = I420ToARGB(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
r = I420ToBGRA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
r = I420ToABGR(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
r = I420ToRGBA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_AR30:
|
||||
r = I420ToAR30(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
r = I400Copy(y, y_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_NV12: {
|
||||
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
|
||||
uint8_t* dst_uv = dst_sample + dst_y_stride * height;
|
||||
r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, dst_uv,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_NV21: {
|
||||
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
|
||||
uint8_t* dst_vu = dst_sample + dst_y_stride * height;
|
||||
r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, dst_vu,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
}
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YV12: {
|
||||
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
|
||||
int halfstride = (dst_sample_stride + 1) / 2;
|
||||
int halfheight = (height + 1) / 2;
|
||||
uint8_t* dst_u;
|
||||
uint8_t* dst_v;
|
||||
if (format == FOURCC_YV12) {
|
||||
dst_v = dst_sample + dst_sample_stride * height;
|
||||
dst_u = dst_v + halfstride * halfheight;
|
||||
} else {
|
||||
dst_u = dst_sample + dst_sample_stride * height;
|
||||
dst_v = dst_u + halfstride * halfheight;
|
||||
}
|
||||
r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
|
||||
width, height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
|
||||
int halfstride = (dst_sample_stride + 1) / 2;
|
||||
uint8_t* dst_u;
|
||||
uint8_t* dst_v;
|
||||
if (format == FOURCC_YV16) {
|
||||
dst_v = dst_sample + dst_sample_stride * height;
|
||||
dst_u = dst_v + halfstride * height;
|
||||
} else {
|
||||
dst_u = dst_sample + dst_sample_stride * height;
|
||||
dst_v = dst_u + halfstride * height;
|
||||
}
|
||||
r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
|
||||
width, height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
|
||||
uint8_t* dst_u;
|
||||
uint8_t* dst_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
dst_v = dst_sample + dst_sample_stride * height;
|
||||
dst_u = dst_v + dst_sample_stride * height;
|
||||
} else {
|
||||
dst_u = dst_sample + dst_sample_stride * height;
|
||||
dst_v = dst_u + dst_sample_stride * height;
|
||||
}
|
||||
r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride, dst_u, dst_sample_stride, dst_v,
|
||||
dst_sample_stride, width, height);
|
||||
break;
|
||||
}
|
||||
// Formats not supported - MJPG, biplanar, some rgb formats.
|
||||
default:
|
||||
return -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
3000
TMessagesProj/jni/third_party/libyuv/source/convert_from_argb.cc
vendored
Normal file
3000
TMessagesProj/jni/third_party/libyuv/source/convert_from_argb.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
602
TMessagesProj/jni/third_party/libyuv/source/convert_jpeg.cc
vendored
Normal file
602
TMessagesProj/jni/third_party/libyuv/source/convert_jpeg.cc
vendored
Normal file
|
|
@ -0,0 +1,602 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
struct I420Buffers {
|
||||
uint8_t* y;
|
||||
int y_stride;
|
||||
uint8_t* u;
|
||||
int u_stride;
|
||||
uint8_t* v;
|
||||
int v_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegCopyI420(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToI420(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToI420(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToI420(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u,
|
||||
dest->u_stride, dest->v, dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int* width,
|
||||
int* height) {
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret) {
|
||||
*width = mjpeg_decoder.GetWidth();
|
||||
*height = mjpeg_decoder.GetHeight();
|
||||
}
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return ret ? 0 : -1; // -1 for runtime failure.
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to I420
|
||||
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
|
||||
// dst_height may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
I420Buffers bufs = {dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other
|
||||
// colorspace/subsample factors that occur in practice. ERROR: Unable to
|
||||
// convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
struct NV21Buffers {
|
||||
uint8_t* y;
|
||||
int y_stride;
|
||||
uint8_t* vu;
|
||||
int vu_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegI420ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I420ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I422ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I444ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
|
||||
dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
NV21Buffers bufs = {dst_y, dst_stride_y, dst_vu,
|
||||
dst_stride_vu, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// Unknown colorspace.
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
static void JpegI420ToNV12(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
// Use NV21 with VU swapped.
|
||||
I420ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToNV12(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
// Use NV21 with VU swapped.
|
||||
I422ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToNV12(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
// Use NV21 with VU swapped.
|
||||
I444ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToNV12(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
// Use NV21 since there is no UV plane.
|
||||
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
|
||||
dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPEG) to NV12.
|
||||
LIBYUV_API
|
||||
int MJPGToNV12(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
// Use NV21Buffers but with UV instead of VU.
|
||||
NV21Buffers bufs = {dst_y, dst_stride_y, dst_uv,
|
||||
dst_stride_uv, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToNV12, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToNV12, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToNV12, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToNV12, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// Unknown colorspace.
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
struct ARGBBuffers {
|
||||
uint8_t* argb;
|
||||
int argb_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegI420ToARGB(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToARGB(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToARGB(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToARGB(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to ARGB
|
||||
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
|
||||
// dst_height may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
ARGBBuffers bufs = {dst_argb, dst_stride_argb, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other
|
||||
// colorspace/subsample factors that occur in practice. ERROR: Unable to
|
||||
// convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
#endif // HAVE_JPEG
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
382
TMessagesProj/jni/third_party/libyuv/source/convert_to_argb.cc
vendored
Normal file
382
TMessagesProj/jni/third_party/libyuv/source/convert_to_argb.cc
vendored
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
|
||||
// TODO(fbarchard): Add the following:
|
||||
// H010ToARGB
|
||||
// I010ToARGB
|
||||
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8_t* src;
|
||||
const uint8_t* src_uv;
|
||||
int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to ARGB (with optional vertical flipping) into a temporary ARGB buffer,
|
||||
// and then rotate the ARGB to the final destination buffer.
|
||||
// For in-place conversion, if destination dst_argb is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_ARGB) || dst_argb == sample;
|
||||
uint8_t* dest_argb = dst_argb;
|
||||
int dest_dst_stride_argb = dst_stride_argb;
|
||||
uint8_t* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
inv_crop_height = -inv_crop_height;
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * 4 * abs_crop_height;
|
||||
rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
dst_argb = rotate_buffer;
|
||||
dst_stride_argb = crop_width * 4;
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
if (!need_buf && !rotation) {
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
}
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_AR30:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_AB30:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_J400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = J400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv =
|
||||
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
|
||||
dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv =
|
||||
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
|
||||
dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YV12: {
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_J420: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_H420: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = H420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_U420: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = U420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_J422: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u =
|
||||
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
r = J422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_H422: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u =
|
||||
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_U422: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u =
|
||||
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
} else {
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_J444: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
r = J444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_H444: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
r = H444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_U444: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
r = U444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width,
|
||||
abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
} else if (rotation) {
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
280
TMessagesProj/jni/third_party/libyuv/source/convert_to_i420.cc
vendored
Normal file
280
TMessagesProj/jni/third_party/libyuv/source/convert_to_i420.cc
vendored
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8_t* src;
|
||||
const uint8_t* src_uv;
|
||||
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
// TODO(nisse): Why allow crop_height < 0?
|
||||
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
|
||||
format != FOURCC_NV21 && format != FOURCC_YV12) ||
|
||||
dst_y == sample;
|
||||
uint8_t* tmp_y = dst_y;
|
||||
uint8_t* tmp_u = dst_u;
|
||||
uint8_t* tmp_v = dst_v;
|
||||
int tmp_y_stride = dst_stride_y;
|
||||
int tmp_u_stride = dst_stride_u;
|
||||
int tmp_v_stride = dst_stride_v;
|
||||
uint8_t* rotate_buffer = NULL;
|
||||
const int inv_crop_height =
|
||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||
|
||||
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
|
||||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination dst_y is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
dst_y = rotate_buffer;
|
||||
dst_u = dst_y + y_size;
|
||||
dst_v = dst_u + uv_size;
|
||||
dst_stride_y = crop_width;
|
||||
dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2: { // TODO(fbarchard): Find better odd crop fix.
|
||||
uint8_t* u = (crop_x & 1) ? dst_v : dst_u;
|
||||
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
|
||||
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
|
||||
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
|
||||
stride_u, v, stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_UYVY: {
|
||||
uint8_t* u = (crop_x & 1) ? dst_v : dst_u;
|
||||
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
|
||||
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
|
||||
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
|
||||
stride_u, v, stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
// TODO(fbarchard): Add AR30 and AB30
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * abs_src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * abs_src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
// Call NV12 but with dst_u and dst_v parameters swapped.
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
|
||||
dst_stride_y, dst_v, dst_stride_v, dst_u,
|
||||
dst_stride_u, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YV12: {
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
|
||||
(crop_x / 2);
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
|
||||
(crop_x / 2);
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
|
||||
}
|
||||
r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
(crop_x / 2);
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
(crop_x / 2);
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
|
||||
}
|
||||
r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
} else {
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, src_width,
|
||||
abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride, crop_width, abs_crop_height,
|
||||
rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
300
TMessagesProj/jni/third_party/libyuv/source/cpu_id.cc
vendored
Normal file
300
TMessagesProj/jni/third_party/libyuv/source/cpu_id.cc
vendored
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
|
||||
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
|
||||
// For ArmCpuCaps() but unittested on all platforms
|
||||
#include <stdio.h> // For fopen()
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// For functions that use the stack and have runtime checks for overflow,
|
||||
// use SAFEBUFFERS to avoid additional check.
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
|
||||
!defined(__clang__)
|
||||
#define SAFEBUFFERS __declspec(safebuffers)
|
||||
#else
|
||||
#define SAFEBUFFERS
|
||||
#endif
|
||||
|
||||
// cpu_info_ variable for SIMD instruction sets detected.
|
||||
LIBYUV_API int cpu_info_ = 0;
|
||||
|
||||
// TODO(fbarchard): Consider using int for cpuid so casting is not needed.
|
||||
// Low level cpuid for X86.
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER)
|
||||
LIBYUV_API
|
||||
void CpuId(int info_eax, int info_ecx, int* cpu_info) {
|
||||
#if defined(_MSC_VER)
|
||||
// Visual C version uses intrinsic or inline x86 assembly.
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
__cpuidex(cpu_info, info_eax, info_ecx);
|
||||
#elif defined(_M_IX86)
|
||||
__asm {
|
||||
mov eax, info_eax
|
||||
mov ecx, info_ecx
|
||||
mov edi, cpu_info
|
||||
cpuid
|
||||
mov [edi], eax
|
||||
mov [edi + 4], ebx
|
||||
mov [edi + 8], ecx
|
||||
mov [edi + 12], edx
|
||||
}
|
||||
#else // Visual C but not x86
|
||||
if (info_ecx == 0) {
|
||||
__cpuid(cpu_info, info_eax);
|
||||
} else {
|
||||
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
|
||||
}
|
||||
#endif
|
||||
// GCC version uses inline x86 assembly.
|
||||
#else // defined(_MSC_VER)
|
||||
int info_ebx, info_edx;
|
||||
asm volatile(
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D"(info_ebx),
|
||||
#else
|
||||
"cpuid \n"
|
||||
: "=b"(info_ebx),
|
||||
#endif // defined( __i386__) && defined(__PIC__)
|
||||
"+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
|
||||
cpu_info[0] = info_eax;
|
||||
cpu_info[1] = info_ebx;
|
||||
cpu_info[2] = info_ecx;
|
||||
cpu_info[3] = info_edx;
|
||||
#endif // defined(_MSC_VER)
|
||||
}
|
||||
#else // (defined(_M_IX86) || defined(_M_X64) ...
|
||||
LIBYUV_API
|
||||
void CpuId(int eax, int ecx, int* cpu_info) {
|
||||
(void)eax;
|
||||
(void)ecx;
|
||||
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// For VS2010 and earlier emit can be used:
|
||||
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
|
||||
// __asm {
|
||||
// xor ecx, ecx // xcr 0
|
||||
// xgetbv
|
||||
// mov xcr0, eax
|
||||
// }
|
||||
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=529
|
||||
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", off)
|
||||
#endif
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
static int GetXCR0() {
|
||||
int xcr0 = 0;
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
|
||||
#endif // defined(__i386__) || defined(__x86_64__)
|
||||
return xcr0;
|
||||
}
|
||||
#else
|
||||
// xgetbv unavailable to query for OSSave support. Return 0.
|
||||
#define GetXCR0() 0
|
||||
#endif // defined(_M_IX86) || defined(_M_X64) ..
|
||||
// Return optimization to previous setting.
|
||||
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", on)
|
||||
#endif
|
||||
|
||||
// Based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
// Assume Neon if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
|
||||
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
|
||||
char* p = strstr(cpuinfo_line, " neon");
|
||||
if (p && (p[5] == ' ' || p[5] == '\n')) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
// aarch64 uses asimd for Neon.
|
||||
p = strstr(cpuinfo_line, " asimd");
|
||||
if (p) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider read_msa_ir().
|
||||
LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
int flag = 0x0;
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
// Assume nothing if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return 0;
|
||||
}
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
|
||||
if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
|
||||
// Workaround early kernel without MSA in ASEs line.
|
||||
if (strstr(cpuinfo_line, "Loongson-2K")) {
|
||||
flag |= kCpuHasMSA;
|
||||
}
|
||||
}
|
||||
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
|
||||
if (strstr(cpuinfo_line, "msa")) {
|
||||
flag |= kCpuHasMSA;
|
||||
}
|
||||
// ASEs is the last line, so we can break here.
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return flag;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider read_loongarch_ir().
|
||||
#define LOONGARCH_CFG2 0x2
|
||||
#define LOONGARCH_CFG2_LSX (1 << 6)
|
||||
#define LOONGARCH_CFG2_LASX (1 << 7)
|
||||
|
||||
#if defined(__loongarch__)
|
||||
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
|
||||
int flag = 0x0;
|
||||
uint32_t cfg2 = 0;
|
||||
|
||||
__asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));
|
||||
|
||||
if (cfg2 & LOONGARCH_CFG2_LSX)
|
||||
flag |= kCpuHasLSX;
|
||||
|
||||
if (cfg2 & LOONGARCH_CFG2_LASX)
|
||||
flag |= kCpuHasLASX;
|
||||
return flag;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
int cpu_info = 0;
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86))
|
||||
int cpu_info0[4] = {0, 0, 0, 0};
|
||||
int cpu_info1[4] = {0, 0, 0, 0};
|
||||
int cpu_info7[4] = {0, 0, 0, 0};
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
|
||||
|
||||
// AVX requires OS saves YMM registers.
|
||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
|
||||
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
|
||||
|
||||
// Detect AVX512bw
|
||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
|
||||
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(__mips__) && defined(__linux__)
|
||||
cpu_info = MipsCpuCaps("/proc/cpuinfo");
|
||||
cpu_info |= kCpuHasMIPS;
|
||||
#endif
|
||||
#if defined(__loongarch__) && defined(__linux__)
|
||||
cpu_info = LoongarchCpuCaps();
|
||||
cpu_info |= kCpuHasLOONGARCH;
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
|
||||
// flag in it.
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
#else
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
#endif
|
||||
cpu_info |= kCpuHasARM;
|
||||
#endif // __arm__
|
||||
cpu_info |= kCpuInitialized;
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
// Note that use of this function is not thread safe.
|
||||
LIBYUV_API
|
||||
int MaskCpuFlags(int enable_flags) {
|
||||
int cpu_info = GetCpuFlags() & enable_flags;
|
||||
SetCpuFlags(cpu_info);
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void) {
|
||||
return MaskCpuFlags(-1);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
581
TMessagesProj/jni/third_party/libyuv/source/mjpeg_decoder.cc
vendored
Normal file
581
TMessagesProj/jni/third_party/libyuv/source/mjpeg_decoder.cc
vendored
Normal file
|
|
@ -0,0 +1,581 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
// Must be included before jpeglib.
|
||||
#include <setjmp.h>
|
||||
#define HAVE_SETJMP
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// disable warning 4324: structure was padded due to __declspec(align())
|
||||
#pragma warning(disable : 4324)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <stdio.h> // For jpeglib.h.
|
||||
|
||||
// C++ build requires extern C for jpeg internals.
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <jpeglib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#include "libyuv/planar_functions.h" // For CopyPlane().
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
struct SetJmpErrorMgr {
|
||||
jpeg_error_mgr base; // Must be at the top
|
||||
jmp_buf setjmp_buffer;
|
||||
};
|
||||
#endif
|
||||
|
||||
const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
|
||||
const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
|
||||
const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
|
||||
const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
|
||||
const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
|
||||
const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
|
||||
|
||||
// Methods that are passed to jpeglib.
|
||||
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
|
||||
void init_source(jpeg_decompress_struct* cinfo);
|
||||
void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
|
||||
void term_source(jpeg_decompress_struct* cinfo);
|
||||
void ErrorHandler(jpeg_common_struct* cinfo);
|
||||
void OutputHandler(jpeg_common_struct* cinfo);
|
||||
|
||||
MJpegDecoder::MJpegDecoder()
|
||||
: has_scanline_padding_(LIBYUV_FALSE),
|
||||
num_outbufs_(0),
|
||||
scanlines_(NULL),
|
||||
scanlines_sizes_(NULL),
|
||||
databuf_(NULL),
|
||||
databuf_strides_(NULL) {
|
||||
decompress_struct_ = new jpeg_decompress_struct;
|
||||
source_mgr_ = new jpeg_source_mgr;
|
||||
#ifdef HAVE_SETJMP
|
||||
error_mgr_ = new SetJmpErrorMgr;
|
||||
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
|
||||
// Override standard exit()-based error handler.
|
||||
error_mgr_->base.error_exit = &ErrorHandler;
|
||||
error_mgr_->base.output_message = &OutputHandler;
|
||||
#endif
|
||||
decompress_struct_->client_data = NULL;
|
||||
source_mgr_->init_source = &init_source;
|
||||
source_mgr_->fill_input_buffer = &fill_input_buffer;
|
||||
source_mgr_->skip_input_data = &skip_input_data;
|
||||
source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
|
||||
source_mgr_->term_source = &term_source;
|
||||
jpeg_create_decompress(decompress_struct_);
|
||||
decompress_struct_->src = source_mgr_;
|
||||
buf_vec_.buffers = &buf_;
|
||||
buf_vec_.len = 1;
|
||||
}
|
||||
|
||||
MJpegDecoder::~MJpegDecoder() {
|
||||
jpeg_destroy_decompress(decompress_struct_);
|
||||
delete decompress_struct_;
|
||||
delete source_mgr_;
|
||||
#ifdef HAVE_SETJMP
|
||||
delete error_mgr_;
|
||||
#endif
|
||||
DestroyOutputBuffers();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
|
||||
if (!ValidateJpeg(src, src_len)) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
buf_.data = src;
|
||||
buf_.len = (int)src_len;
|
||||
buf_vec_.pos = 0;
|
||||
decompress_struct_->client_data = &buf_vec_;
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called jpeg_read_header, it experienced an error, and we called
|
||||
// longjmp() and rewound the stack to here. Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
|
||||
// ERROR: Bad MJPEG header
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
AllocOutputBuffers(GetNumComponents());
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_size = GetComponentScanlinesPerImcuRow(i);
|
||||
if (scanlines_sizes_[i] != scanlines_size) {
|
||||
if (scanlines_[i]) {
|
||||
delete scanlines_[i];
|
||||
}
|
||||
scanlines_[i] = new uint8_t*[scanlines_size];
|
||||
scanlines_sizes_[i] = scanlines_size;
|
||||
}
|
||||
|
||||
// We allocate padding for the final scanline to pad it up to DCTSIZE bytes
|
||||
// to avoid memory errors, since jpeglib only reads full MCUs blocks. For
|
||||
// the preceding scanlines, the padding is not needed/wanted because the
|
||||
// following addresses will already be valid (they are the initial bytes of
|
||||
// the next scanline) and will be overwritten when jpeglib writes out that
|
||||
// next scanline.
|
||||
int databuf_stride = GetComponentStride(i);
|
||||
int databuf_size = scanlines_size * databuf_stride;
|
||||
if (databuf_strides_[i] != databuf_stride) {
|
||||
if (databuf_[i]) {
|
||||
delete databuf_[i];
|
||||
}
|
||||
databuf_[i] = new uint8_t[databuf_size];
|
||||
databuf_strides_[i] = databuf_stride;
|
||||
}
|
||||
|
||||
if (GetComponentStride(i) != GetComponentWidth(i)) {
|
||||
has_scanline_padding_ = LIBYUV_TRUE;
|
||||
}
|
||||
}
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
static int DivideAndRoundUp(int numerator, int denominator) {
|
||||
return (numerator + denominator - 1) / denominator;
|
||||
}
|
||||
|
||||
static int DivideAndRoundDown(int numerator, int denominator) {
|
||||
return numerator / denominator;
|
||||
}
|
||||
|
||||
// Returns width of the last loaded frame.
|
||||
int MJpegDecoder::GetWidth() {
|
||||
return decompress_struct_->image_width;
|
||||
}
|
||||
|
||||
// Returns height of the last loaded frame.
|
||||
int MJpegDecoder::GetHeight() {
|
||||
return decompress_struct_->image_height;
|
||||
}
|
||||
|
||||
// Returns format of the last loaded frame. The return value is one of the
|
||||
// kColorSpace* constants.
|
||||
int MJpegDecoder::GetColorSpace() {
|
||||
return decompress_struct_->jpeg_color_space;
|
||||
}
|
||||
|
||||
// Number of color components in the color space.
|
||||
int MJpegDecoder::GetNumComponents() {
|
||||
return decompress_struct_->num_components;
|
||||
}
|
||||
|
||||
// Sample factors of the n-th component.
|
||||
int MJpegDecoder::GetHorizSampFactor(int component) {
|
||||
return decompress_struct_->comp_info[component].h_samp_factor;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSampFactor(int component) {
|
||||
return decompress_struct_->comp_info[component].v_samp_factor;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetHorizSubSampFactor(int component) {
|
||||
return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSubSampFactor(int component) {
|
||||
return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
|
||||
return decompress_struct_->max_v_samp_factor * DCTSIZE;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
|
||||
int vs = GetVertSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentWidth(int component) {
|
||||
int hs = GetHorizSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetWidth(), hs);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentHeight(int component) {
|
||||
int vs = GetVertSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetHeight(), vs);
|
||||
}
|
||||
|
||||
// Get width in bytes padded out to a multiple of DCTSIZE
|
||||
int MJpegDecoder::GetComponentStride(int component) {
|
||||
return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentSize(int component) {
|
||||
return GetComponentWidth(component) * GetComponentHeight(component);
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called jpeg_abort_decompress, it experienced an error, and we called
|
||||
// longjmp() and rewound the stack to here. Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
jpeg_abort_decompress(decompress_struct_);
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called into jpeglib, it experienced an error sometime during this
|
||||
// function call, and we called longjmp() and rewound the stack to here.
|
||||
// Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (!StartDecode()) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
SetScanlinePointers(databuf_);
|
||||
int lines_left = dst_height;
|
||||
// Compute amount of lines to skip to implement vertical crop.
|
||||
// TODO(fbarchard): Ensure skip is a multiple of maximum component
|
||||
// subsample. ie 2
|
||||
int skip = (GetHeight() - dst_height) / 2;
|
||||
if (skip > 0) {
|
||||
// There is no API to skip lines in the output data, so we read them
|
||||
// into the temp buffer.
|
||||
while (skip >= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
skip -= GetImageScanlinesPerImcuRow();
|
||||
}
|
||||
if (skip > 0) {
|
||||
// Have a partial iMCU row left over to skip. Must read it and then
|
||||
// copy the parts we want into the destination.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy =
|
||||
GetComponentScanlinesPerImcuRow(i) - rows_to_skip;
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i),
|
||||
scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
|
||||
}
|
||||
}
|
||||
|
||||
// Read full MCUs but cropped horizontally
|
||||
for (; lines_left > GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (lines_left > 0) {
|
||||
// Have a partial iMCU row left over to decode.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy =
|
||||
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called into jpeglib, it experienced an error sometime during this
|
||||
// function call, and we called longjmp() and rewound the stack to here.
|
||||
// Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (!StartDecode()) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
SetScanlinePointers(databuf_);
|
||||
int lines_left = dst_height;
|
||||
// TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
|
||||
int skip = (GetHeight() - dst_height) / 2;
|
||||
if (skip > 0) {
|
||||
while (skip >= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
skip -= GetImageScanlinesPerImcuRow();
|
||||
}
|
||||
if (skip > 0) {
|
||||
// Have a partial iMCU row left over to skip.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
// Change our own data buffer pointers so we can pass them to the
|
||||
// callback.
|
||||
databuf_[i] += data_to_skip;
|
||||
}
|
||||
int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
|
||||
(*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
|
||||
// Now change them back.
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
databuf_[i] -= data_to_skip;
|
||||
}
|
||||
lines_left -= scanlines_to_copy;
|
||||
}
|
||||
}
|
||||
// Read full MCUs until we get to the crop point.
|
||||
for (; lines_left >= GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
(*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
if (lines_left > 0) {
|
||||
// Have a partial iMCU row left over to decode.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
(*fn)(opaque, databuf_, databuf_strides_, lines_left);
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
void init_source(j_decompress_ptr cinfo) {
|
||||
fill_input_buffer(cinfo);
|
||||
}
|
||||
|
||||
boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
|
||||
if (buf_vec->pos >= buf_vec->len) {
|
||||
// ERROR: No more data
|
||||
return FALSE;
|
||||
}
|
||||
cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
|
||||
cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
|
||||
++buf_vec->pos;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
jpeg_source_mgr* src = cinfo->src;
|
||||
size_t bytes = (size_t)num_bytes;
|
||||
if (bytes > src->bytes_in_buffer) {
|
||||
src->next_input_byte = nullptr;
|
||||
src->bytes_in_buffer = 0;
|
||||
} else {
|
||||
src->next_input_byte += bytes;
|
||||
src->bytes_in_buffer -= bytes;
|
||||
}
|
||||
}
|
||||
|
||||
void term_source(j_decompress_ptr cinfo) {
|
||||
(void)cinfo; // Nothing to do.
|
||||
}
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
void ErrorHandler(j_common_ptr cinfo) {
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
#ifdef DEBUG
|
||||
char buf[JMSG_LENGTH_MAX];
|
||||
(*cinfo->err->format_message)(cinfo, buf);
|
||||
// ERROR: Error in jpeglib: buf
|
||||
#endif
|
||||
|
||||
SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
|
||||
// This rewinds the call stack to the point of the corresponding setjmp()
|
||||
// and causes it to return (for a second time) with value 1.
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
}
|
||||
|
||||
// Suppress fprintf warnings.
|
||||
void OutputHandler(j_common_ptr cinfo) {
|
||||
(void)cinfo;
|
||||
}
|
||||
|
||||
#endif // HAVE_SETJMP
|
||||
|
||||
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
if (num_outbufs != num_outbufs_) {
|
||||
// We could perhaps optimize this case to resize the output buffers without
|
||||
// necessarily having to delete and recreate each one, but it's not worth
|
||||
// it.
|
||||
DestroyOutputBuffers();
|
||||
|
||||
scanlines_ = new uint8_t**[num_outbufs];
|
||||
scanlines_sizes_ = new int[num_outbufs];
|
||||
databuf_ = new uint8_t*[num_outbufs];
|
||||
databuf_strides_ = new int[num_outbufs];
|
||||
|
||||
for (int i = 0; i < num_outbufs; ++i) {
|
||||
scanlines_[i] = NULL;
|
||||
scanlines_sizes_[i] = 0;
|
||||
databuf_[i] = NULL;
|
||||
databuf_strides_[i] = 0;
|
||||
}
|
||||
|
||||
num_outbufs_ = num_outbufs;
|
||||
}
|
||||
}
|
||||
|
||||
void MJpegDecoder::DestroyOutputBuffers() {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
delete[] scanlines_[i];
|
||||
delete[] databuf_[i];
|
||||
}
|
||||
delete[] scanlines_;
|
||||
delete[] databuf_;
|
||||
delete[] scanlines_sizes_;
|
||||
delete[] databuf_strides_;
|
||||
scanlines_ = NULL;
|
||||
databuf_ = NULL;
|
||||
scanlines_sizes_ = NULL;
|
||||
databuf_strides_ = NULL;
|
||||
num_outbufs_ = 0;
|
||||
}
|
||||
|
||||
// JDCT_IFAST and do_block_smoothing improve performance substantially.
|
||||
LIBYUV_BOOL MJpegDecoder::StartDecode() {
|
||||
decompress_struct_->raw_data_out = TRUE;
|
||||
decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
|
||||
decompress_struct_->dither_mode = JDITHER_NONE;
|
||||
// Not applicable to 'raw':
|
||||
decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
|
||||
// Only for buffered mode:
|
||||
decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
|
||||
// Blocky but fast:
|
||||
decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
|
||||
|
||||
if (!jpeg_start_decompress(decompress_struct_)) {
|
||||
// ERROR: Couldn't start JPEG decompressor";
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::FinishDecode() {
|
||||
// jpeglib considers it an error if we finish without decoding the whole
|
||||
// image, so we call "abort" rather than "finish".
|
||||
jpeg_abort_decompress(decompress_struct_);
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
void MJpegDecoder::SetScanlinePointers(uint8_t** data) {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
uint8_t* data_i = data[i];
|
||||
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
|
||||
scanlines_[i][j] = data_i;
|
||||
data_i += GetComponentStride(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
|
||||
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
|
||||
jpeg_read_raw_data(decompress_struct_, scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components) {
|
||||
if (number_of_components == 3) { // Color images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
|
||||
subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
return kJpegYuv420;
|
||||
}
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
|
||||
subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
return kJpegYuv422;
|
||||
}
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 &&
|
||||
subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
return kJpegYuv444;
|
||||
}
|
||||
} else if (number_of_components == 1) { // Grey-scale images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1) {
|
||||
return kJpegYuv400;
|
||||
}
|
||||
}
|
||||
return kJpegUnknown;
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
#endif // HAVE_JPEG
|
||||
71
TMessagesProj/jni/third_party/libyuv/source/mjpeg_validate.cc
vendored
Normal file
71
TMessagesProj/jni/third_party/libyuv/source/mjpeg_validate.cc
vendored
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
|
||||
#include <string.h> // For memchr.
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Helper function to scan for EOI marker (0xff 0xd9).
|
||||
static LIBYUV_BOOL ScanEOI(const uint8_t* src_mjpg, size_t src_size_mjpg) {
|
||||
if (src_size_mjpg >= 2) {
|
||||
const uint8_t* end = src_mjpg + src_size_mjpg - 1;
|
||||
const uint8_t* it = src_mjpg;
|
||||
while (it < end) {
|
||||
// TODO(fbarchard): scan for 0xd9 instead.
|
||||
it = (const uint8_t*)(memchr(it, 0xff, end - it));
|
||||
if (it == NULL) {
|
||||
break;
|
||||
}
|
||||
if (it[1] == 0xd9) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
++it; // Skip over current 0xff.
|
||||
}
|
||||
}
|
||||
// ERROR: Invalid jpeg end code not found. Size src_size_mjpg
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
// Helper function to validate the jpeg appears intact.
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) {
|
||||
// Maximum size that ValidateJpeg will consider valid.
|
||||
const size_t kMaxJpegSize = 0x7fffffffull;
|
||||
const size_t kBackSearchSize = 1024;
|
||||
if (src_size_mjpg < 64 || src_size_mjpg > kMaxJpegSize || !src_mjpg) {
|
||||
// ERROR: Invalid jpeg size: src_size_mjpg
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
// SOI marker
|
||||
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8 || src_mjpg[2] != 0xff) {
|
||||
// ERROR: Invalid jpeg initial start code
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
// Look for the End Of Image (EOI) marker near the end of the buffer.
|
||||
if (src_size_mjpg > kBackSearchSize) {
|
||||
if (ScanEOI(src_mjpg + src_size_mjpg - kBackSearchSize, kBackSearchSize)) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
// Reduce search size for forward search.
|
||||
src_size_mjpg = src_size_mjpg - kBackSearchSize + 1;
|
||||
}
|
||||
// Step over SOI marker and scan for EOI.
|
||||
return ScanEOI(src_mjpg + 2, src_size_mjpg - 2);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
5498
TMessagesProj/jni/third_party/libyuv/source/planar_functions.cc
vendored
Normal file
5498
TMessagesProj/jni/third_party/libyuv/source/planar_functions.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1183
TMessagesProj/jni/third_party/libyuv/source/rotate.cc
vendored
Normal file
1183
TMessagesProj/jni/third_party/libyuv/source/rotate.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
79
TMessagesProj/jni/third_party/libyuv/source/rotate_any.cc
vendored
Normal file
79
TMessagesProj/jni/third_party/libyuv/source/rotate_any.cc
vendored
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
|
||||
int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEWX8_NEON
|
||||
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_SSSE3
|
||||
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX16_MSA
|
||||
TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX16_LSX
|
||||
TANY(TransposeWx16_Any_LSX, TransposeWx16_LSX, 15)
|
||||
#endif
|
||||
#undef TANY
|
||||
|
||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
|
||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
|
||||
int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
|
||||
dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEUVWX8_NEON
|
||||
TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX8_SSE2
|
||||
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX16_MSA
|
||||
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX16_LSX
|
||||
TUVANY(TransposeUVWx16_Any_LSX, TransposeUVWx16_LSX, 7)
|
||||
#endif
|
||||
#undef TUVANY
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
244
TMessagesProj/jni/third_party/libyuv/source/rotate_argb.cc
vendored
Normal file
244
TMessagesProj/jni/third_party/libyuv/source/rotate_argb.cc
vendored
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_argb.h"
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static int ARGBTranspose(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
int src_pixel_step = src_stride_argb >> 2;
|
||||
void (*ScaleARGBRowDownEven)(
|
||||
const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step,
|
||||
uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C;
|
||||
// Check stride is a multiple of 4.
|
||||
if (src_stride_argb & 3) {
|
||||
return -1;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_LSX;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
||||
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_argb += 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ARGBRotate90(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a ARGBTranspose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
src_argb += src_stride_argb * (height - 1);
|
||||
src_stride_argb = -src_stride_argb;
|
||||
return ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
}
|
||||
|
||||
static int ARGBRotate270(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a ARGBTranspose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
dst_argb += dst_stride_argb * (width - 1);
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
return ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
}
|
||||
|
||||
static int ARGBRotate180(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width * 4);
|
||||
const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
|
||||
uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
||||
ARGBMirrorRow_C;
|
||||
void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
||||
CopyRow_C;
|
||||
#if defined(HAS_ARGBMIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_AVX)
|
||||
if (TestCpuFlag(kCpuHasAVX)) {
|
||||
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_ERMS)
|
||||
if (TestCpuFlag(kCpuHasERMS)) {
|
||||
CopyRow = CopyRow_ERMS;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row
|
||||
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
||||
src_argb += src_stride_argb;
|
||||
dst_argb += dst_stride_argb;
|
||||
src_bot -= src_stride_argb;
|
||||
dst_bot -= dst_stride_argb;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
return ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate270:
|
||||
return ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate180:
|
||||
return ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
172
TMessagesProj/jni/third_party/libyuv/source/rotate_common.cc
vendored
Normal file
172
TMessagesProj/jni/third_party/libyuv/source/rotate_common.cc
vendored
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void TransposeWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
dst[1] = src[1 * src_stride];
|
||||
dst[2] = src[2 * src_stride];
|
||||
dst[3] = src[3 * src_stride];
|
||||
dst[4] = src[4 * src_stride];
|
||||
dst[5] = src[5 * src_stride];
|
||||
dst[6] = src[6 * src_stride];
|
||||
dst[7] = src[7 * src_stride];
|
||||
++src;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
dst_b[0] = src[0 * src_stride + 1];
|
||||
dst_a[1] = src[1 * src_stride + 0];
|
||||
dst_b[1] = src[1 * src_stride + 1];
|
||||
dst_a[2] = src[2 * src_stride + 0];
|
||||
dst_b[2] = src[2 * src_stride + 1];
|
||||
dst_a[3] = src[3 * src_stride + 0];
|
||||
dst_b[3] = src[3 * src_stride + 1];
|
||||
dst_a[4] = src[4 * src_stride + 0];
|
||||
dst_b[4] = src[4 * src_stride + 1];
|
||||
dst_a[5] = src[5 * src_stride + 0];
|
||||
dst_b[5] = src[5 * src_stride + 1];
|
||||
dst_a[6] = src[6 * src_stride + 0];
|
||||
dst_b[6] = src[6 * src_stride + 1];
|
||||
dst_a[7] = src[7 * src_stride + 0];
|
||||
dst_b[7] = src[7 * src_stride + 1];
|
||||
src += 2;
|
||||
dst_a += dst_stride_a;
|
||||
dst_b += dst_stride_b;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst[i * dst_stride + j] = src[j * src_stride + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width * 2; i += 2) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
|
||||
dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
dst[1] = src[1 * src_stride];
|
||||
dst[2] = src[2 * src_stride];
|
||||
dst[3] = src[3 * src_stride];
|
||||
dst[4] = src[4 * src_stride];
|
||||
dst[5] = src[5 * src_stride];
|
||||
dst[6] = src[6 * src_stride];
|
||||
dst[7] = src[7 * src_stride];
|
||||
++src;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint16_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
dst_b[0] = src[0 * src_stride + 1];
|
||||
dst_a[1] = src[1 * src_stride + 0];
|
||||
dst_b[1] = src[1 * src_stride + 1];
|
||||
dst_a[2] = src[2 * src_stride + 0];
|
||||
dst_b[2] = src[2 * src_stride + 1];
|
||||
dst_a[3] = src[3 * src_stride + 0];
|
||||
dst_b[3] = src[3 * src_stride + 1];
|
||||
dst_a[4] = src[4 * src_stride + 0];
|
||||
dst_b[4] = src[4 * src_stride + 1];
|
||||
dst_a[5] = src[5 * src_stride + 0];
|
||||
dst_b[5] = src[5 * src_stride + 1];
|
||||
dst_a[6] = src[6 * src_stride + 0];
|
||||
dst_b[6] = src[6 * src_stride + 1];
|
||||
dst_a[7] = src[7 * src_stride + 0];
|
||||
dst_b[7] = src[7 * src_stride + 1];
|
||||
src += 2;
|
||||
dst_a += dst_stride_a;
|
||||
dst_b += dst_stride_b;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst[i * dst_stride + j] = src[j * src_stride + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
373
TMessagesProj/jni/third_party/libyuv/source/rotate_gcc.cc
vendored
Normal file
373
TMessagesProj/jni/third_party/libyuv/source/rotate_gcc.cc
vendored
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
/*
|
||||
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
// Transpose 16x8. 64 bit
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
|
||||
"xmm15");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
// Transpose UV 8x8. 64 bit.
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
243
TMessagesProj/jni/third_party/libyuv/source/rotate_lsx.cc
vendored
Normal file
243
TMessagesProj/jni/third_party/libyuv/source/rotate_lsx.cc
vendored
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2022 Loongson Technology Corporation Limited
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
|
||||
#include "libyuv/loongson_intrinsics.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ILVLH_B(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
DUP2_ARG2(__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
|
||||
DUP2_ARG2(__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
|
||||
}
|
||||
|
||||
#define ILVLH_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
|
||||
DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
|
||||
}
|
||||
|
||||
#define ILVLH_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
DUP2_ARG2(__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
|
||||
DUP2_ARG2(__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
|
||||
}
|
||||
|
||||
#define ILVLH_D(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
DUP2_ARG2(__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
|
||||
DUP2_ARG2(__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
|
||||
}
|
||||
|
||||
#define LSX_ST_4(_dst0, _dst1, _dst2, _dst3, _dst, _stride, _stride2, \
|
||||
_stride3, _stride4) \
|
||||
{ \
|
||||
__lsx_vst(_dst0, _dst, 0); \
|
||||
__lsx_vstx(_dst1, _dst, _stride); \
|
||||
__lsx_vstx(_dst2, _dst, _stride2); \
|
||||
__lsx_vstx(_dst3, _dst, _stride3); \
|
||||
_dst += _stride4; \
|
||||
}
|
||||
|
||||
#define LSX_ST_2(_dst0, _dst1, _dst, _stride, _stride2) \
|
||||
{ \
|
||||
__lsx_vst(_dst0, _dst, 0); \
|
||||
__lsx_vstx(_dst1, _dst, _stride); \
|
||||
_dst += _stride2; \
|
||||
}
|
||||
|
||||
void TransposeWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
TransposeWx8_C(src, src_stride, dst, dst_stride, width);
|
||||
TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride,
|
||||
width);
|
||||
}
|
||||
|
||||
void TransposeUVWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8),
|
||||
dst_stride_a, (dst_b + 8), dst_stride_b, width);
|
||||
}
|
||||
|
||||
void TransposeWx16_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
uint8_t* s;
|
||||
int src_stride2 = src_stride << 1;
|
||||
int src_stride3 = src_stride + src_stride2;
|
||||
int src_stride4 = src_stride2 << 1;
|
||||
int dst_stride2 = dst_stride << 1;
|
||||
int dst_stride3 = dst_stride + dst_stride2;
|
||||
int dst_stride4 = dst_stride2 << 1;
|
||||
__m128i src0, src1, src2, src3, dst0, dst1, dst2, dst3;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
__m128i res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
s = (uint8_t*)src;
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
|
||||
ILVLH_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVLH_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
|
||||
res8 = __lsx_vilvl_w(reg4, reg0);
|
||||
res9 = __lsx_vilvh_w(reg4, reg0);
|
||||
ILVLH_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
|
||||
dst_stride4);
|
||||
res8 = __lsx_vilvl_w(reg5, reg1);
|
||||
res9 = __lsx_vilvh_w(reg5, reg1);
|
||||
ILVLH_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
|
||||
dst_stride4);
|
||||
res8 = __lsx_vilvl_w(reg6, reg2);
|
||||
res9 = __lsx_vilvh_w(reg6, reg2);
|
||||
ILVLH_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
|
||||
dst_stride4);
|
||||
res8 = __lsx_vilvl_w(reg7, reg3);
|
||||
res9 = __lsx_vilvh_w(reg7, reg3);
|
||||
ILVLH_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
|
||||
dst_stride4);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx16_LSX(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int x;
|
||||
int len = width / 8;
|
||||
uint8_t* s;
|
||||
int src_stride2 = src_stride << 1;
|
||||
int src_stride3 = src_stride + src_stride2;
|
||||
int src_stride4 = src_stride2 << 1;
|
||||
int dst_stride_a2 = dst_stride_a << 1;
|
||||
int dst_stride_b2 = dst_stride_b << 1;
|
||||
__m128i src0, src1, src2, src3, dst0, dst1, dst2, dst3;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
__m128i res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
s = (uint8_t*)src;
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
|
||||
ILVLH_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVLH_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg0, reg1, reg2, reg3);
|
||||
src0 = __lsx_vld(s, 0);
|
||||
src1 = __lsx_vldx(s, src_stride);
|
||||
src2 = __lsx_vldx(s, src_stride2);
|
||||
src3 = __lsx_vldx(s, src_stride3);
|
||||
s += src_stride4;
|
||||
ILVLH_B(src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3);
|
||||
ILVLH_H(tmp0, tmp2, tmp1, tmp3, reg4, reg5, reg6, reg7);
|
||||
res8 = __lsx_vilvl_w(reg4, reg0);
|
||||
res9 = __lsx_vilvh_w(reg4, reg0);
|
||||
ILVLH_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
|
||||
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
|
||||
res8 = __lsx_vilvl_w(reg5, reg1);
|
||||
res9 = __lsx_vilvh_w(reg5, reg1);
|
||||
ILVLH_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
|
||||
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
|
||||
res8 = __lsx_vilvl_w(reg6, reg2);
|
||||
res9 = __lsx_vilvh_w(reg6, reg2);
|
||||
ILVLH_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
|
||||
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
|
||||
res8 = __lsx_vilvl_w(reg7, reg3);
|
||||
res9 = __lsx_vilvh_w(reg7, reg3);
|
||||
ILVLH_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
LSX_ST_2(dst0, dst2, dst_a, dst_stride_a, dst_stride_a2);
|
||||
LSX_ST_2(dst1, dst3, dst_b, dst_stride_b, dst_stride_b2);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
|
||||
291
TMessagesProj/jni/third_party/libyuv/source/rotate_mmi.cc
vendored
Normal file
291
TMessagesProj/jni/third_party/libyuv/source/rotate_mmi.cc
vendored
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Mips MMI.
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
void TransposeWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
|
||||
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
|
||||
uint8_t* src_tmp = nullptr;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
|
||||
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (00 10 01 11 02 12 03 13) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (04 14 05 15 06 16 07 17) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (20 30 21 31 22 32 23 33) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (24 34 25 35 26 36 27 37) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp4 = (00 10 20 30 01 11 21 31) */
|
||||
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp5 = (02 12 22 32 03 13 23 33) */
|
||||
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp6 = (04 14 24 34 05 15 25 35) */
|
||||
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp7 = (06 16 26 36 07 17 27 37) */
|
||||
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (40 50 41 51 42 52 43 53) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (44 54 45 55 46 56 47 57) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (60 70 61 71 62 72 63 73) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (64 74 65 75 66 76 67 77) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp8 = (40 50 60 70 41 51 61 71) */
|
||||
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp9 = (42 52 62 72 43 53 63 73) */
|
||||
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp10 = (44 54 64 74 45 55 65 75) */
|
||||
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp11 = (46 56 66 76 47 57 67 77) */
|
||||
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
/* tmp0 = (00 10 20 30 40 50 60 70) */
|
||||
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
|
||||
/* tmp1 = (01 11 21 31 41 51 61 71) */
|
||||
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (02 12 22 32 42 52 62 72) */
|
||||
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
|
||||
/* tmp1 = (03 13 23 33 43 53 63 73) */
|
||||
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (04 14 24 34 44 54 64 74) */
|
||||
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
|
||||
/* tmp1 = (05 15 25 35 45 55 65 75) */
|
||||
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (06 16 26 36 46 56 66 76) */
|
||||
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
|
||||
/* tmp1 = (07 17 27 37 47 57 67 77) */
|
||||
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"daddi %[src], %[src], 0x08 \n\t"
|
||||
"daddi %[width], %[width], -0x08 \n\t"
|
||||
"bnez %[width], 1b \n\t"
|
||||
|
||||
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
|
||||
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
|
||||
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
|
||||
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
|
||||
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst),
|
||||
[src_tmp] "+&r"(src_tmp)
|
||||
: [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride),
|
||||
[dst_stride] "r"(dst_stride)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
void TransposeUVWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
|
||||
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
|
||||
uint8_t* src_tmp = nullptr;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
/* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */
|
||||
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
|
||||
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
|
||||
/* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */
|
||||
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */
|
||||
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */
|
||||
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */
|
||||
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
/* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
/* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */
|
||||
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */
|
||||
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */
|
||||
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */
|
||||
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
/* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */
|
||||
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
|
||||
/* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */
|
||||
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */
|
||||
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
|
||||
/* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */
|
||||
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */
|
||||
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
|
||||
/* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */
|
||||
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */
|
||||
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
|
||||
/* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */
|
||||
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"daddiu %[src], %[src], 0x08 \n\t"
|
||||
"daddi %[width], %[width], -0x04 \n\t"
|
||||
"bnez %[width], 1b \n\t"
|
||||
|
||||
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
|
||||
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
|
||||
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
|
||||
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
|
||||
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a),
|
||||
[dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp)
|
||||
: [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a),
|
||||
[dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
250
TMessagesProj/jni/third_party/libyuv/source/rotate_msa.cc
vendored
Normal file
250
TMessagesProj/jni/third_party/libyuv/source/rotate_msa.cc
vendored
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
|
||||
// This module is for GCC MSA
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \
|
||||
}
|
||||
|
||||
void TransposeWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
TransposeWx8_C(src, src_stride, dst, dst_stride, width);
|
||||
TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride,
|
||||
width);
|
||||
}
|
||||
|
||||
void TransposeUVWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8),
|
||||
dst_stride_a, (dst_b + 8), dst_stride_b, width);
|
||||
}
|
||||
|
||||
void TransposeWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int x;
|
||||
const uint8_t* s;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
|
||||
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
s = src;
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
|
||||
ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
|
||||
ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
|
||||
ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
|
||||
ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
src += 16;
|
||||
dst += dst_stride * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int x;
|
||||
const uint8_t* s;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
|
||||
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
s = src;
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
|
||||
ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
|
||||
ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
|
||||
ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
|
||||
ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
src += 16;
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
418
TMessagesProj/jni/third_party/libyuv/source/rotate_neon.cc
vendored
Normal file
418
TMessagesProj/jni/third_party/libyuv/source/rotate_neon.cc
vendored
Normal file
|
|
@ -0,0 +1,418 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
|
||||
!defined(__aarch64__)
|
||||
|
||||
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %5, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"vld1.8 {d0}, [%0], %2 \n"
|
||||
"vld1.8 {d1}, [%0], %2 \n"
|
||||
"vld1.8 {d2}, [%0], %2 \n"
|
||||
"vld1.8 {d3}, [%0], %2 \n"
|
||||
"vld1.8 {d4}, [%0], %2 \n"
|
||||
"vld1.8 {d5}, [%0], %2 \n"
|
||||
"vld1.8 {d6}, [%0], %2 \n"
|
||||
"vld1.8 {d7}, [%0] \n"
|
||||
|
||||
"vtrn.8 d1, d0 \n"
|
||||
"vtrn.8 d3, d2 \n"
|
||||
"vtrn.8 d5, d4 \n"
|
||||
"vtrn.8 d7, d6 \n"
|
||||
|
||||
"vtrn.16 d1, d3 \n"
|
||||
"vtrn.16 d0, d2 \n"
|
||||
"vtrn.16 d5, d7 \n"
|
||||
"vtrn.16 d4, d6 \n"
|
||||
|
||||
"vtrn.32 d1, d5 \n"
|
||||
"vtrn.32 d0, d4 \n"
|
||||
"vtrn.32 d3, d7 \n"
|
||||
"vtrn.32 d2, d6 \n"
|
||||
|
||||
"vrev16.8 q0, q0 \n"
|
||||
"vrev16.8 q1, q1 \n"
|
||||
"vrev16.8 q2, q2 \n"
|
||||
"vrev16.8 q3, q3 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vst1.8 {d1}, [%0], %4 \n"
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
"vst1.8 {d3}, [%0], %4 \n"
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
"vst1.8 {d5}, [%0], %4 \n"
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
"vst1.8 {d7}, [%0], %4 \n"
|
||||
"vst1.8 {d6}, [%0] \n"
|
||||
|
||||
"add %1, #8 \n" // src += 8
|
||||
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
|
||||
"subs %5, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %5, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %5, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"vld1.32 {d0[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d0[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d1[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d1[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d2[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d2[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d3[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d3[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vld1.8 {q3}, [%6] \n"
|
||||
|
||||
"vtbl.8 d4, {d0, d1}, d6 \n"
|
||||
"vtbl.8 d5, {d0, d1}, d7 \n"
|
||||
"vtbl.8 d0, {d2, d3}, d6 \n"
|
||||
"vtbl.8 d1, {d2, d3}, d7 \n"
|
||||
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
"vst1.32 {d4[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d4[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d5[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d5[1]}, [%0] \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
"vst1.32 {d0[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d0[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d1[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d1[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4 \n" // src += 4
|
||||
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %5, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"vld1.16 {d0[0]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[0]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[1]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[1]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[2]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[2]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[3]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
"vst1.64 {d1}, [%0] \n"
|
||||
|
||||
"add %1, #2 \n" // src += 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %5, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"vld1.8 {d0[0]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[1]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[2]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[3]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[4]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[5]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[6]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[7]}, [%1] \n"
|
||||
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst), // %3
|
||||
"+r"(dst_stride), // %4
|
||||
"+r"(width) // %5
|
||||
: "r"(&kVTbl4x4Transpose) // %6
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11,
|
||||
4, 12, 5, 13, 6, 14, 7, 15};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %7, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"vld2.8 {d0, d1}, [%0], %2 \n"
|
||||
"vld2.8 {d2, d3}, [%0], %2 \n"
|
||||
"vld2.8 {d4, d5}, [%0], %2 \n"
|
||||
"vld2.8 {d6, d7}, [%0], %2 \n"
|
||||
"vld2.8 {d16, d17}, [%0], %2 \n"
|
||||
"vld2.8 {d18, d19}, [%0], %2 \n"
|
||||
"vld2.8 {d20, d21}, [%0], %2 \n"
|
||||
"vld2.8 {d22, d23}, [%0] \n"
|
||||
|
||||
"vtrn.8 q1, q0 \n"
|
||||
"vtrn.8 q3, q2 \n"
|
||||
"vtrn.8 q9, q8 \n"
|
||||
"vtrn.8 q11, q10 \n"
|
||||
|
||||
"vtrn.16 q1, q3 \n"
|
||||
"vtrn.16 q0, q2 \n"
|
||||
"vtrn.16 q9, q11 \n"
|
||||
"vtrn.16 q8, q10 \n"
|
||||
|
||||
"vtrn.32 q1, q9 \n"
|
||||
"vtrn.32 q0, q8 \n"
|
||||
"vtrn.32 q3, q11 \n"
|
||||
"vtrn.32 q2, q10 \n"
|
||||
|
||||
"vrev16.8 q0, q0 \n"
|
||||
"vrev16.8 q1, q1 \n"
|
||||
"vrev16.8 q2, q2 \n"
|
||||
"vrev16.8 q3, q3 \n"
|
||||
"vrev16.8 q8, q8 \n"
|
||||
"vrev16.8 q9, q9 \n"
|
||||
"vrev16.8 q10, q10 \n"
|
||||
"vrev16.8 q11, q11 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
"vst1.8 {d6}, [%0], %4 \n"
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
"vst1.8 {d18}, [%0], %4 \n"
|
||||
"vst1.8 {d16}, [%0], %4 \n"
|
||||
"vst1.8 {d22}, [%0], %4 \n"
|
||||
"vst1.8 {d20}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
"vst1.8 {d3}, [%0], %6 \n"
|
||||
"vst1.8 {d1}, [%0], %6 \n"
|
||||
"vst1.8 {d7}, [%0], %6 \n"
|
||||
"vst1.8 {d5}, [%0], %6 \n"
|
||||
"vst1.8 {d19}, [%0], %6 \n"
|
||||
"vst1.8 {d17}, [%0], %6 \n"
|
||||
"vst1.8 {d23}, [%0], %6 \n"
|
||||
"vst1.8 {d21}, [%0] \n"
|
||||
|
||||
"add %1, #8*2 \n" // src += 8*2
|
||||
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 *
|
||||
// dst_stride_a
|
||||
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 *
|
||||
// dst_stride_b
|
||||
"subs %7, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %7, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %7, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"vld1.64 {d0}, [%0], %2 \n"
|
||||
"vld1.64 {d1}, [%0], %2 \n"
|
||||
"vld1.64 {d2}, [%0], %2 \n"
|
||||
"vld1.64 {d3}, [%0], %2 \n"
|
||||
"vld1.64 {d4}, [%0], %2 \n"
|
||||
"vld1.64 {d5}, [%0], %2 \n"
|
||||
"vld1.64 {d6}, [%0], %2 \n"
|
||||
"vld1.64 {d7}, [%0] \n"
|
||||
|
||||
"vld1.8 {q15}, [%8] \n"
|
||||
|
||||
"vtrn.8 q0, q1 \n"
|
||||
"vtrn.8 q2, q3 \n"
|
||||
|
||||
"vtbl.8 d16, {d0, d1}, d30 \n"
|
||||
"vtbl.8 d17, {d0, d1}, d31 \n"
|
||||
"vtbl.8 d18, {d2, d3}, d30 \n"
|
||||
"vtbl.8 d19, {d2, d3}, d31 \n"
|
||||
"vtbl.8 d20, {d4, d5}, d30 \n"
|
||||
"vtbl.8 d21, {d4, d5}, d31 \n"
|
||||
"vtbl.8 d22, {d6, d7}, d30 \n"
|
||||
"vtbl.8 d23, {d6, d7}, d31 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vst1.32 {d16[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d16[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d17[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d17[1]}, [%0], %4 \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
"vst1.32 {d20[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d20[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d21[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d21[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
"vst1.32 {d18[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d18[1]}, [%0], %6 \n"
|
||||
"vst1.32 {d19[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d19[1]}, [%0], %6 \n"
|
||||
|
||||
"add %0, %5, #4 \n"
|
||||
"vst1.32 {d22[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d22[1]}, [%0], %6 \n"
|
||||
"vst1.32 {d23[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d23[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4*2 \n" // src += 4 * 2
|
||||
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 *
|
||||
// dst_stride_a
|
||||
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 *
|
||||
// dst_stride_b
|
||||
"subs %7, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[3], d3[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
"vtrn.8 d2, d3 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
"vst1.64 {d2}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
"vst1.64 {d1}, [%0], %6 \n"
|
||||
"vst1.64 {d3}, [%0] \n"
|
||||
|
||||
"add %1, #2*2 \n" // src += 2 * 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 *
|
||||
// dst_stride_a
|
||||
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 *
|
||||
// dst_stride_b
|
||||
"subs %7, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[7], d1[7]}, [%1] \n"
|
||||
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
"vst1.64 {d1}, [%5] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst_a), // %3
|
||||
"+r"(dst_stride_a), // %4
|
||||
"+r"(dst_b), // %5
|
||||
"+r"(dst_stride_b), // %6
|
||||
"+r"(width) // %7
|
||||
: "r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
}
|
||||
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
443
TMessagesProj/jni/third_party/libyuv/source/rotate_neon64.cc
vendored
Normal file
443
TMessagesProj/jni/third_party/libyuv/source/rotate_neon64.cc
vendored
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
/*
|
||||
* Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC Neon armv8 64 bit.
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %w3, %w3, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"ld1 {v0.8b}, [%0], %5 \n"
|
||||
"ld1 {v1.8b}, [%0], %5 \n"
|
||||
"ld1 {v2.8b}, [%0], %5 \n"
|
||||
"ld1 {v3.8b}, [%0], %5 \n"
|
||||
"ld1 {v4.8b}, [%0], %5 \n"
|
||||
"ld1 {v5.8b}, [%0], %5 \n"
|
||||
"ld1 {v6.8b}, [%0], %5 \n"
|
||||
"ld1 {v7.8b}, [%0] \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"trn2 v16.8b, v0.8b, v1.8b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
|
||||
"trn1 v17.8b, v0.8b, v1.8b \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v18.8b, v2.8b, v3.8b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 1
|
||||
"trn1 v19.8b, v2.8b, v3.8b \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v20.8b, v4.8b, v5.8b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 2
|
||||
"trn1 v21.8b, v4.8b, v5.8b \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v22.8b, v6.8b, v7.8b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 3
|
||||
"trn1 v23.8b, v6.8b, v7.8b \n"
|
||||
"add %0, %0, %5 \n"
|
||||
|
||||
"trn2 v3.4h, v17.4h, v19.4h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 4
|
||||
"trn1 v1.4h, v17.4h, v19.4h \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v2.4h, v16.4h, v18.4h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 5
|
||||
"trn1 v0.4h, v16.4h, v18.4h \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v7.4h, v21.4h, v23.4h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 6
|
||||
"trn1 v5.4h, v21.4h, v23.4h \n"
|
||||
"add %0, %0, %5 \n"
|
||||
"trn2 v6.4h, v20.4h, v22.4h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n" // row 7
|
||||
"trn1 v4.4h, v20.4h, v22.4h \n"
|
||||
|
||||
"trn2 v21.2s, v1.2s, v5.2s \n"
|
||||
"trn1 v17.2s, v1.2s, v5.2s \n"
|
||||
"trn2 v20.2s, v0.2s, v4.2s \n"
|
||||
"trn1 v16.2s, v0.2s, v4.2s \n"
|
||||
"trn2 v23.2s, v3.2s, v7.2s \n"
|
||||
"trn1 v19.2s, v3.2s, v7.2s \n"
|
||||
"trn2 v22.2s, v2.2s, v6.2s \n"
|
||||
"trn1 v18.2s, v2.2s, v6.2s \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"st1 {v17.8b}, [%0], %6 \n"
|
||||
"st1 {v16.8b}, [%0], %6 \n"
|
||||
"st1 {v19.8b}, [%0], %6 \n"
|
||||
"st1 {v18.8b}, [%0], %6 \n"
|
||||
"st1 {v21.8b}, [%0], %6 \n"
|
||||
"st1 {v20.8b}, [%0], %6 \n"
|
||||
"st1 {v23.8b}, [%0], %6 \n"
|
||||
"st1 {v22.8b}, [%0] \n"
|
||||
|
||||
"add %1, %1, #8 \n" // src += 8
|
||||
"add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
|
||||
"subs %w3, %w3, #8 \n" // w -= 8
|
||||
"b.ge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %w3, %w3, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %w3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
"cmp %w3, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.s}[0], [%0], %5 \n"
|
||||
"ld1 {v0.s}[1], [%0], %5 \n"
|
||||
"ld1 {v0.s}[2], [%0], %5 \n"
|
||||
"ld1 {v0.s}[3], [%0], %5 \n"
|
||||
"ld1 {v1.s}[0], [%0], %5 \n"
|
||||
"ld1 {v1.s}[1], [%0], %5 \n"
|
||||
"ld1 {v1.s}[2], [%0], %5 \n"
|
||||
"ld1 {v1.s}[3], [%0] \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"ld1 {v2.16b}, [%4] \n"
|
||||
|
||||
"tbl v3.16b, {v0.16b}, v2.16b \n"
|
||||
"tbl v0.16b, {v1.16b}, v2.16b \n"
|
||||
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
"st1 {v3.s}[0], [%0], %6 \n"
|
||||
"st1 {v3.s}[1], [%0], %6 \n"
|
||||
"st1 {v3.s}[2], [%0], %6 \n"
|
||||
"st1 {v3.s}[3], [%0] \n"
|
||||
|
||||
"add %0, %2, #4 \n"
|
||||
"st1 {v0.s}[0], [%0], %6 \n"
|
||||
"st1 {v0.s}[1], [%0], %6 \n"
|
||||
"st1 {v0.s}[2], [%0], %6 \n"
|
||||
"st1 {v0.s}[3], [%0] \n"
|
||||
|
||||
"add %1, %1, #4 \n" // src += 4
|
||||
"add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %w3, %w3, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %w3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.h}[0], [%0], %5 \n"
|
||||
"ld1 {v1.h}[0], [%0], %5 \n"
|
||||
"ld1 {v0.h}[1], [%0], %5 \n"
|
||||
"ld1 {v1.h}[1], [%0], %5 \n"
|
||||
"ld1 {v0.h}[2], [%0], %5 \n"
|
||||
"ld1 {v1.h}[2], [%0], %5 \n"
|
||||
"ld1 {v0.h}[3], [%0], %5 \n"
|
||||
"ld1 {v1.h}[3], [%0] \n"
|
||||
|
||||
"trn2 v2.8b, v0.8b, v1.8b \n"
|
||||
"trn1 v3.8b, v0.8b, v1.8b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"st1 {v3.8b}, [%0], %6 \n"
|
||||
"st1 {v2.8b}, [%0] \n"
|
||||
|
||||
"add %1, %1, #2 \n" // src += 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %w3, %w3, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"ld1 {v0.b}[0], [%1], %5 \n"
|
||||
"ld1 {v0.b}[1], [%1], %5 \n"
|
||||
"ld1 {v0.b}[2], [%1], %5 \n"
|
||||
"ld1 {v0.b}[3], [%1], %5 \n"
|
||||
"ld1 {v0.b}[4], [%1], %5 \n"
|
||||
"ld1 {v0.b}[5], [%1], %5 \n"
|
||||
"ld1 {v0.b}[6], [%1], %5 \n"
|
||||
"ld1 {v0.b}[7], [%1] \n"
|
||||
|
||||
"st1 {v0.8b}, [%2] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride) // %6
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
}
|
||||
|
||||
static const uint8_t kVTbl4x4TransposeDi[32] = {
|
||||
0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
|
||||
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %w4, %w4, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"ld1 {v0.16b}, [%0], %5 \n"
|
||||
"ld1 {v1.16b}, [%0], %5 \n"
|
||||
"ld1 {v2.16b}, [%0], %5 \n"
|
||||
"ld1 {v3.16b}, [%0], %5 \n"
|
||||
"ld1 {v4.16b}, [%0], %5 \n"
|
||||
"ld1 {v5.16b}, [%0], %5 \n"
|
||||
"ld1 {v6.16b}, [%0], %5 \n"
|
||||
"ld1 {v7.16b}, [%0] \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
"trn1 v16.16b, v0.16b, v1.16b \n"
|
||||
"trn2 v17.16b, v0.16b, v1.16b \n"
|
||||
"trn1 v18.16b, v2.16b, v3.16b \n"
|
||||
"trn2 v19.16b, v2.16b, v3.16b \n"
|
||||
"trn1 v20.16b, v4.16b, v5.16b \n"
|
||||
"trn2 v21.16b, v4.16b, v5.16b \n"
|
||||
"trn1 v22.16b, v6.16b, v7.16b \n"
|
||||
"trn2 v23.16b, v6.16b, v7.16b \n"
|
||||
|
||||
"trn1 v0.8h, v16.8h, v18.8h \n"
|
||||
"trn2 v1.8h, v16.8h, v18.8h \n"
|
||||
"trn1 v2.8h, v20.8h, v22.8h \n"
|
||||
"trn2 v3.8h, v20.8h, v22.8h \n"
|
||||
"trn1 v4.8h, v17.8h, v19.8h \n"
|
||||
"trn2 v5.8h, v17.8h, v19.8h \n"
|
||||
"trn1 v6.8h, v21.8h, v23.8h \n"
|
||||
"trn2 v7.8h, v21.8h, v23.8h \n"
|
||||
|
||||
"trn1 v16.4s, v0.4s, v2.4s \n"
|
||||
"trn2 v17.4s, v0.4s, v2.4s \n"
|
||||
"trn1 v18.4s, v1.4s, v3.4s \n"
|
||||
"trn2 v19.4s, v1.4s, v3.4s \n"
|
||||
"trn1 v20.4s, v4.4s, v6.4s \n"
|
||||
"trn2 v21.4s, v4.4s, v6.4s \n"
|
||||
"trn1 v22.4s, v5.4s, v7.4s \n"
|
||||
"trn2 v23.4s, v5.4s, v7.4s \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"st1 {v16.d}[0], [%0], %6 \n"
|
||||
"st1 {v18.d}[0], [%0], %6 \n"
|
||||
"st1 {v17.d}[0], [%0], %6 \n"
|
||||
"st1 {v19.d}[0], [%0], %6 \n"
|
||||
"st1 {v16.d}[1], [%0], %6 \n"
|
||||
"st1 {v18.d}[1], [%0], %6 \n"
|
||||
"st1 {v17.d}[1], [%0], %6 \n"
|
||||
"st1 {v19.d}[1], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"st1 {v20.d}[0], [%0], %7 \n"
|
||||
"st1 {v22.d}[0], [%0], %7 \n"
|
||||
"st1 {v21.d}[0], [%0], %7 \n"
|
||||
"st1 {v23.d}[0], [%0], %7 \n"
|
||||
"st1 {v20.d}[1], [%0], %7 \n"
|
||||
"st1 {v22.d}[1], [%0], %7 \n"
|
||||
"st1 {v21.d}[1], [%0], %7 \n"
|
||||
"st1 {v23.d}[1], [%0] \n"
|
||||
|
||||
"add %1, %1, #16 \n" // src += 8*2
|
||||
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #8 \n" // w -= 8
|
||||
"b.ge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %w4, %w4, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %w4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
"cmp %w4, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.8b}, [%0], %5 \n"
|
||||
"ld1 {v1.8b}, [%0], %5 \n"
|
||||
"ld1 {v2.8b}, [%0], %5 \n"
|
||||
"ld1 {v3.8b}, [%0], %5 \n"
|
||||
"ld1 {v4.8b}, [%0], %5 \n"
|
||||
"ld1 {v5.8b}, [%0], %5 \n"
|
||||
"ld1 {v6.8b}, [%0], %5 \n"
|
||||
"ld1 {v7.8b}, [%0] \n"
|
||||
|
||||
"ld1 {v30.16b}, [%8], #16 \n"
|
||||
"ld1 {v31.16b}, [%8] \n"
|
||||
|
||||
"tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
|
||||
"tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
|
||||
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
|
||||
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"st1 {v16.s}[0], [%0], %6 \n"
|
||||
"st1 {v16.s}[1], [%0], %6 \n"
|
||||
"st1 {v16.s}[2], [%0], %6 \n"
|
||||
"st1 {v16.s}[3], [%0], %6 \n"
|
||||
|
||||
"add %0, %2, #4 \n"
|
||||
"st1 {v18.s}[0], [%0], %6 \n"
|
||||
"st1 {v18.s}[1], [%0], %6 \n"
|
||||
"st1 {v18.s}[2], [%0], %6 \n"
|
||||
"st1 {v18.s}[3], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"st1 {v17.s}[0], [%0], %7 \n"
|
||||
"st1 {v17.s}[1], [%0], %7 \n"
|
||||
"st1 {v17.s}[2], [%0], %7 \n"
|
||||
"st1 {v17.s}[3], [%0], %7 \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
"st1 {v19.s}[0], [%0], %7 \n"
|
||||
"st1 {v19.s}[1], [%0], %7 \n"
|
||||
"st1 {v19.s}[2], [%0], %7 \n"
|
||||
"st1 {v19.s}[3], [%0] \n"
|
||||
|
||||
"add %1, %1, #8 \n" // src += 4 * 2
|
||||
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %w4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"ld2 {v0.h, v1.h}[0], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[0], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[1], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[1], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[2], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[2], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[3], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[3], [%0] \n"
|
||||
|
||||
"trn1 v4.8b, v0.8b, v2.8b \n"
|
||||
"trn2 v5.8b, v0.8b, v2.8b \n"
|
||||
"trn1 v6.8b, v1.8b, v3.8b \n"
|
||||
"trn2 v7.8b, v1.8b, v3.8b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
"st1 {v4.d}[0], [%0], %6 \n"
|
||||
"st1 {v6.d}[0], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
"st1 {v5.d}[0], [%0], %7 \n"
|
||||
"st1 {v7.d}[0], [%0] \n"
|
||||
|
||||
"add %1, %1, #4 \n" // src += 2 * 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"ld2 {v0.b, v1.b}[0], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[1], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[2], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[3], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[4], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[5], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[6], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[7], [%1] \n"
|
||||
|
||||
"st1 {v0.d}[0], [%2] \n"
|
||||
"st1 {v1.d}[0], [%3] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride_a), // %6
|
||||
"r"((ptrdiff_t)dst_stride_b), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
|
||||
}
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
253
TMessagesProj/jni/third_party/libyuv/source/rotate_win.cc
vendored
Normal file
253
TMessagesProj/jni/third_party/libyuv/source/rotate_win.cc
vendored
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for 32 bit Visual C x86
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
||||
!defined(__clang__) && defined(_M_IX86)
|
||||
|
||||
__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
__asm {
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // src
|
||||
mov edi, [esp + 12 + 8] // src_stride
|
||||
mov edx, [esp + 12 + 12] // dst
|
||||
mov esi, [esp + 12 + 16] // dst_stride
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
align 4
|
||||
convertloop:
|
||||
movq xmm0, qword ptr [eax]
|
||||
lea ebp, [eax + 8]
|
||||
movq xmm1, qword ptr [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
punpcklbw xmm0, xmm1
|
||||
movq xmm2, qword ptr [eax]
|
||||
movdqa xmm1, xmm0
|
||||
palignr xmm1, xmm1, 8
|
||||
movq xmm3, qword ptr [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
punpcklbw xmm2, xmm3
|
||||
movdqa xmm3, xmm2
|
||||
movq xmm4, qword ptr [eax]
|
||||
palignr xmm3, xmm3, 8
|
||||
movq xmm5, qword ptr [eax + edi]
|
||||
punpcklbw xmm4, xmm5
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqa xmm5, xmm4
|
||||
movq xmm6, qword ptr [eax]
|
||||
palignr xmm5, xmm5, 8
|
||||
movq xmm7, qword ptr [eax + edi]
|
||||
punpcklbw xmm6, xmm7
|
||||
mov eax, ebp
|
||||
movdqa xmm7, xmm6
|
||||
palignr xmm7, xmm7, 8
|
||||
// Second round of bit swap.
|
||||
punpcklwd xmm0, xmm2
|
||||
punpcklwd xmm1, xmm3
|
||||
movdqa xmm2, xmm0
|
||||
movdqa xmm3, xmm1
|
||||
palignr xmm2, xmm2, 8
|
||||
palignr xmm3, xmm3, 8
|
||||
punpcklwd xmm4, xmm6
|
||||
punpcklwd xmm5, xmm7
|
||||
movdqa xmm6, xmm4
|
||||
movdqa xmm7, xmm5
|
||||
palignr xmm6, xmm6, 8
|
||||
palignr xmm7, xmm7, 8
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
punpckldq xmm0, xmm4
|
||||
movq qword ptr [edx], xmm0
|
||||
movdqa xmm4, xmm0
|
||||
palignr xmm4, xmm4, 8
|
||||
movq qword ptr [edx + esi], xmm4
|
||||
lea edx, [edx + 2 * esi]
|
||||
punpckldq xmm2, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
palignr xmm6, xmm6, 8
|
||||
movq qword ptr [edx], xmm2
|
||||
punpckldq xmm1, xmm5
|
||||
movq qword ptr [edx + esi], xmm6
|
||||
lea edx, [edx + 2 * esi]
|
||||
movdqa xmm5, xmm1
|
||||
movq qword ptr [edx], xmm1
|
||||
palignr xmm5, xmm5, 8
|
||||
punpckldq xmm3, xmm7
|
||||
movq qword ptr [edx + esi], xmm5
|
||||
lea edx, [edx + 2 * esi]
|
||||
movq qword ptr [edx], xmm3
|
||||
movdqa xmm7, xmm3
|
||||
palignr xmm7, xmm7, 8
|
||||
sub ecx, 8
|
||||
movq qword ptr [edx + esi], xmm7
|
||||
lea edx, [edx + 2 * esi]
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop esi
|
||||
pop edi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int w) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // src
|
||||
mov edi, [esp + 16 + 8] // src_stride
|
||||
mov edx, [esp + 16 + 12] // dst_a
|
||||
mov esi, [esp + 16 + 16] // dst_stride_a
|
||||
mov ebx, [esp + 16 + 20] // dst_b
|
||||
mov ebp, [esp + 16 + 24] // dst_stride_b
|
||||
mov ecx, esp
|
||||
sub esp, 4 + 16
|
||||
and esp, ~15
|
||||
mov [esp + 16], ecx
|
||||
mov ecx, [ecx + 16 + 28] // w
|
||||
|
||||
align 4
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqa xmm7, xmm0 // use xmm7 as temp register.
|
||||
punpcklbw xmm0, xmm1
|
||||
punpckhbw xmm7, xmm1
|
||||
movdqa xmm1, xmm7
|
||||
movdqu xmm2, [eax]
|
||||
movdqu xmm3, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqa xmm7, xmm2
|
||||
punpcklbw xmm2, xmm3
|
||||
punpckhbw xmm7, xmm3
|
||||
movdqa xmm3, xmm7
|
||||
movdqu xmm4, [eax]
|
||||
movdqu xmm5, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqa xmm7, xmm4
|
||||
punpcklbw xmm4, xmm5
|
||||
punpckhbw xmm7, xmm5
|
||||
movdqa xmm5, xmm7
|
||||
movdqu xmm6, [eax]
|
||||
movdqu xmm7, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqu [esp], xmm5 // backup xmm5
|
||||
neg edi
|
||||
movdqa xmm5, xmm6 // use xmm5 as temp register.
|
||||
punpcklbw xmm6, xmm7
|
||||
punpckhbw xmm5, xmm7
|
||||
movdqa xmm7, xmm5
|
||||
lea eax, [eax + 8 * edi + 16]
|
||||
neg edi
|
||||
// Second round of bit swap.
|
||||
movdqa xmm5, xmm0
|
||||
punpcklwd xmm0, xmm2
|
||||
punpckhwd xmm5, xmm2
|
||||
movdqa xmm2, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
punpcklwd xmm1, xmm3
|
||||
punpckhwd xmm5, xmm3
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm5, xmm4
|
||||
punpcklwd xmm4, xmm6
|
||||
punpckhwd xmm5, xmm6
|
||||
movdqa xmm6, xmm5
|
||||
movdqu xmm5, [esp] // restore xmm5
|
||||
movdqu [esp], xmm6 // backup xmm6
|
||||
movdqa xmm6, xmm5 // use xmm6 as temp register.
|
||||
punpcklwd xmm5, xmm7
|
||||
punpckhwd xmm6, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
movdqa xmm6, xmm0
|
||||
punpckldq xmm0, xmm4
|
||||
punpckhdq xmm6, xmm4
|
||||
movdqa xmm4, xmm6
|
||||
movdqu xmm6, [esp] // restore xmm6
|
||||
movlpd qword ptr [edx], xmm0
|
||||
movhpd qword ptr [ebx], xmm0
|
||||
movlpd qword ptr [edx + esi], xmm4
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm4
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm2 // use xmm0 as the temp register.
|
||||
punpckldq xmm2, xmm6
|
||||
movlpd qword ptr [edx], xmm2
|
||||
movhpd qword ptr [ebx], xmm2
|
||||
punpckhdq xmm0, xmm6
|
||||
movlpd qword ptr [edx + esi], xmm0
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm1 // use xmm0 as the temp register.
|
||||
punpckldq xmm1, xmm5
|
||||
movlpd qword ptr [edx], xmm1
|
||||
movhpd qword ptr [ebx], xmm1
|
||||
punpckhdq xmm0, xmm5
|
||||
movlpd qword ptr [edx + esi], xmm0
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm3 // use xmm0 as the temp register.
|
||||
punpckldq xmm3, xmm7
|
||||
movlpd qword ptr [edx], xmm3
|
||||
movhpd qword ptr [ebx], xmm3
|
||||
punpckhdq xmm0, xmm7
|
||||
sub ecx, 8
|
||||
movlpd qword ptr [edx + esi], xmm0
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
jg convertloop
|
||||
|
||||
mov esp, [esp + 16]
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
2364
TMessagesProj/jni/third_party/libyuv/source/row_any.cc
vendored
Normal file
2364
TMessagesProj/jni/third_party/libyuv/source/row_any.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
4550
TMessagesProj/jni/third_party/libyuv/source/row_common.cc
vendored
Normal file
4550
TMessagesProj/jni/third_party/libyuv/source/row_common.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
9713
TMessagesProj/jni/third_party/libyuv/source/row_gcc.cc
vendored
Normal file
9713
TMessagesProj/jni/third_party/libyuv/source/row_gcc.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
2302
TMessagesProj/jni/third_party/libyuv/source/row_lasx.cc
vendored
Normal file
2302
TMessagesProj/jni/third_party/libyuv/source/row_lasx.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1857
TMessagesProj/jni/third_party/libyuv/source/row_lsx.cc
vendored
Normal file
1857
TMessagesProj/jni/third_party/libyuv/source/row_lsx.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
7842
TMessagesProj/jni/third_party/libyuv/source/row_mmi.cc
vendored
Normal file
7842
TMessagesProj/jni/third_party/libyuv/source/row_mmi.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
3597
TMessagesProj/jni/third_party/libyuv/source/row_msa.cc
vendored
Normal file
3597
TMessagesProj/jni/third_party/libyuv/source/row_msa.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
3969
TMessagesProj/jni/third_party/libyuv/source/row_neon.cc
vendored
Normal file
3969
TMessagesProj/jni/third_party/libyuv/source/row_neon.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
4520
TMessagesProj/jni/third_party/libyuv/source/row_neon64.cc
vendored
Normal file
4520
TMessagesProj/jni/third_party/libyuv/source/row_neon64.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
6441
TMessagesProj/jni/third_party/libyuv/source/row_win.cc
vendored
Normal file
6441
TMessagesProj/jni/third_party/libyuv/source/row_win.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
2579
TMessagesProj/jni/third_party/libyuv/source/scale.cc
vendored
Normal file
2579
TMessagesProj/jni/third_party/libyuv/source/scale.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1062
TMessagesProj/jni/third_party/libyuv/source/scale_any.cc
vendored
Normal file
1062
TMessagesProj/jni/third_party/libyuv/source/scale_any.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1129
TMessagesProj/jni/third_party/libyuv/source/scale_argb.cc
vendored
Normal file
1129
TMessagesProj/jni/third_party/libyuv/source/scale_argb.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1998
TMessagesProj/jni/third_party/libyuv/source/scale_common.cc
vendored
Normal file
1998
TMessagesProj/jni/third_party/libyuv/source/scale_common.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
2953
TMessagesProj/jni/third_party/libyuv/source/scale_gcc.cc
vendored
Normal file
2953
TMessagesProj/jni/third_party/libyuv/source/scale_gcc.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
739
TMessagesProj/jni/third_party/libyuv/source/scale_lsx.cc
vendored
Normal file
739
TMessagesProj/jni/third_party/libyuv/source/scale_lsx.cc
vendored
Normal file
|
|
@ -0,0 +1,739 @@
|
|||
/*
|
||||
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2022 Loongson Technology Corporation Limited
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
|
||||
#include "libyuv/loongson_intrinsics.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LOAD_DATA(_src, _in, _out) \
|
||||
{ \
|
||||
int _tmp1, _tmp2, _tmp3, _tmp4; \
|
||||
DUP4_ARG2(__lsx_vpickve2gr_w, _in, 0, _in, 1, _in, 2, _in, 3, _tmp1, \
|
||||
_tmp2, _tmp3, _tmp4); \
|
||||
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp1], 0); \
|
||||
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp2], 1); \
|
||||
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp3], 2); \
|
||||
_out = __lsx_vinsgr2vr_w(_out, _src[_tmp4], 3); \
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2_LSX(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 4;
|
||||
(void)src_stride;
|
||||
__m128i src0, src1, dst0;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
|
||||
dst0 = __lsx_vpickod_w(src1, src0);
|
||||
__lsx_vst(dst0, dst_argb, 0);
|
||||
src_argb += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_LSX(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 4;
|
||||
(void)src_stride;
|
||||
__m128i src0, src1, tmp0, tmp1, dst0;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
|
||||
tmp0 = __lsx_vpickev_w(src1, src0);
|
||||
tmp1 = __lsx_vpickod_w(src1, src0);
|
||||
dst0 = __lsx_vavgr_bu(tmp1, tmp0);
|
||||
__lsx_vst(dst0, dst_argb, 0);
|
||||
src_argb += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_LSX(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 4;
|
||||
const uint8_t* s = src_argb;
|
||||
const uint8_t* t = src_argb + src_stride;
|
||||
__m128i src0, src1, src2, src3, tmp0, tmp1, tmp2, tmp3, dst0;
|
||||
__m128i reg0, reg1, reg2, reg3;
|
||||
__m128i shuff = {0x0703060205010400, 0x0F0B0E0A0D090C08};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP2_ARG2(__lsx_vld, s, 0, s, 16, src0, src1);
|
||||
DUP2_ARG2(__lsx_vld, t, 0, t, 16, src2, src3);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff, src1, src1, shuff, src2, src2,
|
||||
shuff, src3, src3, shuff, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG2(__lsx_vhaddw_hu_bu, tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, tmp3,
|
||||
tmp3, reg0, reg1, reg2, reg3);
|
||||
DUP2_ARG2(__lsx_vsadd_hu, reg0, reg2, reg1, reg3, reg0, reg1);
|
||||
dst0 = __lsx_vsrarni_b_h(reg1, reg0, 2);
|
||||
__lsx_vst(dst0, dst_argb, 0);
|
||||
s += 32;
|
||||
t += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int32_t src_stepx,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 4;
|
||||
int32_t stepx = src_stepx << 2;
|
||||
(void)src_stride;
|
||||
__m128i dst0, dst1, dst2, dst3;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
dst0 = __lsx_vldrepl_w(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
dst1 = __lsx_vldrepl_w(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
dst2 = __lsx_vldrepl_w(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
dst3 = __lsx_vldrepl_w(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
__lsx_vstelm_w(dst0, dst_argb, 0, 0);
|
||||
__lsx_vstelm_w(dst1, dst_argb, 4, 0);
|
||||
__lsx_vstelm_w(dst2, dst_argb, 8, 0);
|
||||
__lsx_vstelm_w(dst3, dst_argb, 12, 0);
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 4;
|
||||
int32_t stepx = src_stepx * 4;
|
||||
const uint8_t* next_argb = src_argb + src_stride;
|
||||
__m128i src0, src1, src2, src3;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, dst0;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
tmp0 = __lsx_vldrepl_d(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
tmp1 = __lsx_vldrepl_d(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
tmp2 = __lsx_vldrepl_d(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
tmp3 = __lsx_vldrepl_d(src_argb, 0);
|
||||
src_argb += stepx;
|
||||
tmp4 = __lsx_vldrepl_d(next_argb, 0);
|
||||
next_argb += stepx;
|
||||
tmp5 = __lsx_vldrepl_d(next_argb, 0);
|
||||
next_argb += stepx;
|
||||
tmp6 = __lsx_vldrepl_d(next_argb, 0);
|
||||
next_argb += stepx;
|
||||
tmp7 = __lsx_vldrepl_d(next_argb, 0);
|
||||
next_argb += stepx;
|
||||
DUP4_ARG2(__lsx_vilvl_d, tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, tmp7, tmp6,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
|
||||
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
|
||||
DUP2_ARG2(__lsx_vpackev_w, tmp1, tmp0, tmp3, tmp2, reg0, reg1);
|
||||
DUP2_ARG2(__lsx_vpackod_w, tmp1, tmp0, tmp3, tmp2, tmp4, tmp5);
|
||||
DUP2_ARG2(__lsx_vadd_h, reg0, tmp4, reg1, tmp5, reg0, reg1);
|
||||
dst0 = __lsx_vsrarni_b_h(reg1, reg0, 2);
|
||||
dst0 = __lsx_vshuf4i_b(dst0, 0xD8);
|
||||
__lsx_vst(dst0, dst_argb, 0);
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 32;
|
||||
__m128i src0, src1, src2, src3, dst0, dst1;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vpickod_b, src1, src0, src3, src2, dst0, dst1);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
__lsx_vst(dst1, dst, 16);
|
||||
src_ptr += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 32;
|
||||
__m128i src0, src1, src2, src3;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, dst0, dst1;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vpickev_b, src1, src0, src3, src2, tmp0, tmp2);
|
||||
DUP2_ARG2(__lsx_vpickod_b, src1, src0, src3, src2, tmp1, tmp3);
|
||||
DUP2_ARG2(__lsx_vavgr_bu, tmp0, tmp1, tmp2, tmp3, dst0, dst1);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
__lsx_vst(dst1, dst, 16);
|
||||
src_ptr += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 32;
|
||||
const uint8_t* src_nex = src_ptr + src_stride;
|
||||
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i dst0, dst1;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
|
||||
src4, src5, src6, src7);
|
||||
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp0, tmp2, tmp4, tmp6);
|
||||
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp1, tmp3, tmp5, tmp7);
|
||||
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
DUP2_ARG3(__lsx_vsrarni_b_h, tmp1, tmp0, 2, tmp3, tmp2, 2, dst0, dst1);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
__lsx_vst(dst1, dst, 16);
|
||||
src_ptr += 64;
|
||||
src_nex += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 16;
|
||||
__m128i src0, src1, src2, src3, tmp0, tmp1, dst0;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vpickev_b, src1, src0, src3, src2, tmp0, tmp1);
|
||||
dst0 = __lsx_vpickod_b(tmp1, tmp0);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
src_ptr += 64;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int len = dst_width / 16;
|
||||
const uint8_t* ptr1 = src_ptr + src_stride;
|
||||
const uint8_t* ptr2 = ptr1 + src_stride;
|
||||
const uint8_t* ptr3 = ptr2 + src_stride;
|
||||
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, dst0;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vld, ptr1, 0, ptr1, 16, ptr1, 32, ptr1, 48, src4, src5,
|
||||
src6, src7);
|
||||
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp0, tmp2, tmp4, tmp6);
|
||||
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp1, tmp3, tmp5, tmp7);
|
||||
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
|
||||
reg0, reg1, reg2, reg3);
|
||||
DUP4_ARG2(__lsx_vld, ptr2, 0, ptr2, 16, ptr2, 32, ptr2, 48, src0, src1,
|
||||
src2, src3);
|
||||
DUP4_ARG2(__lsx_vld, ptr3, 0, ptr3, 16, ptr3, 32, ptr3, 48, src4, src5,
|
||||
src6, src7);
|
||||
DUP4_ARG2(__lsx_vaddwev_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp0, tmp2, tmp4, tmp6);
|
||||
DUP4_ARG2(__lsx_vaddwod_h_bu, src0, src4, src1, src5, src2, src6, src3,
|
||||
src7, tmp1, tmp3, tmp5, tmp7);
|
||||
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
|
||||
reg4, reg5, reg6, reg7);
|
||||
DUP4_ARG2(__lsx_vadd_h, reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
|
||||
reg0, reg1, reg2, reg3);
|
||||
DUP4_ARG2(__lsx_vhaddw_wu_hu, reg0, reg0, reg1, reg1, reg2, reg2, reg3,
|
||||
reg3, reg0, reg1, reg2, reg3);
|
||||
DUP2_ARG3(__lsx_vsrarni_h_w, reg1, reg0, 4, reg3, reg2, 4, tmp0, tmp1);
|
||||
dst0 = __lsx_vpickev_b(tmp1, tmp0);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
src_ptr += 64;
|
||||
ptr1 += 64;
|
||||
ptr2 += 64;
|
||||
ptr3 += 64;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x, len;
|
||||
__m128i src0, src1, tmp0;
|
||||
__m128i shuff = {0x13100E0B08060300, 0x000000001E1B1816};
|
||||
|
||||
assert(dst_width % 3 == 0);
|
||||
len = dst_width / 12;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP2_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src0, src1);
|
||||
tmp0 = __lsx_vshuf_b(src1, src0, shuff);
|
||||
__lsx_vstelm_d(tmp0, dst, 0, 0);
|
||||
__lsx_vstelm_w(tmp0, dst, 8, 2);
|
||||
src_ptr += 32;
|
||||
dst += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_ptr,
|
||||
int dst_width) {
|
||||
int x, len;
|
||||
const uint8_t* src_nex = src_ptr + src_stride;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, reg2, reg3;
|
||||
__m128i shuff = {0x0A08160604120200, 0x000000001E0E0C1A};
|
||||
__m128i const_0x2AAA = __lsx_vreplgr2vr_h(0x2AAA);
|
||||
__m128i const_0x4000 = __lsx_vreplgr2vr_w(0x4000);
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
len = dst_width / 12;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_nex, 0, src_nex, 16, src0,
|
||||
src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
|
||||
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
|
||||
DUP2_ARG2(__lsx_vpickev_h, tmp2, tmp0, tmp3, tmp1, reg0, reg1);
|
||||
DUP2_ARG2(__lsx_vpackod_h, tmp1, tmp0, tmp3, tmp2, reg2, reg3);
|
||||
tmp4 = __lsx_vpickev_w(reg3, reg2);
|
||||
tmp5 = __lsx_vadd_h(reg0, reg1);
|
||||
tmp6 = __lsx_vadd_h(tmp5, tmp4);
|
||||
tmp7 = __lsx_vmuh_h(tmp6, const_0x2AAA);
|
||||
tmp0 = __lsx_vpickod_w(reg3, reg2);
|
||||
tmp1 = __lsx_vhaddw_wu_hu(tmp0, tmp0);
|
||||
tmp2 = __lsx_vmul_w(tmp1, const_0x4000);
|
||||
dst0 = __lsx_vshuf_b(tmp2, tmp7, shuff);
|
||||
__lsx_vstelm_d(dst0, dst_ptr, 0, 0);
|
||||
__lsx_vstelm_w(dst0, dst_ptr, 8, 2);
|
||||
src_ptr += 32;
|
||||
src_nex += 32;
|
||||
dst_ptr += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_3_Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_ptr,
|
||||
int dst_width) {
|
||||
int x, len;
|
||||
const uint8_t* ptr1 = src_ptr + src_stride;
|
||||
const uint8_t* ptr2 = ptr1 + src_stride;
|
||||
__m128i src0, src1, src2, src3, src4, src5;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, reg2, reg3, dst0;
|
||||
__m128i zero = __lsx_vldi(0);
|
||||
__m128i shuff = {0x0A08160604120200, 0x000000001E0E0C1A};
|
||||
__m128i const_0x1C71 = __lsx_vreplgr2vr_h(0x1C71);
|
||||
__m128i const_0x2AAA = __lsx_vreplgr2vr_w(0x2AAA);
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
len = dst_width / 12;
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, ptr1, 0, ptr1, 16, src0, src1,
|
||||
src2, src3);
|
||||
DUP2_ARG2(__lsx_vld, ptr2, 0, ptr2, 16, src4, src5);
|
||||
DUP2_ARG2(__lsx_vaddwev_h_bu, src0, src2, src1, src3, tmp0, tmp2);
|
||||
DUP2_ARG2(__lsx_vaddwod_h_bu, src0, src2, src1, src3, tmp1, tmp3);
|
||||
DUP2_ARG2(__lsx_vpackev_b, zero, src4, zero, src5, tmp4, tmp6);
|
||||
DUP2_ARG2(__lsx_vpackod_b, zero, src4, zero, src5, tmp5, tmp7);
|
||||
DUP4_ARG2(__lsx_vadd_h, tmp0, tmp4, tmp1, tmp5, tmp2, tmp6, tmp3, tmp7,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
DUP2_ARG2(__lsx_vpickev_h, tmp2, tmp0, tmp3, tmp1, reg0, reg1);
|
||||
DUP2_ARG2(__lsx_vpackod_h, tmp1, tmp0, tmp3, tmp2, reg2, reg3);
|
||||
tmp4 = __lsx_vpickev_w(reg3, reg2);
|
||||
tmp5 = __lsx_vadd_h(reg0, reg1);
|
||||
tmp6 = __lsx_vadd_h(tmp5, tmp4);
|
||||
tmp7 = __lsx_vmuh_h(tmp6, const_0x1C71);
|
||||
tmp0 = __lsx_vpickod_w(reg3, reg2);
|
||||
tmp1 = __lsx_vhaddw_wu_hu(tmp0, tmp0);
|
||||
tmp2 = __lsx_vmul_w(tmp1, const_0x2AAA);
|
||||
dst0 = __lsx_vshuf_b(tmp2, tmp7, shuff);
|
||||
__lsx_vstelm_d(dst0, dst_ptr, 0, 0);
|
||||
__lsx_vstelm_w(dst0, dst_ptr, 8, 2);
|
||||
src_ptr += 32;
|
||||
ptr1 += 32;
|
||||
ptr2 += 32;
|
||||
dst_ptr += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleAddRow_LSX(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
||||
int x;
|
||||
int len = src_width / 16;
|
||||
__m128i src0, tmp0, tmp1, dst0, dst1;
|
||||
__m128i zero = __lsx_vldi(0);
|
||||
|
||||
assert(src_width > 0);
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_ptr, 0);
|
||||
DUP2_ARG2(__lsx_vld, dst_ptr, 0, dst_ptr, 16, dst0, dst1);
|
||||
tmp0 = __lsx_vilvl_b(zero, src0);
|
||||
tmp1 = __lsx_vilvh_b(zero, src0);
|
||||
DUP2_ARG2(__lsx_vadd_h, dst0, tmp0, dst1, tmp1, dst0, dst1);
|
||||
__lsx_vst(dst0, dst_ptr, 0);
|
||||
__lsx_vst(dst1, dst_ptr, 16);
|
||||
src_ptr += 16;
|
||||
dst_ptr += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleFilterCols_LSX(uint8_t* dst_ptr,
|
||||
const uint8_t* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
int len = dst_width / 16;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
__m128i vec0, vec1, dst0;
|
||||
__m128i vec_x = __lsx_vreplgr2vr_w(x);
|
||||
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
|
||||
__m128i const1 = __lsx_vreplgr2vr_w(0xFFFF);
|
||||
__m128i const2 = __lsx_vreplgr2vr_w(0x40);
|
||||
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
|
||||
|
||||
vec0 = __lsx_vmul_w(vec_dx, const_tmp);
|
||||
vec1 = __lsx_vslli_w(vec_dx, 2);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec0);
|
||||
|
||||
for (j = 0; j < len; j++) {
|
||||
tmp0 = __lsx_vsrai_w(vec_x, 16);
|
||||
tmp4 = __lsx_vand_v(vec_x, const1);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
tmp1 = __lsx_vsrai_w(vec_x, 16);
|
||||
tmp5 = __lsx_vand_v(vec_x, const1);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
tmp2 = __lsx_vsrai_w(vec_x, 16);
|
||||
tmp6 = __lsx_vand_v(vec_x, const1);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
tmp3 = __lsx_vsrai_w(vec_x, 16);
|
||||
tmp7 = __lsx_vand_v(vec_x, const1);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
DUP4_ARG2(__lsx_vsrai_w, tmp4, 9, tmp5, 9, tmp6, 9, tmp7, 9, tmp4, tmp5,
|
||||
tmp6, tmp7);
|
||||
LOAD_DATA(src_ptr, tmp0, reg0);
|
||||
LOAD_DATA(src_ptr, tmp1, reg1);
|
||||
LOAD_DATA(src_ptr, tmp2, reg2);
|
||||
LOAD_DATA(src_ptr, tmp3, reg3);
|
||||
DUP4_ARG2(__lsx_vaddi_wu, tmp0, 1, tmp1, 1, tmp2, 1, tmp3, 1, tmp0, tmp1,
|
||||
tmp2, tmp3);
|
||||
LOAD_DATA(src_ptr, tmp0, reg4);
|
||||
LOAD_DATA(src_ptr, tmp1, reg5);
|
||||
LOAD_DATA(src_ptr, tmp2, reg6);
|
||||
LOAD_DATA(src_ptr, tmp3, reg7);
|
||||
DUP4_ARG2(__lsx_vsub_w, reg4, reg0, reg5, reg1, reg6, reg2, reg7, reg3,
|
||||
reg4, reg5, reg6, reg7);
|
||||
DUP4_ARG2(__lsx_vmul_w, reg4, tmp4, reg5, tmp5, reg6, tmp6, reg7, tmp7,
|
||||
reg4, reg5, reg6, reg7);
|
||||
DUP4_ARG2(__lsx_vadd_w, reg4, const2, reg5, const2, reg6, const2, reg7,
|
||||
const2, reg4, reg5, reg6, reg7);
|
||||
DUP4_ARG2(__lsx_vsrai_w, reg4, 7, reg5, 7, reg6, 7, reg7, 7, reg4, reg5,
|
||||
reg6, reg7);
|
||||
DUP4_ARG2(__lsx_vadd_w, reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
|
||||
reg0, reg1, reg2, reg3);
|
||||
DUP2_ARG2(__lsx_vpickev_h, reg1, reg0, reg3, reg2, tmp0, tmp1);
|
||||
dst0 = __lsx_vpickev_b(tmp1, tmp0);
|
||||
__lsx_vst(dst0, dst_ptr, 0);
|
||||
dst_ptr += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBCols_LSX(uint8_t* dst_argb,
|
||||
const uint8_t* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32_t* src = (const uint32_t*)src_argb;
|
||||
uint32_t* dst = (uint32_t*)dst_argb;
|
||||
int j;
|
||||
int len = dst_width / 4;
|
||||
__m128i tmp0, tmp1, tmp2, dst0;
|
||||
__m128i vec_x = __lsx_vreplgr2vr_w(x);
|
||||
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
|
||||
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
|
||||
|
||||
tmp0 = __lsx_vmul_w(vec_dx, const_tmp);
|
||||
tmp1 = __lsx_vslli_w(vec_dx, 2);
|
||||
vec_x = __lsx_vadd_w(vec_x, tmp0);
|
||||
|
||||
for (j = 0; j < len; j++) {
|
||||
tmp2 = __lsx_vsrai_w(vec_x, 16);
|
||||
vec_x = __lsx_vadd_w(vec_x, tmp1);
|
||||
LOAD_DATA(src, tmp2, dst0);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBFilterCols_LSX(uint8_t* dst_argb,
|
||||
const uint8_t* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32_t* src = (const uint32_t*)src_argb;
|
||||
int j;
|
||||
int len = dst_width / 8;
|
||||
__m128i src0, src1, src2, src3;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
__m128i reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
__m128i vec0, vec1, dst0, dst1;
|
||||
__m128i vec_x = __lsx_vreplgr2vr_w(x);
|
||||
__m128i vec_dx = __lsx_vreplgr2vr_w(dx);
|
||||
__m128i const_tmp = {0x0000000100000000, 0x0000000300000002};
|
||||
__m128i const_7f = __lsx_vldi(0x7F);
|
||||
|
||||
vec0 = __lsx_vmul_w(vec_dx, const_tmp);
|
||||
vec1 = __lsx_vslli_w(vec_dx, 2);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec0);
|
||||
|
||||
for (j = 0; j < len; j++) {
|
||||
tmp0 = __lsx_vsrai_w(vec_x, 16);
|
||||
reg0 = __lsx_vsrai_w(vec_x, 9);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
tmp1 = __lsx_vsrai_w(vec_x, 16);
|
||||
reg1 = __lsx_vsrai_w(vec_x, 9);
|
||||
vec_x = __lsx_vadd_w(vec_x, vec1);
|
||||
DUP2_ARG2(__lsx_vand_v, reg0, const_7f, reg1, const_7f, reg0, reg1);
|
||||
DUP2_ARG2(__lsx_vshuf4i_b, reg0, 0, reg1, 0, reg0, reg1);
|
||||
DUP2_ARG2(__lsx_vxor_v, reg0, const_7f, reg1, const_7f, reg2, reg3);
|
||||
DUP2_ARG2(__lsx_vilvl_b, reg0, reg2, reg1, reg3, reg4, reg6);
|
||||
DUP2_ARG2(__lsx_vilvh_b, reg0, reg2, reg1, reg3, reg5, reg7);
|
||||
LOAD_DATA(src, tmp0, src0);
|
||||
LOAD_DATA(src, tmp1, src1);
|
||||
DUP2_ARG2(__lsx_vaddi_wu, tmp0, 1, tmp1, 1, tmp0, tmp1);
|
||||
LOAD_DATA(src, tmp0, src2);
|
||||
LOAD_DATA(src, tmp1, src3);
|
||||
DUP2_ARG2(__lsx_vilvl_b, src2, src0, src3, src1, tmp4, tmp6);
|
||||
DUP2_ARG2(__lsx_vilvh_b, src2, src0, src3, src1, tmp5, tmp7);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, reg4, tmp5, reg5, tmp6, reg6, tmp7, reg7,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
DUP2_ARG3(__lsx_vsrani_b_h, tmp1, tmp0, 7, tmp3, tmp2, 7, dst0, dst1);
|
||||
__lsx_vst(dst0, dst_argb, 0);
|
||||
__lsx_vst(dst1, dst_argb, 16);
|
||||
dst_argb += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void)src_stride;
|
||||
__m128i src0, src1, src2, src3;
|
||||
__m128i dst0, dst1, dst2;
|
||||
__m128i shuff0 = {0x0908070504030100, 0x141311100F0D0C0B};
|
||||
__m128i shuff1 = {0x0F0D0C0B09080705, 0x1918171514131110};
|
||||
__m128i shuff2 = {0x141311100F0D0C0B, 0x1F1D1C1B19181715};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG3(__lsx_vshuf_b, src1, src0, shuff0, src2, src1, shuff1, dst0,
|
||||
dst1);
|
||||
dst2 = __lsx_vshuf_b(src3, src2, shuff2);
|
||||
__lsx_vst(dst0, dst, 0);
|
||||
__lsx_vst(dst1, dst, 16);
|
||||
__lsx_vst(dst2, dst, 32);
|
||||
src_ptr += 64;
|
||||
dst += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* d,
|
||||
int dst_width) {
|
||||
const uint8_t* src_nex = src_ptr + src_stride;
|
||||
int x;
|
||||
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
|
||||
__m128i tmp10, tmp11, dst0, dst1, dst2;
|
||||
__m128i const0 = {0x0103030101010103, 0x0101010303010101};
|
||||
__m128i const1 = {0x0301010101030301, 0x0103030101010103};
|
||||
__m128i const2 = {0x0101010303010101, 0x0301010101030301};
|
||||
__m128i shuff0 = {0x0504030202010100, 0x0A09090807060605};
|
||||
__m128i shuff1 = {0x0F0E0E0D0D0C0B0A, 0x1514131212111110};
|
||||
__m128i shuff2 = {0x0A09090807060605, 0x0F0E0E0D0D0C0B0A};
|
||||
__m128i shift0 = {0x0002000200010002, 0x0001000200020001};
|
||||
__m128i shift1 = {0x0002000100020002, 0x0002000200010002};
|
||||
__m128i shift2 = {0x0001000200020001, 0x0002000100020002};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
|
||||
src4, src5, src6, src7);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff0, src1, src0, shuff1, src1, src1,
|
||||
shuff2, src2, src2, shuff0, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src3, src2, shuff1, src3, src3, shuff2, src4, src4,
|
||||
shuff0, src5, src4, shuff1, tmp4, tmp5, tmp6, tmp7);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src5, src5, shuff2, src6, src6, shuff0, src7, src6,
|
||||
shuff1, src7, src7, shuff2, tmp8, tmp9, tmp10, tmp11);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp0, const0, tmp1, const1, tmp2, const2, tmp3,
|
||||
const0, src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, const1, tmp5, const2, tmp6, const0, tmp7,
|
||||
const1, src4, src5, src6, src7);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp8, const2, tmp9, const0, tmp10, const1, tmp11,
|
||||
const2, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG2(__lsx_vsrar_h, src0, shift0, src1, shift1, src2, shift2, src3,
|
||||
shift0, src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vsrar_h, src4, shift1, src5, shift2, src6, shift0, src7,
|
||||
shift1, src4, src5, src6, src7);
|
||||
DUP4_ARG2(__lsx_vsrar_h, tmp0, shift2, tmp1, shift0, tmp2, shift1, tmp3,
|
||||
shift2, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG2(__lsx_vslli_h, src0, 1, src1, 1, src2, 1, src3, 1, tmp5, tmp6,
|
||||
tmp7, tmp8);
|
||||
DUP2_ARG2(__lsx_vslli_h, src4, 1, src5, 1, tmp9, tmp10);
|
||||
DUP4_ARG2(__lsx_vadd_h, src0, tmp5, src1, tmp6, src2, tmp7, src3, tmp8,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vadd_h, src4, tmp9, src5, tmp10, src4, src5);
|
||||
DUP4_ARG2(__lsx_vadd_h, src0, src6, src1, src7, src2, tmp0, src3, tmp1,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vadd_h, src4, tmp2, src5, tmp3, src4, src5);
|
||||
DUP2_ARG3(__lsx_vsrarni_b_h, src1, src0, 2, src3, src2, 2, dst0, dst1);
|
||||
dst2 = __lsx_vsrarni_b_h(src5, src4, 2);
|
||||
__lsx_vst(dst0, d, 0);
|
||||
__lsx_vst(dst1, d, 16);
|
||||
__lsx_vst(dst2, d, 32);
|
||||
src_ptr += 64;
|
||||
src_nex += 64;
|
||||
d += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_LSX(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* d,
|
||||
int dst_width) {
|
||||
const uint8_t* src_nex = src_ptr + src_stride;
|
||||
int x;
|
||||
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
|
||||
__m128i tmp10, tmp11, dst0, dst1, dst2;
|
||||
__m128i const0 = {0x0103030101010103, 0x0101010303010101};
|
||||
__m128i const1 = {0x0301010101030301, 0x0103030101010103};
|
||||
__m128i const2 = {0x0101010303010101, 0x0301010101030301};
|
||||
__m128i shuff0 = {0x0504030202010100, 0x0A09090807060605};
|
||||
__m128i shuff1 = {0x0F0E0E0D0D0C0B0A, 0x1514131212111110};
|
||||
__m128i shuff2 = {0x0A09090807060605, 0x0F0E0E0D0D0C0B0A};
|
||||
__m128i shift0 = {0x0002000200010002, 0x0001000200020001};
|
||||
__m128i shift1 = {0x0002000100020002, 0x0002000200010002};
|
||||
__m128i shift2 = {0x0001000200020001, 0x0002000100020002};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
DUP4_ARG2(__lsx_vld, src_ptr, 0, src_ptr, 16, src_ptr, 32, src_ptr, 48,
|
||||
src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vld, src_nex, 0, src_nex, 16, src_nex, 32, src_nex, 48,
|
||||
src4, src5, src6, src7);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src0, src0, shuff0, src1, src0, shuff1, src1, src1,
|
||||
shuff2, src2, src2, shuff0, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src3, src2, shuff1, src3, src3, shuff2, src4, src4,
|
||||
shuff0, src5, src4, shuff1, tmp4, tmp5, tmp6, tmp7);
|
||||
DUP4_ARG3(__lsx_vshuf_b, src5, src5, shuff2, src6, src6, shuff0, src7, src6,
|
||||
shuff1, src7, src7, shuff2, tmp8, tmp9, tmp10, tmp11);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp0, const0, tmp1, const1, tmp2, const2, tmp3,
|
||||
const0, src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp4, const1, tmp5, const2, tmp6, const0, tmp7,
|
||||
const1, src4, src5, src6, src7);
|
||||
DUP4_ARG2(__lsx_vdp2_h_bu, tmp8, const2, tmp9, const0, tmp10, const1, tmp11,
|
||||
const2, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG2(__lsx_vsrar_h, src0, shift0, src1, shift1, src2, shift2, src3,
|
||||
shift0, src0, src1, src2, src3);
|
||||
DUP4_ARG2(__lsx_vsrar_h, src4, shift1, src5, shift2, src6, shift0, src7,
|
||||
shift1, src4, src5, src6, src7);
|
||||
DUP4_ARG2(__lsx_vsrar_h, tmp0, shift2, tmp1, shift0, tmp2, shift1, tmp3,
|
||||
shift2, tmp0, tmp1, tmp2, tmp3);
|
||||
DUP4_ARG2(__lsx_vadd_h, src0, src6, src1, src7, src2, tmp0, src3, tmp1,
|
||||
src0, src1, src2, src3);
|
||||
DUP2_ARG2(__lsx_vadd_h, src4, tmp2, src5, tmp3, src4, src5);
|
||||
DUP2_ARG3(__lsx_vsrarni_b_h, src1, src0, 1, src3, src2, 1, dst0, dst1);
|
||||
dst2 = __lsx_vsrarni_b_h(src5, src4, 1);
|
||||
__lsx_vst(dst0, d, 0);
|
||||
__lsx_vst(dst1, d, 16);
|
||||
__lsx_vst(dst2, d, 32);
|
||||
src_ptr += 64;
|
||||
src_nex += 64;
|
||||
d += 48;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
|
||||
1168
TMessagesProj/jni/third_party/libyuv/source/scale_mmi.cc
vendored
Normal file
1168
TMessagesProj/jni/third_party/libyuv/source/scale_mmi.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
949
TMessagesProj/jni/third_party/libyuv/source/scale_msa.cc
vendored
Normal file
949
TMessagesProj/jni/third_party/libyuv/source/scale_msa.cc
vendored
Normal file
|
|
@ -0,0 +1,949 @@
|
|||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
// This module is for GCC MSA
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LOAD_INDEXED_DATA(srcp, indx0, out0) \
|
||||
{ \
|
||||
out0[0] = srcp[indx0[0]]; \
|
||||
out0[1] = srcp[indx0[1]]; \
|
||||
out0[2] = srcp[indx0[2]]; \
|
||||
out0[3] = srcp[indx0[3]]; \
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
v16u8 src0, src1, dst0;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
|
||||
dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
|
||||
ST_UB(dst0, dst_argb);
|
||||
src_argb += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
v16u8 src0, src1, vec0, vec1, dst0;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
|
||||
vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
|
||||
vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
|
||||
dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1);
|
||||
ST_UB(dst0, dst_argb);
|
||||
src_argb += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
const uint8_t* s = src_argb;
|
||||
const uint8_t* t = src_argb + src_stride;
|
||||
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
|
||||
v8u16 reg0, reg1, reg2, reg3;
|
||||
v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
|
||||
|
||||
for (x = 0; x < dst_width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
|
||||
vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1);
|
||||
vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2);
|
||||
vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3);
|
||||
reg0 = __msa_hadd_u_h(vec0, vec0);
|
||||
reg1 = __msa_hadd_u_h(vec1, vec1);
|
||||
reg2 = __msa_hadd_u_h(vec2, vec2);
|
||||
reg3 = __msa_hadd_u_h(vec3, vec3);
|
||||
reg0 += reg2;
|
||||
reg1 += reg3;
|
||||
reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2);
|
||||
reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||
ST_UB(dst0, dst_argb);
|
||||
s += 32;
|
||||
t += 32;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int32_t src_stepx,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
int32_t stepx = src_stepx * 4;
|
||||
int32_t data0, data1, data2, data3;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 4) {
|
||||
data0 = LW(src_argb);
|
||||
data1 = LW(src_argb + stepx);
|
||||
data2 = LW(src_argb + stepx * 2);
|
||||
data3 = LW(src_argb + stepx * 3);
|
||||
SW(data0, dst_argb);
|
||||
SW(data1, dst_argb + 4);
|
||||
SW(data2, dst_argb + 8);
|
||||
SW(data3, dst_argb + 12);
|
||||
src_argb += stepx * 4;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8_t* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
const uint8_t* nxt_argb = src_argb + src_stride;
|
||||
int32_t stepx = src_stepx * 4;
|
||||
int64_t data0, data1, data2, data3;
|
||||
v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0};
|
||||
v16u8 vec0, vec1, vec2, vec3;
|
||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 dst0;
|
||||
|
||||
for (x = 0; x < dst_width; x += 4) {
|
||||
data0 = LD(src_argb);
|
||||
data1 = LD(src_argb + stepx);
|
||||
data2 = LD(src_argb + stepx * 2);
|
||||
data3 = LD(src_argb + stepx * 3);
|
||||
src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0);
|
||||
src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1);
|
||||
src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2);
|
||||
src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3);
|
||||
data0 = LD(nxt_argb);
|
||||
data1 = LD(nxt_argb + stepx);
|
||||
data2 = LD(nxt_argb + stepx * 2);
|
||||
data3 = LD(nxt_argb + stepx * 3);
|
||||
src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0);
|
||||
src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1);
|
||||
src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2);
|
||||
src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3);
|
||||
vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
reg0 = __msa_hadd_u_h(vec0, vec0);
|
||||
reg1 = __msa_hadd_u_h(vec1, vec1);
|
||||
reg2 = __msa_hadd_u_h(vec2, vec2);
|
||||
reg3 = __msa_hadd_u_h(vec3, vec3);
|
||||
reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0);
|
||||
reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1);
|
||||
reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0);
|
||||
reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1);
|
||||
reg4 += reg6;
|
||||
reg5 += reg7;
|
||||
reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2);
|
||||
reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
||||
ST_UB(dst0, dst_argb);
|
||||
src_argb += stepx * 4;
|
||||
nxt_argb += stepx * 4;
|
||||
dst_argb += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
|
||||
dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
ST_UB2(dst0, dst1, dst, 16);
|
||||
src_ptr += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0, dst1;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
|
||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
vec2 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
dst0 = __msa_aver_u_b(vec1, vec0);
|
||||
dst1 = __msa_aver_u_b(vec3, vec2);
|
||||
ST_UB2(dst0, dst1, dst, 16);
|
||||
src_ptr += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t = src_ptr + src_stride;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1;
|
||||
v8u16 vec0, vec1, vec2, vec3;
|
||||
|
||||
for (x = 0; x < dst_width; x += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
|
||||
src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
|
||||
src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
|
||||
vec0 = __msa_hadd_u_h(src0, src0);
|
||||
vec1 = __msa_hadd_u_h(src1, src1);
|
||||
vec2 = __msa_hadd_u_h(src2, src2);
|
||||
vec3 = __msa_hadd_u_h(src3, src3);
|
||||
vec0 += __msa_hadd_u_h(src4, src4);
|
||||
vec1 += __msa_hadd_u_h(src5, src5);
|
||||
vec2 += __msa_hadd_u_h(src6, src6);
|
||||
vec3 += __msa_hadd_u_h(src7, src7);
|
||||
vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 2);
|
||||
vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 2);
|
||||
vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 2);
|
||||
vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
|
||||
ST_UB2(dst0, dst1, dst, 16);
|
||||
s += 64;
|
||||
t += 64;
|
||||
dst += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
|
||||
(void)src_stride;
|
||||
|
||||
for (x = 0; x < dst_width; x += 16) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
|
||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
|
||||
ST_UB(dst0, dst);
|
||||
src_ptr += 64;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t0 = s + src_stride;
|
||||
const uint8_t* t1 = s + src_stride * 2;
|
||||
const uint8_t* t2 = s + src_stride * 3;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0;
|
||||
v8u16 vec0, vec1, vec2, vec3;
|
||||
v4u32 reg0, reg1, reg2, reg3;
|
||||
|
||||
for (x = 0; x < dst_width; x += 16) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t0, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t0, 16);
|
||||
src6 = (v16u8)__msa_ld_b((v16i8*)t0, 32);
|
||||
src7 = (v16u8)__msa_ld_b((v16i8*)t0, 48);
|
||||
vec0 = __msa_hadd_u_h(src0, src0);
|
||||
vec1 = __msa_hadd_u_h(src1, src1);
|
||||
vec2 = __msa_hadd_u_h(src2, src2);
|
||||
vec3 = __msa_hadd_u_h(src3, src3);
|
||||
vec0 += __msa_hadd_u_h(src4, src4);
|
||||
vec1 += __msa_hadd_u_h(src5, src5);
|
||||
vec2 += __msa_hadd_u_h(src6, src6);
|
||||
vec3 += __msa_hadd_u_h(src7, src7);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)t1, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)t1, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)t1, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)t1, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t2, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t2, 16);
|
||||
src6 = (v16u8)__msa_ld_b((v16i8*)t2, 32);
|
||||
src7 = (v16u8)__msa_ld_b((v16i8*)t2, 48);
|
||||
vec0 += __msa_hadd_u_h(src0, src0);
|
||||
vec1 += __msa_hadd_u_h(src1, src1);
|
||||
vec2 += __msa_hadd_u_h(src2, src2);
|
||||
vec3 += __msa_hadd_u_h(src3, src3);
|
||||
vec0 += __msa_hadd_u_h(src4, src4);
|
||||
vec1 += __msa_hadd_u_h(src5, src5);
|
||||
vec2 += __msa_hadd_u_h(src6, src6);
|
||||
vec3 += __msa_hadd_u_h(src7, src7);
|
||||
reg0 = __msa_hadd_u_w(vec0, vec0);
|
||||
reg1 = __msa_hadd_u_w(vec1, vec1);
|
||||
reg2 = __msa_hadd_u_w(vec2, vec2);
|
||||
reg3 = __msa_hadd_u_w(vec3, vec3);
|
||||
reg0 = (v4u32)__msa_srari_w((v4i32)reg0, 4);
|
||||
reg1 = (v4u32)__msa_srari_w((v4i32)reg1, 4);
|
||||
reg2 = (v4u32)__msa_srari_w((v4i32)reg2, 4);
|
||||
reg3 = (v4u32)__msa_srari_w((v4i32)reg3, 4);
|
||||
vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
|
||||
vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
ST_UB(dst0, dst);
|
||||
s += 64;
|
||||
t0 += 64;
|
||||
t1 += 64;
|
||||
t2 += 64;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x, width;
|
||||
uint64_t dst0;
|
||||
uint32_t dst1;
|
||||
v16u8 src0, src1, vec0;
|
||||
v16i8 mask = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0};
|
||||
(void)src_stride;
|
||||
|
||||
assert(dst_width % 3 == 0);
|
||||
width = dst_width / 3;
|
||||
|
||||
for (x = 0; x < width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
|
||||
vec0 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)src0);
|
||||
dst0 = __msa_copy_u_d((v2i64)vec0, 0);
|
||||
dst1 = __msa_copy_u_w((v4i32)vec0, 2);
|
||||
SD(dst0, dst);
|
||||
SW(dst1, dst + 8);
|
||||
src_ptr += 32;
|
||||
dst += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_ptr,
|
||||
int dst_width) {
|
||||
int x, width;
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t = src_ptr + src_stride;
|
||||
uint64_t dst0;
|
||||
uint32_t dst1;
|
||||
v16u8 src0, src1, src2, src3, out;
|
||||
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v4u32 tmp0, tmp1, tmp2, tmp3, tmp4;
|
||||
v8i16 zero = {0};
|
||||
v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9};
|
||||
v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0};
|
||||
v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA);
|
||||
v4u32 const_0x4000 = (v4u32)__msa_fill_w(0x4000);
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
width = dst_width / 3;
|
||||
|
||||
for (x = 0; x < width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
|
||||
vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
|
||||
vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
|
||||
vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
|
||||
vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
|
||||
vec4 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec0);
|
||||
vec5 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec1);
|
||||
vec6 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec2);
|
||||
vec7 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec3);
|
||||
vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
|
||||
vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2);
|
||||
vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
|
||||
tmp0 = __msa_hadd_u_w(vec4, vec4);
|
||||
tmp1 = __msa_hadd_u_w(vec5, vec5);
|
||||
tmp2 = __msa_hadd_u_w(vec6, vec6);
|
||||
tmp3 = __msa_hadd_u_w(vec7, vec7);
|
||||
tmp4 = __msa_hadd_u_w(vec0, vec0);
|
||||
vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
|
||||
vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
|
||||
tmp0 = __msa_hadd_u_w(vec0, vec0);
|
||||
tmp1 = __msa_hadd_u_w(vec1, vec1);
|
||||
tmp0 *= const_0x2AAA;
|
||||
tmp1 *= const_0x2AAA;
|
||||
tmp4 *= const_0x4000;
|
||||
tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16);
|
||||
tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16);
|
||||
tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16);
|
||||
vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
|
||||
vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4);
|
||||
out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0);
|
||||
dst0 = __msa_copy_u_d((v2i64)out, 0);
|
||||
dst1 = __msa_copy_u_w((v4i32)out, 2);
|
||||
SD(dst0, dst_ptr);
|
||||
SW(dst1, dst_ptr + 8);
|
||||
s += 32;
|
||||
t += 32;
|
||||
dst_ptr += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst_ptr,
|
||||
int dst_width) {
|
||||
int x, width;
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t0 = s + src_stride;
|
||||
const uint8_t* t1 = s + src_stride * 2;
|
||||
uint64_t dst0;
|
||||
uint32_t dst1;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, out;
|
||||
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v4u32 tmp0, tmp1, tmp2, tmp3, tmp4;
|
||||
v8u16 zero = {0};
|
||||
v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9};
|
||||
v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0};
|
||||
v4u32 const_0x1C71 = (v4u32)__msa_fill_w(0x1C71);
|
||||
v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA);
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
width = dst_width / 3;
|
||||
|
||||
for (x = 0; x < width; x += 4) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)t0, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)t0, 16);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t1, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t1, 16);
|
||||
vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
vec4 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src4);
|
||||
vec5 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src4);
|
||||
vec6 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src5);
|
||||
vec7 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src5);
|
||||
vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
|
||||
vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
|
||||
vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
|
||||
vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
|
||||
vec0 += __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4);
|
||||
vec1 += __msa_hadd_u_h((v16u8)vec5, (v16u8)vec5);
|
||||
vec2 += __msa_hadd_u_h((v16u8)vec6, (v16u8)vec6);
|
||||
vec3 += __msa_hadd_u_h((v16u8)vec7, (v16u8)vec7);
|
||||
vec4 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec0);
|
||||
vec5 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec1);
|
||||
vec6 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec2);
|
||||
vec7 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec3);
|
||||
vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
|
||||
vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2);
|
||||
vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
|
||||
tmp0 = __msa_hadd_u_w(vec4, vec4);
|
||||
tmp1 = __msa_hadd_u_w(vec5, vec5);
|
||||
tmp2 = __msa_hadd_u_w(vec6, vec6);
|
||||
tmp3 = __msa_hadd_u_w(vec7, vec7);
|
||||
tmp4 = __msa_hadd_u_w(vec0, vec0);
|
||||
vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
|
||||
vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
|
||||
tmp0 = __msa_hadd_u_w(vec0, vec0);
|
||||
tmp1 = __msa_hadd_u_w(vec1, vec1);
|
||||
tmp0 *= const_0x1C71;
|
||||
tmp1 *= const_0x1C71;
|
||||
tmp4 *= const_0x2AAA;
|
||||
tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16);
|
||||
tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16);
|
||||
tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16);
|
||||
vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
|
||||
vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4);
|
||||
out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0);
|
||||
dst0 = __msa_copy_u_d((v2i64)out, 0);
|
||||
dst1 = __msa_copy_u_w((v4i32)out, 2);
|
||||
SD(dst0, dst_ptr);
|
||||
SW(dst1, dst_ptr + 8);
|
||||
s += 32;
|
||||
t0 += 32;
|
||||
t1 += 32;
|
||||
dst_ptr += 12;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
||||
int x;
|
||||
v16u8 src0;
|
||||
v8u16 dst0, dst1;
|
||||
v16i8 zero = {0};
|
||||
|
||||
assert(src_width > 0);
|
||||
|
||||
for (x = 0; x < src_width; x += 16) {
|
||||
src0 = LD_UB(src_ptr);
|
||||
dst0 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 0);
|
||||
dst1 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 16);
|
||||
dst0 += (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
|
||||
dst1 += (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
|
||||
ST_UH2(dst0, dst1, dst_ptr, 8);
|
||||
src_ptr += 16;
|
||||
dst_ptr += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleFilterCols_MSA(uint8_t* dst_ptr,
|
||||
const uint8_t* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
v4i32 vec_x = __msa_fill_w(x);
|
||||
v4i32 vec_dx = __msa_fill_w(dx);
|
||||
v4i32 vec_const = {0, 1, 2, 3};
|
||||
v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
v8u16 reg0, reg1;
|
||||
v16u8 dst0;
|
||||
v4i32 const_0xFFFF = __msa_fill_w(0xFFFF);
|
||||
v4i32 const_0x40 = __msa_fill_w(0x40);
|
||||
|
||||
vec0 = vec_dx * vec_const;
|
||||
vec1 = vec_dx * 4;
|
||||
vec_x += vec0;
|
||||
|
||||
for (j = 0; j < dst_width - 1; j += 16) {
|
||||
vec2 = vec_x >> 16;
|
||||
vec6 = vec_x & const_0xFFFF;
|
||||
vec_x += vec1;
|
||||
vec3 = vec_x >> 16;
|
||||
vec7 = vec_x & const_0xFFFF;
|
||||
vec_x += vec1;
|
||||
vec4 = vec_x >> 16;
|
||||
vec8 = vec_x & const_0xFFFF;
|
||||
vec_x += vec1;
|
||||
vec5 = vec_x >> 16;
|
||||
vec9 = vec_x & const_0xFFFF;
|
||||
vec_x += vec1;
|
||||
vec6 >>= 9;
|
||||
vec7 >>= 9;
|
||||
vec8 >>= 9;
|
||||
vec9 >>= 9;
|
||||
LOAD_INDEXED_DATA(src_ptr, vec2, tmp0);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec3, tmp1);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec4, tmp2);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec5, tmp3);
|
||||
vec2 += 1;
|
||||
vec3 += 1;
|
||||
vec4 += 1;
|
||||
vec5 += 1;
|
||||
LOAD_INDEXED_DATA(src_ptr, vec2, tmp4);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec3, tmp5);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec4, tmp6);
|
||||
LOAD_INDEXED_DATA(src_ptr, vec5, tmp7);
|
||||
tmp4 -= tmp0;
|
||||
tmp5 -= tmp1;
|
||||
tmp6 -= tmp2;
|
||||
tmp7 -= tmp3;
|
||||
tmp4 *= vec6;
|
||||
tmp5 *= vec7;
|
||||
tmp6 *= vec8;
|
||||
tmp7 *= vec9;
|
||||
tmp4 += const_0x40;
|
||||
tmp5 += const_0x40;
|
||||
tmp6 += const_0x40;
|
||||
tmp7 += const_0x40;
|
||||
tmp4 >>= 7;
|
||||
tmp5 >>= 7;
|
||||
tmp6 >>= 7;
|
||||
tmp7 >>= 7;
|
||||
tmp0 += tmp4;
|
||||
tmp1 += tmp5;
|
||||
tmp2 += tmp6;
|
||||
tmp3 += tmp7;
|
||||
reg0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
|
||||
reg1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||
__msa_st_b(dst0, dst_ptr, 0);
|
||||
dst_ptr += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBCols_MSA(uint8_t* dst_argb,
|
||||
const uint8_t* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32_t* src = (const uint32_t*)(src_argb);
|
||||
uint32_t* dst = (uint32_t*)(dst_argb);
|
||||
int j;
|
||||
v4i32 x_vec = __msa_fill_w(x);
|
||||
v4i32 dx_vec = __msa_fill_w(dx);
|
||||
v4i32 const_vec = {0, 1, 2, 3};
|
||||
v4i32 vec0, vec1, vec2;
|
||||
v4i32 dst0;
|
||||
|
||||
vec0 = dx_vec * const_vec;
|
||||
vec1 = dx_vec * 4;
|
||||
x_vec += vec0;
|
||||
|
||||
for (j = 0; j < dst_width; j += 4) {
|
||||
vec2 = x_vec >> 16;
|
||||
x_vec += vec1;
|
||||
LOAD_INDEXED_DATA(src, vec2, dst0);
|
||||
__msa_st_w(dst0, dst, 0);
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
|
||||
const uint8_t* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32_t* src = (const uint32_t*)(src_argb);
|
||||
int j;
|
||||
v4u32 src0, src1, src2, src3;
|
||||
v4u32 vec0, vec1, vec2, vec3;
|
||||
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 mult0, mult1, mult2, mult3;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v16u8 dst0, dst1;
|
||||
v4u32 vec_x = (v4u32)__msa_fill_w(x);
|
||||
v4u32 vec_dx = (v4u32)__msa_fill_w(dx);
|
||||
v4u32 vec_const = {0, 1, 2, 3};
|
||||
v16u8 const_0x7f = (v16u8)__msa_fill_b(0x7f);
|
||||
|
||||
vec0 = vec_dx * vec_const;
|
||||
vec1 = vec_dx * 4;
|
||||
vec_x += vec0;
|
||||
|
||||
for (j = 0; j < dst_width - 1; j += 8) {
|
||||
vec2 = vec_x >> 16;
|
||||
reg0 = (v16u8)(vec_x >> 9);
|
||||
vec_x += vec1;
|
||||
vec3 = vec_x >> 16;
|
||||
reg1 = (v16u8)(vec_x >> 9);
|
||||
vec_x += vec1;
|
||||
reg0 = reg0 & const_0x7f;
|
||||
reg1 = reg1 & const_0x7f;
|
||||
reg0 = (v16u8)__msa_shf_b((v16i8)reg0, 0);
|
||||
reg1 = (v16u8)__msa_shf_b((v16i8)reg1, 0);
|
||||
reg2 = reg0 ^ const_0x7f;
|
||||
reg3 = reg1 ^ const_0x7f;
|
||||
mult0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)reg2);
|
||||
mult1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)reg2);
|
||||
mult2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)reg3);
|
||||
mult3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)reg3);
|
||||
LOAD_INDEXED_DATA(src, vec2, src0);
|
||||
LOAD_INDEXED_DATA(src, vec3, src1);
|
||||
vec2 += 1;
|
||||
vec3 += 1;
|
||||
LOAD_INDEXED_DATA(src, vec2, src2);
|
||||
LOAD_INDEXED_DATA(src, vec3, src3);
|
||||
reg4 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
reg5 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
reg6 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
reg7 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
tmp0 = __msa_dotp_u_h(reg4, mult0);
|
||||
tmp1 = __msa_dotp_u_h(reg5, mult1);
|
||||
tmp2 = __msa_dotp_u_h(reg6, mult2);
|
||||
tmp3 = __msa_dotp_u_h(reg7, mult3);
|
||||
tmp0 >>= 7;
|
||||
tmp1 >>= 7;
|
||||
tmp2 >>= 7;
|
||||
tmp3 >>= 7;
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
|
||||
__msa_st_b(dst0, dst_argb, 0);
|
||||
__msa_st_b(dst1, dst_argb, 16);
|
||||
dst_argb += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
(void)src_stride;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 vec0, vec1, vec2;
|
||||
v16i8 mask0 = {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20};
|
||||
v16i8 mask1 = {5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25};
|
||||
v16i8 mask2 = {11, 12, 13, 15, 16, 17, 19, 20,
|
||||
21, 23, 24, 25, 27, 28, 29, 31};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
|
||||
vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src2, (v16i8)src1);
|
||||
vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src2);
|
||||
__msa_st_b((v16i8)vec0, dst, 0);
|
||||
__msa_st_b((v16i8)vec1, dst, 16);
|
||||
__msa_st_b((v16i8)vec2, dst, 32);
|
||||
src_ptr += 64;
|
||||
dst += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* d,
|
||||
int dst_width) {
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t = src_ptr + src_stride;
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
|
||||
v16u8 vec6, vec7, vec8, vec9, vec10, vec11;
|
||||
v8i16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||
v8i16 reg6, reg7, reg8, reg9, reg10, reg11;
|
||||
v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1};
|
||||
v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1};
|
||||
v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3};
|
||||
v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
|
||||
v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15,
|
||||
16, 17, 17, 18, 18, 19, 20, 21};
|
||||
v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15};
|
||||
v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1};
|
||||
v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2};
|
||||
v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
|
||||
src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
|
||||
src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
|
||||
vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
||||
vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1);
|
||||
vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2);
|
||||
vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
|
||||
vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3);
|
||||
vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4);
|
||||
vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4);
|
||||
vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5);
|
||||
vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6);
|
||||
vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6);
|
||||
vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7);
|
||||
reg0 = (v8i16)__msa_dotp_u_h(vec0, const0);
|
||||
reg1 = (v8i16)__msa_dotp_u_h(vec1, const1);
|
||||
reg2 = (v8i16)__msa_dotp_u_h(vec2, const2);
|
||||
reg3 = (v8i16)__msa_dotp_u_h(vec3, const0);
|
||||
reg4 = (v8i16)__msa_dotp_u_h(vec4, const1);
|
||||
reg5 = (v8i16)__msa_dotp_u_h(vec5, const2);
|
||||
reg6 = (v8i16)__msa_dotp_u_h(vec6, const0);
|
||||
reg7 = (v8i16)__msa_dotp_u_h(vec7, const1);
|
||||
reg8 = (v8i16)__msa_dotp_u_h(vec8, const2);
|
||||
reg9 = (v8i16)__msa_dotp_u_h(vec9, const0);
|
||||
reg10 = (v8i16)__msa_dotp_u_h(vec10, const1);
|
||||
reg11 = (v8i16)__msa_dotp_u_h(vec11, const2);
|
||||
reg0 = __msa_srar_h(reg0, shft0);
|
||||
reg1 = __msa_srar_h(reg1, shft1);
|
||||
reg2 = __msa_srar_h(reg2, shft2);
|
||||
reg3 = __msa_srar_h(reg3, shft0);
|
||||
reg4 = __msa_srar_h(reg4, shft1);
|
||||
reg5 = __msa_srar_h(reg5, shft2);
|
||||
reg6 = __msa_srar_h(reg6, shft0);
|
||||
reg7 = __msa_srar_h(reg7, shft1);
|
||||
reg8 = __msa_srar_h(reg8, shft2);
|
||||
reg9 = __msa_srar_h(reg9, shft0);
|
||||
reg10 = __msa_srar_h(reg10, shft1);
|
||||
reg11 = __msa_srar_h(reg11, shft2);
|
||||
reg0 = reg0 * 3 + reg6;
|
||||
reg1 = reg1 * 3 + reg7;
|
||||
reg2 = reg2 * 3 + reg8;
|
||||
reg3 = reg3 * 3 + reg9;
|
||||
reg4 = reg4 * 3 + reg10;
|
||||
reg5 = reg5 * 3 + reg11;
|
||||
reg0 = __msa_srari_h(reg0, 2);
|
||||
reg1 = __msa_srari_h(reg1, 2);
|
||||
reg2 = __msa_srari_h(reg2, 2);
|
||||
reg3 = __msa_srari_h(reg3, 2);
|
||||
reg4 = __msa_srari_h(reg4, 2);
|
||||
reg5 = __msa_srari_h(reg5, 2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
|
||||
dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
||||
__msa_st_b((v16i8)dst0, d, 0);
|
||||
__msa_st_b((v16i8)dst1, d, 16);
|
||||
__msa_st_b((v16i8)dst2, d, 32);
|
||||
s += 64;
|
||||
t += 64;
|
||||
d += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* d,
|
||||
int dst_width) {
|
||||
const uint8_t* s = src_ptr;
|
||||
const uint8_t* t = src_ptr + src_stride;
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
|
||||
v16u8 vec6, vec7, vec8, vec9, vec10, vec11;
|
||||
v8i16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||
v8i16 reg6, reg7, reg8, reg9, reg10, reg11;
|
||||
v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1};
|
||||
v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1};
|
||||
v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3};
|
||||
v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
|
||||
v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15,
|
||||
16, 17, 17, 18, 18, 19, 20, 21};
|
||||
v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15};
|
||||
v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1};
|
||||
v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2};
|
||||
v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2};
|
||||
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
|
||||
for (x = 0; x < dst_width; x += 48) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
|
||||
src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
|
||||
src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
|
||||
src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
|
||||
vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
||||
vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1);
|
||||
vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2);
|
||||
vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
|
||||
vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3);
|
||||
vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4);
|
||||
vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4);
|
||||
vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5);
|
||||
vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6);
|
||||
vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6);
|
||||
vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7);
|
||||
reg0 = (v8i16)__msa_dotp_u_h(vec0, const0);
|
||||
reg1 = (v8i16)__msa_dotp_u_h(vec1, const1);
|
||||
reg2 = (v8i16)__msa_dotp_u_h(vec2, const2);
|
||||
reg3 = (v8i16)__msa_dotp_u_h(vec3, const0);
|
||||
reg4 = (v8i16)__msa_dotp_u_h(vec4, const1);
|
||||
reg5 = (v8i16)__msa_dotp_u_h(vec5, const2);
|
||||
reg6 = (v8i16)__msa_dotp_u_h(vec6, const0);
|
||||
reg7 = (v8i16)__msa_dotp_u_h(vec7, const1);
|
||||
reg8 = (v8i16)__msa_dotp_u_h(vec8, const2);
|
||||
reg9 = (v8i16)__msa_dotp_u_h(vec9, const0);
|
||||
reg10 = (v8i16)__msa_dotp_u_h(vec10, const1);
|
||||
reg11 = (v8i16)__msa_dotp_u_h(vec11, const2);
|
||||
reg0 = __msa_srar_h(reg0, shft0);
|
||||
reg1 = __msa_srar_h(reg1, shft1);
|
||||
reg2 = __msa_srar_h(reg2, shft2);
|
||||
reg3 = __msa_srar_h(reg3, shft0);
|
||||
reg4 = __msa_srar_h(reg4, shft1);
|
||||
reg5 = __msa_srar_h(reg5, shft2);
|
||||
reg6 = __msa_srar_h(reg6, shft0);
|
||||
reg7 = __msa_srar_h(reg7, shft1);
|
||||
reg8 = __msa_srar_h(reg8, shft2);
|
||||
reg9 = __msa_srar_h(reg9, shft0);
|
||||
reg10 = __msa_srar_h(reg10, shft1);
|
||||
reg11 = __msa_srar_h(reg11, shft2);
|
||||
reg0 += reg6;
|
||||
reg1 += reg7;
|
||||
reg2 += reg8;
|
||||
reg3 += reg9;
|
||||
reg4 += reg10;
|
||||
reg5 += reg11;
|
||||
reg0 = __msa_srari_h(reg0, 1);
|
||||
reg1 = __msa_srari_h(reg1, 1);
|
||||
reg2 = __msa_srari_h(reg2, 1);
|
||||
reg3 = __msa_srari_h(reg3, 1);
|
||||
reg4 = __msa_srari_h(reg4, 1);
|
||||
reg5 = __msa_srari_h(reg5, 1);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
|
||||
dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
||||
__msa_st_b((v16i8)dst0, d, 0);
|
||||
__msa_st_b((v16i8)dst1, d, 16);
|
||||
__msa_st_b((v16i8)dst2, d, 32);
|
||||
s += 64;
|
||||
t += 64;
|
||||
d += 48;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
1494
TMessagesProj/jni/third_party/libyuv/source/scale_neon.cc
vendored
Normal file
1494
TMessagesProj/jni/third_party/libyuv/source/scale_neon.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1634
TMessagesProj/jni/third_party/libyuv/source/scale_neon64.cc
vendored
Normal file
1634
TMessagesProj/jni/third_party/libyuv/source/scale_neon64.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
66
TMessagesProj/jni/third_party/libyuv/source/scale_rgb.cc
vendored
Normal file
66
TMessagesProj/jni/third_party/libyuv/source/scale_rgb.cc
vendored
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/scale.h" /* For FilterMode */
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libyuv/convert_argb.h"
|
||||
#include "libyuv/convert_from_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_argb.h"
|
||||
#include "libyuv/scale_rgb.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Scale a 24 bit image.
|
||||
// Converts to ARGB as intermediate step
|
||||
|
||||
LIBYUV_API
|
||||
int RGBScale(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_rgb,
|
||||
int dst_stride_rgb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
int r;
|
||||
uint8_t* src_argb =
|
||||
(uint8_t*)malloc(src_width * src_height * 4 + dst_width * dst_height * 4);
|
||||
uint8_t* dst_argb = src_argb + src_width * src_height * 4;
|
||||
|
||||
if (!src_argb) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
r = RGB24ToARGB(src_rgb, src_stride_rgb, src_argb, src_width * 4, src_width,
|
||||
src_height);
|
||||
if (!r) {
|
||||
r = ARGBScale(src_argb, src_width * 4, src_width, src_height, dst_argb,
|
||||
dst_width * 4, dst_width, dst_height, filtering);
|
||||
if (!r) {
|
||||
r = ARGBToRGB24(dst_argb, dst_width * 4, dst_rgb, dst_stride_rgb,
|
||||
dst_width, dst_height);
|
||||
}
|
||||
}
|
||||
free(src_argb);
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
1161
TMessagesProj/jni/third_party/libyuv/source/scale_uv.cc
vendored
Normal file
1161
TMessagesProj/jni/third_party/libyuv/source/scale_uv.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
1392
TMessagesProj/jni/third_party/libyuv/source/scale_win.cc
vendored
Normal file
1392
TMessagesProj/jni/third_party/libyuv/source/scale_win.cc
vendored
Normal file
File diff suppressed because it is too large
Load diff
62
TMessagesProj/jni/third_party/libyuv/source/video_common.cc
vendored
Normal file
62
TMessagesProj/jni/third_party/libyuv/source/video_common.cc
vendored
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct FourCCAliasEntry {
|
||||
uint32_t alias;
|
||||
uint32_t canonical;
|
||||
};
|
||||
|
||||
#define NUM_ALIASES 18
|
||||
static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU12, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
|
||||
{FOURCC_HDYC, FOURCC_UYVY},
|
||||
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
|
||||
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
|
||||
{FOURCC_DMB1, FOURCC_MJPG},
|
||||
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
|
||||
{FOURCC_RGB3, FOURCC_RAW},
|
||||
{FOURCC_BGR3, FOURCC_24BG},
|
||||
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
|
||||
{FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB
|
||||
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
|
||||
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
|
||||
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
|
||||
};
|
||||
// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
|
||||
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
|
||||
|
||||
LIBYUV_API
|
||||
uint32_t CanonicalFourCC(uint32_t fourcc) {
|
||||
int i;
|
||||
for (i = 0; i < NUM_ALIASES; ++i) {
|
||||
if (kFourCCAliases[i].alias == fourcc) {
|
||||
return kFourCCAliases[i].canonical;
|
||||
}
|
||||
}
|
||||
// Not an alias, so return it as-is.
|
||||
return fourcc;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue