mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
Merge 0ac296d79a
into e6a64aadda
This commit is contained in:
commit
0c15e3e5fc
23 changed files with 765 additions and 442 deletions
7
.github/workflows/build.yml
vendored
7
.github/workflows/build.yml
vendored
|
@ -177,6 +177,9 @@ jobs:
|
|||
|
||||
build-macos:
|
||||
runs-on: macos-14
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [x86_64, arm64]
|
||||
steps:
|
||||
- name: "Checkout repo"
|
||||
uses: actions/checkout@v4
|
||||
|
@ -236,7 +239,7 @@ jobs:
|
|||
cd build
|
||||
cmake .. ${{ env.BUILD_FLAGS }} \
|
||||
-DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \
|
||||
-DCMAKE_OSX_ARCHITECTURES=x86_64 \
|
||||
-DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \
|
||||
-DMACOS_BUNDLE=ON \
|
||||
-G Ninja
|
||||
|
||||
|
@ -259,5 +262,5 @@ jobs:
|
|||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cemu-bin-macos-x64
|
||||
name: cemu-bin-macos-${{ matrix.arch }}
|
||||
path: ./bin/Cemu.dmg
|
||||
|
|
|
@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
rev64 v7.4h, v2.4h
|
||||
ld1 {v3.2s}, [x10]
|
||||
sub x5, x3, #8
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
|
||||
#endif
|
||||
usubl v10.8h, v5.8b, v1.8b
|
||||
ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
|
||||
mov v8.d[1], v9.d[0]
|
||||
|
@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
zip1 v1.8h, v0.8h, v2.8h
|
||||
zip2 v2.8h, v0.8h, v2.8h
|
||||
mov v0.16b, v1.16b
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
|
||||
|
||||
#endif
|
||||
ld1 {v8.2s, v9.2s}, [x12]
|
||||
mov v8.d[1], v9.d[0]
|
||||
mov v10.16b, v8.16b
|
||||
|
|
|
@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
|
|||
mov x10, x1 //top_left
|
||||
mov x4, #-1
|
||||
ld1 {v2.2s}, [x1], x8
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE
|
||||
ldr x7, [x7, #_ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
|
||||
ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
|
||||
|
||||
#endif
|
||||
ld1 {v0.2s}, [x1]
|
||||
rev64 v2.8b, v2.8b
|
||||
ld1 {v6.2s, v7.2s}, [x7]
|
||||
|
|
|
@ -1030,8 +1030,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
|
|||
ext v4.16b, v2.16b , v2.16b , #1
|
||||
mov v5.d[0], v4.d[1]
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
|
||||
#endif
|
||||
uaddl v20.8h, v0.8b, v2.8b
|
||||
uaddl v22.8h, v1.8b, v3.8b
|
||||
uaddl v24.8h, v2.8b, v4.8b
|
||||
|
|
|
@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8:
|
|||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wd in w8
|
||||
ldr w9, [sp, #84] //Load ht in w9
|
||||
ldr w10, [sp, #88] //Load offst2 in w10
|
||||
ldr w11, [sp, #92] //Load offst1 in w11
|
||||
ldr w12, [sp, #96] //Load offst1 in w12
|
||||
#endif
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
add w9, w9, #1 //w9 = ofst1 + 1
|
||||
add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
|
||||
mov v2.s[0], w7
|
||||
|
@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8:
|
|||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
|
||||
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wd in w8
|
||||
ldr w9, [sp, #84] //Load ht in w9
|
||||
ldr w10, [sp, #88] //Load offst2 in w10
|
||||
ldr w11, [sp, #92] //Load offst1 in w11
|
||||
ldr w12, [sp, #96] //Load offst1 in w12
|
||||
#endif
|
||||
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
|
||||
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
neg w20, w6 //w20 = -(log_WD + 1)
|
||||
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
|
||||
ldr w11, [sp, #104] //Load ht in x11
|
||||
ldr w12, [sp, #112] //Load wd in x12
|
||||
dup v20.8h, w9 //0ffset1
|
||||
dup v21.8h, w10 //0ffset2
|
||||
srhadd v6.8b, v20.8b, v21.8b
|
||||
|
|
|
@ -41,19 +41,31 @@
|
|||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
typedef void ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src,
|
||||
typedef void _ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src,
|
||||
WORD32 src_strd,
|
||||
WORD32 alpha,
|
||||
WORD32 beta,
|
||||
UWORD32 u4_bs,
|
||||
const UWORD8 *pu1_cliptab );
|
||||
|
||||
typedef void ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_deblk_edge_bslt4_ft(arg) _ih264_deblk_edge_bslt4_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_deblk_edge_bslt4_ft(arg) _ih264_deblk_edge_bslt4_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src,
|
||||
WORD32 src_strd,
|
||||
WORD32 alpha,
|
||||
WORD32 beta );
|
||||
|
||||
typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_deblk_edge_bs4_ft(arg) _ih264_deblk_edge_bs4_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_deblk_edge_bs4_ft(arg) _ih264_deblk_edge_bs4_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src,
|
||||
WORD32 src_strd,
|
||||
WORD32 alpha_cb,
|
||||
WORD32 beta_cb,
|
||||
|
@ -63,133 +75,143 @@ typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src,
|
|||
const UWORD8 *pu1_cliptab_cb,
|
||||
const UWORD8 *pu1_cliptab_cr);
|
||||
|
||||
typedef void ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_deblk_chroma_edge_bslt4_ft(arg) _ih264_deblk_chroma_edge_bslt4_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_deblk_chroma_edge_bslt4_ft(arg) _ih264_deblk_chroma_edge_bslt4_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src,
|
||||
WORD32 src_strd,
|
||||
WORD32 alpha_cb,
|
||||
WORD32 beta_cb,
|
||||
WORD32 alpha_cr,
|
||||
WORD32 beta_cr);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_deblk_chroma_edge_bs4_ft(arg) _ih264_deblk_chroma_edge_bs4_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_deblk_chroma_edge_bs4_ft(arg) _ih264_deblk_chroma_edge_bs4_ft arg;
|
||||
#endif
|
||||
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff);
|
||||
|
||||
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp);
|
||||
|
||||
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp);
|
||||
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff);
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp;
|
||||
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff;
|
||||
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff;
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff);
|
||||
|
||||
|
||||
/*A9*/
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_a9;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_a9;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_a9;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_a9);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_a9);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_a9);
|
||||
|
||||
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_a9;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_a9;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_a9;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_a9);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_a9);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_a9);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_a9;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_a9;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_a9;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_a9);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_a9);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_a9);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_a9;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_a9;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_a9);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_a9);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9);
|
||||
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_a9;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_a9;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_a9;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_a9;
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_a9);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_a9);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_a9);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_a9);
|
||||
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_a9;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_a9;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_a9;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_a9;
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_a9);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_a9);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_a9);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_a9);
|
||||
|
||||
/*AV8*/
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_av8;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_av8;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_av8;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_av8);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_av8);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_av8);
|
||||
|
||||
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_av8;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_av8;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_av8;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_av8);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_av8);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_av8);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_av8;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_av8;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_av8;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_av8);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_av8);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_av8);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_av8;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_av8;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_av8);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_av8);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8);
|
||||
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_av8;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_av8;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_av8;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_av8;
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_av8);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_av8);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_av8);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_av8);
|
||||
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_av8;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_av8;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_av8;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_av8;
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_av8);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_av8);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_av8);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_av8);
|
||||
|
||||
/*SSE3*/
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_ssse3;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_ssse3;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_ssse3;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_ssse3);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_ssse3);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_ssse3);
|
||||
|
||||
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_ssse3;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_ssse3;
|
||||
ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3;
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_ssse3);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_ssse3);
|
||||
ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_ssse3);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_ssse3);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_ssse3);
|
||||
|
||||
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3;
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_ssse3);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_ssse3);
|
||||
ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3);
|
||||
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_ssse3;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_ssse3;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_ssse3;
|
||||
ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_ssse3;
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_ssse3);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_ssse3);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_ssse3);
|
||||
ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_ssse3);
|
||||
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_ssse3;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_ssse3;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_ssse3;
|
||||
ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_ssse3;
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_ssse3);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_ssse3);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_ssse3);
|
||||
ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_ssse3);
|
||||
|
||||
#endif /* IH264_DEBLK_H_ */
|
||||
|
|
|
@ -100,7 +100,7 @@ extern const WORD32 ih264_g_six_tap[3];/* coefficients for 6 tap filtering*/
|
|||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src,
|
||||
typedef void _ih264_inter_pred_luma_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 dst_strd,
|
||||
|
@ -109,14 +109,26 @@ typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src,
|
|||
UWORD8* pu1_tmp,
|
||||
WORD32 dydx);
|
||||
|
||||
typedef void ih264_interleave_copy_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_inter_pred_luma_ft(arg) _ih264_inter_pred_luma_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_inter_pred_luma_ft(arg) _ih264_inter_pred_luma_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_interleave_copy_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 dst_strd,
|
||||
WORD32 ht,
|
||||
WORD32 wd);
|
||||
|
||||
typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_interleave_copy_ft(arg) _ih264_interleave_copy_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_interleave_copy_ft(arg) _ih264_interleave_copy_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1,
|
||||
UWORD8 *pu1_src2,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 src_strd1,
|
||||
|
@ -125,7 +137,13 @@ typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1,
|
|||
WORD32 height,
|
||||
WORD32 width);
|
||||
|
||||
typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_inter_pred_luma_bilinear_ft(arg) _ih264_inter_pred_luma_bilinear_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_inter_pred_luma_bilinear_ft(arg) _ih264_inter_pred_luma_bilinear_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_inter_pred_chroma_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 dst_strd,
|
||||
|
@ -134,107 +152,113 @@ typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src,
|
|||
WORD32 ht,
|
||||
WORD32 wd);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_inter_pred_chroma_ft(arg) _ih264_inter_pred_chroma_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_inter_pred_chroma_ft(arg) _ih264_inter_pred_chroma_ft arg;
|
||||
#endif
|
||||
|
||||
/* No NEON Declarations */
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy);
|
||||
|
||||
ih264_interleave_copy_ft ih264_interleave_copy;
|
||||
ih264_interleave_copy_ft(ih264_interleave_copy);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel);
|
||||
|
||||
ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear;
|
||||
ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma);
|
||||
|
||||
/* A9 NEON Declarations */
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_a9q);
|
||||
|
||||
ih264_interleave_copy_ft ih264_interleave_copy_a9;
|
||||
ih264_interleave_copy_ft(ih264_interleave_copy_a9);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_a9q);
|
||||
|
||||
ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_a9q;
|
||||
ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_a9q;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_a9q);
|
||||
|
||||
/* AV8 NEON Declarations */
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_av8);
|
||||
|
||||
ih264_interleave_copy_ft ih264_interleave_copy_av8;
|
||||
ih264_interleave_copy_ft(ih264_interleave_copy_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_av8);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_av8);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_av8;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_av8);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dx_zero_av8;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_dx_zero_av8);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dy_zero_av8;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_dy_zero_av8);
|
||||
|
||||
|
||||
/* SSSE3 Intrinsic Declarations */
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_ssse3;
|
||||
ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3);
|
||||
|
||||
ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
|
||||
ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3);
|
||||
|
||||
ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_ssse3;
|
||||
ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_ssse3);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -61,271 +61,284 @@ extern const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[];
|
|||
/*****************************************************************************/
|
||||
|
||||
|
||||
typedef void ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left,
|
||||
typedef void _ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left,
|
||||
UWORD8 *pu1_topleft,
|
||||
UWORD8 *pu1_top,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 left_strd,
|
||||
WORD32 ngbr_avail);
|
||||
|
||||
typedef void ih264_intra_pred_luma_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_intra_pred_ref_filtering_ft(arg) _ih264_intra_pred_ref_filtering_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_intra_pred_ref_filtering_ft(arg) _ih264_intra_pred_ref_filtering_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_intra_pred_luma_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 dst_strd,
|
||||
WORD32 ngbr_avail);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_intra_pred_luma_ft(arg) _ih264_intra_pred_luma_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_intra_pred_luma_ft(arg) _ih264_intra_pred_luma_ft arg;
|
||||
#endif
|
||||
|
||||
/* No Neon Definitions */
|
||||
|
||||
/* Luma 4x4 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u);
|
||||
|
||||
/* Luma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u);
|
||||
|
||||
/* Luma 16x16 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane);
|
||||
|
||||
/* Chroma 8x8 Intra pred filters */
|
||||
|
||||
typedef ih264_intra_pred_luma_ft ih264_intra_pred_chroma_ft;
|
||||
typedef _ih264_intra_pred_luma_ft _ih264_intra_pred_chroma_ft;
|
||||
#define ih264_intra_pred_chroma_ft(arg) ih264_intra_pred_luma_ft(arg);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane);
|
||||
|
||||
|
||||
ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering;
|
||||
ih264_intra_pred_ref_filtering_ft(ih264_intra_pred_luma_8x8_mode_ref_filtering);
|
||||
|
||||
/* A9 Definition */
|
||||
|
||||
/* Luma 4x4 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_a9q);
|
||||
|
||||
/* Luma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q;
|
||||
ih264_intra_pred_ref_filtering_ft(ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_a9q);
|
||||
|
||||
/* Luma 16x16 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_a9q);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_a9q;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_a9q);
|
||||
|
||||
/* Chroma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_a9q;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_a9q);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_a9q;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_a9q);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_a9q;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_a9q);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_a9q;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_a9q);
|
||||
|
||||
/* X86 Intrinsic Definitions */
|
||||
|
||||
/* Luma 4x4 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_ssse3);
|
||||
|
||||
/* Luma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_ssse3);
|
||||
|
||||
/* Luma 16x16 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_ssse3);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_ssse3;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_ssse3);
|
||||
|
||||
/* Chroma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_ssse3;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_ssse3);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_ssse3);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_ssse3);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_ssse3);
|
||||
|
||||
/* AV8 Definition */
|
||||
|
||||
/* Luma 4x4 Intra pred filters */
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_av8);
|
||||
|
||||
/* Luma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_av8);
|
||||
|
||||
/* Luma 16x16 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_av8);
|
||||
|
||||
ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_av8;
|
||||
ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_av8);
|
||||
|
||||
/* Chroma 8x8 Intra pred filters */
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_av8;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_av8);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_av8;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_av8);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_av8;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_av8);
|
||||
|
||||
ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_av8;
|
||||
ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_av8);
|
||||
|
||||
#endif /* IH264_INTRA_PRED_FILTERS_H_ */
|
||||
|
|
49
dependencies/ih264d/common/ih264_padding.h
vendored
49
dependencies/ih264d/common/ih264_padding.h
vendored
|
@ -2,7 +2,7 @@
|
|||
*
|
||||
* Copyright (C) 2015 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"));
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
|
@ -41,34 +41,39 @@
|
|||
/* Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
typedef void ih264_pad(UWORD8 *, WORD32, WORD32, WORD32);
|
||||
typedef void _ih264_pad(UWORD8 *, WORD32, WORD32, WORD32);
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_pad(arg) _ih264_pad arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_pad(arg) _ih264_pad arg;
|
||||
#endif
|
||||
|
||||
/* C function declarations */
|
||||
ih264_pad ih264_pad_top;
|
||||
ih264_pad ih264_pad_bottom;
|
||||
ih264_pad ih264_pad_left_luma;
|
||||
ih264_pad ih264_pad_left_chroma;
|
||||
ih264_pad ih264_pad_right_luma;
|
||||
ih264_pad ih264_pad_right_chroma;
|
||||
ih264_pad(ih264_pad_top);
|
||||
ih264_pad(ih264_pad_bottom);
|
||||
ih264_pad(ih264_pad_left_luma);
|
||||
ih264_pad(ih264_pad_left_chroma);
|
||||
ih264_pad(ih264_pad_right_luma);
|
||||
ih264_pad(ih264_pad_right_chroma);
|
||||
|
||||
/* A9 Q function declarations */
|
||||
ih264_pad ih264_pad_top_a9q;
|
||||
ih264_pad ih264_pad_left_luma_a9q;
|
||||
ih264_pad ih264_pad_left_chroma_a9q;
|
||||
ih264_pad ih264_pad_right_luma_a9q;
|
||||
ih264_pad ih264_pad_right_chroma_a9q;
|
||||
ih264_pad(ih264_pad_top_a9q);
|
||||
ih264_pad(ih264_pad_left_luma_a9q);
|
||||
ih264_pad(ih264_pad_left_chroma_a9q);
|
||||
ih264_pad(ih264_pad_right_luma_a9q);
|
||||
ih264_pad(ih264_pad_right_chroma_a9q);
|
||||
|
||||
/* AV8 function declarations */
|
||||
ih264_pad ih264_pad_top_av8;
|
||||
ih264_pad ih264_pad_left_luma_av8;
|
||||
ih264_pad ih264_pad_left_chroma_av8;
|
||||
ih264_pad ih264_pad_right_luma_av8;
|
||||
ih264_pad ih264_pad_right_chroma_av8;
|
||||
ih264_pad(ih264_pad_top_av8);
|
||||
ih264_pad(ih264_pad_left_luma_av8);
|
||||
ih264_pad(ih264_pad_left_chroma_av8);
|
||||
ih264_pad(ih264_pad_right_luma_av8);
|
||||
ih264_pad(ih264_pad_right_chroma_av8);
|
||||
|
||||
|
||||
ih264_pad ih264_pad_left_luma_ssse3;
|
||||
ih264_pad ih264_pad_left_chroma_ssse3;
|
||||
ih264_pad ih264_pad_right_luma_ssse3;
|
||||
ih264_pad ih264_pad_right_chroma_ssse3;
|
||||
ih264_pad(ih264_pad_left_luma_ssse3);
|
||||
ih264_pad(ih264_pad_left_chroma_ssse3);
|
||||
ih264_pad(ih264_pad_right_luma_ssse3);
|
||||
ih264_pad(ih264_pad_right_chroma_ssse3);
|
||||
|
||||
#endif /*_IH264_PADDING_H_*/
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
/*****************************************************************************/
|
||||
|
||||
|
||||
typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src,
|
||||
typedef void _ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src,
|
||||
UWORD8 *pu1_pred,
|
||||
WORD16 *pi2_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -53,7 +53,13 @@ typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src,
|
|||
UWORD32 u4_round_fact,
|
||||
UWORD8 *pu1_nnz);
|
||||
|
||||
typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_resi_trans_dctrans_quant_ft(arg) _ih264_resi_trans_dctrans_quant_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_resi_trans_dctrans_quant_ft(arg) _ih264_resi_trans_dctrans_quant_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
UWORD8 *pu1_pred,
|
||||
UWORD8 *pu1_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -65,9 +71,15 @@ typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
|||
UWORD32 pi4_cntrl,
|
||||
WORD32 *pi4_tmp);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_idctrans_iquant_itrans_recon_ft(arg) _ih264_pad arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_idctrans_iquant_itrans_recon_ft(arg) _ih264_pad arg;
|
||||
#endif
|
||||
|
||||
|
||||
/*Function prototype declarations*/
|
||||
typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src,
|
||||
typedef void _ih264_resi_trans_quant_ft(UWORD8*pu1_src,
|
||||
UWORD8 *pu1_pred,
|
||||
WORD16 *pi2_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -79,7 +91,13 @@ typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src,
|
|||
UWORD8 *pu1_nnz,
|
||||
WORD16 *pi2_alt_dc_addr);
|
||||
|
||||
typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_resi_trans_quant_ft(arg) _ih264_resi_trans_quant_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_resi_trans_quant_ft(arg) _ih264_resi_trans_quant_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_pred,
|
||||
WORD16 *pi2_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -92,7 +110,13 @@ typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
|||
UWORD8 *pu1_nnz,
|
||||
UWORD32 u4_dc_flag);
|
||||
|
||||
typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_luma_16x16_resi_trans_dctrans_quant_ft(arg) _ih264_luma_16x16_resi_trans_dctrans_quant_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_luma_16x16_resi_trans_dctrans_quant_ft(arg) _ih264_luma_16x16_resi_trans_dctrans_quant_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
||||
UWORD8 *pu1_pred,
|
||||
WORD16 *pi2_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -104,7 +128,13 @@ typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
|
|||
UWORD32 u4_round_factor,
|
||||
UWORD8 *pu1_nnz);
|
||||
|
||||
typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_chroma_8x8_resi_trans_dctrans_quant_ft(arg) _ih264_chroma_8x8_resi_trans_dctrans_quant_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_chroma_8x8_resi_trans_dctrans_quant_ft(arg) _ih264_chroma_8x8_resi_trans_dctrans_quant_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
UWORD8 *pu1_pred,
|
||||
UWORD8 *pu1_out,
|
||||
WORD32 pred_strd,
|
||||
|
@ -116,8 +146,14 @@ typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
|||
WORD32 iq_start_idx,
|
||||
WORD16 *pi2_dc_ld_addr);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_iquant_itrans_recon_ft(arg) _ih264_iquant_itrans_recon_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_iquant_itrans_recon_ft(arg) _ih264_iquant_itrans_recon_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src,
|
||||
|
||||
typedef void _ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src,
|
||||
UWORD8 *pu1_pred,
|
||||
UWORD8 *pu1_out,
|
||||
WORD32 pred_strd,
|
||||
|
@ -128,8 +164,14 @@ typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src,
|
|||
WORD16 *pi2_tmp,
|
||||
WORD16 *pi2_dc_src);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_iquant_itrans_recon_chroma_ft(arg) _ih264_iquant_itrans_recon_chroma_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_iquant_itrans_recon_chroma_ft(arg) _ih264_iquant_itrans_recon_chroma_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
|
||||
typedef void _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
UWORD8 *pu1_pred,
|
||||
UWORD8 *pu1_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -142,7 +184,13 @@ typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
|||
UWORD32 u4_dc_trans_flag,
|
||||
WORD32 *pi4_tmp);
|
||||
|
||||
typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(arg) _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(arg) _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
||||
UWORD8 *pu1_pred,
|
||||
UWORD8 *pu1_out,
|
||||
WORD32 src_strd,
|
||||
|
@ -154,79 +202,97 @@ typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
|
|||
UWORD32 pi4_cntrl,
|
||||
WORD32 *pi4_tmp);
|
||||
|
||||
typedef void ih264_ihadamard_scaling_ft(WORD16* pi2_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(arg) _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(arg) _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_ihadamard_scaling_ft(WORD16* pi2_src,
|
||||
WORD16* pi2_out,
|
||||
const UWORD16 *pu2_iscal_mat,
|
||||
const UWORD16 *pu2_weigh_mat,
|
||||
UWORD32 u4_qp_div_6,
|
||||
WORD32* pi4_tmp);
|
||||
|
||||
typedef void ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_ihadamard_scaling_ft(arg) _ih264_ihadamard_scaling_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_ihadamard_scaling_ft(arg) _ih264_ihadamard_scaling_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst,
|
||||
const UWORD16 *pu2_scale_matrix,
|
||||
const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
|
||||
UWORD32 u4_round_factor,UWORD8 *pu1_nnz);
|
||||
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4;
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4;
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_8x8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_4x4;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv;
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_hadamard_quant_ft(arg) _ih264_hadamard_quant_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_hadamard_quant_ft(arg) _ih264_hadamard_quant_ft arg;
|
||||
#endif
|
||||
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4);
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4);
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_8x8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv);
|
||||
|
||||
/*A9 Declarations*/
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_a9;
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_a9;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_a9;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_a9;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_a9;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_a9;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_a9;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
|
||||
ih264_luma_16x16_resi_trans_dctrans_quant_ft ih264_luma_16x16_resi_trans_dctrans_quant_a9;
|
||||
ih264_chroma_8x8_resi_trans_dctrans_quant_ft ih264_chroma_8x8_resi_trans_dctrans_quant_a9;
|
||||
ih264_luma_16x16_idctrans_iquant_itrans_recon_ft ih264_luma_16x16_idctrans_iquant_itrans_recon_a9;
|
||||
ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_a9;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_a9;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_a9;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_a9;
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_a9);
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_a9);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_a9);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_a9);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_a9);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_a9);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_a9);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_a9);
|
||||
ih264_luma_16x16_resi_trans_dctrans_quant_ft(ih264_luma_16x16_resi_trans_dctrans_quant_a9);
|
||||
ih264_chroma_8x8_resi_trans_dctrans_quant_ft(ih264_chroma_8x8_resi_trans_dctrans_quant_a9);
|
||||
ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(ih264_luma_16x16_idctrans_iquant_itrans_recon_a9);
|
||||
ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_a9);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_a9);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_a9);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_a9);
|
||||
|
||||
/*Av8 Declarations*/
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_av8;
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_av8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_av8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_av8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_av8;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_av8;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_av8;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_av8;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_av8;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_av8;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_av8;
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_av8);
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_av8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_av8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_av8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_av8);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_av8);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_av8);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_av8);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_av8);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_av8);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_av8);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_av8);
|
||||
|
||||
/*SSSE3 Declarations*/
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_ssse3;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_ssse3;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_ssse3;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_ssse3;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_ssse3;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_ssse3;
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_ssse3);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_ssse3);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_ssse3);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_ssse3);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_ssse3);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_ssse3);
|
||||
/*SSSE42 Declarations*/
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_sse42;
|
||||
ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_sse42;
|
||||
ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_sse42;
|
||||
ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_sse42;
|
||||
ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_sse42;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_sse42;
|
||||
ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_sse42;
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_sse42);
|
||||
ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_sse42);
|
||||
ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_sse42);
|
||||
ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_sse42);
|
||||
ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_sse42);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_sse42);
|
||||
ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_sse42);
|
||||
|
||||
#endif /* IH264_TRANS_QUANT_H_ */
|
||||
|
|
72
dependencies/ih264d/common/ih264_weighted_pred.h
vendored
72
dependencies/ih264d/common/ih264_weighted_pred.h
vendored
|
@ -68,7 +68,7 @@
|
|||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1,
|
||||
typedef void _ih264_default_weighted_pred_ft(UWORD8 *puc_src1,
|
||||
UWORD8 *puc_src2,
|
||||
UWORD8 *puc_dst,
|
||||
WORD32 src_strd1,
|
||||
|
@ -77,7 +77,13 @@ typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1,
|
|||
WORD32 ht,
|
||||
WORD32 wd);
|
||||
|
||||
typedef void ih264_weighted_pred_ft(UWORD8 *puc_src,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_default_weighted_pred_ft(arg) _ih264_default_weighted_pred_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_default_weighted_pred_ft(arg) _ih264_default_weighted_pred_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_weighted_pred_ft(UWORD8 *puc_src,
|
||||
UWORD8 *puc_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 dst_strd,
|
||||
|
@ -87,7 +93,13 @@ typedef void ih264_weighted_pred_ft(UWORD8 *puc_src,
|
|||
WORD32 ht,
|
||||
WORD32 wd);
|
||||
|
||||
typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1,
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_weighted_pred_ft(arg) _ih264_weighted_pred_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_weighted_pred_ft(arg) _ih264_weighted_pred_ft arg;
|
||||
#endif
|
||||
|
||||
typedef void _ih264_weighted_bi_pred_ft(UWORD8 *puc_src1,
|
||||
UWORD8 *puc_src2,
|
||||
UWORD8 *puc_dst,
|
||||
WORD32 src_strd1,
|
||||
|
@ -101,63 +113,69 @@ typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1,
|
|||
WORD32 ht,
|
||||
WORD32 wd);
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#define ih264_weighted_bi_pred_ft(arg) _ih264_weighted_bi_pred_ft arg __asm__(#arg);
|
||||
#else
|
||||
#define ih264_weighted_bi_pred_ft(arg) _ih264_weighted_bi_pred_ft arg;
|
||||
#endif
|
||||
|
||||
/* No NEON Declarations */
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma);
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_luma;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_luma);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_chroma;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_chroma);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma);
|
||||
|
||||
/* A9 NEON Declarations */
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_a9q;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_a9q);
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_a9q;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_a9q);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_luma_a9q;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_luma_a9q);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_chroma_a9q;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_chroma_a9q);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_a9q;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_a9q);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_a9q;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_a9q);
|
||||
|
||||
|
||||
/* AV8 NEON Declarations */
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_av8;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_av8);
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_av8;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_av8);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_luma_av8;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_luma_av8);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_chroma_av8;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_chroma_av8);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_av8;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_av8);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_av8;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_av8);
|
||||
|
||||
|
||||
/* SSE42 Intrinsic Declarations */
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_sse42;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_sse42);
|
||||
|
||||
ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_sse42;
|
||||
ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_sse42);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_luma_sse42;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_luma_sse42);
|
||||
|
||||
ih264_weighted_pred_ft ih264_weighted_pred_chroma_sse42;
|
||||
ih264_weighted_pred_ft(ih264_weighted_pred_chroma_sse42);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_sse42;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_sse42);
|
||||
|
||||
ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_sse42;
|
||||
ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_sse42);
|
||||
|
||||
#endif /* IH264_WEIGHTED_PRED_H_ */
|
||||
|
||||
|
|
76
dependencies/ih264d/decoder/ih264d_structs.h
vendored
76
dependencies/ih264d/decoder/ih264d_structs.h
vendored
|
@ -1365,112 +1365,112 @@ typedef struct _DecStruct
|
|||
UWORD8 *pu1_mv_bank_buf_base;
|
||||
UWORD8 *pu1_init_dpb_base;
|
||||
|
||||
ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma;
|
||||
_ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma;
|
||||
|
||||
ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma;
|
||||
_ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma;
|
||||
|
||||
ih264_weighted_pred_ft *pf_weighted_pred_luma;
|
||||
_ih264_weighted_pred_ft *pf_weighted_pred_luma;
|
||||
|
||||
ih264_weighted_pred_ft *pf_weighted_pred_chroma;
|
||||
_ih264_weighted_pred_ft *pf_weighted_pred_chroma;
|
||||
|
||||
ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma;
|
||||
_ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma;
|
||||
|
||||
ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma;
|
||||
_ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma;
|
||||
|
||||
ih264_pad *pf_pad_top;
|
||||
ih264_pad *pf_pad_bottom;
|
||||
ih264_pad *pf_pad_left_luma;
|
||||
ih264_pad *pf_pad_left_chroma;
|
||||
ih264_pad *pf_pad_right_luma;
|
||||
ih264_pad *pf_pad_right_chroma;
|
||||
_ih264_pad *pf_pad_top;
|
||||
_ih264_pad *pf_pad_bottom;
|
||||
_ih264_pad *pf_pad_left_luma;
|
||||
_ih264_pad *pf_pad_left_chroma;
|
||||
_ih264_pad *pf_pad_right_luma;
|
||||
_ih264_pad *pf_pad_right_chroma;
|
||||
|
||||
ih264_inter_pred_chroma_ft *pf_inter_pred_chroma;
|
||||
_ih264_inter_pred_chroma_ft *pf_inter_pred_chroma;
|
||||
|
||||
ih264_inter_pred_luma_ft *apf_inter_pred_luma[16];
|
||||
_ih264_inter_pred_luma_ft *apf_inter_pred_luma[16];
|
||||
|
||||
ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4];
|
||||
_ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4];
|
||||
|
||||
ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9];
|
||||
_ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9];
|
||||
|
||||
ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9];
|
||||
_ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9];
|
||||
|
||||
ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering;
|
||||
_ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering;
|
||||
|
||||
ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4];
|
||||
_ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4];
|
||||
|
||||
ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4;
|
||||
_ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4;
|
||||
|
||||
ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc;
|
||||
_ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc;
|
||||
|
||||
ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8;
|
||||
_ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8;
|
||||
|
||||
ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc;
|
||||
_ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc;
|
||||
|
||||
ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
|
||||
_ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
|
||||
|
||||
ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
|
||||
_ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
|
||||
|
||||
ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
|
||||
_ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
|
||||
|
||||
/**
|
||||
* deblock vertical luma edge with blocking strength 4
|
||||
*/
|
||||
ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
|
||||
_ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
|
||||
|
||||
/**
|
||||
* deblock vertical luma edge with blocking strength less than 4
|
||||
*/
|
||||
ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
|
||||
_ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
|
||||
|
||||
/**
|
||||
* deblock vertical luma edge with blocking strength 4 for mbaff
|
||||
*/
|
||||
ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff;
|
||||
_ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff;
|
||||
|
||||
/**
|
||||
* deblock vertical luma edge with blocking strength less than 4 for mbaff
|
||||
*/
|
||||
ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff;
|
||||
_ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff;
|
||||
|
||||
/**
|
||||
* deblock vertical chroma edge with blocking strength 4
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
|
||||
_ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
|
||||
|
||||
/**
|
||||
* deblock vertical chroma edge with blocking strength less than 4
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
|
||||
_ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
|
||||
|
||||
/**
|
||||
* deblock vertical chroma edge with blocking strength 4 for mbaff
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff;
|
||||
_ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff;
|
||||
|
||||
/**
|
||||
* deblock vertical chroma edge with blocking strength less than 4 for mbaff
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff;
|
||||
_ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff;
|
||||
|
||||
/**
|
||||
* deblock horizontal luma edge with blocking strength 4
|
||||
*/
|
||||
ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
|
||||
_ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
|
||||
|
||||
/**
|
||||
* deblock horizontal luma edge with blocking strength less than 4
|
||||
*/
|
||||
ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
|
||||
_ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
|
||||
|
||||
/**
|
||||
* deblock horizontal chroma edge with blocking strength 4
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
|
||||
_ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
|
||||
|
||||
/**
|
||||
* deblock horizontal chroma edge with blocking strength less than 4
|
||||
*/
|
||||
ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
|
||||
_ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
|
||||
|
||||
|
||||
} dec_struct_t;
|
||||
|
|
|
@ -102,9 +102,9 @@ if (MACOS_BUNDLE)
|
|||
endforeach(folder)
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib")
|
||||
else()
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib")
|
||||
endif()
|
||||
|
||||
add_custom_command (TARGET CemuBin POST_BUILD
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#if defined(ARCH_X86_64) && defined(__GNUC__)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__aarch64__)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
struct
|
||||
|
@ -502,6 +504,114 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
|
|||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint16* indicesU16BE = (const uint16*)indexDataInput;
|
||||
uint16* indexOutput = (uint16*)indexDataOutput;
|
||||
sint32 count8 = count >> 3;
|
||||
sint32 countRemaining = count & 7;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint16x8_t mMin = vdupq_n_u16(0xFFFF);
|
||||
uint16x8_t mMax = vdupq_n_u16(0x0000);
|
||||
uint16x8_t mTemp;
|
||||
uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE;
|
||||
indicesU16BE += count8 * 8;
|
||||
uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput;
|
||||
indexOutput += count8 * 8;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u16((uint16*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev16q_u8(mTemp);
|
||||
mMin = vminq_u16(mMin, mTemp);
|
||||
mMax = vmaxq_u16(mMax, mTemp);
|
||||
vst1q_u16((uint16*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint16* mMaxU16 = (uint16*)&mMax;
|
||||
uint16* mMinU16 = (uint16*)&mMin;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
indexMax = std::max(indexMax, (uint32)mMaxU16[i]);
|
||||
indexMin = std::min(indexMin, (uint32)mMinU16[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint16 idx = _swapEndianU16(*indicesU16BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU16BE++;
|
||||
_maxIndex = std::max(_maxIndex, (uint32)idx);
|
||||
_minIndex = std::min(_minIndex, (uint32)idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint32* indicesU32BE = (const uint32*)indexDataInput;
|
||||
uint32* indexOutput = (uint32*)indexDataOutput;
|
||||
sint32 count8 = count >> 2;
|
||||
sint32 countRemaining = count & 3;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF);
|
||||
uint32x4_t mMax = vdupq_n_u32(0x00000000);
|
||||
uint32x4_t mTemp;
|
||||
uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE;
|
||||
indicesU32BE += count8 * 4;
|
||||
uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput;
|
||||
indexOutput += count8 * 4;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u32((uint32*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev32q_u8(mTemp);
|
||||
mMin = vminq_u32(mMin, mTemp);
|
||||
mMax = vmaxq_u32(mMax, mTemp);
|
||||
vst1q_u32((uint32*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint32* mMaxU32 = (uint32*)&mMax;
|
||||
uint32* mMinU32 = (uint32*)&mMin;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
indexMax = std::max(indexMax, mMaxU32[i]);
|
||||
indexMin = std::min(indexMin, mMinU32[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint32 idx = _swapEndianU32(*indicesU32BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU32BE++;
|
||||
_maxIndex = std::max(_maxIndex, idx);
|
||||
_minIndex = std::min(_minIndex, idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
|
@ -688,27 +798,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
|
|||
{
|
||||
if (indexType == LatteIndexType::U16_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3)
|
||||
LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U32_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U16_LE)
|
||||
{
|
||||
|
|
|
@ -25,7 +25,11 @@ void nnNfp_update();
|
|||
|
||||
namespace coreinit
|
||||
{
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32, uint32);
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* thread);
|
||||
#endif
|
||||
void __OSAddReadyThreadToRunQueue(OSThread_t* thread);
|
||||
void __OSRemoveThreadFromRunQueues(OSThread_t* thread);
|
||||
};
|
||||
|
@ -49,7 +53,7 @@ namespace coreinit
|
|||
|
||||
struct OSHostThread
|
||||
{
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this)
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1304,8 +1308,14 @@ namespace coreinit
|
|||
__OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance);
|
||||
}
|
||||
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32 _high, uint32 _low)
|
||||
{
|
||||
uint64 _thread = (uint64) _high << 32 | _low;
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* _thread)
|
||||
{
|
||||
#endif
|
||||
OSHostThread* hostThread = (OSHostThread*)_thread;
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
|
|
@ -17,28 +17,28 @@ GX2WriteGatherPipeState gx2WriteGatherPipe = { 0 };
|
|||
void gx2WriteGather_submitU32AsBE(uint32 v)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
return;
|
||||
*(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = _swapEndianU32(v);
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4;
|
||||
*(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = _swapEndianU32(v);
|
||||
*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4;
|
||||
}
|
||||
|
||||
void gx2WriteGather_submitU32AsLE(uint32 v)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
return;
|
||||
*(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = v;
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4;
|
||||
*(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = v;
|
||||
*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4;
|
||||
}
|
||||
|
||||
void gx2WriteGather_submitU32AsLEArray(uint32* v, uint32 numValues)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
return;
|
||||
memcpy_dwords((*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]), v, numValues);
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4 * numValues;
|
||||
memcpy_dwords(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load(), v, numValues);
|
||||
*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4 * numValues;
|
||||
}
|
||||
|
||||
namespace GX2
|
||||
|
@ -121,7 +121,7 @@ namespace GX2
|
|||
if (sGX2MainCoreIndex == coreIndex)
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = &gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex];
|
||||
else
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL;
|
||||
*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL;
|
||||
// return size of (written) display list
|
||||
return currentWriteSize;
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ namespace GX2
|
|||
cemu_assert_debug(coreIndex == sGX2MainCoreIndex);
|
||||
coreIndex = sGX2MainCoreIndex; // always submit to main queue which is owned by GX2 main core (TCLSubmitToRing does not need this workaround)
|
||||
|
||||
uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]);
|
||||
uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex].load());
|
||||
cmdStream[0] = pm4HeaderType3(IT_INDIRECT_BUFFER_PRIV, 3);
|
||||
cmdStream[1] = memory_virtualToPhysical(MEMPTR<void>(addr).GetMPTR());
|
||||
cmdStream[2] = 0;
|
||||
|
|
|
@ -6,9 +6,9 @@ struct GX2WriteGatherPipeState
|
|||
{
|
||||
uint8* gxRingBuffer;
|
||||
// each core has it's own write gatherer and display list state (writing)
|
||||
uint8* writeGatherPtrGxBuffer[Espresso::CORE_COUNT];
|
||||
uint8** writeGatherPtrWrite[Espresso::CORE_COUNT];
|
||||
uint8* writeGatherPtrDisplayList[Espresso::CORE_COUNT];
|
||||
std::atomic<uint8*> writeGatherPtrGxBuffer[Espresso::CORE_COUNT];
|
||||
std::atomic<uint8*>* writeGatherPtrWrite[Espresso::CORE_COUNT];
|
||||
std::atomic<uint8*> writeGatherPtrDisplayList[Espresso::CORE_COUNT];
|
||||
MPTR displayListStart[Espresso::CORE_COUNT];
|
||||
uint32 displayListMaxSize[Espresso::CORE_COUNT];
|
||||
};
|
||||
|
@ -75,10 +75,10 @@ template <typename ...Targs>
|
|||
inline void gx2WriteGather_submit(Targs... args)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCurrentCoreIndex();
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr)
|
||||
if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr)
|
||||
return;
|
||||
|
||||
uint32be* writePtr = (uint32be*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]);
|
||||
uint32be* writePtr = (uint32be*)gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load();
|
||||
gx2WriteGather_submit_(coreIndex, writePtr, std::forward<Targs>(args)...);
|
||||
}
|
||||
|
||||
|
|
|
@ -310,7 +310,8 @@ inline uint64 __rdtsc()
|
|||
|
||||
inline void _mm_mfence()
|
||||
{
|
||||
|
||||
asm volatile("" ::: "memory");
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result)
|
||||
|
|
|
@ -74,10 +74,14 @@ CPUMode ActiveSettings::GetCPUMode()
|
|||
|
||||
if (mode == CPUMode::Auto)
|
||||
{
|
||||
#ifdef __aarch64__
|
||||
mode = CPUMode::SinglecoreInterpreter;
|
||||
#else
|
||||
if (GetPhysicalCoreCount() >= 4)
|
||||
mode = CPUMode::MulticoreRecompiler;
|
||||
else
|
||||
mode = CPUMode::SinglecoreRecompiler;
|
||||
#endif
|
||||
}
|
||||
else if (mode == CPUMode::DualcoreRecompiler) // dualcore is disabled now
|
||||
mode = CPUMode::MulticoreRecompiler;
|
||||
|
|
|
@ -139,6 +139,7 @@ enum
|
|||
MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS,
|
||||
|
||||
// debug->logging
|
||||
MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE = 21499,
|
||||
MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500,
|
||||
MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO = 21599,
|
||||
// debug->dump
|
||||
|
@ -2205,7 +2206,7 @@ void MainWindow::RecreateMenu()
|
|||
debugLoggingMenu->AppendSeparator();
|
||||
|
||||
wxMenu* logCosModulesMenu = new wxMenu();
|
||||
logCosModulesMenu->AppendCheckItem(0, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false);
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false);
|
||||
logCosModulesMenu->AppendSeparator();
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile));
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync));
|
||||
|
|
|
@ -15,7 +15,12 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri
|
|||
ctx->uc_stack.ss_sp = m_stackPtr;
|
||||
ctx->uc_stack.ss_size = stackSize;
|
||||
ctx->uc_link = &ctx[0];
|
||||
#ifdef __arm64__
|
||||
// https://www.man7.org/linux/man-pages/man3/makecontext.3.html#NOTES
|
||||
makecontext(ctx, (void(*)())FiberEntryPoint, 2, (uint64) userParam >> 32, userParam);
|
||||
#else
|
||||
makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam);
|
||||
#endif
|
||||
this->m_implData = (void*)ctx;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,7 +45,11 @@ namespace MemMapper
|
|||
void* r;
|
||||
if(fromReservation)
|
||||
{
|
||||
if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 )
|
||||
uint64 page_size = sysconf(_SC_PAGESIZE);
|
||||
void* page = baseAddr;
|
||||
if ( (uint64) baseAddr % page_size != 0 )
|
||||
page = (void*) ((uint64)baseAddr & ~(page_size - 1));
|
||||
if( mprotect(page, size, GetProt(permissionFlags)) == 0 )
|
||||
r = baseAddr;
|
||||
else
|
||||
r = nullptr;
|
||||
|
|
|
@ -27,6 +27,8 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 {
|
|||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
return (uint64)(freq.QuadPart);
|
||||
#elif BOOST_OS_MACOS
|
||||
return 1000000000;
|
||||
#else
|
||||
timespec pc;
|
||||
clock_getres(CLOCK_MONOTONIC_RAW, &pc);
|
||||
|
|
Loading…
Add table
Reference in a new issue