diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6ae4b892..4001ef21 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -177,6 +177,9 @@ jobs: build-macos: runs-on: macos-14 + strategy: + matrix: + arch: [x86_64, arm64] steps: - name: "Checkout repo" uses: actions/checkout@v4 @@ -236,7 +239,7 @@ jobs: cd build cmake .. ${{ env.BUILD_FLAGS }} \ -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \ - -DCMAKE_OSX_ARCHITECTURES=x86_64 \ + -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \ -DMACOS_BUNDLE=ON \ -G Ninja @@ -259,5 +262,5 @@ jobs: - name: Upload artifact uses: actions/upload-artifact@v4 with: - name: cemu-bin-macos-x64 + name: cemu-bin-macos-${{ matrix.arch }} path: ./bin/Cemu.dmg diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s index 39c02560..cee1b7e3 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s @@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: rev64 v7.4h, v2.4h ld1 {v3.2s}, [x10] sub x5, x3, #8 +#if defined(__APPLE__) && defined(__aarch64__) + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1] +#endif usubl v10.8h, v5.8b, v1.8b ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3 mov v8.d[1], v9.d[0] @@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: zip1 v1.8h, v0.8h, v2.8h zip2 v2.8h, v0.8h, v2.8h mov v0.16b, v1.16b - +#if defined(__APPLE__) && defined(__aarch64__) + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2] - +#endif ld1 {v8.2s, v9.2s}, [x12] mov v8.d[1], v9.d[0] mov v10.16b, v8.16b diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s index fa19c121..99a95a13 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8: mov x10, x1 //top_left mov x4, #-1 ld1 {v2.2s}, [x1], x8 - +#if defined(__APPLE__) && defined(__aarch64__) + adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE + ldr x7, [x7, #_ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF] +#else adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] - +#endif ld1 {v0.2s}, [x1] rev64 v2.8b, v2.8b ld1 {v6.2s, v7.2s}, [x7] diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s index 273aa81b..d320ccb0 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s @@ -1030,8 +1030,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8: ext v4.16b, v2.16b , v2.16b , #1 mov v5.d[0], v4.d[1] +#if defined(__APPLE__) && defined(__aarch64__) + adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] +#endif uaddl v20.8h, v0.8b, v2.8b uaddl v22.8h, v1.8b, v3.8b uaddl v24.8h, v2.8b, v4.8b diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s index 475f690e..b8f53454 100644 --- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s @@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8: sxtw x4, w4 sxtw x5, w5 stp x19, x20, [sp, #-16]! +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 ldr w9, [sp, #88] //Load ofst1 in w9 - add w6, w6, #1 //w6 = log_WD + 1 - neg w10, w6 //w10 = -(log_WD + 1) - dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) ldr w10, [sp, #96] //Load ofst2 in w10 ldr w11, [sp, #104] //Load ht in w11 ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wd in w8 + ldr w9, [sp, #84] //Load ht in w9 + ldr w10, [sp, #88] //Load offst2 in w10 + ldr w11, [sp, #92] //Load offst1 in w11 + ldr w12, [sp, #96] //Load offst1 in w12 +#endif + add w6, w6, #1 //w6 = log_WD + 1 + neg w10, w6 //w10 = -(log_WD + 1) + dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) add w9, w9, #1 //w9 = ofst1 + 1 add w9, w9, w10 //w9 = ofst1 + ofst2 + 1 mov v2.s[0], w7 @@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8: sxtw x5, w5 stp x19, x20, [sp, #-16]! - +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 + ldr w9, [sp, #88] //Load ofst1 in w9 + ldr w10, [sp, #96] //Load ofst2 in w10 + ldr w11, [sp, #104] //Load ht in w11 + ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wd in w8 + ldr w9, [sp, #84] //Load ht in w9 + ldr w10, [sp, #88] //Load offst2 in w10 + ldr w11, [sp, #92] //Load offst1 in w11 + ldr w12, [sp, #96] //Load offst1 in w12 +#endif dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) add w6, w6, #1 //w6 = log_WD + 1 - ldr w9, [sp, #88] //Load ofst1 in w9 - ldr w10, [sp, #96] //Load ofst2 in w10 neg w20, w6 //w20 = -(log_WD + 1) dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit) - ldr w11, [sp, #104] //Load ht in x11 - ldr w12, [sp, #112] //Load wd in x12 dup v20.8h, w9 //0ffset1 dup v21.8h, w10 //0ffset2 srhadd v6.8b, v20.8b, v21.8b diff --git a/dependencies/ih264d/common/ih264_deblk_edge_filters.h b/dependencies/ih264d/common/ih264_deblk_edge_filters.h index 4079dd2c..17ccee3d 100644 --- a/dependencies/ih264d/common/ih264_deblk_edge_filters.h +++ b/dependencies/ih264d/common/ih264_deblk_edge_filters.h @@ -41,19 +41,31 @@ /* Extern Function Declarations */ /*****************************************************************************/ -typedef void ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src, +typedef void _ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, WORD32 beta, UWORD32 u4_bs, const UWORD8 *pu1_cliptab ); -typedef void ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_deblk_edge_bslt4_ft(arg) _ih264_deblk_edge_bslt4_ft arg __asm__(#arg); +#else +#define ih264_deblk_edge_bslt4_ft(arg) _ih264_deblk_edge_bslt4_ft arg; +#endif + +typedef void _ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, WORD32 beta ); -typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_deblk_edge_bs4_ft(arg) _ih264_deblk_edge_bs4_ft arg __asm__(#arg); +#else +#define ih264_deblk_edge_bs4_ft(arg) _ih264_deblk_edge_bs4_ft arg; +#endif + +typedef void _ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, WORD32 beta_cb, @@ -63,133 +75,143 @@ typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src, const UWORD8 *pu1_cliptab_cb, const UWORD8 *pu1_cliptab_cr); -typedef void ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_deblk_chroma_edge_bslt4_ft(arg) _ih264_deblk_chroma_edge_bslt4_ft arg __asm__(#arg); +#else +#define ih264_deblk_chroma_edge_bslt4_ft(arg) _ih264_deblk_chroma_edge_bslt4_ft arg; +#endif + +typedef void _ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, WORD32 beta_cb, WORD32 alpha_cr, WORD32 beta_cr); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_deblk_chroma_edge_bs4_ft(arg) _ih264_deblk_chroma_edge_bs4_ft arg __asm__(#arg); +#else +#define ih264_deblk_chroma_edge_bs4_ft(arg) _ih264_deblk_chroma_edge_bs4_ft arg; +#endif + +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff); -ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff; +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp); -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp; +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff); -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff; +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff); -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp; - -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff; - -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff; +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff); /*A9*/ -ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_a9; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_a9; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_a9; +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_a9); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_a9); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_a9); -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_a9; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_a9; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_a9; +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_a9); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_a9); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_a9); -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_a9; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_a9; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_a9; +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_a9); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_a9); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_a9); -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_a9; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_a9; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9; +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_a9); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_a9); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9); -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_a9; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_a9; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_a9; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_a9; +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_a9); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_a9); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_a9); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_a9); -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_a9; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_a9; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_a9; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_a9; +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_a9); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_a9); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_a9); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_a9); /*AV8*/ -ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_av8; +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_av8); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_av8); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_av8); -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_av8; +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_av8); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_av8); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_av8); -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_av8; +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_av8); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_av8); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_av8); -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8; +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_av8); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_av8); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8); -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_av8; +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_av8); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_av8); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_av8); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_av8); -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_av8; +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_av8); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_av8); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_av8); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_av8); /*SSE3*/ -ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_ssse3; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_ssse3; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_ssse3; +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_horz_bs4_ssse3); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_ssse3); +ih264_deblk_edge_bs4_ft(ih264_deblk_luma_vert_bs4_mbaff_ssse3); -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_ssse3; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_ssse3; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3; +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_bp_ssse3); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_bp_ssse3); +ih264_deblk_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3); -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_ssse3; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_ssse3; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_ssse3; +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_horz_bslt4_ssse3); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_ssse3); +ih264_deblk_edge_bslt4_ft(ih264_deblk_luma_vert_bslt4_mbaff_ssse3); -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_ssse3; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_ssse3; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3; +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_bp_ssse3); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_bp_ssse3); +ih264_deblk_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3); -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_ssse3; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_ssse3; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_ssse3; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_ssse3; +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_ssse3); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_ssse3); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_vert_bs4_mbaff_ssse3); +ih264_deblk_chroma_edge_bs4_ft(ih264_deblk_chroma_horz_bs4_mbaff_ssse3); -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_ssse3; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_ssse3; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_ssse3; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_ssse3; +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_ssse3); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_ssse3); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_vert_bslt4_mbaff_ssse3); +ih264_deblk_chroma_edge_bslt4_ft(ih264_deblk_chroma_horz_bslt4_mbaff_ssse3); #endif /* IH264_DEBLK_H_ */ diff --git a/dependencies/ih264d/common/ih264_inter_pred_filters.h b/dependencies/ih264d/common/ih264_inter_pred_filters.h index c439ab84..a727345b 100644 --- a/dependencies/ih264d/common/ih264_inter_pred_filters.h +++ b/dependencies/ih264d/common/ih264_inter_pred_filters.h @@ -100,7 +100,7 @@ extern const WORD32 ih264_g_six_tap[3];/* coefficients for 6 tap filtering*/ /* Extern Function Declarations */ /*****************************************************************************/ -typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src, +typedef void _ih264_inter_pred_luma_ft(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, @@ -109,14 +109,26 @@ typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src, UWORD8* pu1_tmp, WORD32 dydx); -typedef void ih264_interleave_copy_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_inter_pred_luma_ft(arg) _ih264_inter_pred_luma_ft arg __asm__(#arg); +#else +#define ih264_inter_pred_luma_ft(arg) _ih264_inter_pred_luma_ft arg; +#endif + +typedef void _ih264_interleave_copy_ft(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, WORD32 ht, WORD32 wd); -typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_interleave_copy_ft(arg) _ih264_interleave_copy_ft arg __asm__(#arg); +#else +#define ih264_interleave_copy_ft(arg) _ih264_interleave_copy_ft arg; +#endif + +typedef void _ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, WORD32 src_strd1, @@ -125,7 +137,13 @@ typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1, WORD32 height, WORD32 width); -typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_inter_pred_luma_bilinear_ft(arg) _ih264_inter_pred_luma_bilinear_ft arg __asm__(#arg); +#else +#define ih264_inter_pred_luma_bilinear_ft(arg) _ih264_inter_pred_luma_bilinear_ft arg; +#endif + +typedef void _ih264_inter_pred_chroma_ft(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, @@ -134,107 +152,113 @@ typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src, WORD32 ht, WORD32 wd); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_inter_pred_chroma_ft(arg) _ih264_inter_pred_chroma_ft arg __asm__(#arg); +#else +#define ih264_inter_pred_chroma_ft(arg) _ih264_inter_pred_chroma_ft arg; +#endif + /* No NEON Declarations */ -ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy); -ih264_interleave_copy_ft ih264_interleave_copy; +ih264_interleave_copy_ft(ih264_interleave_copy); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel); -ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear; +ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma); /* A9 NEON Declarations */ -ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_a9q); -ih264_interleave_copy_ft ih264_interleave_copy_a9; +ih264_interleave_copy_ft(ih264_interleave_copy_a9); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_a9q); -ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_a9q; +ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_a9q; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_a9q); /* AV8 NEON Declarations */ -ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_av8); -ih264_interleave_copy_ft ih264_interleave_copy_av8; +ih264_interleave_copy_ft(ih264_interleave_copy_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_av8; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dx_zero_av8; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_dx_zero_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dy_zero_av8; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_dy_zero_av8); /* SSSE3 Intrinsic Declarations */ -ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_copy_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_ssse3); -ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_ssse3; +ih264_inter_pred_luma_bilinear_ft(ih264_inter_pred_luma_bilinear_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_vert_qpel_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3; +ih264_inter_pred_luma_ft(ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_ssse3; +ih264_inter_pred_chroma_ft(ih264_inter_pred_chroma_ssse3); #endif diff --git a/dependencies/ih264d/common/ih264_intra_pred_filters.h b/dependencies/ih264d/common/ih264_intra_pred_filters.h index caf6b335..526df6ab 100644 --- a/dependencies/ih264d/common/ih264_intra_pred_filters.h +++ b/dependencies/ih264d/common/ih264_intra_pred_filters.h @@ -61,271 +61,284 @@ extern const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[]; /*****************************************************************************/ -typedef void ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left, +typedef void _ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left, UWORD8 *pu1_topleft, UWORD8 *pu1_top, UWORD8 *pu1_dst, WORD32 left_strd, WORD32 ngbr_avail); -typedef void ih264_intra_pred_luma_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_intra_pred_ref_filtering_ft(arg) _ih264_intra_pred_ref_filtering_ft arg __asm__(#arg); +#else +#define ih264_intra_pred_ref_filtering_ft(arg) _ih264_intra_pred_ref_filtering_ft arg; +#endif + +typedef void _ih264_intra_pred_luma_ft(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, WORD32 ngbr_avail); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_intra_pred_luma_ft(arg) _ih264_intra_pred_luma_ft arg __asm__(#arg); +#else +#define ih264_intra_pred_luma_ft(arg) _ih264_intra_pred_luma_ft arg; +#endif + /* No Neon Definitions */ /* Luma 4x4 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u); /* Luma 8x8 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u); /* Luma 16x16 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane); /* Chroma 8x8 Intra pred filters */ -typedef ih264_intra_pred_luma_ft ih264_intra_pred_chroma_ft; +typedef _ih264_intra_pred_luma_ft _ih264_intra_pred_chroma_ft; +#define ih264_intra_pred_chroma_ft(arg) ih264_intra_pred_luma_ft(arg); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane); -ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering; +ih264_intra_pred_ref_filtering_ft(ih264_intra_pred_luma_8x8_mode_ref_filtering); /* A9 Definition */ /* Luma 4x4 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_a9q); /* Luma 8x8 Intra pred filters */ -ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q; +ih264_intra_pred_ref_filtering_ft(ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_a9q); /* Luma 16x16 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_a9q); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_a9q; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_a9q); /* Chroma 8x8 Intra pred filters */ -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_a9q; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_a9q); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_a9q; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_a9q); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_a9q; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_a9q); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_a9q; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_a9q); /* X86 Intrinsic Definitions */ /* Luma 4x4 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_ssse3); /* Luma 8x8 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_ssse3); /* Luma 16x16 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_ssse3); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_ssse3; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_ssse3); /* Chroma 8x8 Intra pred filters */ -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_ssse3; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_ssse3); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_ssse3; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_ssse3); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_ssse3; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_ssse3); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_ssse3; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_ssse3); /* AV8 Definition */ /* Luma 4x4 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dl_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_diag_dr_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_r_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_d_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_vert_l_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_4x4_mode_horz_u_av8); /* Luma 8x8 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dl_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_diag_dr_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_r_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_d_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_vert_l_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_8x8_mode_horz_u_av8); /* Luma 16x16 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_av8; +ih264_intra_pred_luma_ft(ih264_intra_pred_luma_16x16_mode_plane_av8); /* Chroma 8x8 Intra pred filters */ -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_av8; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_dc_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_av8; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_horz_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_av8; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_vert_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_av8; +ih264_intra_pred_chroma_ft(ih264_intra_pred_chroma_8x8_mode_plane_av8); #endif /* IH264_INTRA_PRED_FILTERS_H_ */ diff --git a/dependencies/ih264d/common/ih264_padding.h b/dependencies/ih264d/common/ih264_padding.h index e4e18fbe..4151279b 100644 --- a/dependencies/ih264d/common/ih264_padding.h +++ b/dependencies/ih264d/common/ih264_padding.h @@ -2,7 +2,7 @@ * * Copyright (C) 2015 The Android Open Source Project * - * Licensed under the Apache License, Version 2.0 (the "License"); + * Licensed under the Apache License, Version 2.0 (the "License")); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * @@ -41,34 +41,39 @@ /* Function Declarations */ /*****************************************************************************/ -typedef void ih264_pad(UWORD8 *, WORD32, WORD32, WORD32); +typedef void _ih264_pad(UWORD8 *, WORD32, WORD32, WORD32); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_pad(arg) _ih264_pad arg __asm__(#arg); +#else +#define ih264_pad(arg) _ih264_pad arg; +#endif /* C function declarations */ -ih264_pad ih264_pad_top; -ih264_pad ih264_pad_bottom; -ih264_pad ih264_pad_left_luma; -ih264_pad ih264_pad_left_chroma; -ih264_pad ih264_pad_right_luma; -ih264_pad ih264_pad_right_chroma; +ih264_pad(ih264_pad_top); +ih264_pad(ih264_pad_bottom); +ih264_pad(ih264_pad_left_luma); +ih264_pad(ih264_pad_left_chroma); +ih264_pad(ih264_pad_right_luma); +ih264_pad(ih264_pad_right_chroma); /* A9 Q function declarations */ -ih264_pad ih264_pad_top_a9q; -ih264_pad ih264_pad_left_luma_a9q; -ih264_pad ih264_pad_left_chroma_a9q; -ih264_pad ih264_pad_right_luma_a9q; -ih264_pad ih264_pad_right_chroma_a9q; +ih264_pad(ih264_pad_top_a9q); +ih264_pad(ih264_pad_left_luma_a9q); +ih264_pad(ih264_pad_left_chroma_a9q); +ih264_pad(ih264_pad_right_luma_a9q); +ih264_pad(ih264_pad_right_chroma_a9q); /* AV8 function declarations */ -ih264_pad ih264_pad_top_av8; -ih264_pad ih264_pad_left_luma_av8; -ih264_pad ih264_pad_left_chroma_av8; -ih264_pad ih264_pad_right_luma_av8; -ih264_pad ih264_pad_right_chroma_av8; +ih264_pad(ih264_pad_top_av8); +ih264_pad(ih264_pad_left_luma_av8); +ih264_pad(ih264_pad_left_chroma_av8); +ih264_pad(ih264_pad_right_luma_av8); +ih264_pad(ih264_pad_right_chroma_av8); -ih264_pad ih264_pad_left_luma_ssse3; -ih264_pad ih264_pad_left_chroma_ssse3; -ih264_pad ih264_pad_right_luma_ssse3; -ih264_pad ih264_pad_right_chroma_ssse3; +ih264_pad(ih264_pad_left_luma_ssse3); +ih264_pad(ih264_pad_left_chroma_ssse3); +ih264_pad(ih264_pad_right_luma_ssse3); +ih264_pad(ih264_pad_right_chroma_ssse3); #endif /*_IH264_PADDING_H_*/ diff --git a/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h b/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h index 83551aad..e5004a64 100644 --- a/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h +++ b/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h @@ -41,7 +41,7 @@ /*****************************************************************************/ -typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src, +typedef void _ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, @@ -53,7 +53,13 @@ typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src, UWORD32 u4_round_fact, UWORD8 *pu1_nnz); -typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_resi_trans_dctrans_quant_ft(arg) _ih264_resi_trans_dctrans_quant_ft arg __asm__(#arg); +#else +#define ih264_resi_trans_dctrans_quant_ft(arg) _ih264_resi_trans_dctrans_quant_ft arg; +#endif + +typedef void _ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, @@ -65,9 +71,15 @@ typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD32 pi4_cntrl, WORD32 *pi4_tmp); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_idctrans_iquant_itrans_recon_ft(arg) _ih264_pad arg __asm__(#arg); +#else +#define ih264_idctrans_iquant_itrans_recon_ft(arg) _ih264_pad arg; +#endif + /*Function prototype declarations*/ -typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src, +typedef void _ih264_resi_trans_quant_ft(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, @@ -79,7 +91,13 @@ typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src, UWORD8 *pu1_nnz, WORD16 *pi2_alt_dc_addr); -typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_resi_trans_quant_ft(arg) _ih264_resi_trans_quant_ft arg __asm__(#arg); +#else +#define ih264_resi_trans_quant_ft(arg) _ih264_resi_trans_quant_ft arg; +#endif + +typedef void _ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, @@ -92,7 +110,13 @@ typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag); -typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_luma_16x16_resi_trans_dctrans_quant_ft(arg) _ih264_luma_16x16_resi_trans_dctrans_quant_ft arg __asm__(#arg); +#else +#define ih264_luma_16x16_resi_trans_dctrans_quant_ft(arg) _ih264_luma_16x16_resi_trans_dctrans_quant_ft arg; +#endif + +typedef void _ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, @@ -104,7 +128,13 @@ typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, UWORD32 u4_round_factor, UWORD8 *pu1_nnz); -typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_chroma_8x8_resi_trans_dctrans_quant_ft(arg) _ih264_chroma_8x8_resi_trans_dctrans_quant_ft arg __asm__(#arg); +#else +#define ih264_chroma_8x8_resi_trans_dctrans_quant_ft(arg) _ih264_chroma_8x8_resi_trans_dctrans_quant_ft arg; +#endif + +typedef void _ih264_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 pred_strd, @@ -116,8 +146,14 @@ typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src, WORD32 iq_start_idx, WORD16 *pi2_dc_ld_addr); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_iquant_itrans_recon_ft(arg) _ih264_iquant_itrans_recon_ft arg __asm__(#arg); +#else +#define ih264_iquant_itrans_recon_ft(arg) _ih264_iquant_itrans_recon_ft arg; +#endif -typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src, + +typedef void _ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 pred_strd, @@ -128,8 +164,14 @@ typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src, WORD16 *pi2_tmp, WORD16 *pi2_dc_src); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_iquant_itrans_recon_chroma_ft(arg) _ih264_iquant_itrans_recon_chroma_ft arg __asm__(#arg); +#else +#define ih264_iquant_itrans_recon_chroma_ft(arg) _ih264_iquant_itrans_recon_chroma_ft arg; +#endif -typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, + +typedef void _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, @@ -142,7 +184,13 @@ typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp); -typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(arg) _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft arg __asm__(#arg); +#else +#define ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(arg) _ih264_luma_16x16_idctrans_iquant_itrans_recon_ft arg; +#endif + +typedef void _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, @@ -154,79 +202,97 @@ typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, UWORD32 pi4_cntrl, WORD32 *pi4_tmp); -typedef void ih264_ihadamard_scaling_ft(WORD16* pi2_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(arg) _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft arg __asm__(#arg); +#else +#define ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(arg) _ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft arg; +#endif + +typedef void _ih264_ihadamard_scaling_ft(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6, WORD32* pi4_tmp); -typedef void ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_ihadamard_scaling_ft(arg) _ih264_ihadamard_scaling_ft arg __asm__(#arg); +#else +#define ih264_ihadamard_scaling_ft(arg) _ih264_ihadamard_scaling_ft arg; +#endif + +typedef void _ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, UWORD32 u4_round_factor,UWORD8 *pu1_nnz); -ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_8x8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv; -ih264_hadamard_quant_ft ih264_hadamard_quant_4x4; -ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv; +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_hadamard_quant_ft(arg) _ih264_hadamard_quant_ft arg __asm__(#arg); +#else +#define ih264_hadamard_quant_ft(arg) _ih264_hadamard_quant_ft arg; +#endif + +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4); +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4); +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_8x8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv); +ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4); +ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv); /*A9 Declarations*/ -ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_a9; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_a9; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_a9; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_a9; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_a9; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_a9; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_a9; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_a9; -ih264_luma_16x16_resi_trans_dctrans_quant_ft ih264_luma_16x16_resi_trans_dctrans_quant_a9; -ih264_chroma_8x8_resi_trans_dctrans_quant_ft ih264_chroma_8x8_resi_trans_dctrans_quant_a9; -ih264_luma_16x16_idctrans_iquant_itrans_recon_ft ih264_luma_16x16_idctrans_iquant_itrans_recon_a9; -ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_a9; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_a9; -ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_a9; -ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_a9; +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_a9); +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_a9); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_a9); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_a9); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_a9); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_a9); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_a9); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_a9); +ih264_luma_16x16_resi_trans_dctrans_quant_ft(ih264_luma_16x16_resi_trans_dctrans_quant_a9); +ih264_chroma_8x8_resi_trans_dctrans_quant_ft(ih264_chroma_8x8_resi_trans_dctrans_quant_a9); +ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(ih264_luma_16x16_idctrans_iquant_itrans_recon_a9); +ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_a9); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_a9); +ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_a9); +ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_a9); /*Av8 Declarations*/ -ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_av8; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_av8; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_av8; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_av8; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_av8; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_av8; -ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_av8; -ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_av8; +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_av8); +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_av8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_av8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_av8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_av8); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_av8); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_av8); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_av8); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_av8); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_av8); +ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_av8); +ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_av8); /*SSSE3 Declarations*/ -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_ssse3; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_ssse3; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_ssse3; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_ssse3; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_ssse3; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_ssse3; +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_ssse3); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_ssse3); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_dc_ssse3); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_8x8_dc_ssse3); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_ssse3); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_2x2_uv_ssse3); /*SSSE42 Declarations*/ -ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_sse42; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_sse42; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_sse42; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_sse42; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_sse42; -ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_sse42; -ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_sse42; +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_4x4_sse42); +ih264_resi_trans_quant_ft(ih264_resi_trans_quant_chroma_4x4_sse42); +ih264_iquant_itrans_recon_ft(ih264_iquant_itrans_recon_4x4_sse42); +ih264_iquant_itrans_recon_chroma_ft(ih264_iquant_itrans_recon_chroma_4x4_sse42); +ih264_ihadamard_scaling_ft(ih264_ihadamard_scaling_4x4_sse42); +ih264_hadamard_quant_ft(ih264_hadamard_quant_4x4_sse42); +ih264_hadamard_quant_ft(ih264_hadamard_quant_2x2_uv_sse42); #endif /* IH264_TRANS_QUANT_H_ */ diff --git a/dependencies/ih264d/common/ih264_weighted_pred.h b/dependencies/ih264d/common/ih264_weighted_pred.h index f9b93b0f..785744f8 100644 --- a/dependencies/ih264d/common/ih264_weighted_pred.h +++ b/dependencies/ih264d/common/ih264_weighted_pred.h @@ -68,7 +68,7 @@ /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ -typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1, +typedef void _ih264_default_weighted_pred_ft(UWORD8 *puc_src1, UWORD8 *puc_src2, UWORD8 *puc_dst, WORD32 src_strd1, @@ -77,7 +77,13 @@ typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1, WORD32 ht, WORD32 wd); -typedef void ih264_weighted_pred_ft(UWORD8 *puc_src, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_default_weighted_pred_ft(arg) _ih264_default_weighted_pred_ft arg __asm__(#arg); +#else +#define ih264_default_weighted_pred_ft(arg) _ih264_default_weighted_pred_ft arg; +#endif + +typedef void _ih264_weighted_pred_ft(UWORD8 *puc_src, UWORD8 *puc_dst, WORD32 src_strd, WORD32 dst_strd, @@ -87,7 +93,13 @@ typedef void ih264_weighted_pred_ft(UWORD8 *puc_src, WORD32 ht, WORD32 wd); -typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1, +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_weighted_pred_ft(arg) _ih264_weighted_pred_ft arg __asm__(#arg); +#else +#define ih264_weighted_pred_ft(arg) _ih264_weighted_pred_ft arg; +#endif + +typedef void _ih264_weighted_bi_pred_ft(UWORD8 *puc_src1, UWORD8 *puc_src2, UWORD8 *puc_dst, WORD32 src_strd1, @@ -101,63 +113,69 @@ typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1, WORD32 ht, WORD32 wd); +#if defined(__APPLE__) && defined(__aarch64__) +#define ih264_weighted_bi_pred_ft(arg) _ih264_weighted_bi_pred_ft arg __asm__(#arg); +#else +#define ih264_weighted_bi_pred_ft(arg) _ih264_weighted_bi_pred_ft arg; +#endif + /* No NEON Declarations */ -ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma); -ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma); -ih264_weighted_pred_ft ih264_weighted_pred_luma; +ih264_weighted_pred_ft(ih264_weighted_pred_luma); -ih264_weighted_pred_ft ih264_weighted_pred_chroma; +ih264_weighted_pred_ft(ih264_weighted_pred_chroma); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma); /* A9 NEON Declarations */ -ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_a9q; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_a9q); -ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_a9q; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_a9q); -ih264_weighted_pred_ft ih264_weighted_pred_luma_a9q; +ih264_weighted_pred_ft(ih264_weighted_pred_luma_a9q); -ih264_weighted_pred_ft ih264_weighted_pred_chroma_a9q; +ih264_weighted_pred_ft(ih264_weighted_pred_chroma_a9q); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_a9q; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_a9q); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_a9q; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_a9q); /* AV8 NEON Declarations */ -ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_av8; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_av8); -ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_av8; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_av8); -ih264_weighted_pred_ft ih264_weighted_pred_luma_av8; +ih264_weighted_pred_ft(ih264_weighted_pred_luma_av8); -ih264_weighted_pred_ft ih264_weighted_pred_chroma_av8; +ih264_weighted_pred_ft(ih264_weighted_pred_chroma_av8); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_av8; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_av8); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_av8; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_av8); /* SSE42 Intrinsic Declarations */ -ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_sse42; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_luma_sse42); -ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_sse42; +ih264_default_weighted_pred_ft(ih264_default_weighted_pred_chroma_sse42); -ih264_weighted_pred_ft ih264_weighted_pred_luma_sse42; +ih264_weighted_pred_ft(ih264_weighted_pred_luma_sse42); -ih264_weighted_pred_ft ih264_weighted_pred_chroma_sse42; +ih264_weighted_pred_ft(ih264_weighted_pred_chroma_sse42); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_sse42; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_luma_sse42); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_sse42; +ih264_weighted_bi_pred_ft(ih264_weighted_bi_pred_chroma_sse42); #endif /* IH264_WEIGHTED_PRED_H_ */ diff --git a/dependencies/ih264d/decoder/ih264d_structs.h b/dependencies/ih264d/decoder/ih264d_structs.h index 989bd33c..e5309602 100644 --- a/dependencies/ih264d/decoder/ih264d_structs.h +++ b/dependencies/ih264d/decoder/ih264d_structs.h @@ -1365,112 +1365,112 @@ typedef struct _DecStruct UWORD8 *pu1_mv_bank_buf_base; UWORD8 *pu1_init_dpb_base; - ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma; + _ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma; - ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma; + _ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma; - ih264_weighted_pred_ft *pf_weighted_pred_luma; + _ih264_weighted_pred_ft *pf_weighted_pred_luma; - ih264_weighted_pred_ft *pf_weighted_pred_chroma; + _ih264_weighted_pred_ft *pf_weighted_pred_chroma; - ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma; + _ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma; - ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma; + _ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma; - ih264_pad *pf_pad_top; - ih264_pad *pf_pad_bottom; - ih264_pad *pf_pad_left_luma; - ih264_pad *pf_pad_left_chroma; - ih264_pad *pf_pad_right_luma; - ih264_pad *pf_pad_right_chroma; + _ih264_pad *pf_pad_top; + _ih264_pad *pf_pad_bottom; + _ih264_pad *pf_pad_left_luma; + _ih264_pad *pf_pad_left_chroma; + _ih264_pad *pf_pad_right_luma; + _ih264_pad *pf_pad_right_chroma; - ih264_inter_pred_chroma_ft *pf_inter_pred_chroma; + _ih264_inter_pred_chroma_ft *pf_inter_pred_chroma; - ih264_inter_pred_luma_ft *apf_inter_pred_luma[16]; + _ih264_inter_pred_luma_ft *apf_inter_pred_luma[16]; - ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4]; + _ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4]; - ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9]; + _ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9]; - ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9]; + _ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9]; - ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering; + _ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering; - ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4]; + _ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4]; - ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4; + _ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4; - ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc; + _ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc; - ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8; + _ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8; - ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc; + _ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc; - ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4; + _ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4; - ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc; + _ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc; - ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4; + _ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4; /** * deblock vertical luma edge with blocking strength 4 */ - ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4; + _ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4; /** * deblock vertical luma edge with blocking strength less than 4 */ - ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4; + _ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4; /** * deblock vertical luma edge with blocking strength 4 for mbaff */ - ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff; + _ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff; /** * deblock vertical luma edge with blocking strength less than 4 for mbaff */ - ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff; + _ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff; /** * deblock vertical chroma edge with blocking strength 4 */ - ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4; + _ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4; /** * deblock vertical chroma edge with blocking strength less than 4 */ - ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4; + _ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4; /** * deblock vertical chroma edge with blocking strength 4 for mbaff */ - ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff; + _ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff; /** * deblock vertical chroma edge with blocking strength less than 4 for mbaff */ - ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff; + _ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff; /** * deblock horizontal luma edge with blocking strength 4 */ - ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4; + _ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4; /** * deblock horizontal luma edge with blocking strength less than 4 */ - ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4; + _ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4; /** * deblock horizontal chroma edge with blocking strength 4 */ - ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4; + _ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4; /** * deblock horizontal chroma edge with blocking strength less than 4 */ - ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4; + _ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4; } dec_struct_t; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 79471321..2bfe109d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -102,9 +102,9 @@ if (MACOS_BUNDLE) endforeach(folder) if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib") + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib") else() - set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib") + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib") endif() add_custom_command (TARGET CemuBin POST_BUILD diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index aec51725..2bbb617d 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -6,6 +6,8 @@ #if defined(ARCH_X86_64) && defined(__GNUC__) #include +#elif defined(__aarch64__) +#include #endif struct @@ -502,6 +504,114 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMax = std::max(indexMax, _maxIndex); indexMin = std::min(indexMin, _minIndex); } +#elif defined(__aarch64__) + +void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint16* indicesU16BE = (const uint16*)indexDataInput; + uint16* indexOutput = (uint16*)indexDataOutput; + sint32 count8 = count >> 3; + sint32 countRemaining = count & 7; + + if (count8) + { + uint16x8_t mMin = vdupq_n_u16(0xFFFF); + uint16x8_t mMax = vdupq_n_u16(0x0000); + uint16x8_t mTemp; + uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE; + indicesU16BE += count8 * 8; + uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput; + indexOutput += count8 * 8; + + while (count8--) + { + mTemp = vld1q_u16((uint16*)mRawIndices); + mRawIndices++; + mTemp = vrev16q_u8(mTemp); + mMin = vminq_u16(mMin, mTemp); + mMax = vmaxq_u16(mMax, mTemp); + vst1q_u16((uint16*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint16* mMaxU16 = (uint16*)&mMax; + uint16* mMinU16 = (uint16*)&mMin; + + for (int i = 0; i < 8; ++i) { + indexMax = std::max(indexMax, (uint32)mMaxU16[i]); + indexMin = std::min(indexMin, (uint32)mMinU16[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint16 idx = _swapEndianU16(*indicesU16BE); + *indexOutput = idx; + indexOutput++; + indicesU16BE++; + _maxIndex = std::max(_maxIndex, (uint32)idx); + _minIndex = std::min(_minIndex, (uint32)idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + +void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint32* indicesU32BE = (const uint32*)indexDataInput; + uint32* indexOutput = (uint32*)indexDataOutput; + sint32 count8 = count >> 2; + sint32 countRemaining = count & 3; + + if (count8) + { + uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF); + uint32x4_t mMax = vdupq_n_u32(0x00000000); + uint32x4_t mTemp; + uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE; + indicesU32BE += count8 * 4; + uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput; + indexOutput += count8 * 4; + + while (count8--) + { + mTemp = vld1q_u32((uint32*)mRawIndices); + mRawIndices++; + mTemp = vrev32q_u8(mTemp); + mMin = vminq_u32(mMin, mTemp); + mMax = vmaxq_u32(mMax, mTemp); + vst1q_u32((uint32*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint32* mMaxU32 = (uint32*)&mMax; + uint32* mMinU32 = (uint32*)&mMin; + + for (int i = 0; i < 4; ++i) { + indexMax = std::max(indexMax, mMaxU32[i]); + indexMin = std::min(indexMin, mMinU32[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint32 idx = _swapEndianU32(*indicesU32BE); + *indexOutput = idx; + indexOutput++; + indicesU32BE++; + _maxIndex = std::max(_maxIndex, idx); + _minIndex = std::min(_minIndex, idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + #endif template @@ -688,27 +798,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 { if (indexType == LatteIndexType::U16_BE) { - #if defined(ARCH_X86_64) +#if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3) LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #else +#elif defined(__aarch64__) + LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #endif +#endif } else if (indexType == LatteIndexType::U32_BE) { - #if defined(ARCH_X86_64) +#if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #else +#elif defined(__aarch64__) + LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #endif +#endif } else if (indexType == LatteIndexType::U16_LE) { diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp index 870d1850..2eef929d 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp @@ -25,7 +25,11 @@ void nnNfp_update(); namespace coreinit { +#ifdef __arm64__ + void __OSFiberThreadEntry(uint32, uint32); +#else void __OSFiberThreadEntry(void* thread); +#endif void __OSAddReadyThreadToRunQueue(OSThread_t* thread); void __OSRemoveThreadFromRunQueues(OSThread_t* thread); }; @@ -49,7 +53,7 @@ namespace coreinit struct OSHostThread { - OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this) + OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this) { } @@ -1304,8 +1308,14 @@ namespace coreinit __OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance); } +#ifdef __arm64__ + void __OSFiberThreadEntry(uint32 _high, uint32 _low) + { + uint64 _thread = (uint64) _high << 32 | _low; +#else void __OSFiberThreadEntry(void* _thread) { +#endif OSHostThread* hostThread = (OSHostThread*)_thread; #if defined(ARCH_X86_64) diff --git a/src/Cafe/OS/libs/gx2/GX2_Command.cpp b/src/Cafe/OS/libs/gx2/GX2_Command.cpp index ec96a4ff..0779cbb1 100644 --- a/src/Cafe/OS/libs/gx2/GX2_Command.cpp +++ b/src/Cafe/OS/libs/gx2/GX2_Command.cpp @@ -17,28 +17,28 @@ GX2WriteGatherPipeState gx2WriteGatherPipe = { 0 }; void gx2WriteGather_submitU32AsBE(uint32 v) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - *(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = _swapEndianU32(v); - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4; + *(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = _swapEndianU32(v); + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4; } void gx2WriteGather_submitU32AsLE(uint32 v) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - *(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = v; - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4; + *(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = v; + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4; } void gx2WriteGather_submitU32AsLEArray(uint32* v, uint32 numValues) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - memcpy_dwords((*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]), v, numValues); - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4 * numValues; + memcpy_dwords(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load(), v, numValues); + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4 * numValues; } namespace GX2 @@ -121,7 +121,7 @@ namespace GX2 if (sGX2MainCoreIndex == coreIndex) gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = &gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]; else - gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL; + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL; // return size of (written) display list return currentWriteSize; } @@ -217,7 +217,7 @@ namespace GX2 cemu_assert_debug(coreIndex == sGX2MainCoreIndex); coreIndex = sGX2MainCoreIndex; // always submit to main queue which is owned by GX2 main core (TCLSubmitToRing does not need this workaround) - uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]); + uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex].load()); cmdStream[0] = pm4HeaderType3(IT_INDIRECT_BUFFER_PRIV, 3); cmdStream[1] = memory_virtualToPhysical(MEMPTR(addr).GetMPTR()); cmdStream[2] = 0; diff --git a/src/Cafe/OS/libs/gx2/GX2_Command.h b/src/Cafe/OS/libs/gx2/GX2_Command.h index 51c04928..fb9bb65e 100644 --- a/src/Cafe/OS/libs/gx2/GX2_Command.h +++ b/src/Cafe/OS/libs/gx2/GX2_Command.h @@ -6,9 +6,9 @@ struct GX2WriteGatherPipeState { uint8* gxRingBuffer; // each core has it's own write gatherer and display list state (writing) - uint8* writeGatherPtrGxBuffer[Espresso::CORE_COUNT]; - uint8** writeGatherPtrWrite[Espresso::CORE_COUNT]; - uint8* writeGatherPtrDisplayList[Espresso::CORE_COUNT]; + std::atomic writeGatherPtrGxBuffer[Espresso::CORE_COUNT]; + std::atomic* writeGatherPtrWrite[Espresso::CORE_COUNT]; + std::atomic writeGatherPtrDisplayList[Espresso::CORE_COUNT]; MPTR displayListStart[Espresso::CORE_COUNT]; uint32 displayListMaxSize[Espresso::CORE_COUNT]; }; @@ -75,10 +75,10 @@ template inline void gx2WriteGather_submit(Targs... args) { uint32 coreIndex = PPCInterpreter_getCurrentCoreIndex(); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr) return; - uint32be* writePtr = (uint32be*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]); + uint32be* writePtr = (uint32be*)gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load(); gx2WriteGather_submit_(coreIndex, writePtr, std::forward(args)...); } diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index bda75cef..dccd1e36 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -310,7 +310,8 @@ inline uint64 __rdtsc() inline void _mm_mfence() { - + asm volatile("" ::: "memory"); + std::atomic_thread_fence(std::memory_order_seq_cst); } inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result) diff --git a/src/config/ActiveSettings.cpp b/src/config/ActiveSettings.cpp index e552d164..dc7788d2 100644 --- a/src/config/ActiveSettings.cpp +++ b/src/config/ActiveSettings.cpp @@ -74,10 +74,14 @@ CPUMode ActiveSettings::GetCPUMode() if (mode == CPUMode::Auto) { +#ifdef __aarch64__ + mode = CPUMode::SinglecoreInterpreter; +#else if (GetPhysicalCoreCount() >= 4) mode = CPUMode::MulticoreRecompiler; else mode = CPUMode::SinglecoreRecompiler; +#endif } else if (mode == CPUMode::DualcoreRecompiler) // dualcore is disabled now mode = CPUMode::MulticoreRecompiler; diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index f2a5a2fa..9ddc6c93 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -139,6 +139,7 @@ enum MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, // debug->logging + MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE = 21499, MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500, MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO = 21599, // debug->dump @@ -2205,7 +2206,7 @@ void MainWindow::RecreateMenu() debugLoggingMenu->AppendSeparator(); wxMenu* logCosModulesMenu = new wxMenu(); - logCosModulesMenu->AppendCheckItem(0, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); + logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); logCosModulesMenu->AppendSeparator(); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile)); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync)); diff --git a/src/util/Fiber/FiberUnix.cpp b/src/util/Fiber/FiberUnix.cpp index 0d527069..36430449 100644 --- a/src/util/Fiber/FiberUnix.cpp +++ b/src/util/Fiber/FiberUnix.cpp @@ -15,7 +15,12 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri ctx->uc_stack.ss_sp = m_stackPtr; ctx->uc_stack.ss_size = stackSize; ctx->uc_link = &ctx[0]; +#ifdef __arm64__ + // https://www.man7.org/linux/man-pages/man3/makecontext.3.html#NOTES + makecontext(ctx, (void(*)())FiberEntryPoint, 2, (uint64) userParam >> 32, userParam); +#else makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam); +#endif this->m_implData = (void*)ctx; } diff --git a/src/util/MemMapper/MemMapperUnix.cpp b/src/util/MemMapper/MemMapperUnix.cpp index 0ade291d..8e800e53 100644 --- a/src/util/MemMapper/MemMapperUnix.cpp +++ b/src/util/MemMapper/MemMapperUnix.cpp @@ -45,7 +45,11 @@ namespace MemMapper void* r; if(fromReservation) { - if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 ) + uint64 page_size = sysconf(_SC_PAGESIZE); + void* page = baseAddr; + if ( (uint64) baseAddr % page_size != 0 ) + page = (void*) ((uint64)baseAddr & ~(page_size - 1)); + if( mprotect(page, size, GetProt(permissionFlags)) == 0 ) r = baseAddr; else r = nullptr; diff --git a/src/util/highresolutiontimer/HighResolutionTimer.cpp b/src/util/highresolutiontimer/HighResolutionTimer.cpp index 67ffa349..bb4a40ab 100644 --- a/src/util/highresolutiontimer/HighResolutionTimer.cpp +++ b/src/util/highresolutiontimer/HighResolutionTimer.cpp @@ -27,6 +27,8 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (uint64)(freq.QuadPart); +#elif BOOST_OS_MACOS + return 1000000000; #else timespec pc; clock_getres(CLOCK_MONOTONIC_RAW, &pc);