From 738449abe016bef13347d8a41abf3b0cb54d4f51 Mon Sep 17 00:00:00 2001 From: Alexander Frick Date: Sat, 19 Oct 2024 13:58:22 -0500 Subject: [PATCH] fix arm builds --- arm/third_party/libaom/BUILD.gn | 8 +- arm/third_party/libaom/cmake_update.sh | 6 +- arm/third_party/libvpx/BUILD.gn | 104 ++++- arm/third_party/xnnpack/BUILD.gn | 397 +++++++++++++++++-- arm/third_party/xnnpack/generate_build_gn.py | 66 ++- other/thorium-2024-ui.patch | 2 +- 6 files changed, 517 insertions(+), 66 deletions(-) diff --git a/arm/third_party/libaom/BUILD.gn b/arm/third_party/libaom/BUILD.gn index 2b892033..81f94073 100644 --- a/arm/third_party/libaom/BUILD.gn +++ b/arm/third_party/libaom/BUILD.gn @@ -61,8 +61,8 @@ if (is_nacl) { libaom_include_dirs = [ "source/config", - platform_include_dir, "source/libaom", + platform_include_dir, ] # Private configuration used in building libaom. @@ -73,6 +73,10 @@ config("libaom_config") { # This config is applied to targets that depend on libaom. config("libaom_public_config") { include_dirs = [ + # libaom_public_config does not have "source/config" (which gives access to + # the private header config/aom_version.h) because code outside the libaom + # library should use the aom_codec_version*() functions and macros declared + # in the public header aom_codec.h. "source/libaom", platform_include_dir, ] @@ -384,7 +388,7 @@ test("test_libaom") { deps = [ ":libaom", - "//third_party/googletest:gtest", + "//testing/gtest", ] defines = [ "CHROMIUM" ] diff --git a/arm/third_party/libaom/cmake_update.sh b/arm/third_party/libaom/cmake_update.sh index ee0cb2e2..8de5b822 100755 --- a/arm/third_party/libaom/cmake_update.sh +++ b/arm/third_party/libaom/cmake_update.sh @@ -36,6 +36,7 @@ export LC_ALL=C BASE=$(pwd) SRC="${BASE}/source/libaom" CFG="${BASE}/source/config" +TMP="$(mktemp -d "${BASE}/build.XXXX")" function cleanup() { rm -rf "${TMP}" @@ -114,7 +115,6 @@ git -C "${SRC}" fetch --tags # Scope 'trap' error reporting to configuration generation. ( -TMP=$(mktemp -d "${BASE}/build.XXXX") cd "${TMP}" trap '{ @@ -186,12 +186,12 @@ gen_config_files linux/arm-neon \ reset_dirs linux/arm-neon-cpu-detect gen_config_files linux/arm-neon-cpu-detect \ - "${toolchain}/armv7-linux-gcc.cmake -DCONFIG_RUNTIME_CPU_DETECT=1 -DENABLE_ARM_CRC32=0 -DENABLE_NEON_DOTPROD=0 -DENABLE_NEON_I8MM=0 \ + "${toolchain}/armv7-linux-gcc.cmake -DENABLE_ARM_CRC32=0 -DENABLE_NEON_DOTPROD=0 -DENABLE_NEON_I8MM=0 \ ${all_platforms}" reset_dirs linux/arm64-cpu-detect gen_config_files linux/arm64-cpu-detect \ - "${toolchain}/arm64-linux-gcc.cmake -DCONFIG_RUNTIME_CPU_DETECT=1 -DENABLE_ARM_CRC32=0 -DENABLE_NEON_DOTPROD=0 -DENABLE_NEON_I8MM=0 \ + "${toolchain}/arm64-linux-gcc.cmake -DENABLE_ARM_CRC32=0 -DENABLE_NEON_DOTPROD=0 -DENABLE_NEON_I8MM=0 \ ${all_platforms}" # Copy linux configurations and modify for Windows. diff --git a/arm/third_party/libvpx/BUILD.gn b/arm/third_party/libvpx/BUILD.gn index 5968f8bc..c1db398a 100644 --- a/arm/third_party/libvpx/BUILD.gn +++ b/arm/third_party/libvpx/BUILD.gn @@ -7,6 +7,7 @@ import("//build/config/arm.gni") import("//build/config/chromeos/ui_mode.gni") import("//testing/test.gni") import("//third_party/libvpx/libvpx_srcs.gni") +import("//third_party/libvpx/libvpx_test_srcs.gni") import("//third_party/nasm/nasm_assemble.gni") # Sets the architecture name for building libvpx. @@ -66,13 +67,20 @@ if (is_nacl) { libvpx_include_dirs = [ "source/config", - platform_include_dir, "source/libvpx", + platform_include_dir, ] # Private configuration used in building libvpx. config("libvpx_config") { include_dirs = libvpx_include_dirs + defines = [ + "CHROMIUM", + + # Maximum allowed for a direct mapping, + # see partition_alloc::internal::MaxDirectMapped() + "VPX_MAX_ALLOCABLE_MEMORY=((1ULL << 31) - (1 << 21))", + ] # gn orders flags on a target before flags from configs. The default config # adds -Wall, and these flags have to be after -Wall -- so they need to come @@ -99,19 +107,48 @@ config("libvpx_config") { # This config is applied to targets that depend on libvpx. config("libvpx_public_config") { include_dirs = [ + # libvpx_public_config does not have "source/config" (which gives access to + # the private header vpx_version.h) because code outside the libvpx library + # should use the vpx_codec_version*() functions and macros declared in the + # public header vpx_codec.h. "source/libvpx", platform_include_dir, ] } +source_set("libvpx_test_generic_headers") { + deps = [ ":libvpx" ] + sources = libvpx_test_srcs_generic_headers +} + +config("gtest_config") { + include_dirs = [ + "source/libvpx/third_party/googletest/src/include/", + "source/libvpx/third_party/googletest/src/", + ] +} + +source_set("gtest") { + sources = [ "source/libvpx/third_party/googletest/src/src/gtest-all.cc" ] + + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] + + public_configs = [ ":gtest_config" ] + + # gtest-death-test dependency on fdio for fuchsia builds + if (is_fuchsia) { + deps = [ + "//third_party/fuchsia-sdk/sdk/pkg/fdio", + "//third_party/fuchsia-sdk/sdk/pkg/zx", + ] + } +} + executable("decode_encode_profile_test") { configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] - include_dirs = libvpx_include_dirs + [ - "source/libvpx/third_party/libwebm/", - "source/libvpx/third_party/googletest/src/include/", - "source/libvpx/third_party/googletest/src/", - ] + include_dirs = [ "source/libvpx/third_party/libwebm/" ] testonly = true sources = [ @@ -121,7 +158,6 @@ executable("decode_encode_profile_test") { "source/libvpx/test/test_libvpx.cc", "source/libvpx/test/test_vectors.cc", "source/libvpx/test/test_vectors.h", - "source/libvpx/third_party/googletest/src/src/gtest-all.cc", "source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc", "source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc", "source/libvpx/tools_common.h", @@ -129,15 +165,11 @@ executable("decode_encode_profile_test") { "source/libvpx/y4minput.c", "tests/pgo/decode_encode_profile_test.cc", ] - deps = [ ":libvpx" ] - - # gtest-death-test dependency on fdio for fuchsia builds - if (is_fuchsia) { - deps += [ - "//third_party/fuchsia-sdk/sdk/pkg/fdio", - "//third_party/fuchsia-sdk/sdk/pkg/zx", - ] - } + deps = [ + ":gtest", + ":libvpx", + ":libvpx_test_generic_headers", + ] } if (current_cpu == "x86" || (current_cpu == "x64" && !is_msan)) { @@ -461,10 +493,6 @@ source_set("libvpx_x86_64_headers") { sources = libvpx_srcs_x86_64_headers } -source_set("libvpx_loongarch_headers") { - sources = libvpx_srcs_loongarch_headers -} - source_set("libvpx_arm_headers") { sources = libvpx_srcs_arm_headers } @@ -493,6 +521,14 @@ source_set("libvpx_mips_headers") { sources = libvpx_srcs_mips_headers } +source_set("libvpx_loongarch_headers") { + sources = libvpx_srcs_loongarch_headers +} + +source_set("libvpx_ppc64_headers") { + sources = libvpx_srcs_ppc64_headers +} + source_set("libvpx_nacl_headers") { sources = libvpx_srcs_nacl_headers } @@ -548,6 +584,7 @@ static_library("libvpx") { } } else if (current_cpu == "ppc64") { sources = libvpx_srcs_ppc64 + public_deps = [ ":libvpx_ppc64_headers" ] } else if (current_cpu == "riscv64") { sources = libvpx_srcs_generic public_deps = [ ":libvpx_generic_headers" ] @@ -648,3 +685,30 @@ test("vp9_encoder_fuzz_test") { "//third_party/fuzztest:fuzztest_gtest_main", ] } + +test("test_libvpx") { + sources = libvpx_test_srcs_generic + sources += libvpx_test_srcs_generic_cc + + include_dirs = [ "source/libvpx/third_party/libwebm/" ] + + deps = [ + ":gtest", + ":libvpx", + ":libvpx_test_generic_headers", + ] + + if (is_android) { + use_raw_android_executable = true + defines = [ "LIBVPX_TEST_DATA_PATH=/data/local/tmp/chromium_tests_root/third_party/libvpx/testdata" ] + data = [ "testdata/" ] + } else { + defines = [ string_join("", + [ + "LIBVPX_TEST_DATA_PATH=", + rebase_path("testdata", root_build_dir), + ]) ] + } + + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] diff --git a/arm/third_party/xnnpack/BUILD.gn b/arm/third_party/xnnpack/BUILD.gn index 9bf3ce70..d2f505af 100644 --- a/arm/third_party/xnnpack/BUILD.gn +++ b/arm/third_party/xnnpack/BUILD.gn @@ -52,6 +52,10 @@ config("xnnpack_config") { "XNN_ENABLE_ARM_I8MM=1", ] } + + if (current_cpu == "x86" || current_cpu == "x64") { + defines += [ "XNN_ENABLE_AVXVNNI=1" ] + } } if (current_cpu == "x64" || current_cpu == "x86") { @@ -65,6 +69,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-amx-tile-amx-int8", + ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-avx512fp16", ":amalgam_f16c-fma-no-avx2", ":amalgam_f16c-no-avx2-no-fma", ":amalgam_sse2-no-sse3", @@ -85,6 +90,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { ":amalgam_f16c-fma-avx2_standalone", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vbmi_standalone", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-amx-tile-amx-int8_standalone", + ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-avx512fp16_standalone", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni_standalone", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni_standalone", ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl_standalone", @@ -173,6 +179,8 @@ source_set("xnnpack") { configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] sources = [ + "build_identifier.c", + "src/include/xnnpack.h", "src/src/allocator.c", "src/src/cache.c", "src/src/indirection.c", @@ -213,6 +221,8 @@ source_set("xnnpack_standalone") { configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] sources = [ + "build_identifier.c", + "src/include/xnnpack.h", "src/src/allocator.c", "src/src/cache.c", "src/src/indirection.c", @@ -257,7 +267,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-fma", ] - sources = [ "src/src/amalgam/gen/avx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -282,7 +295,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-fma", ] - sources = [ "src/src/amalgam/gen/avx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -310,7 +326,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avxvnni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avxvnni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -335,7 +354,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avxvnni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avxvnni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -358,7 +380,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { source_set("amalgam_avx512f") { cflags = [ "-mavx512f" ] - sources = [ "src/src/amalgam/gen/avx512f.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512f.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -378,7 +403,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { source_set("amalgam_avx512f_standalone") { cflags = [ "-mavx512f" ] - sources = [ "src/src/amalgam/gen/avx512f.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512f.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -405,7 +433,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx2.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx2.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -429,7 +460,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx2.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx2.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -460,7 +494,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512skx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256skx.c", + "src/src/amalgam/gen/avx512skx.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -489,7 +527,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512skx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256skx.c", + "src/src/amalgam/gen/avx512skx.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -522,7 +564,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512vbmi.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512vbmi.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -552,7 +597,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512vbmi.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512vbmi.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -585,7 +633,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512vnni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256vnni.c", + "src/src/amalgam/gen/avx512vnni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -615,7 +667,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mfma", ] - sources = [ "src/src/amalgam/gen/avx512vnni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256vnni.c", + "src/src/amalgam/gen/avx512vnni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -649,7 +705,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mgfni", ] - sources = [ "src/src/amalgam/gen/avx512vnnigfni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256vnnigfni.c", + "src/src/amalgam/gen/avx512vnnigfni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -680,7 +740,11 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mgfni", ] - sources = [ "src/src/amalgam/gen/avx512vnnigfni.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx256vnnigfni.c", + "src/src/amalgam/gen/avx512vnnigfni.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -716,7 +780,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mgfni", ] - sources = [ "src/src/amalgam/gen/avx512amx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512amx.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -749,7 +816,83 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mgfni", ] - sources = [ "src/src/amalgam/gen/avx512amx.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512amx.c", + ] + + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] + configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] + + deps = [ + "//third_party/cpuinfo", + "//third_party/fp16", + "//third_party/fxdiv", + "//third_party/pthreadpool:pthreadpool_standalone", + ] + + public_configs = [ ":xnnpack_config" ] + + if (!(is_android && use_order_profiling)) { + assert_no_deps = [ "//base" ] + } + } + + source_set( + "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-avx512fp16") { + cflags = [ + "-mavx512bw", + "-mavx512cd", + "-mavx512dq", + "-mavx512f", + "-mavx512fp16", + "-mavx512vl", + "-mavx512vnni", + "-mf16c", + "-mfma", + "-mgfni", + ] + + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512fp16.c", + ] + + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] + configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] + + deps = [ + "//third_party/cpuinfo", + "//third_party/fp16", + "//third_party/fxdiv", + "//third_party/pthreadpool", + ] + + public_configs = [ ":xnnpack_config" ] + } + + # This is a target that cannot depend on //base. + source_set( + "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni-avx512fp16_standalone") { + cflags = [ + "-mavx512bw", + "-mavx512cd", + "-mavx512dq", + "-mavx512f", + "-mavx512fp16", + "-mavx512vl", + "-mavx512vnni", + "-mf16c", + "-mfma", + "-mgfni", + ] + + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/avx512fp16.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -776,7 +919,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-avx2", ] - sources = [ "src/src/amalgam/gen/fma3.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/fma3.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -800,7 +946,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-avx2", ] - sources = [ "src/src/amalgam/gen/fma3.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/fma3.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -827,7 +976,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-fma", ] - sources = [ "src/src/amalgam/gen/f16c.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/f16c.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -851,7 +1003,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mno-fma", ] - sources = [ "src/src/amalgam/gen/f16c.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/f16c.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -878,6 +1033,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/sse.c", "src/src/amalgam/gen/sse2.c", ] @@ -904,6 +1060,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/sse.c", "src/src/amalgam/gen/sse2.c", ] @@ -932,7 +1089,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-msse4.1", ] - sources = [ "src/src/amalgam/gen/sse41.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/sse41.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -955,7 +1115,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-msse4.1", ] - sources = [ "src/src/amalgam/gen/sse41.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/sse41.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -981,7 +1144,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mssse3", ] - sources = [ "src/src/amalgam/gen/ssse3.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/ssse3.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1004,7 +1170,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { "-mssse3", ] - sources = [ "src/src/amalgam/gen/ssse3.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/ssse3.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1027,7 +1196,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { source_set("amalgam_x64") { cflags = [] - sources = [ "src/src/amalgam/gen/scalar.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/scalar.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1047,7 +1219,10 @@ if (current_cpu == "x64" || current_cpu == "x86") { source_set("amalgam_x64_standalone") { cflags = [] - sources = [ "src/src/amalgam/gen/scalar.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/scalar.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1071,6 +1246,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/configs/argmaxpool-config.c", "src/src/configs/avgpool-config.c", "src/src/configs/binary-elementwise-config.c", @@ -1122,6 +1298,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/configs/argmaxpool-config.c", "src/src/configs/avgpool-config.c", "src/src/configs/binary-elementwise-config.c", @@ -1176,6 +1353,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/enums/allocation-type.c", "src/src/enums/datatype-strings.c", "src/src/enums/microkernel-type.c", @@ -1202,6 +1380,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/enums/allocation-type.c", "src/src/enums/datatype-strings.c", "src/src/enums/microkernel-type.c", @@ -1231,6 +1410,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/operators/argmax-pooling-nhwc.c", "src/src/operators/average-pooling-nhwc.c", "src/src/operators/batch-matrix-multiply-nc.c", @@ -1279,6 +1459,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/operators/argmax-pooling-nhwc.c", "src/src/operators/average-pooling-nhwc.c", "src/src/operators/batch-matrix-multiply-nc.c", @@ -1330,6 +1511,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/subgraph/abs.c", "src/src/subgraph/add2.c", "src/src/subgraph/argmax-pooling-2d.c", @@ -1342,19 +1524,23 @@ if (current_cpu == "x64" || current_cpu == "x86") { "src/src/subgraph/convert.c", "src/src/subgraph/convolution-2d.c", "src/src/subgraph/copy.c", + "src/src/subgraph/copysign.c", "src/src/subgraph/deconvolution-2d.c", "src/src/subgraph/depth-to-space-2d.c", "src/src/subgraph/depthwise-convolution-2d.c", "src/src/subgraph/divide.c", "src/src/subgraph/elu.c", "src/src/subgraph/even-split.c", + "src/src/subgraph/exp.c", "src/src/subgraph/floor.c", "src/src/subgraph/fully-connected-sparse.c", "src/src/subgraph/fully-connected.c", + "src/src/subgraph/gelu.c", "src/src/subgraph/global-average-pooling.c", "src/src/subgraph/global-sum-pooling.c", "src/src/subgraph/hardswish.c", "src/src/subgraph/leaky-relu.c", + "src/src/subgraph/log.c", "src/src/subgraph/max-pooling-2d.c", "src/src/subgraph/maximum2.c", "src/src/subgraph/minimum2.c", @@ -1403,6 +1589,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/subgraph/abs.c", "src/src/subgraph/add2.c", "src/src/subgraph/argmax-pooling-2d.c", @@ -1415,19 +1602,23 @@ if (current_cpu == "x64" || current_cpu == "x86") { "src/src/subgraph/convert.c", "src/src/subgraph/convolution-2d.c", "src/src/subgraph/copy.c", + "src/src/subgraph/copysign.c", "src/src/subgraph/deconvolution-2d.c", "src/src/subgraph/depth-to-space-2d.c", "src/src/subgraph/depthwise-convolution-2d.c", "src/src/subgraph/divide.c", "src/src/subgraph/elu.c", "src/src/subgraph/even-split.c", + "src/src/subgraph/exp.c", "src/src/subgraph/floor.c", "src/src/subgraph/fully-connected-sparse.c", "src/src/subgraph/fully-connected.c", + "src/src/subgraph/gelu.c", "src/src/subgraph/global-average-pooling.c", "src/src/subgraph/global-sum-pooling.c", "src/src/subgraph/hardswish.c", "src/src/subgraph/leaky-relu.c", + "src/src/subgraph/log.c", "src/src/subgraph/max-pooling-2d.c", "src/src/subgraph/maximum2.c", "src/src/subgraph/minimum2.c", @@ -1479,6 +1670,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/tables/exp2-k-over-2048.c", "src/src/tables/exp2-k-over-64.c", "src/src/tables/exp2minus-k-over-16.c", @@ -1509,6 +1701,7 @@ if (current_cpu == "x64" || current_cpu == "x86") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/tables/exp2-k-over-2048.c", "src/src/tables/exp2-k-over-64.c", "src/src/tables/exp2minus-k-over-16.c", @@ -1544,6 +1737,7 @@ if (current_cpu == "arm64") { configs += [ "//build/config/compiler:march_dotprod" ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neondot-aarch64.c", "src/src/amalgam/gen/neondot.c", ] @@ -1567,6 +1761,7 @@ if (current_cpu == "arm64") { configs += [ "//build/config/compiler:march_dotprod" ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neondot-aarch64.c", "src/src/amalgam/gen/neondot.c", ] @@ -1592,7 +1787,10 @@ if (current_cpu == "arm64") { source_set("amalgam_arch=armv8.2-a+dotprod+fp16") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] - sources = [ "src/src/amalgam/gen/neondotfp16arith.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/neondotfp16arith.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1612,7 +1810,10 @@ if (current_cpu == "arm64") { source_set("amalgam_arch=armv8.2-a+dotprod+fp16_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] - sources = [ "src/src/amalgam/gen/neondotfp16arith.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/neondotfp16arith.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1636,6 +1837,7 @@ if (current_cpu == "arm64") { configs += [ "//build/config/compiler:march_fp16" ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neonfp16arith-aarch64.c", "src/src/amalgam/gen/neonfp16arith.c", ] @@ -1659,6 +1861,7 @@ if (current_cpu == "arm64") { configs += [ "//build/config/compiler:march_fp16" ] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neonfp16arith-aarch64.c", "src/src/amalgam/gen/neonfp16arith.c", ] @@ -1684,7 +1887,11 @@ if (current_cpu == "arm64") { source_set("amalgam_arch=armv8.2-a+i8mm+fp16") { configs += [ "//build/config/compiler:march_i8mm_f16" ] - sources = [ "src/src/amalgam/gen/neoni8mm.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/neoni8mm-aarch64.c", + "src/src/amalgam/gen/neoni8mm.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1704,7 +1911,11 @@ if (current_cpu == "arm64") { source_set("amalgam_arch=armv8.2-a+i8mm+fp16_standalone") { configs += [ "//build/config/compiler:march_i8mm_f16" ] - sources = [ "src/src/amalgam/gen/neoni8mm.c" ] + sources = [ + "src/include/xnnpack.h", + "src/src/amalgam/gen/neoni8mm-aarch64.c", + "src/src/amalgam/gen/neoni8mm.c", + ] configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] @@ -1728,6 +1939,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neon-aarch64.c", "src/src/amalgam/gen/neon.c", "src/src/amalgam/gen/neonfma-aarch64.c", @@ -1756,6 +1968,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/amalgam/gen/neon-aarch64.c", "src/src/amalgam/gen/neon.c", "src/src/amalgam/gen/neonfma-aarch64.c", @@ -1787,6 +2000,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/configs/argmaxpool-config.c", "src/src/configs/avgpool-config.c", "src/src/configs/binary-elementwise-config.c", @@ -1838,6 +2052,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/configs/argmaxpool-config.c", "src/src/configs/avgpool-config.c", "src/src/configs/binary-elementwise-config.c", @@ -1892,6 +2107,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/enums/allocation-type.c", "src/src/enums/datatype-strings.c", "src/src/enums/microkernel-type.c", @@ -1918,6 +2134,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/enums/allocation-type.c", "src/src/enums/datatype-strings.c", "src/src/enums/microkernel-type.c", @@ -1946,7 +2163,10 @@ if (current_cpu == "arm64") { source_set("f16-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f16-gemm/gen/f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S", "src/src/f16-gemm/gen/f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S", "src/src/f16-gemm/gen/f16-gemm-1x8-minmax-asm-aarch64-neonfp16arith-ld64.S", @@ -1989,7 +2209,10 @@ if (current_cpu == "arm64") { source_set("f16-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f16-gemm/gen/f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S", "src/src/f16-gemm/gen/f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S", "src/src/f16-gemm/gen/f16-gemm-1x8-minmax-asm-aarch64-neonfp16arith-ld64.S", @@ -2035,7 +2258,10 @@ if (current_cpu == "arm64") { source_set("f16-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f16-igemm/f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S", "src/src/f16-igemm/f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S", "src/src/f16-igemm/f16-igemm-4x16-minmax-asm-aarch64-neonfp16arith-ld32.S", @@ -2065,7 +2291,10 @@ if (current_cpu == "arm64") { source_set("f16-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f16-igemm/f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S", "src/src/f16-igemm/f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S", "src/src/f16-igemm/f16-igemm-4x16-minmax-asm-aarch64-neonfp16arith-ld32.S", @@ -2098,7 +2327,10 @@ if (current_cpu == "arm64") { source_set("f32-dwconv_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-dwconv/f32-dwconv-9p4c-minmax-asm-aarch64-neonfma-cortex-a55.S", "src/src/f32-dwconv/f32-dwconv-9p4c-minmax-asm-aarch64-neonfma.S", ] @@ -2121,7 +2353,10 @@ if (current_cpu == "arm64") { source_set("f32-dwconv_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-dwconv/f32-dwconv-9p4c-minmax-asm-aarch64-neonfma-cortex-a55.S", "src/src/f32-dwconv/f32-dwconv-9p4c-minmax-asm-aarch64-neonfma.S", ] @@ -2147,7 +2382,10 @@ if (current_cpu == "arm64") { source_set("f32-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-gemm/gen/f32-gemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", @@ -2214,7 +2452,10 @@ if (current_cpu == "arm64") { source_set("f32-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-gemm/gen/f32-gemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", @@ -2284,7 +2525,10 @@ if (current_cpu == "arm64") { source_set("f32-gemminc_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-gemminc/gen/f32-gemminc-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemminc/gen/f32-gemminc-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemminc/gen/f32-gemminc-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S", @@ -2326,7 +2570,10 @@ if (current_cpu == "arm64") { source_set("f32-gemminc_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-gemminc/gen/f32-gemminc-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemminc/gen/f32-gemminc-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-gemminc/gen/f32-gemminc-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S", @@ -2371,7 +2618,10 @@ if (current_cpu == "arm64") { source_set("f32-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-igemm/f32-igemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-igemm/f32-igemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-igemm/f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S", @@ -2420,7 +2670,10 @@ if (current_cpu == "arm64") { source_set("f32-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-igemm/f32-igemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-igemm/f32-igemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S", "src/src/f32-igemm/f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S", @@ -2472,7 +2725,10 @@ if (current_cpu == "arm64") { source_set("f32-ppmm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-ld128-prfm.S", @@ -2501,7 +2757,10 @@ if (current_cpu == "arm64") { source_set("f32-ppmm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S", "src/src/f32-ppmm/gen/f32-ppmm-4x8-minmax-asm-aarch64-neonfma-ld128-prfm.S", @@ -2533,7 +2792,10 @@ if (current_cpu == "arm64") { source_set("f32-qc4w-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S", @@ -2576,7 +2838,10 @@ if (current_cpu == "arm64") { source_set("f32-qc4w-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", "src/src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S", @@ -2622,7 +2887,10 @@ if (current_cpu == "arm64") { source_set("f32-qc8w-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S", @@ -2665,7 +2933,10 @@ if (current_cpu == "arm64") { source_set("f32-qc8w-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S", "src/src/f32-qc8w-gemm/gen/f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S", @@ -2712,6 +2983,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/operators/argmax-pooling-nhwc.c", "src/src/operators/average-pooling-nhwc.c", "src/src/operators/batch-matrix-multiply-nc.c", @@ -2760,6 +3032,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/operators/argmax-pooling-nhwc.c", "src/src/operators/average-pooling-nhwc.c", "src/src/operators/batch-matrix-multiply-nc.c", @@ -2810,7 +3083,10 @@ if (current_cpu == "arm64") { source_set("qd8-f16-qc8w-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", "src/src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondotfp16arith-cortex-a55.S", ] @@ -2833,7 +3109,10 @@ if (current_cpu == "arm64") { source_set("qd8-f16-qc8w-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", "src/src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondotfp16arith-cortex-a55.S", ] @@ -2859,7 +3138,10 @@ if (current_cpu == "arm64") { source_set("qd8-f16-qc8w-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f16-qc8w-igemm/gen/qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f16-qc8w-igemm/gen/qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", ] @@ -2882,7 +3164,10 @@ if (current_cpu == "arm64") { source_set("qd8-f16-qc8w-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f16-qc8w-igemm/gen/qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f16-qc8w-igemm/gen/qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", ] @@ -2908,7 +3193,10 @@ if (current_cpu == "arm64") { source_set("qd8-f32-qc8w-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld64.S", @@ -2932,7 +3220,10 @@ if (current_cpu == "arm64") { source_set("qd8-f32-qc8w-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", "src/src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld64.S", @@ -2959,7 +3250,10 @@ if (current_cpu == "arm64") { source_set("qd8-f32-qc8w-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", ] @@ -2982,7 +3276,10 @@ if (current_cpu == "arm64") { source_set("qd8-f32-qc8w-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S", "src/src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S", ] @@ -3008,7 +3305,10 @@ if (current_cpu == "arm64") { source_set("qs8-qc8w-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld32.S", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S", @@ -3049,7 +3349,10 @@ if (current_cpu == "arm64") { source_set("qs8-qc8w-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld32.S", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S", "src/src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S", @@ -3093,7 +3396,10 @@ if (current_cpu == "arm64") { source_set("qs8-qc8w-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S", @@ -3130,7 +3436,10 @@ if (current_cpu == "arm64") { source_set("qs8-qc8w-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S", "src/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S", @@ -3170,7 +3479,10 @@ if (current_cpu == "arm64") { source_set("qu8-gemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", @@ -3197,7 +3509,10 @@ if (current_cpu == "arm64") { source_set("qu8-gemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "src/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", @@ -3227,7 +3542,10 @@ if (current_cpu == "arm64") { source_set("qu8-igemm_arch=armv8.2-a+fp16+dotprod") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", @@ -3254,7 +3572,10 @@ if (current_cpu == "arm64") { source_set("qu8-igemm_arch=armv8.2-a+fp16+dotprod_standalone") { configs += [ "//build/config/compiler:march_dotprod_fp16" ] + asmflags = cflags + sources = [ + "src/include/xnnpack.h", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "src/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", @@ -3285,6 +3606,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/subgraph/abs.c", "src/src/subgraph/add2.c", "src/src/subgraph/argmax-pooling-2d.c", @@ -3297,19 +3619,23 @@ if (current_cpu == "arm64") { "src/src/subgraph/convert.c", "src/src/subgraph/convolution-2d.c", "src/src/subgraph/copy.c", + "src/src/subgraph/copysign.c", "src/src/subgraph/deconvolution-2d.c", "src/src/subgraph/depth-to-space-2d.c", "src/src/subgraph/depthwise-convolution-2d.c", "src/src/subgraph/divide.c", "src/src/subgraph/elu.c", "src/src/subgraph/even-split.c", + "src/src/subgraph/exp.c", "src/src/subgraph/floor.c", "src/src/subgraph/fully-connected-sparse.c", "src/src/subgraph/fully-connected.c", + "src/src/subgraph/gelu.c", "src/src/subgraph/global-average-pooling.c", "src/src/subgraph/global-sum-pooling.c", "src/src/subgraph/hardswish.c", "src/src/subgraph/leaky-relu.c", + "src/src/subgraph/log.c", "src/src/subgraph/max-pooling-2d.c", "src/src/subgraph/maximum2.c", "src/src/subgraph/minimum2.c", @@ -3358,6 +3684,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/subgraph/abs.c", "src/src/subgraph/add2.c", "src/src/subgraph/argmax-pooling-2d.c", @@ -3370,19 +3697,23 @@ if (current_cpu == "arm64") { "src/src/subgraph/convert.c", "src/src/subgraph/convolution-2d.c", "src/src/subgraph/copy.c", + "src/src/subgraph/copysign.c", "src/src/subgraph/deconvolution-2d.c", "src/src/subgraph/depth-to-space-2d.c", "src/src/subgraph/depthwise-convolution-2d.c", "src/src/subgraph/divide.c", "src/src/subgraph/elu.c", "src/src/subgraph/even-split.c", + "src/src/subgraph/exp.c", "src/src/subgraph/floor.c", "src/src/subgraph/fully-connected-sparse.c", "src/src/subgraph/fully-connected.c", + "src/src/subgraph/gelu.c", "src/src/subgraph/global-average-pooling.c", "src/src/subgraph/global-sum-pooling.c", "src/src/subgraph/hardswish.c", "src/src/subgraph/leaky-relu.c", + "src/src/subgraph/log.c", "src/src/subgraph/max-pooling-2d.c", "src/src/subgraph/maximum2.c", "src/src/subgraph/minimum2.c", @@ -3434,6 +3765,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/tables/exp2-k-over-2048.c", "src/src/tables/exp2-k-over-64.c", "src/src/tables/exp2minus-k-over-16.c", @@ -3464,6 +3796,7 @@ if (current_cpu == "arm64") { cflags = [] sources = [ + "src/include/xnnpack.h", "src/src/tables/exp2-k-over-2048.c", "src/src/tables/exp2-k-over-64.c", "src/src/tables/exp2minus-k-over-16.c", diff --git a/arm/third_party/xnnpack/generate_build_gn.py b/arm/third_party/xnnpack/generate_build_gn.py index d454c1ec..a4796468 100755 --- a/arm/third_party/xnnpack/generate_build_gn.py +++ b/arm/third_party/xnnpack/generate_build_gn.py @@ -41,14 +41,15 @@ import json import logging import os import platform +import tempfile import shutil import subprocess import sys import urllib.request -import zipfile from dataclasses import dataclass, field + _HEADER = ''' # Copyright 2022 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be @@ -105,6 +106,12 @@ config("xnnpack_config") { "XNN_ENABLE_ARM_I8MM=1", ] } + + if (current_cpu == "x86" || current_cpu == "x64") { + defines += [ + "XNN_ENABLE_AVXVNNI=1", + ] + } } '''.strip() @@ -117,6 +124,8 @@ source_set("xnnpack") { configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] sources = [ + "src/include/xnnpack.h", + "build_identifier.c", %SRCS% ] @@ -139,6 +148,8 @@ source_set("xnnpack_standalone") { configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] sources = [ + "src/include/xnnpack.h", + "build_identifier.c", %SRCS% ] @@ -164,6 +175,7 @@ source_set("%TARGET_NAME%") { ] %ASMFLAGS% sources = [ + "src/include/xnnpack.h", %SRCS% ] @@ -188,6 +200,7 @@ source_set("%TARGET_NAME%_standalone") { ] %ASMFLAGS% sources = [ + "src/include/xnnpack.h", %SRCS% ] @@ -481,7 +494,9 @@ def _run_bazel_cmd(args): Runs a bazel command in the form of bazel . Returns the stdout, raising an Exception if the command failed. """ - exec_path = shutil.which("bazel") + + # Use standard Bazel install instead of the one included with depot_tools. + exec_path = "/usr/bin/bazel" if not exec_path: raise Exception( "bazel is not installed. Please run `sudo apt-get install " + @@ -519,6 +534,10 @@ def GenerateObjectBuilds(cpu): cpu: aarch64 or k8 """ logging.info(f'Querying xnnpack compile commands for {cpu} with bazel...') + # Make sure we have a clean start, this is important if the Android NDK + # version changed. + _run_bazel_cmd(['clean']) + basename = os.path.basename(_TOOLCHAIN_DIR) crosstool_top = f'//{basename}:cc_suite' logs = _run_bazel_cmd([ @@ -631,17 +650,36 @@ def MakeXNNPACKDepsList(target_sss): return deps_list + def EnsureAndroidNDK(): """ Ensures that the Android NDK is available and bazel can find it later. + + This must use command line utilities instead of native Python as a workaround + for https://github.com/python/cpython/issues/59999. """ - if 'ANDROID_NDK_HOME' in os.environ: - return - logging.info('Downloading a copy of the Android NDK for bazel') - resp = urllib.request.urlopen('https://dl.google.com/android/repository/android-ndk-r19c-linux-x86_64.zip') + tempdir = tempfile.mkdtemp() + zipdownload = os.path.join(tempdir, 'android-ndk-r25b-linux.zip') + extractdir = os.path.join(tempdir, 'android-ndk-r25b') + logging.info('Downloading a copy of the Android NDK') + subprocess.check_call( + [ + 'curl', + 'https://dl.google.com/android/repository/android-ndk-r25b-linux.zip', + '-o', + zipdownload, + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) logging.info('Unpacking the Android NDK') - zipfile.ZipFile(io.BytesIO(resp.read())).extractall(path='/tmp/') - os.environ['ANDROID_NDK_HOME'] = '/tmp/android-ndk-r19c' + subprocess.check_call( + ['unzip', zipdownload, '-d', extractdir], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + os.environ['ANDROID_NDK_HOME'] = os.path.join(extractdir, 'android-ndk-r25b') + def MakeXNNPACKSourceSet(ss): """ @@ -654,6 +692,16 @@ def MakeXNNPACKSourceSet(ss): return target +# Generates the `build_identifier.c` using bazel and copies to the correct directory. +def GenerateBuildIdentifier(): + _run_bazel_cmd(['build', 'generate_build_identifier']) + bazel_bin_dir =_run_bazel_cmd(['info', 'bazel-bin']).strip() + build_identifier_src = os.path.join(bazel_bin_dir, 'src', 'build_identifier.c') + assert os.path.exists(build_identifier_src) + build_identifier_dst = os.path.join(_xnnpack_dir(), 'build_identifier.c') + logging.info(f'Copying {build_identifier_src} to {build_identifier_dst}') + shutil.copyfile(build_identifier_src, build_identifier_dst) + def main(): logging.basicConfig(level=logging.INFO) @@ -717,6 +765,8 @@ def main(): f.write('\n\n') f.write('}\n') + GenerateBuildIdentifier() + logging.info('Done! Please run `git cl format`') diff --git a/other/thorium-2024-ui.patch b/other/thorium-2024-ui.patch index 81efda0e..a83ba2b0 100644 --- a/other/thorium-2024-ui.patch +++ b/other/thorium-2024-ui.patch @@ -1329,7 +1329,7 @@ index 65676013fece9..8041d18f35745 100644 } } -+ if (features::IsThorium2024()) { ++ if (features::IsThorium2024() && !is_win) { + constexpr int Th24StrokeOffset = 1; + top -= Th24StrokeOffset; + //bottom -= Th24StrokeOffset;