| From 576b64d6a4253f900d4846503df55dba051063ab Mon Sep 17 00:00:00 2001 |
| From: gxw <guxiwei-hf@loongson.cn> |
| Date: Tue, 22 Oct 2019 19:20:19 +0800 |
| Subject: [PATCH] Adjust the mmi/msa detection mode for mips platform. |
| |
| Using mips-simd-check.sh to test the current compiler support mmi/msa |
| or not before make. If supported, enable mmi/msa. |
| According to the model name in /proc/cpuinfo to test the current |
| cpu support mmi/msa or not for runtime detection. |
| Now We can use the following make instructions on mips platform: |
| 1. make (automatic detection mmi/msa) |
| 2. make ENABLE_MMI=No (disable mmi) |
| 3. make ENABLE_MSA=No (disable msa) |
| 4. make ENABLE_MMI=No ENABLE_MSA=No (disable mmi and msa) |
| |
| Change-Id: Ibd348ebc11912d7fca1b548c76838675d69b7c40 |
| |
| Downloaded from upstream PR: |
| https://github.com/cisco/openh264/pull/3175 |
| |
| Signed-off-by: gxw <guxiwei-hf@loongson.cn> |
| [Bernd: rebased on top of patch 0001] |
| Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de> |
| --- |
| Makefile | 2 ++ |
| build/arch.mk | 18 ++++++++++++---- |
| build/mips-simd-check.sh | 32 +++++++++++++++++++++++++++ |
| build/mktargets.py | 43 ++++++++++++++++++++++++++----------- |
| codec/common/inc/cpu_core.h | 1 + |
| codec/common/src/cpu.cpp | 37 +++++++++++++++++++++++++------ |
| codec/common/targets.mk | 18 ++++++++++++---- |
| codec/decoder/targets.mk | 18 ++++++++++++---- |
| codec/encoder/targets.mk | 18 ++++++++++++---- |
| codec/processing/targets.mk | 18 ++++++++++++---- |
| 10 files changed, 166 insertions(+), 39 deletions(-) |
| create mode 100755 build/mips-simd-check.sh |
| |
| diff --git a/Makefile b/Makefile |
| index 74ff029d9..65d13630b 100644 |
| --- a/Makefile |
| +++ b/Makefile |
| @@ -35,6 +35,8 @@ GTEST_VER=release-1.8.1 |
| STATIC_LDFLAGS=-lstdc++ |
| STRIP ?= strip |
| USE_STACK_PROTECTOR = Yes |
| +ENABLE_MMI=Yes |
| +ENABLE_MSA=Yes |
| |
| SHAREDLIB_MAJORVERSION=5 |
| FULL_VERSION := 2.0.0 |
| diff --git a/build/arch.mk b/build/arch.mk |
| index 8ac3e70a5..555e4afec 100644 |
| --- a/build/arch.mk |
| +++ b/build/arch.mk |
| @@ -30,14 +30,24 @@ CFLAGS += -DHAVE_NEON_AARCH64 |
| endif |
| endif |
| |
| -#for loongson |
| +#for mips |
| ifneq ($(filter mips mips64, $(ARCH)),) |
| ifeq ($(USE_ASM), Yes) |
| ASM_ARCH = mips |
| ASMFLAGS += -I$(SRC_PATH)codec/common/mips/ |
| -LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ") |
| -ifeq ($(LOONGSON3A), "loongson3a") |
| -CFLAGS += -DHAVE_MMI |
| +#mmi |
| +ifeq ($(ENABLE_MMI), Yes) |
| +ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi) |
| +ifeq ($(ENABLE_MMI), Yes) |
| +CFLAGS += -march=loongson3a -DHAVE_MMI |
| +endif |
| +endif |
| +#msa |
| +ifeq ($(ENABLE_MSA), Yes) |
| +ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa) |
| +ifeq ($(ENABLE_MSA), Yes) |
| +CFLAGS += -mmsa -DHAVE_MSA |
| +endif |
| endif |
| endif |
| endif |
| diff --git a/build/mips-simd-check.sh b/build/mips-simd-check.sh |
| new file mode 100755 |
| index 000000000..cbc29e3d3 |
| --- /dev/null |
| +++ b/build/mips-simd-check.sh |
| @@ -0,0 +1,32 @@ |
| +#!/bin/bash |
| +#********************************************************************************** |
| +# This script is using in build/arch.mk for mips to detect the simd instructions: |
| +# mmi, msa (maybe more in the future). |
| +# |
| +# --usage: |
| +# ./mips-simd-check.sh $(CC) mmi |
| +# or ./mips-simd-check.sh $(CC) msa |
| +# |
| +# date: 10/17/2019 Created |
| +#********************************************************************************** |
| + |
| +TMPC=$(mktemp test.XXXXXX.c) |
| +TMPO=$(mktemp test.XXXXXX.o) |
| +if [ $2 == "mmi" ] |
| +then |
| + echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC |
| + $1 -march=loongson3a $TMPC -o $TMPO &> /dev/null |
| + if test -s $TMPO |
| + then |
| + echo "Yes" |
| + fi |
| +elif [ $2 == "msa" ] |
| +then |
| + echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC |
| + $1 -mmsa $TMPC -o $TMPO &> /dev/null |
| + if test -s $TMPO |
| + then |
| + echo "Yes" |
| + fi |
| +fi |
| +rm -f $TMPC $TMPO |
| diff --git a/build/mktargets.py b/build/mktargets.py |
| index 593280c09..518909d3d 100755 |
| --- a/build/mktargets.py |
| +++ b/build/mktargets.py |
| @@ -119,9 +119,9 @@ def find_sources(): |
| armfiles.append(file) |
| mipsfiles = [] |
| for file in cfiles: |
| - c = file.split('/') |
| - if 'mips' in c: |
| - mipsfiles.append(file) |
| + c = file.split('/') |
| + if 'mips' in c: |
| + mipsfiles.append(file) |
| cfiles = [x for x in cfiles if x not in mipsfiles] |
| |
| |
| @@ -181,15 +181,34 @@ def find_sources(): |
| f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX)) |
| |
| if len(mipsfiles) > 0: |
| - f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX)) |
| - for c in mipsfiles: |
| - f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) |
| - f.write("\n") |
| - f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX)) |
| - f.write("ifeq ($(ASM_ARCH), mips)\n") |
| - f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX)) |
| - f.write("endif\n") |
| - f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX)) |
| + mmifiles = [] |
| + for file in mipsfiles: |
| + if '_mmi' in file: |
| + mmifiles.append(file) |
| + f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX)) |
| + for c in mmifiles: |
| + f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) |
| + f.write("\n") |
| + f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX)) |
| + msafiles = [] |
| + for file in mipsfiles: |
| + if '_msa' in file: |
| + msafiles.append(file) |
| + f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX)) |
| + for c in msafiles: |
| + f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) |
| + f.write("\n") |
| + f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX)) |
| + f.write("ifeq ($(ASM_ARCH), mips)\n") |
| + f.write("ifeq ($(ENABLE_MMI), Yes)\n") |
| + f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX)) |
| + f.write("endif\n") |
| + f.write("ifeq ($(ENABLE_MSA), Yes)\n") |
| + f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX)) |
| + f.write("endif\n") |
| + f.write("endif\n") |
| + f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX)) |
| + f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX)) |
| |
| f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX)) |
| write_cpp_rule_pattern(f) |
| diff --git a/codec/common/inc/cpu_core.h b/codec/common/inc/cpu_core.h |
| index e5906c62b..f25787b04 100644 |
| --- a/codec/common/inc/cpu_core.h |
| +++ b/codec/common/inc/cpu_core.h |
| @@ -86,6 +86,7 @@ |
| |
| /* For loongson */ |
| #define WELS_CPU_MMI 0x00000001 /* mmi */ |
| +#define WELS_CPU_MSA 0x00000002 /* msa */ |
| |
| /* |
| * Interfaces for CPU core feature detection as below |
| diff --git a/codec/common/src/cpu.cpp b/codec/common/src/cpu.cpp |
| index a39fd0645..94bb2d5d3 100644 |
| --- a/codec/common/src/cpu.cpp |
| +++ b/codec/common/src/cpu.cpp |
| @@ -309,12 +309,37 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
| |
| #elif defined(mips) |
| /* for loongson */ |
| +static uint32_t get_cpu_flags_from_cpuinfo(void) |
| +{ |
| + uint32_t flags = 0; |
| + |
| +# ifdef __linux__ |
| + FILE* fp = fopen("/proc/cpuinfo", "r"); |
| + if (!fp) |
| + return flags; |
| + |
| + char buf[200]; |
| + memset(buf, 0, sizeof(buf)); |
| + while (fgets(buf, sizeof(buf), fp)) { |
| + if (!strncmp(buf, "model name", strlen("model name"))) { |
| + if (strstr(buf, "3A4000")) { |
| + flags |= WELS_CPU_MSA | WELS_CPU_MMI; |
| + } else if (strstr(buf, "2K1000")) { |
| + flags |= WELS_CPU_MSA | WELS_CPU_MMI; |
| + } else if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B")) { |
| + flags |= WELS_CPU_MMI; |
| + } |
| + break; |
| + } |
| + } |
| + fclose(fp); |
| +# endif |
| + |
| + return flags; |
| +} |
| + |
| uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
| -#if defined(HAVE_MMI) |
| - return WELS_CPU_MMI; |
| -#else |
| - return 0; |
| -#endif |
| + return get_cpu_flags_from_cpuinfo(); |
| } |
| |
| #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */ |
| @@ -324,5 +349,3 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
| } |
| |
| #endif |
| - |
| - |
| diff --git a/codec/common/targets.mk b/codec/common/targets.mk |
| index 96843cd9d..f2cd192fd 100644 |
| --- a/codec/common/targets.mk |
| +++ b/codec/common/targets.mk |
| @@ -66,18 +66,28 @@ COMMON_OBJS += $(COMMON_OBJSARM64) |
| endif |
| OBJS += $(COMMON_OBJSARM64) |
| |
| -COMMON_ASM_MIPS_SRCS=\ |
| +COMMON_ASM_MIPS_MMI_SRCS=\ |
| $(COMMON_SRCDIR)/mips/copy_mb_mmi.c\ |
| $(COMMON_SRCDIR)/mips/deblock_mmi.c\ |
| $(COMMON_SRCDIR)/mips/expand_picture_mmi.c\ |
| $(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\ |
| $(COMMON_SRCDIR)/mips/satd_sad_mmi.c\ |
| |
| -COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ)) |
| +COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) |
| + |
| +COMMON_ASM_MIPS_MSA_SRCS=\ |
| + |
| +COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) |
| ifeq ($(ASM_ARCH), mips) |
| -COMMON_OBJS += $(COMMON_OBJSMIPS) |
| +ifeq ($(ENABLE_MMI), Yes) |
| +COMMON_OBJS += $(COMMON_OBJSMIPS_MMI) |
| +endif |
| +ifeq ($(ENABLE_MSA), Yes) |
| +COMMON_OBJS += $(COMMON_OBJSMIPS_MSA) |
| +endif |
| endif |
| -OBJS += $(COMMON_OBJSMIPS) |
| +OBJS += $(COMMON_OBJSMIPS_MMI) |
| +OBJS += $(COMMON_OBJSMIPS_MSA) |
| |
| OBJS += $(COMMON_OBJS) |
| |
| diff --git a/codec/decoder/targets.mk b/codec/decoder/targets.mk |
| index c01618411..88dc5afb1 100644 |
| --- a/codec/decoder/targets.mk |
| +++ b/codec/decoder/targets.mk |
| @@ -57,14 +57,24 @@ DECODER_OBJS += $(DECODER_OBJSARM64) |
| endif |
| OBJS += $(DECODER_OBJSARM64) |
| |
| -DECODER_ASM_MIPS_SRCS=\ |
| +DECODER_ASM_MIPS_MMI_SRCS=\ |
| $(DECODER_SRCDIR)/core/mips/dct_mmi.c\ |
| |
| -DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ)) |
| +DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) |
| + |
| +DECODER_ASM_MIPS_MSA_SRCS=\ |
| + |
| +DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) |
| ifeq ($(ASM_ARCH), mips) |
| -DECODER_OBJS += $(DECODER_OBJSMIPS) |
| +ifeq ($(ENABLE_MMI), Yes) |
| +DECODER_OBJS += $(DECODER_OBJSMIPS_MMI) |
| +endif |
| +ifeq ($(ENABLE_MSA), Yes) |
| +DECODER_OBJS += $(DECODER_OBJSMIPS_MSA) |
| +endif |
| endif |
| -OBJS += $(DECODER_OBJSMIPS) |
| +OBJS += $(DECODER_OBJSMIPS_MMI) |
| +OBJS += $(DECODER_OBJSMIPS_MSA) |
| |
| OBJS += $(DECODER_OBJS) |
| |
| diff --git a/codec/encoder/targets.mk b/codec/encoder/targets.mk |
| index 1f053280e..4fb2e690e 100644 |
| --- a/codec/encoder/targets.mk |
| +++ b/codec/encoder/targets.mk |
| @@ -82,16 +82,26 @@ ENCODER_OBJS += $(ENCODER_OBJSARM64) |
| endif |
| OBJS += $(ENCODER_OBJSARM64) |
| |
| -ENCODER_ASM_MIPS_SRCS=\ |
| +ENCODER_ASM_MIPS_MMI_SRCS=\ |
| $(ENCODER_SRCDIR)/core/mips/dct_mmi.c\ |
| $(ENCODER_SRCDIR)/core/mips/quant_mmi.c\ |
| $(ENCODER_SRCDIR)/core/mips/score_mmi.c\ |
| |
| -ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ)) |
| +ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) |
| + |
| +ENCODER_ASM_MIPS_MSA_SRCS=\ |
| + |
| +ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) |
| ifeq ($(ASM_ARCH), mips) |
| -ENCODER_OBJS += $(ENCODER_OBJSMIPS) |
| +ifeq ($(ENABLE_MMI), Yes) |
| +ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI) |
| +endif |
| +ifeq ($(ENABLE_MSA), Yes) |
| +ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA) |
| +endif |
| endif |
| -OBJS += $(ENCODER_OBJSMIPS) |
| +OBJS += $(ENCODER_OBJSMIPS_MMI) |
| +OBJS += $(ENCODER_OBJSMIPS_MSA) |
| |
| OBJS += $(ENCODER_OBJS) |
| |
| diff --git a/codec/processing/targets.mk b/codec/processing/targets.mk |
| index 300de2d80..0f8873335 100644 |
| --- a/codec/processing/targets.mk |
| +++ b/codec/processing/targets.mk |
| @@ -58,14 +58,24 @@ PROCESSING_OBJS += $(PROCESSING_OBJSARM64) |
| endif |
| OBJS += $(PROCESSING_OBJSARM64) |
| |
| -PROCESSING_ASM_MIPS_SRCS=\ |
| +PROCESSING_ASM_MIPS_MMI_SRCS=\ |
| $(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\ |
| |
| -PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ)) |
| +PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) |
| + |
| +PROCESSING_ASM_MIPS_MSA_SRCS=\ |
| + |
| +PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) |
| ifeq ($(ASM_ARCH), mips) |
| -PROCESSING_OBJS += $(PROCESSING_OBJSMIPS) |
| +ifeq ($(ENABLE_MMI), Yes) |
| +PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI) |
| +endif |
| +ifeq ($(ENABLE_MSA), Yes) |
| +PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA) |
| +endif |
| endif |
| -OBJS += $(PROCESSING_OBJSMIPS) |
| +OBJS += $(PROCESSING_OBJSMIPS_MMI) |
| +OBJS += $(PROCESSING_OBJSMIPS_MSA) |
| |
| OBJS += $(PROCESSING_OBJS) |
| |