1. Valgrind 介绍
Valgrind 是一款用于内存调试、内存泄漏检测和性能分析的软件开发工具。Valgrind 最初由 Julian Seward 设计,在发布之前的开发过程中曾被命名为 “Heimdall”,但因与一个安全软件包冲突而改名。其名称是对北欧神话中瓦尔哈拉主入口的参考。2006 年,Julian Seward 因其在 Valgrind 上的工作获得了第二届 Google-O'Reilly 开放源码奖。随着不断发展,有许多人对 Valgrind 做出了重要贡献。
2. 为社么要移植到 ARMv5
目前我们的主要工作平台是基于 ARM926 的 CPU,架构为 ARMv5。但是 Valgrind 默认支持 ARMv7,不支持 ARMv5。即使正常编译过后,也不能正常运行。所以必须对源代码进行修改,以适配低版本的架构。
3. 准备工作
下载 Valgrind 的连接 https://sourceware.org/pub/valgrind,里面有各个版本的 Valgrind 源代码包,我们要下载的是 3.14.0 版本的源码包(因为补丁包是针对这个版本的)。
# https://sourceware.org/pub/valgrind wget https://sourceware.org/pub/valgrind/valgrind-3.14.0.tar.bz2 tar -xvjf valgrind-3.14.0.tar.bz2 cd valgrind-3.14.0 # https://bugs.kde.org/buglist.cgi?quicksearch=Valgrind%20ARMv5 # https://bugs.kde.org/show_bug.cgi?id=248998 wget https://bugsfiles.kde.org/attachment.cgi?id=117327 -O valgrind-armv5.patch patch -p1 < valgrind-armv5.patch
4. 交叉编译及安装
./configure --host=arm-linux-gnueabi --prefix=/user --enable-only32bit CC="arm-linux-gnueabi-gcc -march=armv5te -mcpu=arm926ej-s -mfloat-abi=soft" CXX="arm-linux-gnueabi-g++ -march=armv5te -mcpu=arm926ej-s -mfloat-abi=soft" make -j2 DESTDIR=$PWD/install make install
5. 使用方法
# export VALGRIND_LIB=/usr/lib/valgrind valgrind --tool=memcheck --leak-check=full --show-reachable=yes ./test valgrind --leak-check=full --show-reachable=yes ./test valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./test # 更多使用方法请参照:valgrind --help
6. 补丁包内容
diff -Naur valgrind-3.14.0/configure.ac valgrind-3.14.0-armv5/configure.ac
--- valgrind-3.14.0/configure.ac 2018-10-09 08:26:33.000000000 +0100
+++ valgrind-3.14.0-armv5/configure.ac 2019-01-04 17:58:37.415112119 +0000
@@ -255,8 +255,15 @@
armv7*)
AC_MSG_RESULT([ok (${host_cpu})])
ARCH_MAX="arm"
+ FLAG_MCPU_CORTEX_A8="-mcpu=cortex-a8"
;;
+ arm*)
+ AC_MSG_RESULT([ok (${host_cpu})])
+ ARCH_MAX="arm"
+ FLAG_MCPU_CORTEX_A8=""
+ ;;
+
aarch64*)
AC_MSG_RESULT([ok (${host_cpu})])
ARCH_MAX="arm64"
@@ -293,6 +300,8 @@
;;
esac
+AC_SUBST(FLAG_MCPU_CORTEX_A8)
+
#----------------------------------------------------------------------------
# Sometimes it's convenient to subvert the bi-arch build system and
diff -Naur valgrind-3.14.0/coregrind/m_dispatch/dispatch-arm-linux.S valgrind-3.14.0-armv5/coregrind/m_dispatch/dispatch-arm-linux.S
--- valgrind-3.14.0/coregrind/m_dispatch/dispatch-arm-linux.S 2018-05-05 08:42:22.000000000 +0100
+++ valgrind-3.14.0-armv5/coregrind/m_dispatch/dispatch-arm-linux.S 2019-01-04 17:58:37.416112114 +0000
@@ -67,10 +67,16 @@
order to keep the stack 8-aligned. */
push {r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+ ldr r4, =VG_(machine_arm_has_fpscr)
+ ldr r4, [r4]
+ cmp r4, #0
+ beq no_fpscr_setup
+
/* set FPSCR to vex-required default value */
mov r4, #0
fmxr fpscr, r4
+no_fpscr_setup:
/* Set up the guest state pointer */
mov r8, r1
@@ -87,6 +93,11 @@
/*----------------------------------------------------*/
postamble:
+ ldr r4, =VG_(machine_arm_has_fpscr)
+ ldr r4, [r4]
+ cmp r4, #0
+ beq remove_frame
+
/* At this point, r1 and r2 contain two
words to be returned to the caller. r1
holds a TRC value, and r2 optionally may
@@ -101,8 +112,8 @@
cmp r4, #0
beq remove_frame /* we're OK */
/* otherwise we have an invariant violation */
- movw r1, #VG_TRC_INVARIANT_FAILED
- movw r2, #0
+ mov r1, #VG_TRC_INVARIANT_FAILED
+ mov r2, #0
/* fall through */
remove_frame:
@@ -126,10 +137,19 @@
handing the caller the pair (Chain_me_S, RA) */
mov r1, #VG_TRC_CHAIN_ME_TO_SLOW_EP
mov r2, lr
+#ifdef __ARM_ARCH_7A__
/* 4 = movw r12, lo16(disp_cp_chain_me_to_slowEP)
4 = movt r12, hi16(disp_cp_chain_me_to_slowEP)
4 = blx r12 */
sub r2, r2, #4+4+4
+#else
+ /* 4 = mov r12, #lo8(disp_cp_chain_me_to_slowEP)
+ 4 = orr r12, r12, #lh8(disp_cp_chain_me_to_slowEP)
+ 4 = orr r12, r12, #hl8(disp_cp_chain_me_to_slowEP)
+ 4 = orr r12, r12, #hi8(disp_cp_chain_me_to_slowEP)
+ 4 = blx r12 */
+ sub r2, r2, #4+4+4+4+4
+#endif
b postamble
/* ------ Chain me to fast entry point ------ */
@@ -141,10 +161,19 @@
handing the caller the pair (Chain_me_F, RA) */
mov r1, #VG_TRC_CHAIN_ME_TO_FAST_EP
mov r2, lr
+#ifdef __ARM_ARCH_7A__
/* 4 = movw r12, lo16(disp_cp_chain_me_to_fastEP)
4 = movt r12, hi16(disp_cp_chain_me_to_fastEP)
4 = blx r12 */
sub r2, r2, #4+4+4
+#else
+ /* 4 = mov r12, #lo8(disp_cp_chain_me_to_fastEP)
+ 4 = orr r12, r12, #lh8(disp_cp_chain_me_to_fastEP)
+ 4 = orr r12, r12, #hl8(disp_cp_chain_me_to_fastEP)
+ 4 = orr r12, r12, #hi8(disp_cp_chain_me_to_fastEP)
+ 4 = blx r12 */
+ sub r2, r2, #4+4+4+4+4
+#endif
b postamble
/* ------ Indirect but boring jump ------ */
@@ -154,23 +183,40 @@
ldr r0, [r8, #OFFSET_arm_R15T]
/* stats only */
+#ifdef __ARM_ARCH_7A__
movw r1, #:lower16:vgPlain_stats__n_xindirs_32
movt r1, #:upper16:vgPlain_stats__n_xindirs_32
+#else
+ ldr r1, =vgPlain_stats__n_xindirs_32
+#endif
ldr r2, [r1, #0]
add r2, r2, #1
str r2, [r1, #0]
/* try a fast lookup in the translation cache */
// r0 = next guest, r1,r2,r3,r4 scratch
+#ifdef __ARM_ARCH_7A__
movw r1, #VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK
movw r4, #:lower16:VG_(tt_fast)
+#else
+ ldr r1, =VG_TT_FAST_MASK
+#endif
and r2, r1, r0, LSR #1 // r2 = entry #
+#ifdef __ARM_ARCH_7A__
movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
+#else
+ ldr r4, =VG_(tt_fast) // r4 = &VG_(tt_fast)
+#endif
add r1, r4, r2, LSL #3 // r1 = &tt_fast[entry#]
+#ifdef __ARM_ARCH_7A__
ldrd r4, r5, [r1, #0] // r4 = .guest, r5 = .host
+#else
+ ldr r4, [r1, #0] // r4 = .guest
+ ldr r5, [r1, #4] // r5 = .host
+#endif
cmp r4, r0
@@ -179,8 +225,12 @@
/* otherwise the fast lookup failed */
/* RM ME -- stats only */
+#ifdef __ARM_ARCH_7A__
movw r1, #:lower16:vgPlain_stats__n_xindir_misses_32
movt r1, #:upper16:vgPlain_stats__n_xindir_misses_32
+#else
+ ldr r1, =vgPlain_stats__n_xindir_misses_32
+#endif
ldr r2, [r1, #0]
add r2, r2, #1
str r2, [r1, #0]
diff -Naur valgrind-3.14.0/coregrind/m_machine.c valgrind-3.14.0-armv5/coregrind/m_machine.c
--- valgrind-3.14.0/coregrind/m_machine.c 2018-07-24 09:23:41.000000000 +0100
+++ valgrind-3.14.0-armv5/coregrind/m_machine.c 2019-01-04 17:58:37.416112114 +0000
@@ -456,6 +456,7 @@
#endif
#if defined(VGA_arm)
Int VG_(machine_arm_archlevel) = 4;
+UInt VG_(machine_arm_has_fpscr) = 0;
#endif
@@ -1540,7 +1541,7 @@
vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
- volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
+ volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_FPSCR, have_V8;
volatile Int archlevel;
Int r;
@@ -1580,6 +1581,15 @@
tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
+#if 1
+ have_VFP = False;
+ have_VFP2 = False;
+ have_VFP2 = False;
+ have_NEON = False;
+ have_FPSCR = False;
+ have_V8 = False;
+ archlevel = 5;
+#else
/* VFP insns */
have_VFP = True;
if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
@@ -1600,6 +1610,14 @@
__asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
}
+ /* VFP2, VFP3, or SIMD Extension FPSCR register */
+ have_FPSCR = True;
+ if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
+ have_FPSCR = False;
+ } else {
+ __asm__ __volatile__(".word 0xEEE12C10"); /* VMSR FPSCR, r2 */
+ }
+
/* ARM architecture level */
archlevel = 5; /* v5 will be base level */
if (archlevel < 7) {
@@ -1631,18 +1649,20 @@
archlevel = 8;
}
}
-
+#endif
VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
- VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
+ VG_(debugLog)(1, "machine",
+ "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d FPSCR %d\n",
archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
- (Int)have_NEON);
+ (Int)have_NEON, (Int)have_FPSCR);
VG_(machine_arm_archlevel) = archlevel;
+ VG_(machine_arm_has_fpscr) = have_FPSCR;
va = VexArchARM;
vai.endness = VexEndnessLE;
@@ -1652,6 +1672,7 @@
if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
+ if (have_FPSCR) vai.hwcaps |= VEX_HWCAPS_ARM_FPSCR;
VG_(machine_get_cache_info)(&vai);
diff -Naur valgrind-3.14.0/coregrind/m_scheduler/scheduler.c valgrind-3.14.0-armv5/coregrind/m_scheduler/scheduler.c
--- valgrind-3.14.0/coregrind/m_scheduler/scheduler.c 2018-09-30 05:41:00.000000000 +0100
+++ valgrind-3.14.0-armv5/coregrind/m_scheduler/scheduler.c 2019-01-04 17:58:37.417112109 +0000
@@ -1459,6 +1459,22 @@
&& trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
}
+# if defined(VGP_arm_linux)
+ /* This is a dirty, dirty hack. When a program jumps to the
+ location 0xFFFF0FE0 on linux, it's trying to read the
+ TLS. The reason we're able to put the check here is because
+ the fast cache will always miss when jumping to 0xFFFF0FE0
+ because it gets trapped here. The check could go in the
+ dispatcher, but ideally we can keep the check out of the fast
+ path.
+ */
+ if (VG_(get_IP)(tid) == 0xFFFF0FE0) {
+ trc[0] = VG_TRC_BORING;
+ VG_(set_IP)(tid, VG_(threads)[tid].arch.vex.guest_R14);
+ VG_(threads)[tid].arch.vex.guest_R0 = VG_(threads)[tid].arch.vex.guest_TPIDRURO;
+ }
+# endif
+
switch (trc[0]) {
case VEX_TRC_JMP_BORING:
/* assisted dispatch, no event. Used by no-redir
diff -Naur valgrind-3.14.0/Makefile.all.am valgrind-3.14.0-armv5/Makefile.all.am
--- valgrind-3.14.0/Makefile.all.am 2018-07-14 05:58:40.000000000 +0100
+++ valgrind-3.14.0-armv5/Makefile.all.am 2019-01-04 17:58:37.414112124 +0000
@@ -206,11 +206,11 @@
AM_FLAG_M3264_ARM_LINUX = @FLAG_M32@
AM_CFLAGS_ARM_LINUX = @FLAG_M32@ \
- $(AM_CFLAGS_BASE) -marm -mcpu=cortex-a8
+ $(AM_CFLAGS_BASE) -marm @FLAG_MCPU_CORTEX_A8@
AM_CFLAGS_PSO_ARM_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) \
- -marm -mcpu=cortex-a8 $(AM_CFLAGS_PSO_BASE)
+ -marm @FLAG_MCPU_CORTEX_A8@ $(AM_CFLAGS_PSO_BASE)
AM_CCASFLAGS_ARM_LINUX = @FLAG_M32@ \
- -marm -mcpu=cortex-a8 -g
+ -marm @FLAG_MCPU_CORTEX_A8@ -g
AM_FLAG_M3264_ARM64_LINUX = @FLAG_M64@
AM_CFLAGS_ARM64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE)
diff -Naur valgrind-3.14.0/memcheck/mc_main_asm.c valgrind-3.14.0-armv5/memcheck/mc_main_asm.c
--- valgrind-3.14.0/memcheck/mc_main_asm.c 2018-05-05 08:42:22.000000000 +0100
+++ valgrind-3.14.0-armv5/memcheck/mc_main_asm.c 2019-01-04 18:08:21.387216129 +0000
@@ -53,13 +53,29 @@
".type vgMemCheck_helperc_LOADV64le, %function \n"
"vgMemCheck_helperc_LOADV64le: \n"
" tst r0, #7 \n"
+#ifdef __ARM_ARCH_7A__
" movw r3, #:lower16:primary_map \n"
+#else
+" ldr r3, .LLV64LEpm \n"
+#endif
" bne .LLV64LEc4 \n" // if misaligned
" lsr r2, r0, #16 \n"
+#ifdef __ARM_ARCH_7A__
" movt r3, #:upper16:primary_map \n"
+#endif
" ldr r2, [r3, r2, lsl #2] \n"
+#ifdef __ARM_ARCH_7A__
" uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
+#else
+" lsl r1, r0, #16 \n"
+" lsr r1, r1, #16 \n"
+#endif
+#ifdef __ARM_ARCH_7A__
" movw r3, #0xAAAA \n"
+#else
+" mov r3, #0xAA00 \n"
+" orr r3, r3, #0xAA \n"
+#endif
" lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
" ldrh r1, [r2, r1] \n"
" cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
@@ -68,7 +84,12 @@
" mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
" bx lr \n"
".LLV64LEc0: \n"
+#ifdef __ARM_ARCH_7A__
" movw r3, #0x5555 \n"
+#else
+" mov r3, #0x5500 \n"
+" orr r3, r3, #0x55 \n"
+#endif
" cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
" bne .LLV64LEc4 \n" // if !all_undefined
" mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
@@ -80,6 +101,10 @@
" mov r1, #64 \n"
" bl mc_LOADVn_slow \n"
" pop {r4, pc} \n"
+#ifndef __ARM_ARCH_7A__
+".LLV64LEpm: \n"
+" .word primary_map \n"
+#endif
".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
".previous\n"
);
@@ -135,12 +160,23 @@
".type vgMemCheck_helperc_LOADV32le, %function \n"
"vgMemCheck_helperc_LOADV32le: \n"
" tst r0, #3 \n" // 1
+#ifdef __ARM_ARCH_7A__
" movw r3, #:lower16:primary_map \n" // 1
+#else
+" ldr r3, .LLV32LEpm \n"
+#endif
" bne .LLV32LEc4 \n" // 2 if misaligned
" lsr r2, r0, #16 \n" // 3
+#ifdef __ARM_ARCH_7A__
" movt r3, #:upper16:primary_map \n" // 3
+#endif
" ldr r2, [r3, r2, lsl #2] \n" // 4
+#ifdef __ARM_ARCH_7A__
" uxth r1, r0 \n" // 4
+#else
+" lsl r1, r0, #16 \n"
+" lsr r1, r1, #16 \n"
+#endif
" ldrb r1, [r2, r1, lsr #2] \n" // 5
" cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
" bne .LLV32LEc0 \n" // 7 if !all_defined
@@ -157,6 +193,10 @@
" mov r1, #32 \n"
" bl mc_LOADVn_slow \n"
" pop {r4, pc} \n"
+#ifndef __ARM_ARCH_7A__
+".LLV32LEpm: \n"
+" .word primary_map \n"
+#endif
".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
".previous\n"
);
diff -Naur valgrind-3.14.0/memcheck/mc_main.c valgrind-3.14.0-armv5/memcheck/mc_main.c
--- valgrind-3.14.0/memcheck/mc_main.c 2018-09-30 05:41:00.000000000 +0100
+++ valgrind-3.14.0-armv5/memcheck/mc_main.c 2019-01-04 17:58:37.419112099 +0000
@@ -5193,9 +5193,20 @@
" tst r0, #1 \n" //
" bne .LLV16LEc12 \n" // if misaligned
" lsr r2, r0, #16 \n" // r2 = pri-map-ix
+#ifdef __ARM_ARCH_7A__
" movw r3, #:lower16:primary_map \n" //
+#else
+" ldr r3, .LLV16LEpm \n"
+#endif
+#ifdef __ARM_ARCH_7A__
" uxth r1, r0 \n" // r1 = sec-map-offB
+#else
+" lsl r1, r0, #16 \n"
+" lsr r1, r1, #16 \n"
+#endif
+#ifdef __ARM_ARCH_7A__
" movt r3, #:upper16:primary_map \n" //
+#endif
" ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
" ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
" cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
@@ -5230,6 +5241,10 @@
" mov r1, #16 \n" //
" bl mc_LOADVn_slow \n" //
" pop {r4, pc} \n" //
+#ifndef __ARM_ARCH_7A__
+".LLV16LEpm: \n"
+" .word primary_map \n"
+#endif
".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
".previous\n"
);
@@ -5388,9 +5403,20 @@
".type vgMemCheck_helperc_LOADV8, %function \n"
"vgMemCheck_helperc_LOADV8: \n" //
" lsr r2, r0, #16 \n" // r2 = pri-map-ix
+#ifdef __ARM_ARCH_7A__
" movw r3, #:lower16:primary_map \n" //
+#else
+" ldr r3, .LLV8LEpm \n"
+#endif
+#ifdef __ARM_ARCH_7A__
" uxth r1, r0 \n" // r1 = sec-map-offB
+#else
+" lsl r1, r0, #16 \n"
+" lsr r1, r1, #16 \n"
+#endif
+#ifdef __ARM_ARCH_7A__
" movt r3, #:upper16:primary_map \n" //
+#endif
" ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
" ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
" cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
@@ -5423,6 +5449,10 @@
" mov r1, #8 \n" //
" bl mc_LOADVn_slow \n" //
" pop {r4, pc} \n" //
+#ifndef __ARM_ARCH_7A__
+".LLV8LEpm: \n"
+" .word primary_map \n"
+#endif
".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
".previous\n"
);
diff -Naur valgrind-3.14.0/VEX/priv/guest_arm_toIR.c valgrind-3.14.0-armv5/VEX/priv/guest_arm_toIR.c
--- valgrind-3.14.0/VEX/priv/guest_arm_toIR.c 2018-05-05 08:47:45.000000000 +0100
+++ valgrind-3.14.0-armv5/VEX/priv/guest_arm_toIR.c 2019-01-04 17:58:37.422112084 +0000
@@ -17548,27 +17548,39 @@
/* Ok, now we're unconditional. Generate a LL-SC loop. */
assign(tRn, getIRegA(rN));
assign(tNew, getIRegA(rM));
- if (isB) {
- /* swpb */
- tOld = newTemp(Ity_I8);
- stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
- NULL/*=>isLL*/) );
- stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
- unop(Iop_32to8, mkexpr(tNew))) );
+ if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) > 5) {
+ if (isB) {
+ /* swpb */
+ tOld = newTemp(Ity_I8);
+ stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
+ NULL/*=>isLL*/) );
+ stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
+ unop(Iop_32to8, mkexpr(tNew))) );
+ } else {
+ /* swp */
+ tOld = newTemp(Ity_I32);
+ stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
+ NULL/*=>isLL*/) );
+ stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
+ mkexpr(tNew)) );
+ }
+ stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
+ /*Ijk_NoRedir*/Ijk_Boring,
+ IRConst_U32(guest_R15_curr_instr_notENC),
+ OFFB_R15T ));
+ putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
+ IRTemp_INVALID, Ijk_Boring);
} else {
- /* swp */
- tOld = newTemp(Ity_I32);
- stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
- NULL/*=>isLL*/) );
- stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
- mkexpr(tNew)) );
+ if (isB) {
+ putIRegA(rD, unop(Iop_8Uto32, loadLE(Ity_I8, mkexpr(tRn))),
+ IRTemp_INVALID, Ijk_Boring);
+ storeLE(mkexpr(tRn), unop(Iop_32to8, mkexpr(tNew)));
+ } else {
+ putIRegA(rD, loadLE(Ity_I32, mkexpr(tRn)),
+ IRTemp_INVALID, Ijk_Boring);
+ storeLE(mkexpr(tRn), mkexpr(tNew));
+ }
}
- stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
- /*Ijk_NoRedir*/Ijk_Boring,
- IRConst_U32(guest_R15_curr_instr_notENC),
- OFFB_R15T ));
- putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
- IRTemp_INVALID, Ijk_Boring);
DIP("swp%s%s r%u, r%u, [r%u]\n",
isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
goto decode_success;
diff -Naur valgrind-3.14.0/VEX/priv/host_arm_defs.c valgrind-3.14.0-armv5/VEX/priv/host_arm_defs.c
--- valgrind-3.14.0/VEX/priv/host_arm_defs.c 2018-05-05 08:42:22.000000000 +0100
+++ valgrind-3.14.0-armv5/VEX/priv/host_arm_defs.c 2019-01-04 17:58:37.423112079 +0000
@@ -41,6 +41,8 @@
#include "host_generic_regs.h"
#include "host_arm_defs.h"
+#include <stdio.h>
+
UInt arm_hwcaps = 0;
@@ -2991,10 +2993,10 @@
}
/* Get an immediate into a register, using only that register, and
- generating exactly 2 instructions, regardless of the value of the
+ generating exactly 2 or 4 instructions, regardless of the value of the
immediate. This is used when generating sections of code that need
to be patched later, so as to guarantee a specific size. */
-static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+static UInt* imm32_to_iregNo_patchable ( UInt* p, Int rD, UInt imm32 )
{
if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
/* Generate movw rD, #low16 ; movt rD, #high16. */
@@ -3010,14 +3012,33 @@
hi16 & 0xF);
*p++ = instr;
} else {
- vassert(0); /* lose */
+ /* Generate mov rD,#X; orr rD,rD,#X; orr rD,rD,#X; orr rD,rD,#X */
+ UInt a = imm32 & 0xFF;
+ UInt b = (imm32 >> 8) & 0xFF;
+ UInt c = (imm32 >> 16) & 0xFF;
+ UInt d = (imm32 >> 24) & 0xFF;
+ UInt instr;
+ instr = XXXXXXXX(0xE, 0x3, 0xA, 0x0, rD, 0x0, (a >> 4) & 0xF, a & 0xF);
+//vex_printf("imm32_to_iregNo_patchable(%p, %d, 0x%x):", p, rD, imm32);
+//vex_printf(" 0x%08x", instr);
+ *p++ = instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0xC, (b >> 4) & 0xF, b & 0xF);
+//vex_printf(" 0x%08x", instr);
+ *p++ = instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0x8, (c >> 4) & 0xF, c & 0xF);
+//vex_printf(" 0x%08x", instr);
+ *p++ = instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0x4, (d >> 4) & 0xF, d & 0xF);
+//vex_printf(" 0x%08x\n", instr);
+ *p++ = instr;
+ return p;
}
return p;
}
-/* Check whether p points at a 2-insn sequence cooked up by
- imm32_to_ireg_EXACTLY2(). */
-static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+/* Check whether p points at a sequence cooked up by
+ imm32_to_iregNo_patchable(). */
+static Bool is_imm32_to_iregNo_patchable ( UInt* p, Int rD, UInt imm32 )
{
if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
/* Generate movw rD, #low16 ; movt rD, #high16. */
@@ -3032,10 +3053,34 @@
hi16 & 0xF);
return p[0] == i0 && p[1] == i1;
} else {
- vassert(0); /* lose */
+ /* Generate mov rD,#X; orr rD,rD,#X; orr rD,rD,#X; orr rD,rD,#X */
+ UInt a = imm32 & 0xFF;
+ UInt b = (imm32 >> 8) & 0xFF;
+ UInt c = (imm32 >> 16) & 0xFF;
+ UInt d = (imm32 >> 24) & 0xFF;
+ UInt i0, i1, i2, i3;
+ i0 = XXXXXXXX(0xE, 0x3, 0xA, 0x0, rD, 0x0, (a >> 4) & 0xF, a & 0xF);
+ i1 = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0xC, (b >> 4) & 0xF, b & 0xF);
+ i2 = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0x8, (c >> 4) & 0xF, c & 0xF);
+ i3 = XXXXXXXX(0xE, 0x3, 0x8, rD, rD, 0x4, (d >> 4) & 0xF, d & 0xF);
+//vex_printf("is_imm32_to_iregNo_patchable(%p, %d, 0x%x):", p, rD, imm32);
+//vex_printf(" 0x%08x/0x%08x", p[0], i0);
+//vex_printf(" 0x%08x/0x%08x", p[1], i1);
+//vex_printf(" 0x%08x/0x%08x", p[2], i2);
+//vex_printf(" 0x%08x/0x%08x\n", p[3], i3);
+ return p[0] == i0 && p[1] == i1 && p[2] == i2 && p[3] == i3;
}
}
+/* Return the length of sequence cooked up by imm32_to_iregNo_patchable(). */
+static UInt imm32_to_iregNo_patchable_length ( void )
+{
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ return 2;
+ } else {
+ return 4;
+ }
+}
static UInt* do_load_or_store32 ( UInt* p,
Bool isLoad, UInt rD, ARMAMode1* am )
@@ -3078,7 +3123,7 @@
const void* disp_cp_xassisted )
{
UInt* p = (UInt*)buf;
- vassert(nbuf >= 32);
+ vassert(nbuf >= 44);
vassert(mode64 == False);
vassert(0 == (((HWord)buf) & 3));
@@ -3330,18 +3375,16 @@
p = do_load_or_store32(p, False/*!isLoad*/,
/*r*/12, i->ARMin.XDirect.amR15T);
+//vex_printf("ARMin_XDirect\n");
+
/* --- FIRST PATCHABLE BYTE follows --- */
/* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
calling to) backs up the return address, so as to find the
- address of the first patchable byte. So: don't change the
- number of instructions (3) below. */
- /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
- /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
- /* blx r12 (A1) */
+ address of the first patchable byte. */
const void* disp_cp_chain_me
= i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
: disp_cp_chain_me_to_slowEP;
- p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
+ p = imm32_to_iregNo_patchable(p, /*r*/12,
(UInt)(Addr)disp_cp_chain_me);
*p++ = 0xE12FFF3C;
/* --- END of PATCHABLE BYTES --- */
@@ -3349,7 +3392,7 @@
/* Fix up the conditional jump, if there was one. */
if (i->ARMin.XDirect.cond != ARMcc_AL) {
Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
- vassert(delta > 0 && delta < 40);
+ vassert(delta > 0 && delta < 48);
vassert((delta & 3) == 0);
UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
vassert(notCond <= 13); /* Neither AL nor NV */
@@ -3552,11 +3595,14 @@
/* Do the 'else' actions */
switch (i->ARMin.Call.rloc.pri) {
case RLPri_Int:
- p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
+//vex_printf("ARMin_Call/RLPri_Int\n");
+ p = imm32_to_iregNo_patchable(p, /*r*/0, 0x55555555);
break;
case RLPri_2Int:
+//vex_printf("ARMin_Call/RLPri_Int2\n");
+ p = imm32_to_iregNo_patchable(p, /*r*/0, 0x55555555);
vassert(0); //ATC
- p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
+ p = imm32_to_iregNo_patchable(p, /*r*/0, 0x55555555);
/* mov r1, r0 */
*p++ = 0xE1A01000;
break;
@@ -4856,7 +4902,8 @@
adc r11, r11, #0
str r11, [r12+4]
*/
- p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
+//vex_printf("ARMin_ProfInc\n");
+ p = imm32_to_iregNo_patchable(p, /*r*/12, 0x65556555);
*p++ = 0xE59CB000;
*p++ = 0xE29BB001;
*p++ = 0xE58CB000;
@@ -4880,7 +4927,8 @@
/*NOTREACHED*/
done:
- vassert(((UChar*)p) - &buf[0] <= 32);
+//vex_printf("tag %d len %d\n", i->tag, ((UChar*)p) - &buf[0]);
+ vassert(((UChar*)p) - &buf[0] <= 44);
return ((UChar*)p) - &buf[0];
}
@@ -4903,34 +4951,50 @@
{
vassert(endness_host == VexEndnessLE);
- /* What we're expecting to see is:
+ /* What we're expecting to see is (>=ARMv7):
movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
blx r12
+ or (<=ARMv6):
+ mov r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ blx r12
viz
- <8 bytes generated by imm32_to_ireg_EXACTLY2>
+ <code generated by imm32_to_iregNo_patchable>
E1 2F FF 3C
*/
UInt* p = (UInt*)place_to_chain;
+ UInt insn_len = imm32_to_iregNo_patchable_length();
vassert(0 == (3 & (HWord)p));
- vassert(is_imm32_to_ireg_EXACTLY2(
+ vassert(is_imm32_to_iregNo_patchable(
p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
- vassert(p[2] == 0xE12FFF3C);
+ vassert(p[insn_len] == 0xE12FFF3C);
/* And what we want to change it to is either:
- (general case)
+ (general case, >=ARMv7)
movw r12, lo16(place_to_jump_to)
movt r12, hi16(place_to_jump_to)
bx r12
+ or (<=ARMv6)
+ mov r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
viz
- <8 bytes generated by imm32_to_ireg_EXACTLY2>
+ <code generated by imm32_to_iregNo_patchable>
E1 2F FF 1C
---OR---
in the case where the displacement falls within 26 bits
- b disp24; undef; undef
+ b disp24; undef; undef (>=ARMv7)
+ or
+ b disp24; undef; undef; undef; undef (<=ARMv6)
viz
EA <3 bytes == disp24>
FF 00 00 00
FF 00 00 00
+ (FF 00 00 00)
+ (FF 00 00 00)
In both cases the replacement has the same length as the original.
To remain sane & verifiable,
@@ -4945,6 +5009,8 @@
/* This is the delta we need to put into a B insn. It's relative
to the start of the next-but-one insn, hence the -8. */
Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
+//vex_printf("chainXDirect_ARM\n");
+//vex_printf("place_to_chain=%p, place_to_jump_to=%p, delta=%d, insn_len=%d\n", p, place_to_jump_to, delta, insn_len);
Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
vassert(0 == (delta & (Long)3));
@@ -4969,13 +5035,17 @@
p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
p[1] = 0xFF000000;
p[2] = 0xFF000000;
+ if (insn_len == 4) {
+ p[3] = 0xFF000000;
+ p[4] = 0xFF000000;
+ }
} else {
- (void)imm32_to_ireg_EXACTLY2(
+ (void)imm32_to_iregNo_patchable(
p, /*r*/12, (UInt)(Addr)place_to_jump_to);
- p[2] = 0xE12FFF1C;
+ p[insn_len] = 0xE12FFF1C;
}
- VexInvalRange vir = {(HWord)p, 12};
+ VexInvalRange vir = {(HWord)p, (insn_len+1) * 4};
return vir;
}
@@ -4990,33 +5060,47 @@
vassert(endness_host == VexEndnessLE);
/* What we're expecting to see is:
- (general case)
+ (general case, >=ARMv7)
movw r12, lo16(place_to_jump_to_EXPECTED)
movt r12, lo16(place_to_jump_to_EXPECTED)
bx r12
+ or (<=ARMv6):
+ mov r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ bx r12
viz
- <8 bytes generated by imm32_to_ireg_EXACTLY2>
+ <code bytes generated by imm32_to_iregNo_patchable>
E1 2F FF 1C
---OR---
in the case where the displacement falls within 26 bits
- b disp24; undef; undef
+ b disp24; undef; undef (>=ARMv7)
+ or
+ b disp24; undef; undef; undef; undef (<=ARMv6)
viz
EA <3 bytes == disp24>
FF 00 00 00
FF 00 00 00
+ (FF 00 00 00)
+ (FF 00 00 00)
*/
UInt* p = (UInt*)place_to_unchain;
+ UInt insn_len = imm32_to_iregNo_patchable_length();
vassert(0 == (3 & (HWord)p));
+//vex_printf("unchainXDirect_ARM\n");
+//vex_printf("place_to_unchain=%p place_to_jump_to_EXPECTED=%p\n", place_to_unchain, place_to_jump_to_EXPECTED);
Bool valid = False;
- if (is_imm32_to_ireg_EXACTLY2(
+ if (is_imm32_to_iregNo_patchable(
p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
- && p[2] == 0xE12FFF1C) {
+ && p[insn_len] == 0xE12FFF1C) {
valid = True; /* it's the long form */
if (0)
vex_printf("QQQ unchainXDirect_ARM: found long form\n");
} else
- if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
+ if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000 &&
+ (insn_len == 2 || (p[3] == 0xFF000000 && p[4] == 0xFF000000))) {
/* It's the short form. Check the displacement is right. */
Int simm24 = p[0] & 0x00FFFFFF;
simm24 <<= 8; simm24 >>= 8;
@@ -5031,15 +5115,21 @@
/* And what we want to change it to is:
movw r12, lo16(disp_cp_chain_me)
movt r12, hi16(disp_cp_chain_me)
- blx r12
+ bx r12
+ or
+ mov r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ orr r12, r12, #...
+ bx r12
viz
- <8 bytes generated by imm32_to_ireg_EXACTLY2>
+ <code generated by imm32_to_iregNo_patchable>
E1 2F FF 3C
*/
- (void)imm32_to_ireg_EXACTLY2(
+ (void)imm32_to_iregNo_patchable(
p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
- p[2] = 0xE12FFF3C;
- VexInvalRange vir = {(HWord)p, 12};
+ p[insn_len] = 0xE12FFF3C;
+ VexInvalRange vir = {(HWord)p, (insn_len+1) * 4};
return vir;
}
@@ -5053,16 +5143,17 @@
vassert(endness_host == VexEndnessLE);
vassert(sizeof(ULong*) == 4);
UInt* p = (UInt*)place_to_patch;
+ UInt insn_len = imm32_to_iregNo_patchable_length();
vassert(0 == (3 & (HWord)p));
- vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
- vassert(p[2] == 0xE59CB000);
- vassert(p[3] == 0xE29BB001);
- vassert(p[4] == 0xE58CB000);
- vassert(p[5] == 0xE59CB004);
- vassert(p[6] == 0xE2ABB000);
- vassert(p[7] == 0xE58CB004);
- imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
- VexInvalRange vir = {(HWord)p, 8};
+ vassert(is_imm32_to_iregNo_patchable(p, /*r*/12, 0x65556555));
+ vassert(p[insn_len] == 0xE59CB000);
+ vassert(p[insn_len+1] == 0xE29BB001);
+ vassert(p[insn_len+2] == 0xE58CB000);
+ vassert(p[insn_len+3] == 0xE59CB004);
+ vassert(p[insn_len+4] == 0xE2ABB000);
+ vassert(p[insn_len+5] == 0xE58CB004);
+ imm32_to_iregNo_patchable(p, /*r*/12, (UInt)(Addr)location_of_counter);
+ VexInvalRange vir = {(HWord)p, insn_len * 4};
return vir;
}
diff -Naur valgrind-3.14.0/VEX/pub/libvex.h valgrind-3.14.0-armv5/VEX/pub/libvex.h
--- valgrind-3.14.0/VEX/pub/libvex.h 2018-07-24 09:23:41.000000000 +0100
+++ valgrind-3.14.0-armv5/VEX/pub/libvex.h 2019-01-04 17:58:37.424112074 +0000
@@ -186,6 +186,7 @@
#define VEX_HWCAPS_ARM_VFP (1<<6) /* VFP extension */
#define VEX_HWCAPS_ARM_VFP2 (1<<7) /* VFPv2 */
#define VEX_HWCAPS_ARM_VFP3 (1<<8) /* VFPv3 */
+#define VEX_HWCAPS_ARM_FPSCR (1<<9) /* FPSCR register (VFPv2,v3 or NEON) */
/* Bits 15:10 reserved for (possible) future VFP revisions */
#define VEX_HWCAPS_ARM_NEON (1<<16) /* Advanced SIMD also known as NEON */
admin