aports/main/llvm16/9011-AArch64CompressJumpTables-prevent-over-compression.patch

From 957e85ce449ab281fbbdc99c5546b928d5b2fb7b Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Tue, 6 Jun 2023 18:21:16 +0100
Subject: [PATCH] [AArch64CompressJumpTables] Prevent over-compression caused
 by invalid alignment.

AArch64CompressJumpTables assumes it can calculate exact block
offsets. This assumption is bogus because getInstSizeInBytes()
only returns an upper bound rather than an exact size. The
assumption is also invalid when a block alignment is bigger than
the function's alignment.

To mitigate both scenarios this patch changes the algorithm to
compute the maximum upper bound for all block offsets. This is
pessimistic but safe because all offsets are treated as unsigned.

Differential Revision: https://reviews.llvm.org/D150009

(cherry picked from commit 31c485c99024edf267b505070f87473fd397eb6a)
---
 .../AArch64/AArch64CompressJumpTables.cpp     | 17 ++--
 .../CodeGen/AArch64/jump-table-compress.mir   | 85 +++++++++++++++++++
 2 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 75abe9c53e3b..7d14d2d20bad 100644
--- a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -37,7 +37,7 @@ class AArch64CompressJumpTables : public MachineFunctionPass {
   MachineFunction *MF;
   SmallVector<int, 8> BlockInfo;

-  /// Returns the size in instructions of the block \p MBB, or std::nullopt if
+  /// Returns the size of instructions in the block \p MBB, or std::nullopt if
   /// we couldn't get a safe upper bound.
   std::optional<int> computeBlockSize(MachineBasicBlock &MBB);

@@ -88,19 +88,20 @@ bool AArch64CompressJumpTables::scanFunction() {
   BlockInfo.clear();
   BlockInfo.resize(MF->getNumBlockIDs());

+  // NOTE: BlockSize, Offset, OffsetAfterAlignment are all upper bounds.
+
   unsigned Offset = 0;
   for (MachineBasicBlock &MBB : *MF) {
     const Align Alignment = MBB.getAlignment();
-    unsigned AlignedOffset;
-    if (Alignment == Align(1))
-      AlignedOffset = Offset;
-    else
-      AlignedOffset = alignTo(Offset, Alignment);
-    BlockInfo[MBB.getNumber()] = AlignedOffset;
+    unsigned OffsetAfterAlignment = Offset;
+    // We don't know the exact size of MBB so assume worse case padding.
+    if (Alignment > Align(4))
+      OffsetAfterAlignment += Alignment.value() - 4;
+    BlockInfo[MBB.getNumber()] = OffsetAfterAlignment;
     auto BlockSize = computeBlockSize(MBB);
     if (!BlockSize)
       return false;
-    Offset = AlignedOffset + *BlockSize;
+    Offset = OffsetAfterAlignment + *BlockSize;
   }
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/jump-table-compress.mir b/llvm/test/CodeGen/AArch64/jump-table-compress.mir
index a46b7c6ac9c0..375357f4fd5c 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-compress.mir
+++ b/llvm/test/CodeGen/AArch64/jump-table-compress.mir
@@ -5,6 +5,7 @@
   }

   define void @test_inline_asm_no_compress() { ret void }
+  define void @test_bb_alignment_not_byte_compressable() { ret void }

 ...
 ---
@@ -197,3 +198,87 @@ body:             |
     RET undef $lr, implicit $w0

 ...
+---
+name:            test_bb_alignment_not_byte_compressable
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+  - { reg: '$w1' }
+  - { reg: '$w2' }
+frameInfo:
+  maxAlignment:    1
+  maxCallFrameSize: 0
+machineFunctionInfo:
+  hasRedZone:      false
+jumpTable:
+  kind:            label-difference32
+  entries:
+    - id:              0
+      blocks:          [ '%bb.2', '%bb.4', '%bb.5', '%bb.6', '%bb.7', '%bb.8' ]
+body:             |
+  bb.0:
+    successors: %bb.3(0x12492492), %bb.1(0x6db6db6e)
+    liveins: $w0, $w1, $w2
+
+    dead $wzr = SUBSWri renamable $w0, 5, 0, implicit-def $nzcv
+    Bcc 8, %bb.3, implicit $nzcv
+
+  bb.1:
+    successors: %bb.2, %bb.4, %bb.5, %bb.6, %bb.7, %bb.8
+    liveins: $w0, $w1, $w2
+    ; Ensure there's no jump table compression when block alignments are bigger
+    ; than the function alignment because we don't known the padding length at
+    ; the point where compression is done.
+    ; CHECK-LABEL: test_bb_alignment_not_byte_compressable
+    ; CHECK-LABEL: bb.1
+    ; CHECK: JumpTableDest16
+    renamable $w8 = ORRWrs $wzr, killed renamable $w0, 0, implicit-def $x8
+    $x9 = ADRP target-flags(aarch64-page) %jump-table.0
+    renamable $x9 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0, 0
+    early-clobber renamable $x10, dead early-clobber renamable $x11 = JumpTableDest32 killed renamable $x9, killed renamable $x8, %jump-table.0
+    BR killed renamable $x10
+
+  bb.2:
+    liveins: $w1, $w2
+    $w0 = ADDWrs killed renamable $w2, killed renamable $w1, 0
+    RET undef $lr, implicit $w0
+
+  bb.3:
+    $w0 = MOVZWi 0, 0
+    RET undef $lr, implicit $w0
+
+  bb.4:
+    liveins: $w1, $w2
+
+    renamable $w0 = nsw MADDWrrr killed renamable $w2, killed renamable $w1, $wzr
+    RET undef $lr, implicit $w0
+
+  ; bb.5 is aligned to make it more that 256 instructions away from bb.1, which
+  ; means we can no longer assume the jump table will be byte indexable.
+  bb.5 (align 1024):
+    liveins: $w1, $w2
+
+    $w0 = SUBWrs killed renamable $w1, killed renamable $w2, 0
+    RET undef $lr, implicit $w0
+
+  bb.6:
+    liveins: $w1, $w2
+
+    $w0 = SUBWrs killed renamable $w2, killed renamable $w1, 0
+    RET undef $lr, implicit $w0
+
+  bb.7:
+    liveins: $w1, $w2
+
+    renamable $w0 = MADDWrrr killed renamable $w1, renamable $w1, killed renamable $w2
+    RET undef $lr, implicit $w0
+
+  bb.8:
+    liveins: $w1, $w2
+
+    renamable $w8 = nsw MADDWrrr renamable $w2, renamable $w2, $wzr
+    renamable $w0 = MADDWrrr killed renamable $w8, killed renamable $w2, killed renamable $w1
+    RET undef $lr, implicit $w0
+
+...