[4/7] ira: Support subreg copy

Message ID 20231108034740.834590-5-lehua.ding@rivai.ai
State Unresolved
Headers
Series ira/lra: Support subreg coalesce |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Lehua Ding Nov. 8, 2023, 3:47 a.m. UTC
  This patch change the copy between allocno and allocno to the copy between
object and object, that is, allow partial copy between pseudo registers.

gcc/ChangeLog:

	* ira-build.cc (find_allocno_copy): Removed.
	(ira_create_object): Adjust.
	(find_object): New.
	(ira_create_copy): Adjust.
	(add_allocno_copy_to_list): Adjust.
	(swap_allocno_copy_ends_if_necessary): Adjust.
	(ira_add_allocno_copy): Adjust.
	(print_copy): Adjust.
	(print_allocno_copies): Adjust.
	(ira_flattening): Adjust.
	* ira-color.cc (INCLUDE_VECTOR): use std::vector
	(struct allocno_color_data): New fields.
	(struct allocno_hard_regs_subnode): More comments.
	(form_allocno_hard_regs_nodes_forest): More comments.
	(update_left_conflict_sizes_p): More comments.
	(struct update_cost_queue_elem): New field.
	(queue_update_cost): Adjust.
	(get_next_update_cost): Adjust.
	(update_costs_from_allocno): Adjust.
	(update_conflict_hard_regno_costs): Adjust.
	(assign_hard_reg): Adjust.
	(objects_conflict_by_live_ranges_p): New.
	(allocno_thread_conflict_p): Removed.
	(object_thread_conflict_p): New.
	(merge_threads): Adjust.
	(form_threads_from_copies): Adjust.
	(form_threads_from_bucket): Adjust.
	(form_threads_from_colorable_allocno): Adjust.
	(init_allocno_threads): Adjust.
	(add_allocno_to_bucket): Adjust.
	(delete_allocno_from_bucket): Adjust.
	(allocno_copy_cost_saving): Adjust.
	(color_allocnos): Adjust.
	(color_pass): Adjust.
	(update_curr_costs): Adjust.
	(coalesce_allocnos): Adjust.
	(ira_reuse_stack_slot): Adjust.
	(ira_initiate_assign): Adjust.
	(ira_finish_assign): Adjust.
	* ira-conflicts.cc (allocnos_conflict_for_copy_p): Removed.
	(REG_SUBREG_P): Adjust.
	(subreg_move_p): New.
	(regs_non_conflict_for_copy_p): New.
	(subreg_reg_align_and_times_p): New.
	(process_regs_for_copy): Adjust.
	(add_insn_allocno_copies): Adjust.
	(propagate_copies): Adjust.
	* ira-emit.cc (add_range_and_copies_from_move_list): Adjust.
	* ira-int.h (struct ira_object): New field.
	(OBJECT_INDEX): New macro.
	(struct ira_allocno_copy): Adjust fields.
	(ira_add_allocno_copy): Exported.
	(find_object): Exported.
	(subreg_move_p): Exported.
	* ira.cc (print_redundant_copies): Adjust.

---
 gcc/ira-build.cc     | 150 +++++++-----
 gcc/ira-color.cc     | 541 +++++++++++++++++++++++++++++++------------
 gcc/ira-conflicts.cc | 173 +++++++++++---
 gcc/ira-emit.cc      |  10 +-
 gcc/ira-int.h        |  13 +-
 gcc/ira.cc           |   5 +-
 6 files changed, 645 insertions(+), 247 deletions(-)
  

Patch

diff --git a/gcc/ira-build.cc b/gcc/ira-build.cc
index 5fb7a9f800f..1c47f81ce9d 100644
--- a/gcc/ira-build.cc
+++ b/gcc/ira-build.cc
@@ -36,9 +36,6 @@  along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "subreg-live-range.h"
 
-static ira_copy_t find_allocno_copy (ira_allocno_t, ira_allocno_t, rtx_insn *,
-				     ira_loop_tree_node_t);
-
 /* The root of the loop tree corresponding to the all function.  */
 ira_loop_tree_node_t ira_loop_tree_root;
 
@@ -463,6 +460,7 @@  ira_create_object (ira_allocno_t a, int start, int nregs)
   OBJECT_LIVE_RANGES (obj) = NULL;
   OBJECT_START (obj) = start;
   OBJECT_NREGS (obj) = nregs;
+  OBJECT_INDEX (obj) = ALLOCNO_NUM_OBJECTS (a);
 
   ira_object_id_map_vec.safe_push (obj);
   ira_object_id_map
@@ -519,6 +517,16 @@  find_object (ira_allocno_t a, poly_int64 offset, poly_int64 size)
   return find_object (a, subreg_start, subreg_nregs);
 }
 
+/* Return object in allocno A for REG.  */
+ira_object_t
+find_object (ira_allocno_t a, rtx reg)
+{
+  if (has_subreg_object_p (a) && read_modify_subreg_p (reg))
+    return find_object (a, SUBREG_BYTE (reg), GET_MODE_SIZE (GET_MODE (reg)));
+  else
+    return find_object (a, 0, ALLOCNO_NREGS (a));
+}
+
 /* Return the object in allocno A which match START & NREGS.  Create when not
    found.  */
 ira_object_t
@@ -1502,27 +1510,36 @@  initiate_copies (void)
 /* Return copy connecting A1 and A2 and originated from INSN of
    LOOP_TREE_NODE if any.  */
 static ira_copy_t
-find_allocno_copy (ira_allocno_t a1, ira_allocno_t a2, rtx_insn *insn,
+find_allocno_copy (ira_object_t obj1, ira_object_t obj2, rtx_insn *insn,
 		   ira_loop_tree_node_t loop_tree_node)
 {
   ira_copy_t cp, next_cp;
-  ira_allocno_t another_a;
+  ira_object_t another_obj;
 
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
   for (cp = ALLOCNO_COPIES (a1); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a1)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      if (first_a == a1)
 	{
 	  next_cp = cp->next_first_allocno_copy;
-	  another_a = cp->second;
+	  if (cp->first == obj1)
+	    another_obj = cp->second;
+	  else
+	    continue;
 	}
-      else if (cp->second == a1)
+      else if (second_a == a1)
 	{
 	  next_cp = cp->next_second_allocno_copy;
-	  another_a = cp->first;
+	  if (cp->second == obj1)
+	    another_obj = cp->first;
+	  else
+	    continue;
 	}
       else
 	gcc_unreachable ();
-      if (another_a == a2 && cp->insn == insn
+      if (another_obj == obj2 && cp->insn == insn
 	  && cp->loop_tree_node == loop_tree_node)
 	return cp;
     }
@@ -1532,7 +1549,7 @@  find_allocno_copy (ira_allocno_t a1, ira_allocno_t a2, rtx_insn *insn,
 /* Create and return copy with given attributes LOOP_TREE_NODE, FIRST,
    SECOND, FREQ, CONSTRAINT_P, and INSN.  */
 ira_copy_t
-ira_create_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+ira_create_copy (ira_object_t first, ira_object_t second, int freq,
 		 bool constraint_p, rtx_insn *insn,
 		 ira_loop_tree_node_t loop_tree_node)
 {
@@ -1556,28 +1573,29 @@  ira_create_copy (ira_allocno_t first, ira_allocno_t second, int freq,
 static void
 add_allocno_copy_to_list (ira_copy_t cp)
 {
-  ira_allocno_t first = cp->first, second = cp->second;
+  ira_object_t first = cp->first, second = cp->second;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (first), a2 = OBJECT_ALLOCNO (second);
 
   cp->prev_first_allocno_copy = NULL;
   cp->prev_second_allocno_copy = NULL;
-  cp->next_first_allocno_copy = ALLOCNO_COPIES (first);
+  cp->next_first_allocno_copy = ALLOCNO_COPIES (a1);
   if (cp->next_first_allocno_copy != NULL)
     {
-      if (cp->next_first_allocno_copy->first == first)
+      if (OBJECT_ALLOCNO (cp->next_first_allocno_copy->first) == a1)
 	cp->next_first_allocno_copy->prev_first_allocno_copy = cp;
       else
 	cp->next_first_allocno_copy->prev_second_allocno_copy = cp;
     }
-  cp->next_second_allocno_copy = ALLOCNO_COPIES (second);
+  cp->next_second_allocno_copy = ALLOCNO_COPIES (a2);
   if (cp->next_second_allocno_copy != NULL)
     {
-      if (cp->next_second_allocno_copy->second == second)
+      if (OBJECT_ALLOCNO (cp->next_second_allocno_copy->second) == a2)
 	cp->next_second_allocno_copy->prev_second_allocno_copy = cp;
       else
 	cp->next_second_allocno_copy->prev_first_allocno_copy = cp;
     }
-  ALLOCNO_COPIES (first) = cp;
-  ALLOCNO_COPIES (second) = cp;
+  ALLOCNO_COPIES (a1) = cp;
+  ALLOCNO_COPIES (a2) = cp;
 }
 
 /* Make a copy CP a canonical copy where number of the
@@ -1585,7 +1603,8 @@  add_allocno_copy_to_list (ira_copy_t cp)
 static void
 swap_allocno_copy_ends_if_necessary (ira_copy_t cp)
 {
-  if (ALLOCNO_NUM (cp->first) <= ALLOCNO_NUM (cp->second))
+  if (ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))
+      <= ALLOCNO_NUM (OBJECT_ALLOCNO (cp->second)))
     return;
 
   std::swap (cp->first, cp->second);
@@ -1594,11 +1613,10 @@  swap_allocno_copy_ends_if_necessary (ira_copy_t cp)
 }
 
 /* Create (or update frequency if the copy already exists) and return
-   the copy of allocnos FIRST and SECOND with frequency FREQ
-   corresponding to move insn INSN (if any) and originated from
-   LOOP_TREE_NODE.  */
+   the copy of objects FIRST and SECOND with frequency FREQ corresponding to
+   move insn INSN (if any) and originated from LOOP_TREE_NODE.  */
 ira_copy_t
-ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+ira_add_allocno_copy (ira_object_t first, ira_object_t second, int freq,
 		      bool constraint_p, rtx_insn *insn,
 		      ira_loop_tree_node_t loop_tree_node)
 {
@@ -1617,15 +1635,33 @@  ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq,
   return cp;
 }
 
+/* Create (or update frequency if the copy already exists) and return
+   the copy of allocnos FIRST and SECOND with frequency FREQ
+   corresponding to move insn INSN (if any) and originated from
+   LOOP_TREE_NODE.  */
+ira_copy_t
+ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+		      bool constraint_p, rtx_insn *insn,
+		      ira_loop_tree_node_t loop_tree_node)
+{
+  ira_object_t obj1 = get_full_object (first);
+  ira_object_t obj2 = get_full_object (second);
+  gcc_assert (obj1 != NULL && obj2 != NULL);
+  return ira_add_allocno_copy (obj1, obj2, freq, constraint_p, insn,
+			       loop_tree_node);
+}
+
 /* Print info about copy CP into file F.  */
 static void
 print_copy (FILE *f, ira_copy_t cp)
 {
-  fprintf (f, "  cp%d:a%d(r%d)<->a%d(r%d)@%d:%s\n", cp->num,
-	   ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-	   ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second), cp->freq,
-	   cp->insn != NULL
-	   ? "move" : cp->constraint_p ? "constraint" : "shuffle");
+  ira_allocno_t a1 = OBJECT_ALLOCNO (cp->first);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (cp->second);
+  fprintf (f, "  cp%d:a%d(r%d)<->a%d(r%d)@%d:%s\n", cp->num, ALLOCNO_NUM (a1),
+	   ALLOCNO_REGNO (a1), ALLOCNO_NUM (a2), ALLOCNO_REGNO (a2), cp->freq,
+	   cp->insn != NULL   ? "move"
+	   : cp->constraint_p ? "constraint"
+			      : "shuffle");
 }
 
 DEBUG_FUNCTION void
@@ -1672,24 +1708,25 @@  ira_debug_copies (void)
 static void
 print_allocno_copies (FILE *f, ira_allocno_t a)
 {
-  ira_allocno_t another_a;
+  ira_object_t another_obj;
   ira_copy_t cp, next_cp;
 
   fprintf (f, " a%d(r%d):", ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      if (OBJECT_ALLOCNO (cp->first) == a)
 	{
 	  next_cp = cp->next_first_allocno_copy;
-	  another_a = cp->second;
+	  another_obj = cp->second;
 	}
-      else if (cp->second == a)
+      else if (OBJECT_ALLOCNO (cp->second) == a)
 	{
 	  next_cp = cp->next_second_allocno_copy;
-	  another_a = cp->first;
+	  another_obj = cp->first;
 	}
       else
 	gcc_unreachable ();
+      ira_allocno_t another_a = OBJECT_ALLOCNO (another_obj);
       fprintf (f, " cp%d:a%d(r%d)@%d", cp->num,
 	       ALLOCNO_NUM (another_a), ALLOCNO_REGNO (another_a), cp->freq);
     }
@@ -3479,25 +3516,21 @@  ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit)
      copies.  */
   FOR_EACH_COPY (cp, ci)
     {
-      if (ALLOCNO_CAP_MEMBER (cp->first) != NULL
-	  || ALLOCNO_CAP_MEMBER (cp->second) != NULL)
+      ira_allocno_t a1 = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t a2 = OBJECT_ALLOCNO (cp->second);
+      if (ALLOCNO_CAP_MEMBER (a1) != NULL || ALLOCNO_CAP_MEMBER (a2) != NULL)
 	{
 	  if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL)
-	    fprintf
-	      (ira_dump_file, "      Remove cp%d:%c%dr%d-%c%dr%d\n",
-	       cp->num, ALLOCNO_CAP_MEMBER (cp->first) != NULL ? 'c' : 'a',
-	       ALLOCNO_NUM (cp->first),
-	       REGNO (allocno_emit_reg (cp->first)),
-	       ALLOCNO_CAP_MEMBER (cp->second) != NULL ? 'c' : 'a',
-	       ALLOCNO_NUM (cp->second),
-	       REGNO (allocno_emit_reg (cp->second)));
+	    fprintf (ira_dump_file, "      Remove cp%d:%c%dr%d-%c%dr%d\n",
+		     cp->num, ALLOCNO_CAP_MEMBER (a1) != NULL ? 'c' : 'a',
+		     ALLOCNO_NUM (a1), REGNO (allocno_emit_reg (a1)),
+		     ALLOCNO_CAP_MEMBER (a2) != NULL ? 'c' : 'a',
+		     ALLOCNO_NUM (a2), REGNO (allocno_emit_reg (a2)));
 	  cp->loop_tree_node = NULL;
 	  continue;
 	}
-      first
-	= regno_top_level_allocno_map[REGNO (allocno_emit_reg (cp->first))];
-      second
-	= regno_top_level_allocno_map[REGNO (allocno_emit_reg (cp->second))];
+      first = regno_top_level_allocno_map[REGNO (allocno_emit_reg (a1))];
+      second = regno_top_level_allocno_map[REGNO (allocno_emit_reg (a2))];
       node = cp->loop_tree_node;
       if (node == NULL)
 	keep_p = true; /* It copy generated in ira-emit.cc.  */
@@ -3505,8 +3538,8 @@  ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit)
 	{
 	  /* Check that the copy was not propagated from level on
 	     which we will have different pseudos.  */
-	  node_first = node->regno_allocno_map[ALLOCNO_REGNO (cp->first)];
-	  node_second = node->regno_allocno_map[ALLOCNO_REGNO (cp->second)];
+	  node_first = node->regno_allocno_map[ALLOCNO_REGNO (a1)];
+	  node_second = node->regno_allocno_map[ALLOCNO_REGNO (a2)];
 	  keep_p = ((REGNO (allocno_emit_reg (first))
 		     == REGNO (allocno_emit_reg (node_first)))
 		     && (REGNO (allocno_emit_reg (second))
@@ -3515,18 +3548,18 @@  ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit)
       if (keep_p)
 	{
 	  cp->loop_tree_node = ira_loop_tree_root;
-	  cp->first = first;
-	  cp->second = second;
+	  cp->first = find_object_anyway (first, OBJECT_START (cp->first),
+					  OBJECT_NREGS (cp->first));
+	  cp->second = find_object_anyway (second, OBJECT_START (cp->second),
+					   OBJECT_NREGS (cp->second));
 	}
       else
 	{
 	  cp->loop_tree_node = NULL;
 	  if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL)
 	    fprintf (ira_dump_file, "      Remove cp%d:a%dr%d-a%dr%d\n",
-		     cp->num, ALLOCNO_NUM (cp->first),
-		     REGNO (allocno_emit_reg (cp->first)),
-		     ALLOCNO_NUM (cp->second),
-		     REGNO (allocno_emit_reg (cp->second)));
+		     cp->num, ALLOCNO_NUM (a1), REGNO (allocno_emit_reg (a1)),
+		     ALLOCNO_NUM (a2), REGNO (allocno_emit_reg (a2)));
 	}
     }
   /* Remove unnecessary allocnos on lower levels of the loop tree.  */
@@ -3562,9 +3595,10 @@  ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit)
 	  finish_copy (cp);
 	  continue;
 	}
-      ira_assert
-	(ALLOCNO_LOOP_TREE_NODE (cp->first) == ira_loop_tree_root
-	 && ALLOCNO_LOOP_TREE_NODE (cp->second) == ira_loop_tree_root);
+      ira_assert (ALLOCNO_LOOP_TREE_NODE (OBJECT_ALLOCNO (cp->first))
+		    == ira_loop_tree_root
+		  && ALLOCNO_LOOP_TREE_NODE (OBJECT_ALLOCNO (cp->second))
+		       == ira_loop_tree_root);
       add_allocno_copy_to_list (cp);
       swap_allocno_copy_ends_if_necessary (cp);
     }
diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 8aed25144b9..099312bcdb3 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -20,6 +20,7 @@  along with GCC; see the file COPYING3.  If not see
 
 #include "config.h"
 #define INCLUDE_MAP
+#define INCLUDE_VECTOR
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -150,11 +151,18 @@  struct allocno_color_data
   struct update_cost_record *update_cost_records;
   /* Threads.  We collect allocnos connected by copies into threads
      and try to assign hard regs to allocnos by threads.  */
-  /* Allocno representing all thread.  */
-  ira_allocno_t first_thread_allocno;
+  /* The head objects for all thread.  */
+  ira_object_t *first_thread_objects;
   /* Allocnos in thread forms a cycle list through the following
      member.  */
-  ira_allocno_t next_thread_allocno;
+  ira_object_t *next_thread_objects;
+  /* The allocno all thread shared.  */
+  ira_allocno_t first_thread_allocno;
+  /* The offset start relative to the first_thread_allocno.  */
+  int first_thread_offset;
+  /* All allocnos belong to the thread.  */
+  bitmap thread_allocnos;
+  /* The freq sum of all thread allocno.  */
   /* All thread frequency.  Defined only for first thread allocno.  */
   int thread_freq;
   /* Sum of frequencies of hard register preferences of the allocno.  */
@@ -188,6 +196,9 @@  static bitmap coloring_allocno_bitmap;
    allocnos.  */
 static bitmap consideration_allocno_bitmap;
 
+/* Bitmap of allocnos which is not trivially colorable.  */
+static bitmap uncolorable_allocno_set;
+
 /* All allocnos sorted according their priorities.  */
 static ira_allocno_t *sorted_allocnos;
 
@@ -647,9 +658,13 @@  struct allocno_hard_regs_subnode
      Overall conflict size is
      left_conflict_subnodes_size
        + MIN (max_node_impact - left_conflict_subnodes_size,
-              left_conflict_size)
+	      left_conflict_size)
+     Use MIN here to ensure that the total conflict does not exceed
+     max_node_impact.
   */
+  /* The total conflict size of subnodes.  */
   short left_conflict_subnodes_size;
+  /* The maximum number of registers that the current node can use.  */
   short max_node_impact;
 };
 
@@ -758,6 +773,8 @@  form_allocno_hard_regs_nodes_forest (void)
       collect_allocno_hard_regs_cover (hard_regs_roots,
 				       allocno_data->profitable_hard_regs);
       allocno_hard_regs_node = NULL;
+      /* Find the ancestor node in forest which cover all nodes. The ancestor is
+	 a smallest superset of profitable_hard_regs.  */
       for (j = 0; hard_regs_node_vec.iterate (j, &node); j++)
 	allocno_hard_regs_node
 	  = (j == 0
@@ -990,6 +1007,8 @@  update_left_conflict_sizes_p (ira_allocno_t a,
 					removed_node->hard_regs->set));
   start = node_preorder_num * allocno_hard_regs_nodes_num;
   i = allocno_hard_regs_subnode_index[start + removed_node->preorder_num];
+  /* i < 0 means removed_node is parent of node instead of node is the parent of
+     removed_node.  */
   if (i < 0)
     i = 0;
   subnodes = allocno_hard_regs_subnodes + data->hard_regs_subnodes_start;
@@ -999,6 +1018,7 @@  update_left_conflict_sizes_p (ira_allocno_t a,
 	      - subnodes[i].left_conflict_subnodes_size,
 	      subnodes[i].left_conflict_size));
   subnodes[i].left_conflict_size -= size;
+  /* Update all ancestors for subnode i.  */
   for (;;)
     {
       conflict_size
@@ -1242,6 +1262,9 @@  struct update_cost_queue_elem
      connecting this allocno to the one being allocated.  */
   int divisor;
 
+  /* Hard register regno assigned to current ALLOCNO.  */
+  int hard_regno;
+
   /* Allocno from which we started chaining costs of connected
      allocnos. */
   ira_allocno_t start;
@@ -1308,7 +1331,7 @@  start_update_cost (void)
 /* Add (ALLOCNO, START, FROM, DIVISOR) to the end of update_cost_queue, unless
    ALLOCNO is already in the queue, or has NO_REGS class.  */
 static inline void
-queue_update_cost (ira_allocno_t allocno, ira_allocno_t start,
+queue_update_cost (ira_allocno_t allocno, int hard_regno, ira_allocno_t start,
 		   ira_allocno_t from, int divisor)
 {
   struct update_cost_queue_elem *elem;
@@ -1317,6 +1340,7 @@  queue_update_cost (ira_allocno_t allocno, ira_allocno_t start,
   if (elem->check != update_cost_check
       && ALLOCNO_CLASS (allocno) != NO_REGS)
     {
+      elem->hard_regno = hard_regno;
       elem->check = update_cost_check;
       elem->start = start;
       elem->from = from;
@@ -1334,8 +1358,8 @@  queue_update_cost (ira_allocno_t allocno, ira_allocno_t start,
    false if the queue was empty, otherwise make (*ALLOCNO, *START,
    *FROM, *DIVISOR) describe the removed element.  */
 static inline bool
-get_next_update_cost (ira_allocno_t *allocno, ira_allocno_t *start,
-		      ira_allocno_t *from, int *divisor)
+get_next_update_cost (ira_allocno_t *allocno, int *hard_regno,
+		      ira_allocno_t *start, ira_allocno_t *from, int *divisor)
 {
   struct update_cost_queue_elem *elem;
 
@@ -1348,6 +1372,8 @@  get_next_update_cost (ira_allocno_t *allocno, ira_allocno_t *start,
   *from = elem->from;
   *divisor = elem->divisor;
   update_cost_queue = elem->next;
+  if (hard_regno != NULL)
+    *hard_regno = elem->hard_regno;
   return true;
 }
 
@@ -1449,31 +1475,41 @@  update_costs_from_allocno (ira_allocno_t allocno, int hard_regno,
   enum reg_class rclass, aclass;
   ira_allocno_t another_allocno, start = allocno, from = NULL;
   ira_copy_t cp, next_cp;
+  ira_object_t another_obj;
+  unsigned int obj_index1, obj_index2;
 
   rclass = REGNO_REG_CLASS (hard_regno);
   do
     {
+      gcc_assert (hard_regno >= 0);
       mode = ALLOCNO_MODE (allocno);
       ira_init_register_move_cost_if_necessary (mode);
       for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
 	{
-	  if (cp->first == allocno)
+	  if (OBJECT_ALLOCNO (cp->first) == allocno)
 	    {
+	      obj_index1 = OBJECT_INDEX (cp->first);
+	      obj_index2 = OBJECT_INDEX (cp->second);
 	      next_cp = cp->next_first_allocno_copy;
-	      another_allocno = cp->second;
+	      another_obj = cp->second;
 	    }
-	  else if (cp->second == allocno)
+	  else if (OBJECT_ALLOCNO (cp->second) == allocno)
 	    {
+	      obj_index1 = OBJECT_INDEX (cp->second);
+	      obj_index2 = OBJECT_INDEX (cp->first);
 	      next_cp = cp->next_second_allocno_copy;
-	      another_allocno = cp->first;
+	      another_obj = cp->first;
 	    }
 	  else
 	    gcc_unreachable ();
 
+	  another_allocno = OBJECT_ALLOCNO (another_obj);
 	  if (another_allocno == from
 	      || (ALLOCNO_COLOR_DATA (another_allocno) != NULL
-		  && (ALLOCNO_COLOR_DATA (allocno)->first_thread_allocno
-		      != ALLOCNO_COLOR_DATA (another_allocno)->first_thread_allocno)))
+		  && (ALLOCNO_COLOR_DATA (allocno)
+			->first_thread_objects[obj_index1]
+		      != ALLOCNO_COLOR_DATA (another_allocno)
+			   ->first_thread_objects[obj_index2])))
 	    continue;
 
 	  aclass = ALLOCNO_CLASS (another_allocno);
@@ -1482,6 +1518,8 @@  update_costs_from_allocno (ira_allocno_t allocno, int hard_regno,
 	      || ALLOCNO_ASSIGNED_P (another_allocno))
 	    continue;
 
+	  ira_allocno_t first_allocno = OBJECT_ALLOCNO (cp->first);
+	  ira_allocno_t second_allocno = OBJECT_ALLOCNO (cp->second);
 	  /* If we have different modes use the smallest one.  It is
 	     a sub-register move.  It is hard to predict what LRA
 	     will reload (the pseudo or its sub-register) but LRA
@@ -1489,14 +1527,21 @@  update_costs_from_allocno (ira_allocno_t allocno, int hard_regno,
 	     register classes bigger modes might be invalid,
 	     e.g. DImode for AREG on x86.  For such cases the
 	     register move cost will be maximal.  */
-	  mode = narrower_subreg_mode (ALLOCNO_MODE (cp->first),
-				       ALLOCNO_MODE (cp->second));
+	  mode = narrower_subreg_mode (ALLOCNO_MODE (first_allocno),
+				       ALLOCNO_MODE (second_allocno));
 
 	  ira_init_register_move_cost_if_necessary (mode);
 
-	  cost = (cp->second == allocno
-		  ? ira_register_move_cost[mode][rclass][aclass]
-		  : ira_register_move_cost[mode][aclass][rclass]);
+	  cost = (second_allocno == allocno
+		    ? ira_register_move_cost[mode][rclass][aclass]
+		    : ira_register_move_cost[mode][aclass][rclass]);
+	  /* Adjust the hard regno for another_allocno for subreg copy.  */
+	  int start_regno = hard_regno;
+	  if (cp->insn && subreg_move_p (cp->first, cp->second))
+	    {
+	      int diff = OBJECT_START (cp->first) - OBJECT_START (cp->second);
+	      start_regno += (first_allocno == allocno ? diff : -diff);
+	    }
 	  if (decr_p)
 	    cost = -cost;
 
@@ -1505,25 +1550,30 @@  update_costs_from_allocno (ira_allocno_t allocno, int hard_regno,
 
 	  if (internal_flag_ira_verbose > 5 && ira_dump_file != NULL)
 	    fprintf (ira_dump_file,
-		     "          a%dr%d (hr%d): update cost by %d, conflict cost by %d\n",
-		     ALLOCNO_NUM (another_allocno), ALLOCNO_REGNO (another_allocno),
-		     hard_regno, update_cost, update_conflict_cost);
+		     "          a%dr%d (hr%d): update cost by %d, conflict "
+		     "cost by %d\n",
+		     ALLOCNO_NUM (another_allocno),
+		     ALLOCNO_REGNO (another_allocno), start_regno, update_cost,
+		     update_conflict_cost);
 	  if (update_cost == 0)
 	    continue;
 
-	  if (! update_allocno_cost (another_allocno, hard_regno,
-				     update_cost, update_conflict_cost))
+	  if (start_regno < 0
+	      || (start_regno + ALLOCNO_NREGS (another_allocno))
+		   > FIRST_PSEUDO_REGISTER
+	      || !update_allocno_cost (another_allocno, start_regno,
+				       update_cost, update_conflict_cost))
 	    continue;
-	  queue_update_cost (another_allocno, start, allocno,
+	  queue_update_cost (another_allocno, start_regno, start, allocno,
 			     divisor * COST_HOP_DIVISOR);
 	  if (record_p && ALLOCNO_COLOR_DATA (another_allocno) != NULL)
 	    ALLOCNO_COLOR_DATA (another_allocno)->update_cost_records
-	      = get_update_cost_record (hard_regno, divisor,
-					ALLOCNO_COLOR_DATA (another_allocno)
-					->update_cost_records);
+	      = get_update_cost_record (
+		start_regno, divisor,
+		ALLOCNO_COLOR_DATA (another_allocno)->update_cost_records);
 	}
-    }
-  while (get_next_update_cost (&allocno, &start, &from, &divisor));
+  } while (
+    get_next_update_cost (&allocno, &hard_regno, &start, &from, &divisor));
 }
 
 /* Decrease preferred ALLOCNO hard register costs and costs of
@@ -1632,23 +1682,25 @@  update_conflict_hard_regno_costs (int *costs, enum reg_class aclass,
   enum reg_class another_aclass;
   ira_allocno_t allocno, another_allocno, start, from;
   ira_copy_t cp, next_cp;
+  ira_object_t another_obj;
 
-  while (get_next_update_cost (&allocno, &start, &from, &divisor))
+  while (get_next_update_cost (&allocno, NULL, &start, &from, &divisor))
     for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
       {
-	if (cp->first == allocno)
+	if (OBJECT_ALLOCNO (cp->first) == allocno)
 	  {
 	    next_cp = cp->next_first_allocno_copy;
-	    another_allocno = cp->second;
+	    another_obj = cp->second;
 	  }
-	else if (cp->second == allocno)
+	else if (OBJECT_ALLOCNO (cp->second) == allocno)
 	  {
 	    next_cp = cp->next_second_allocno_copy;
-	    another_allocno = cp->first;
+	    another_obj = cp->first;
 	  }
 	else
 	  gcc_unreachable ();
 
+	another_allocno = OBJECT_ALLOCNO (another_obj);
 	another_aclass = ALLOCNO_CLASS (another_allocno);
 	if (another_allocno == from
 	    || ALLOCNO_ASSIGNED_P (another_allocno)
@@ -1696,7 +1748,8 @@  update_conflict_hard_regno_costs (int *costs, enum reg_class aclass,
 			   * COST_HOP_DIVISOR
 			   * COST_HOP_DIVISOR
 			   * COST_HOP_DIVISOR))
-	  queue_update_cost (another_allocno, start, from, divisor * COST_HOP_DIVISOR);
+	  queue_update_cost (another_allocno, -1, start, from,
+			     divisor * COST_HOP_DIVISOR);
       }
 }
 
@@ -2034,6 +2087,11 @@  assign_hard_reg (ira_allocno_t a, bool retry_p)
       FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
         {
 	  ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
+
+	  if (ALLOCNO_COLOR_DATA (a)->first_thread_allocno
+	      == ALLOCNO_COLOR_DATA (conflict_a)->first_thread_allocno)
+	    continue;
+
 	  enum reg_class conflict_aclass;
 	  allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a);
 
@@ -2225,7 +2283,8 @@  assign_hard_reg (ira_allocno_t a, bool retry_p)
 		      continue;
 		    full_costs[j] -= conflict_costs[k];
 		  }
-	      queue_update_cost (conflict_a, conflict_a, NULL, COST_HOP_DIVISOR);
+	      queue_update_cost (conflict_a, -1, conflict_a, NULL,
+				 COST_HOP_DIVISOR);
 	    }
 	}
     }
@@ -2239,7 +2298,7 @@  assign_hard_reg (ira_allocno_t a, bool retry_p)
   if (! retry_p)
     {
       start_update_cost ();
-      queue_update_cost (a, a, NULL, COST_HOP_DIVISOR);
+      queue_update_cost (a, -1, a, NULL, COST_HOP_DIVISOR);
       update_conflict_hard_regno_costs (full_costs, aclass, false);
     }
   min_cost = min_full_cost = INT_MAX;
@@ -2264,17 +2323,17 @@  assign_hard_reg (ira_allocno_t a, bool retry_p)
       if (!HONOR_REG_ALLOC_ORDER)
 	{
 	  if ((saved_nregs = calculate_saved_nregs (hard_regno, mode)) != 0)
-	  /* We need to save/restore the hard register in
-	     epilogue/prologue.  Therefore we increase the cost.  */
-	  {
-	    rclass = REGNO_REG_CLASS (hard_regno);
-	    add_cost = ((ira_memory_move_cost[mode][rclass][0]
-		         + ira_memory_move_cost[mode][rclass][1])
+	    /* We need to save/restore the hard register in
+	       epilogue/prologue.  Therefore we increase the cost.  */
+	    {
+	      rclass = REGNO_REG_CLASS (hard_regno);
+	      add_cost = ((ira_memory_move_cost[mode][rclass][0]
+			   + ira_memory_move_cost[mode][rclass][1])
 		        * saved_nregs / hard_regno_nregs (hard_regno,
 							  mode) - 1);
-	    cost += add_cost;
-	    full_cost += add_cost;
-	  }
+	      cost += add_cost;
+	      full_cost += add_cost;
+	    }
 	}
       if (min_cost > cost)
 	min_cost = cost;
@@ -2393,54 +2452,173 @@  copy_freq_compare_func (const void *v1p, const void *v2p)
   return cp1->num - cp2->num;
 }
 
-
+/* Return true if object OBJ1 conflict with OBJ2.  */
+static bool
+objects_conflict_by_live_ranges_p (ira_object_t obj1, ira_object_t obj2)
+{
+  rtx reg1, reg2;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  if (a1 == a2)
+    return false;
+  reg1 = regno_reg_rtx[ALLOCNO_REGNO (a1)];
+  reg2 = regno_reg_rtx[ALLOCNO_REGNO (a2)];
+  if (reg1 != NULL && reg2 != NULL
+      && ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
+    return false;
+
+  /* We don't keep live ranges for caps because they can be quite big.
+     Use ranges of non-cap allocno from which caps are created.  */
+  a1 = get_cap_member (a1);
+  a2 = get_cap_member (a2);
+
+  obj1 = find_object (a1, OBJECT_START (obj1), OBJECT_NREGS (obj1));
+  obj2 = find_object (a2, OBJECT_START (obj2), OBJECT_NREGS (obj2));
+  return ira_live_ranges_intersect_p (OBJECT_LIVE_RANGES (obj1),
+				      OBJECT_LIVE_RANGES (obj2));
+}
 
-/* Return true if any allocno from thread of A1 conflicts with any
-   allocno from thread A2.  */
+/* Return true if any object from thread of OBJ1 conflicts with any
+   object from thread OBJ2.  */
 static bool
-allocno_thread_conflict_p (ira_allocno_t a1, ira_allocno_t a2)
+object_thread_conflict_p (ira_object_t obj1, ira_object_t obj2)
 {
-  ira_allocno_t a, conflict_a;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+
+  gcc_assert (
+    obj1 != obj2
+    && ALLOCNO_COLOR_DATA (a1)->first_thread_objects[OBJECT_INDEX (obj1)]
+	 == obj1
+    && ALLOCNO_COLOR_DATA (a2)->first_thread_objects[OBJECT_INDEX (obj2)]
+	 == obj2);
+
+  ira_allocno_t first_thread_allocno1
+    = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+  ira_allocno_t first_thread_allocno2
+    = ALLOCNO_COLOR_DATA (a2)->first_thread_allocno;
+
+  int offset
+    = (ALLOCNO_COLOR_DATA (a1)->first_thread_offset + OBJECT_START (obj1))
+      - (ALLOCNO_COLOR_DATA (a2)->first_thread_offset + OBJECT_START (obj2));
+
+  /* Update first_thread_allocno and thread_allocnos info.  */
+  bitmap thread_allocnos1
+    = ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_allocnos;
+  bitmap thread_allocnos2
+    = ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos;
+  gcc_assert (!bitmap_empty_p (thread_allocnos1)
+	      && !bitmap_empty_p (thread_allocnos2));
+  std::vector<ira_object_t> thread_objects_2;
 
-  for (a = ALLOCNO_COLOR_DATA (a2)->next_thread_allocno;;
-       a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
+  unsigned int i;
+  bitmap_iterator bi;
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos2, 0, i, bi)
     {
-      for (conflict_a = ALLOCNO_COLOR_DATA (a1)->next_thread_allocno;;
-	   conflict_a = ALLOCNO_COLOR_DATA (conflict_a)->next_thread_allocno)
-	{
-	  if (allocnos_conflict_by_live_ranges_p (a, conflict_a))
-	    return true;
-	  if (conflict_a == a1)
-	    break;
-	}
-      if (a == a2)
-	break;
+      ira_allocno_object_iterator oi;
+      ira_object_t obj;
+      FOR_EACH_ALLOCNO_OBJECT (ira_allocnos[i], obj, oi)
+	thread_objects_2.push_back (obj);
+    }
+
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos1, 0, i, bi)
+    {
+      ira_allocno_object_iterator oi;
+      ira_object_t obj;
+      ira_allocno_t a = ira_allocnos[i];
+      FOR_EACH_ALLOCNO_OBJECT (ira_allocnos[i], obj, oi)
+	for (ira_object_t other_obj : thread_objects_2)
+	  {
+	    int thread_start1 = ALLOCNO_COLOR_DATA (a)->first_thread_offset
+				+ OBJECT_START (obj);
+	    int thread_start2 = ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (other_obj))
+				  ->first_thread_offset
+				+ offset + OBJECT_START (other_obj);
+	    if (!(thread_start1 + OBJECT_NREGS (obj) <= thread_start2
+		  || thread_start2 + OBJECT_NREGS (other_obj) <= thread_start1)
+		&& objects_conflict_by_live_ranges_p (obj, other_obj))
+	      return true;
+	  }
     }
+
   return false;
 }
 
-/* Merge two threads given correspondingly by their first allocnos T1
-   and T2 (more accurately merging T2 into T1).  */
+/* Merge two threads given correspondingly by their first objects OBJ1
+   and OBJ2 (more accurately merging OBJ2 into OBJ1).  */
 static void
-merge_threads (ira_allocno_t t1, ira_allocno_t t2)
+merge_threads (ira_object_t obj1, ira_object_t obj2)
 {
-  ira_allocno_t a, next, last;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+
+  gcc_assert (
+    obj1 != obj2
+    && ALLOCNO_COLOR_DATA (a1)->first_thread_objects[OBJECT_INDEX (obj1)]
+	 == obj1
+    && ALLOCNO_COLOR_DATA (a2)->first_thread_objects[OBJECT_INDEX (obj2)]
+	 == obj2);
+
+  ira_allocno_t first_thread_allocno1
+    = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+  ira_allocno_t first_thread_allocno2
+    = ALLOCNO_COLOR_DATA (a2)->first_thread_allocno;
+
+  gcc_assert (first_thread_allocno1 != first_thread_allocno2);
 
-  gcc_assert (t1 != t2
-	      && ALLOCNO_COLOR_DATA (t1)->first_thread_allocno == t1
-	      && ALLOCNO_COLOR_DATA (t2)->first_thread_allocno == t2);
-  for (last = t2, a = ALLOCNO_COLOR_DATA (t2)->next_thread_allocno;;
-       a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
+  int offset
+    = (ALLOCNO_COLOR_DATA (a1)->first_thread_offset + OBJECT_START (obj1))
+      - (ALLOCNO_COLOR_DATA (a2)->first_thread_offset + OBJECT_START (obj2));
+
+  /* Update first_thread_allocno and thread_allocnos info.  */
+  unsigned int i;
+  bitmap_iterator bi;
+  bitmap thread_allocnos2
+    = ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos;
+  bitmap thread_allocnos1
+    = ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_allocnos;
+  gcc_assert (!bitmap_empty_p (thread_allocnos1)
+	      && !bitmap_empty_p (thread_allocnos2));
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos2, 0, i, bi)
+    {
+      ira_allocno_t a = ira_allocnos[i];
+      gcc_assert (ALLOCNO_COLOR_DATA (a)->first_thread_allocno
+		  == first_thread_allocno2);
+      /* Update first_thread_allocno and first_thread_offset filed.  */
+      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = first_thread_allocno1;
+      ALLOCNO_COLOR_DATA (a)->first_thread_offset += offset;
+      bitmap_set_bit (thread_allocnos1, i);
+    }
+  bitmap_clear (thread_allocnos2);
+  ira_free_bitmap (thread_allocnos2);
+  ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos = NULL;
+
+  ira_object_t last_obj = obj2;
+  for (ira_object_t next_obj
+       = ALLOCNO_COLOR_DATA (a2)->next_thread_objects[OBJECT_INDEX (obj2)];
+       ; next_obj = ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (next_obj))
+		      ->next_thread_objects[OBJECT_INDEX (next_obj)])
     {
-      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = t1;
-      if (a == t2)
+      ira_allocno_t next_a = OBJECT_ALLOCNO (next_obj);
+      ALLOCNO_COLOR_DATA (next_a)->first_thread_objects[OBJECT_INDEX (next_obj)]
+	= obj1;
+      gcc_assert (ALLOCNO_COLOR_DATA (next_a)->first_thread_allocno
+		  == first_thread_allocno1);
+      gcc_assert (bitmap_bit_p (thread_allocnos1, ALLOCNO_NUM (next_a)));
+      if (next_obj == obj2)
 	break;
-      last = a;
+      last_obj = next_obj;
     }
-  next = ALLOCNO_COLOR_DATA (t1)->next_thread_allocno;
-  ALLOCNO_COLOR_DATA (t1)->next_thread_allocno = t2;
-  ALLOCNO_COLOR_DATA (last)->next_thread_allocno = next;
-  ALLOCNO_COLOR_DATA (t1)->thread_freq += ALLOCNO_COLOR_DATA (t2)->thread_freq;
+  /* Add OBJ2's threads chain to OBJ1.  */
+  ira_object_t temp_obj
+    = ALLOCNO_COLOR_DATA (a1)->next_thread_objects[OBJECT_INDEX (obj1)];
+  ALLOCNO_COLOR_DATA (a1)->next_thread_objects[OBJECT_INDEX (obj1)] = obj2;
+  ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (last_obj))
+    ->next_thread_objects[OBJECT_INDEX (last_obj)]
+    = temp_obj;
+
+  ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_freq
+    += ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_freq;
 }
 
 /* Create threads by processing CP_NUM copies from sorted copies.  We
@@ -2448,7 +2626,6 @@  merge_threads (ira_allocno_t t1, ira_allocno_t t2)
 static void
 form_threads_from_copies (int cp_num)
 {
-  ira_allocno_t a, thread1, thread2;
   ira_copy_t cp;
 
   qsort (sorted_copies, cp_num, sizeof (ira_copy_t), copy_freq_compare_func);
@@ -2457,33 +2634,43 @@  form_threads_from_copies (int cp_num)
   for (int i = 0; i < cp_num; i++)
     {
       cp = sorted_copies[i];
-      thread1 = ALLOCNO_COLOR_DATA (cp->first)->first_thread_allocno;
-      thread2 = ALLOCNO_COLOR_DATA (cp->second)->first_thread_allocno;
-      if (thread1 == thread2)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      ira_object_t thread1 = ALLOCNO_COLOR_DATA (first_a)
+			       ->first_thread_objects[OBJECT_INDEX (cp->first)];
+      ira_object_t thread2
+	= ALLOCNO_COLOR_DATA (second_a)
+	    ->first_thread_objects[OBJECT_INDEX (cp->second)];
+      if (thread1 == thread2
+	  || ALLOCNO_COLOR_DATA (first_a)->first_thread_allocno
+	       == ALLOCNO_COLOR_DATA (second_a)->first_thread_allocno)
 	continue;
-      if (! allocno_thread_conflict_p (thread1, thread2))
+      if (!object_thread_conflict_p (thread1, thread2))
 	{
 	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-	    fprintf
-		(ira_dump_file,
-		 "        Forming thread by copy %d:a%dr%d-a%dr%d (freq=%d):\n",
-		 cp->num, ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-		 ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second),
-		 cp->freq);
+	    fprintf (
+	      ira_dump_file,
+	      "        Forming thread by copy %d:a%dr%d-a%dr%d (freq=%d):\n",
+	      cp->num, ALLOCNO_NUM (first_a), ALLOCNO_REGNO (first_a),
+	      ALLOCNO_NUM (second_a), ALLOCNO_REGNO (second_a), cp->freq);
 	  merge_threads (thread1, thread2);
 	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
 	    {
-	      thread1 = ALLOCNO_COLOR_DATA (thread1)->first_thread_allocno;
-	      fprintf (ira_dump_file, "          Result (freq=%d): a%dr%d(%d)",
-		       ALLOCNO_COLOR_DATA (thread1)->thread_freq,
-		       ALLOCNO_NUM (thread1), ALLOCNO_REGNO (thread1),
-		       ALLOCNO_FREQ (thread1));
-	      for (a = ALLOCNO_COLOR_DATA (thread1)->next_thread_allocno;
-		   a != thread1;
-		   a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
-		fprintf (ira_dump_file, " a%dr%d(%d)",
-			 ALLOCNO_NUM (a), ALLOCNO_REGNO (a),
-			 ALLOCNO_FREQ (a));
+	      ira_allocno_t a1 = OBJECT_ALLOCNO (thread1);
+	      ira_allocno_t first_thread_allocno
+		= ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+	      fprintf (ira_dump_file, "          Result (freq=%d):",
+		       ALLOCNO_COLOR_DATA (first_thread_allocno)->thread_freq);
+	      unsigned int i;
+	      bitmap_iterator bi;
+	      EXECUTE_IF_SET_IN_BITMAP (
+		ALLOCNO_COLOR_DATA (first_thread_allocno)->thread_allocnos, 0,
+		i, bi)
+		{
+		  ira_allocno_t a = ira_allocnos[i];
+		  fprintf (ira_dump_file, " a%dr%d(%d)", ALLOCNO_NUM (a),
+			   ALLOCNO_REGNO (a), ALLOCNO_FREQ (a));
+		}
 	      fprintf (ira_dump_file, "\n");
 	    }
 	}
@@ -2503,13 +2690,27 @@  form_threads_from_bucket (ira_allocno_t bucket)
     {
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
 	{
-	  if (cp->first == a)
+	  bool intersect_p = hard_reg_set_intersect_p (
+	    ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (cp->first))
+	      ->profitable_hard_regs,
+	    ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (cp->second))
+	      ->profitable_hard_regs);
+	  if (OBJECT_ALLOCNO (cp->first) == a)
 	    {
 	      next_cp = cp->next_first_allocno_copy;
+	      if (!intersect_p)
+		continue;
+	      sorted_copies[cp_num++] = cp;
+	    }
+	  else if (OBJECT_ALLOCNO (cp->second) == a)
+	    {
+	      next_cp = cp->next_second_allocno_copy;
+	      if (!intersect_p
+		  || !bitmap_bit_p (uncolorable_allocno_set,
+				    ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))))
+		continue;
 	      sorted_copies[cp_num++] = cp;
 	    }
-	  else if (cp->second == a)
-	    next_cp = cp->next_second_allocno_copy;
 	  else
 	    gcc_unreachable ();
 	}
@@ -2531,15 +2732,15 @@  form_threads_from_colorable_allocno (ira_allocno_t a)
 	     ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      if (OBJECT_ALLOCNO (cp->first) == a)
 	{
 	  next_cp = cp->next_first_allocno_copy;
-	  another_a = cp->second;
+	  another_a = OBJECT_ALLOCNO (cp->second);
 	}
-      else if (cp->second == a)
+      else if (OBJECT_ALLOCNO (cp->second) == a)
 	{
 	  next_cp = cp->next_second_allocno_copy;
-	  another_a = cp->first;
+	  another_a = OBJECT_ALLOCNO (cp->first);
 	}
       else
 	gcc_unreachable ();
@@ -2564,8 +2765,16 @@  init_allocno_threads (void)
     {
       a = ira_allocnos[j];
       /* Set up initial thread data: */
-      ALLOCNO_COLOR_DATA (a)->first_thread_allocno
-	= ALLOCNO_COLOR_DATA (a)->next_thread_allocno = a;
+      for (int i = 0; i < ALLOCNO_NUM_OBJECTS (a); i += 1)
+	{
+	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
+	  ALLOCNO_COLOR_DATA (a)->first_thread_objects[i]
+	    = ALLOCNO_COLOR_DATA (a)->next_thread_objects[i] = obj;
+	}
+      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = a;
+      ALLOCNO_COLOR_DATA (a)->first_thread_offset = 0;
+      ALLOCNO_COLOR_DATA (a)->thread_allocnos = ira_allocate_bitmap ();
+      bitmap_set_bit (ALLOCNO_COLOR_DATA (a)->thread_allocnos, ALLOCNO_NUM (a));
       ALLOCNO_COLOR_DATA (a)->thread_freq = ALLOCNO_FREQ (a);
       ALLOCNO_COLOR_DATA (a)->hard_reg_prefs = 0;
       for (pref = ALLOCNO_PREFS (a); pref != NULL; pref = pref->next_pref)
@@ -2608,6 +2817,9 @@  add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t *bucket_ptr)
   ira_allocno_t first_a;
   allocno_color_data_t data;
 
+  if (bucket_ptr == &uncolorable_allocno_bucket)
+    bitmap_set_bit (uncolorable_allocno_set, ALLOCNO_NUM (a));
+
   if (bucket_ptr == &uncolorable_allocno_bucket
       && ALLOCNO_CLASS (a) != NO_REGS)
     {
@@ -2734,6 +2946,9 @@  delete_allocno_from_bucket (ira_allocno_t allocno, ira_allocno_t *bucket_ptr)
 {
   ira_allocno_t prev_allocno, next_allocno;
 
+  if (bucket_ptr == &uncolorable_allocno_bucket)
+    bitmap_clear_bit (uncolorable_allocno_set, ALLOCNO_NUM (allocno));
+
   if (bucket_ptr == &uncolorable_allocno_bucket
       && ALLOCNO_CLASS (allocno) != NO_REGS)
     {
@@ -3227,16 +3442,23 @@  allocno_copy_cost_saving (ira_allocno_t allocno, int hard_regno)
     rclass = ALLOCNO_CLASS (allocno);
   for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
     {
-      if (cp->first == allocno)
+      if (OBJECT_ALLOCNO (cp->first) == allocno)
 	{
 	  next_cp = cp->next_first_allocno_copy;
-	  if (ALLOCNO_HARD_REGNO (cp->second) != hard_regno)
+	  ira_allocno_t another_a = OBJECT_ALLOCNO (cp->second);
+	  if (ALLOCNO_HARD_REGNO (another_a) > -1
+	      && hard_regno + OBJECT_START (cp->first)
+		   != ALLOCNO_HARD_REGNO (another_a)
+			+ OBJECT_START (cp->second))
 	    continue;
 	}
-      else if (cp->second == allocno)
+      else if (OBJECT_ALLOCNO (cp->second) == allocno)
 	{
 	  next_cp = cp->next_second_allocno_copy;
-	  if (ALLOCNO_HARD_REGNO (cp->first) != hard_regno)
+	  ira_allocno_t another_a = OBJECT_ALLOCNO (cp->first);
+	  if (ALLOCNO_HARD_REGNO (another_a) > -1
+	      && hard_regno + OBJECT_START (cp->second)
+		   != ALLOCNO_HARD_REGNO (another_a) + OBJECT_START (cp->first))
 	    continue;
 	}
       else
@@ -3643,6 +3865,7 @@  color_allocnos (void)
       /* Put the allocnos into the corresponding buckets.  */
       colorable_allocno_bucket = NULL;
       uncolorable_allocno_bucket = NULL;
+      bitmap_clear (uncolorable_allocno_set);
       EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi)
 	{
 	  a = ira_allocnos[i];
@@ -3740,10 +3963,12 @@  color_pass (ira_loop_tree_node_t loop_tree_node)
   bitmap_copy (coloring_allocno_bitmap, loop_tree_node->all_allocnos);
   bitmap_copy (consideration_allocno_bitmap, coloring_allocno_bitmap);
   n = 0;
+  size_t obj_n = 0;
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
       n++;
+      obj_n += ALLOCNO_NUM_OBJECTS (a);
       if (! ALLOCNO_ASSIGNED_P (a))
 	continue;
       bitmap_clear_bit (coloring_allocno_bitmap, ALLOCNO_NUM (a));
@@ -3752,20 +3977,29 @@  color_pass (ira_loop_tree_node_t loop_tree_node)
     = (allocno_color_data_t) ira_allocate (sizeof (struct allocno_color_data)
 					   * n);
   memset (allocno_color_data, 0, sizeof (struct allocno_color_data) * n);
+  ira_object_t *thread_objects
+    = (ira_object_t *) ira_allocate (sizeof (ira_object_t *) * obj_n * 2);
+  memset (thread_objects, 0, sizeof (ira_object_t *) * obj_n * 2);
   curr_allocno_process = 0;
   n = 0;
+  size_t obj_offset = 0;
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
       ALLOCNO_ADD_DATA (a) = allocno_color_data + n;
+      ALLOCNO_COLOR_DATA (a)->first_thread_objects
+	= thread_objects + obj_offset;
+      obj_offset += ALLOCNO_NUM_OBJECTS (a);
+      ALLOCNO_COLOR_DATA (a)->next_thread_objects = thread_objects + obj_offset;
+      obj_offset += ALLOCNO_NUM_OBJECTS (a);
       n++;
     }
+  gcc_assert (obj_n * 2 == obj_offset);
   init_allocno_threads ();
   /* Color all mentioned allocnos including transparent ones.  */
   color_allocnos ();
   /* Process caps.  They are processed just once.  */
-  if (flag_ira_region == IRA_REGION_MIXED
-      || flag_ira_region == IRA_REGION_ALL)
+  if (flag_ira_region == IRA_REGION_MIXED || flag_ira_region == IRA_REGION_ALL)
     EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->all_allocnos, 0, j, bi)
       {
 	a = ira_allocnos[j];
@@ -3881,12 +4115,22 @@  color_pass (ira_loop_tree_node_t loop_tree_node)
 	    }
 	}
     }
-  ira_free (allocno_color_data);
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
+      gcc_assert (a != NULL);
+      ALLOCNO_COLOR_DATA (a)->first_thread_objects = NULL;
+      ALLOCNO_COLOR_DATA (a)->next_thread_objects = NULL;
+      if (ALLOCNO_COLOR_DATA (a)->thread_allocnos != NULL)
+	{
+	  bitmap_clear (ALLOCNO_COLOR_DATA (a)->thread_allocnos);
+	  ira_free_bitmap (ALLOCNO_COLOR_DATA (a)->thread_allocnos);
+	  ALLOCNO_COLOR_DATA (a)->thread_allocnos = NULL;
+	}
       ALLOCNO_ADD_DATA (a) = NULL;
     }
+  ira_free (allocno_color_data);
+  ira_free (thread_objects);
 }
 
 /* Initialize the common data for coloring and calls functions to do
@@ -4080,15 +4324,17 @@  update_curr_costs (ira_allocno_t a)
   ira_init_register_move_cost_if_necessary (mode);
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      if (first_a == a)
 	{
 	  next_cp = cp->next_first_allocno_copy;
-	  another_a = cp->second;
+	  another_a = second_a;
 	}
-      else if (cp->second == a)
+      else if (second_a == a)
 	{
 	  next_cp = cp->next_second_allocno_copy;
-	  another_a = cp->first;
+	  another_a = first_a;
 	}
       else
 	gcc_unreachable ();
@@ -4100,9 +4346,8 @@  update_curr_costs (ira_allocno_t a)
       i = ira_class_hard_reg_index[aclass][hard_regno];
       if (i < 0)
 	continue;
-      cost = (cp->first == a
-	      ? ira_register_move_cost[mode][rclass][aclass]
-	      : ira_register_move_cost[mode][aclass][rclass]);
+      cost = (first_a == a ? ira_register_move_cost[mode][rclass][aclass]
+			   : ira_register_move_cost[mode][aclass][rclass]);
       ira_allocate_and_set_or_copy_costs
 	(&ALLOCNO_UPDATED_HARD_REG_COSTS (a), aclass, ALLOCNO_CLASS_COST (a),
 	 ALLOCNO_HARD_REG_COSTS (a));
@@ -4349,21 +4594,23 @@  coalesce_allocnos (void)
 	continue;
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
 	{
-	  if (cp->first == a)
+	  ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+	  ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+	  if (first_a == a)
 	    {
 	      next_cp = cp->next_first_allocno_copy;
-	      regno = ALLOCNO_REGNO (cp->second);
+	      regno = ALLOCNO_REGNO (second_a);
 	      /* For priority coloring we coalesce allocnos only with
 		 the same allocno class not with intersected allocno
 		 classes as it were possible.  It is done for
 		 simplicity.  */
 	      if ((cp->insn != NULL || cp->constraint_p)
-		  && ALLOCNO_ASSIGNED_P (cp->second)
-		  && ALLOCNO_HARD_REGNO (cp->second) < 0
-		  && ! ira_equiv_no_lvalue_p (regno))
+		  && ALLOCNO_ASSIGNED_P (second_a)
+		  && ALLOCNO_HARD_REGNO (second_a) < 0
+		  && !ira_equiv_no_lvalue_p (regno))
 		sorted_copies[cp_num++] = cp;
 	    }
-	  else if (cp->second == a)
+	  else if (second_a == a)
 	    next_cp = cp->next_second_allocno_copy;
 	  else
 	    gcc_unreachable ();
@@ -4376,17 +4623,18 @@  coalesce_allocnos (void)
       for (i = 0; i < cp_num; i++)
 	{
 	  cp = sorted_copies[i];
-	  if (! coalesced_allocno_conflict_p (cp->first, cp->second))
+	  ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+	  ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+	  if (!coalesced_allocno_conflict_p (first_a, second_a))
 	    {
 	      allocno_coalesced_p = true;
 	      if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-		fprintf
-		  (ira_dump_file,
-		   "      Coalescing copy %d:a%dr%d-a%dr%d (freq=%d)\n",
-		   cp->num, ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-		   ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second),
-		   cp->freq);
-	      merge_allocnos (cp->first, cp->second);
+		fprintf (ira_dump_file,
+			 "      Coalescing copy %d:a%dr%d-a%dr%d (freq=%d)\n",
+			 cp->num, ALLOCNO_NUM (first_a),
+			 ALLOCNO_REGNO (first_a), ALLOCNO_NUM (second_a),
+			 ALLOCNO_REGNO (second_a), cp->freq);
+	      merge_allocnos (first_a, second_a);
 	      i++;
 	      break;
 	    }
@@ -4395,8 +4643,11 @@  coalesce_allocnos (void)
       for (n = 0; i < cp_num; i++)
 	{
 	  cp = sorted_copies[i];
-	  if (allocno_coalesce_data[ALLOCNO_NUM (cp->first)].first
-	      != allocno_coalesce_data[ALLOCNO_NUM (cp->second)].first)
+	  if (allocno_coalesce_data[ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))]
+		.first
+	      != allocno_coalesce_data[ALLOCNO_NUM (
+					 OBJECT_ALLOCNO (cp->second))]
+		   .first)
 	    sorted_copies[n++] = cp;
 	}
       cp_num = n;
@@ -5070,15 +5321,15 @@  ira_reuse_stack_slot (int regno, poly_uint64 inherent_size,
 	       cp != NULL;
 	       cp = next_cp)
 	    {
-	      if (cp->first == allocno)
+	      if (OBJECT_ALLOCNO (cp->first) == allocno)
 		{
 		  next_cp = cp->next_first_allocno_copy;
-		  another_allocno = cp->second;
+		  another_allocno = OBJECT_ALLOCNO (cp->second);
 		}
-	      else if (cp->second == allocno)
+	      else if (OBJECT_ALLOCNO (cp->second) == allocno)
 		{
 		  next_cp = cp->next_second_allocno_copy;
-		  another_allocno = cp->first;
+		  another_allocno = OBJECT_ALLOCNO (cp->first);
 		}
 	      else
 		gcc_unreachable ();
@@ -5274,6 +5525,7 @@  ira_initiate_assign (void)
     = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t)
 				      * ira_allocnos_num);
   consideration_allocno_bitmap = ira_allocate_bitmap ();
+  uncolorable_allocno_set = ira_allocate_bitmap ();
   initiate_cost_update ();
   allocno_priorities = (int *) ira_allocate (sizeof (int) * ira_allocnos_num);
   sorted_copies = (ira_copy_t *) ira_allocate (ira_copies_num
@@ -5286,6 +5538,7 @@  ira_finish_assign (void)
 {
   ira_free (sorted_allocnos);
   ira_free_bitmap (consideration_allocno_bitmap);
+  ira_free_bitmap (uncolorable_allocno_set);
   finish_cost_update ();
   ira_free (allocno_priorities);
   ira_free (sorted_copies);
diff --git a/gcc/ira-conflicts.cc b/gcc/ira-conflicts.cc
index 0585ad10043..7aeed7202ce 100644
--- a/gcc/ira-conflicts.cc
+++ b/gcc/ira-conflicts.cc
@@ -173,25 +173,115 @@  build_conflict_bit_table (void)
   sparseset_free (objects_live);
   return true;
 }
-
-/* Return true iff allocnos A1 and A2 cannot be allocated to the same
-   register due to conflicts.  */
 
-static bool
-allocnos_conflict_for_copy_p (ira_allocno_t a1, ira_allocno_t a2)
+/* Check that X is REG or SUBREG of REG.  */
+#define REG_SUBREG_P(x)                                                        \
+  (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (SUBREG_REG (x))))
+
+/* Return true if OBJ1 and OBJ2 can be a move INSN.  */
+bool
+subreg_move_p (ira_object_t obj1, ira_object_t obj2)
 {
-  /* Due to the fact that we canonicalize conflicts (see
-     record_object_conflict), we only need to test for conflicts of
-     the lowest order words.  */
-  ira_object_t obj1 = ALLOCNO_OBJECT (a1, 0);
-  ira_object_t obj2 = ALLOCNO_OBJECT (a2, 0);
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  return ALLOCNO_CLASS (a1) != NO_REGS && ALLOCNO_CLASS (a2) != NO_REGS
+	 && (ALLOCNO_TRACK_SUBREG_P (a1) || ALLOCNO_TRACK_SUBREG_P (a2))
+	 && OBJECT_NREGS (obj1) == OBJECT_NREGS (obj2)
+	 && (OBJECT_NREGS (obj1) != ALLOCNO_NREGS (a1)
+	     || OBJECT_NREGS (obj2) != ALLOCNO_NREGS (a2));
+}
 
-  return OBJECTS_CONFLICT_P (obj1, obj2);
+/* Return true if ORIG_DEST_REG and ORIG_SRC_REG can be a move INSN.  */
+bool
+subreg_move_p (rtx orig_dest_reg, rtx orig_src_reg)
+{
+  gcc_assert (REG_SUBREG_P (orig_dest_reg) && REG_SUBREG_P (orig_src_reg));
+  rtx reg1
+    = SUBREG_P (orig_dest_reg) ? SUBREG_REG (orig_dest_reg) : orig_dest_reg;
+  rtx reg2 = SUBREG_P (orig_src_reg) ? SUBREG_REG (orig_src_reg) : orig_src_reg;
+  if (HARD_REGISTER_P (reg1) || HARD_REGISTER_P (reg2))
+    return false;
+  ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)];
+  ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)];
+  ira_object_t obj1 = find_object (a1, orig_dest_reg);
+  ira_object_t obj2 = find_object (a2, orig_src_reg);
+  return subreg_move_p (obj1, obj2);
 }
 
-/* Check that X is REG or SUBREG of REG.  */
-#define REG_SUBREG_P(x)							\
-   (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (SUBREG_REG (x))))
+/* Return true if OBJ1 and OBJ2 can allocate to the same register.  */
+static bool
+regs_non_conflict_for_copy_p (ira_object_t obj1, ira_object_t obj2,
+			      bool is_move, bool offset_equal)
+{
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  if (is_move && subreg_move_p (obj1, obj2))
+    {
+      if (OBJECTS_CONFLICT_P (obj1, obj2))
+	return false;
+      /* Assume a1 allocate to `OBJECT_START (obj2)` and a2 allocate to
+	 `OBJECT_START (obj1)` hard register, so both objects can use the same
+	 hard register `OBJECT_START (obj1) + OBJECT_START (obj2)`.  */
+      int start_regno1 = OBJECT_START (obj2);
+      int start_regno2 = OBJECT_START (obj1);
+
+      ira_object_t obj_a, obj_b;
+      ira_allocno_object_iterator oi_a, oi_b;
+      FOR_EACH_ALLOCNO_OBJECT (a1, obj_a, oi_a)
+	FOR_EACH_ALLOCNO_OBJECT (a2, obj_b, oi_b)
+	  /* If there have a conflict between a1 and a2 and prevent the
+	     allocation before, then obj1 and obj2 cannot be a copy.  */
+	  if (OBJECTS_CONFLICT_P (obj_a, obj_b)
+	      && !(start_regno1 + OBJECT_START (obj_a) + OBJECT_NREGS (obj_a)
+		     <= (start_regno2 + OBJECT_START (obj_b))
+		   || start_regno2 + OBJECT_START (obj_b) + OBJECT_NREGS (obj_b)
+			<= (start_regno1 + OBJECT_START (obj_a))))
+	      return false;
+
+      return true;
+    }
+  else
+    {
+      /* For normal case, make sure full_obj1 and full_obj2 can allocate to the
+	 same register.  */
+      ira_object_t full_obj1 = find_object (a1, 0, ALLOCNO_NREGS (a1));
+      ira_object_t full_obj2 = find_object (a2, 0, ALLOCNO_NREGS (a2));
+      return !OBJECTS_CONFLICT_P (full_obj1, full_obj2) && offset_equal;
+    }
+}
+
+/* Return true if ORIG_REG offset align in ALLOCNO_UNIT_SIZE (A) and times of
+   ALLOCNO_UNIT_SIZE (A). Use to forbidden bellow rtl which has a subreg move to
+   create copy (from testsuite/gcc.dg/vect/vect-simd-20.c on AArch64). Suppose
+   they are all allocated to the fourth register, that is, pseudo 127 is
+   allocated to w4, and pseudo 149 is allocated to x4 and x5. Then the third
+   instruction can be safely deleted without affecting the result of pseudo 149.
+   But when the second instruction is executed, the upper 32 bits of x4 will be
+   set to 0 (the behavior of the add instruction), that is to say, the result of
+   pseudo 149 is modified, and its 32~63 bits are set to 0, Not the desired
+   result.
+
+     (set (reg:SI 127)
+	  (subreg:SI (reg:TI 149) 0))
+     ...
+     (set (reg:SI 127)
+	  (plus:SI (reg:SI 127)
+		   (reg:SI 180)))
+     ...
+     (set (zero_extract:DI (subreg:DI (reg:TI 149) 0)
+			   (const_int 32 [0x20])
+			   (const_int 0 [0]))
+	  (subreg:DI (reg:SI 127) 0))  */
+static bool
+subreg_reg_align_and_times_p (ira_allocno_t a, rtx orig_reg)
+{
+  if (!has_subreg_object_p (a) || !SUBREG_P (orig_reg))
+    return true;
+
+  return multiple_p (SUBREG_BYTE (orig_reg), ALLOCNO_UNIT_SIZE (a))
+	 && multiple_p (GET_MODE_SIZE (GET_MODE (orig_reg)),
+			ALLOCNO_UNIT_SIZE (a));
+}
 
 /* Return X if X is a REG, otherwise it should be SUBREG of REG and
    the function returns the reg in this case.  *OFFSET will be set to
@@ -237,8 +327,9 @@  get_freq_for_shuffle_copy (int freq)
    SINGLE_INPUT_OP_HAS_CSTR_P is only meaningful when constraint_p
    is true, see function ira_get_dup_out_num for its meaning.  */
 static bool
-process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, rtx_insn *insn,
-		       int freq, bool single_input_op_has_cstr_p = true)
+process_regs_for_copy (rtx orig_reg1, rtx orig_reg2, bool constraint_p,
+		       rtx_insn *insn, int freq,
+		       bool single_input_op_has_cstr_p = true)
 {
   int allocno_preferenced_hard_regno, index, offset1, offset2;
   int cost, conflict_cost, move_cost;
@@ -248,10 +339,10 @@  process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, rtx_insn *insn,
   machine_mode mode;
   ira_copy_t cp;
 
-  gcc_assert (REG_SUBREG_P (reg1) && REG_SUBREG_P (reg2));
-  only_regs_p = REG_P (reg1) && REG_P (reg2);
-  reg1 = go_through_subreg (reg1, &offset1);
-  reg2 = go_through_subreg (reg2, &offset2);
+  gcc_assert (REG_SUBREG_P (orig_reg1) && REG_SUBREG_P (orig_reg2));
+  only_regs_p = REG_P (orig_reg1) && REG_P (orig_reg2);
+  rtx reg1 = go_through_subreg (orig_reg1, &offset1);
+  rtx reg2 = go_through_subreg (orig_reg2, &offset2);
   /* Set up hard regno preferenced by allocno.  If allocno gets the
      hard regno the copy (or potential move) insn will be removed.  */
   if (HARD_REGISTER_P (reg1))
@@ -270,13 +361,17 @@  process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, rtx_insn *insn,
     {
       ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)];
       ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)];
+      ira_object_t obj1 = find_object (a1, orig_reg1);
+      ira_object_t obj2 = find_object (a2, orig_reg2);
 
-      if (!allocnos_conflict_for_copy_p (a1, a2)
-	  && offset1 == offset2
+      if (subreg_reg_align_and_times_p (a1, orig_reg1)
+	  && subreg_reg_align_and_times_p (a2, orig_reg2)
+	  && regs_non_conflict_for_copy_p (obj1, obj2, insn != NULL,
+					   offset1 == offset2)
 	  && ordered_p (GET_MODE_PRECISION (ALLOCNO_MODE (a1)),
 			GET_MODE_PRECISION (ALLOCNO_MODE (a2))))
 	{
-	  cp = ira_add_allocno_copy (a1, a2, freq, constraint_p, insn,
+	  cp = ira_add_allocno_copy (obj1, obj2, freq, constraint_p, insn,
 				     ira_curr_loop_tree_node);
 	  bitmap_set_bit (ira_curr_loop_tree_node->local_copies, cp->num);
 	  return true;
@@ -438,16 +533,15 @@  add_insn_allocno_copies (rtx_insn *insn)
   freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn));
   if (freq == 0)
     freq = 1;
-  if ((set = single_set (insn)) != NULL_RTX
-      && REG_SUBREG_P (SET_DEST (set)) && REG_SUBREG_P (SET_SRC (set))
-      && ! side_effects_p (set)
-      && find_reg_note (insn, REG_DEAD,
-			REG_P (SET_SRC (set))
-			? SET_SRC (set)
-			: SUBREG_REG (SET_SRC (set))) != NULL_RTX)
+  if ((set = single_set (insn)) != NULL_RTX && REG_SUBREG_P (SET_DEST (set))
+      && REG_SUBREG_P (SET_SRC (set)) && !side_effects_p (set)
+      && (find_reg_note (insn, REG_DEAD,
+			 REG_P (SET_SRC (set)) ? SET_SRC (set)
+					       : SUBREG_REG (SET_SRC (set)))
+	    != NULL_RTX
+	  || subreg_move_p (SET_DEST (set), SET_SRC (set))))
     {
-      process_regs_for_copy (SET_SRC (set), SET_DEST (set),
-			     false, insn, freq);
+      process_regs_for_copy (SET_SRC (set), SET_DEST (set), false, insn, freq);
       return;
     }
   /* Fast check of possibility of constraint or shuffle copies.  If
@@ -521,16 +615,23 @@  propagate_copies (void)
 
   FOR_EACH_COPY (cp, ci)
     {
-      a1 = cp->first;
-      a2 = cp->second;
+      a1 = OBJECT_ALLOCNO (cp->first);
+      a2 = OBJECT_ALLOCNO (cp->second);
       if (ALLOCNO_LOOP_TREE_NODE (a1) == ira_loop_tree_root)
 	continue;
       ira_assert ((ALLOCNO_LOOP_TREE_NODE (a2) != ira_loop_tree_root));
       parent_a1 = ira_parent_or_cap_allocno (a1);
       parent_a2 = ira_parent_or_cap_allocno (a2);
+      ira_object_t parent_obj1
+	= find_object_anyway (parent_a1, OBJECT_START (cp->first),
+			      OBJECT_NREGS (cp->first));
+      ira_object_t parent_obj2
+	= find_object_anyway (parent_a2, OBJECT_START (cp->second),
+			      OBJECT_NREGS (cp->second));
       ira_assert (parent_a1 != NULL && parent_a2 != NULL);
-      if (! allocnos_conflict_for_copy_p (parent_a1, parent_a2))
-	ira_add_allocno_copy (parent_a1, parent_a2, cp->freq,
+      if (regs_non_conflict_for_copy_p (parent_obj1, parent_obj2,
+					cp->insn != NULL, true))
+	ira_add_allocno_copy (parent_obj1, parent_obj2, cp->freq,
 			      cp->constraint_p, cp->insn, cp->loop_tree_node);
     }
 }
diff --git a/gcc/ira-emit.cc b/gcc/ira-emit.cc
index 9dc7f3c655e..30ff46980f5 100644
--- a/gcc/ira-emit.cc
+++ b/gcc/ira-emit.cc
@@ -1129,11 +1129,11 @@  add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node,
       update_costs (to, false, freq);
       cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
       if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
-	fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n",
-		 cp->num, ALLOCNO_NUM (cp->first),
-		 REGNO (allocno_emit_reg (cp->first)),
-		 ALLOCNO_NUM (cp->second),
-		 REGNO (allocno_emit_reg (cp->second)));
+	fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n", cp->num,
+		 ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first)),
+		 REGNO (allocno_emit_reg (OBJECT_ALLOCNO (cp->first))),
+		 ALLOCNO_NUM (OBJECT_ALLOCNO (cp->second)),
+		 REGNO (allocno_emit_reg (OBJECT_ALLOCNO (cp->second))));
 
       nr = ALLOCNO_NUM_OBJECTS (from);
       for (i = 0; i < nr; i++)
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index b9e24328867..963e533e448 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -229,6 +229,8 @@  struct ira_object
 {
   /* The allocno associated with this record.  */
   ira_allocno_t allocno;
+  /* Index in allocno->objects array */
+  unsigned int index;
   /* Vector of accumulated conflicting conflict_redords with NULL end
      marker (if OBJECT_CONFLICT_VEC_P is true) or conflict bit vector
      otherwise.  */
@@ -522,6 +524,7 @@  allocno_emit_reg (ira_allocno_t a)
 }
 
 #define OBJECT_ALLOCNO(O) ((O)->allocno)
+#define OBJECT_INDEX(O) ((O)->index)
 #define OBJECT_CONFLICT_ARRAY(O) ((O)->conflicts_array)
 #define OBJECT_CONFLICT_VEC(O) ((ira_object_t *)(O)->conflicts_array)
 #define OBJECT_CONFLICT_BITVEC(O) ((IRA_INT_TYPE *)(O)->conflicts_array)
@@ -591,9 +594,9 @@  struct ira_allocno_copy
 {
   /* The unique order number of the copy node starting with 0.  */
   int num;
-  /* Allocnos connected by the copy.  The first allocno should have
+  /* Objects connected by the copy.  The first allocno should have
      smaller order number than the second one.  */
-  ira_allocno_t first, second;
+  ira_object_t first, second;
   /* Execution frequency of the copy.  */
   int freq;
   bool constraint_p;
@@ -1043,6 +1046,9 @@  extern void ira_remove_allocno_prefs (ira_allocno_t);
 extern ira_copy_t ira_create_copy (ira_allocno_t, ira_allocno_t,
 				   int, bool, rtx_insn *,
 				   ira_loop_tree_node_t);
+extern ira_copy_t
+ira_add_allocno_copy (ira_object_t, ira_object_t, int, bool, rtx_insn *,
+		      ira_loop_tree_node_t);
 extern ira_copy_t ira_add_allocno_copy (ira_allocno_t, ira_allocno_t, int,
 					bool, rtx_insn *,
 					ira_loop_tree_node_t);
@@ -1056,6 +1062,7 @@  extern void ira_destroy (void);
 extern ira_object_t
 find_object (ira_allocno_t, int, int);
 extern ira_object_t find_object (ira_allocno_t, poly_int64, poly_int64);
+extern ira_object_t find_object (ira_allocno_t, rtx);
 ira_object_t
 find_object_anyway (ira_allocno_t a, int start, int nregs);
 extern void ira_copy_allocno_objects (ira_allocno_t, ira_allocno_t);
@@ -1084,6 +1091,8 @@  extern void ira_implicitly_set_insn_hard_regs (HARD_REG_SET *,
 /* ira-conflicts.cc */
 extern void ira_debug_conflicts (bool);
 extern void ira_build_conflicts (void);
+extern bool subreg_move_p (ira_object_t, ira_object_t);
+extern bool subreg_move_p (rtx, rtx);
 
 /* ira-color.cc */
 extern ira_allocno_t ira_soft_conflict (ira_allocno_t, ira_allocno_t);
diff --git a/gcc/ira.cc b/gcc/ira.cc
index b9159d089c3..739ef28af6e 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -2853,14 +2853,15 @@  print_redundant_copies (void)
       if (hard_regno >= 0)
 	continue;
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
-	if (cp->first == a)
+	if (OBJECT_ALLOCNO (cp->first) == a)
 	  next_cp = cp->next_first_allocno_copy;
 	else
 	  {
 	    next_cp = cp->next_second_allocno_copy;
 	    if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL
 		&& cp->insn != NULL_RTX
-		&& ALLOCNO_HARD_REGNO (cp->first) == hard_regno)
+		&& ALLOCNO_HARD_REGNO (OBJECT_ALLOCNO (cp->first))
+		     == hard_regno)
 	      fprintf (ira_dump_file,
 		       "        Redundant move from %d(freq %d):%d\n",
 		       INSN_UID (cp->insn), cp->freq, hard_regno);