From ca4c3bdde17ea1d97738434bcc2a01624cb9a5c1 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Mon, 21 Nov 2022 14:41:23 -0500
Subject: [PATCH 1/7] Use VF2 to find a partial layout for seeding a
 SabreLayout trial

This commit builds on the VF2PartialLayout pass which was an experiment
available as an external plugin here:

https://github.com/mtreinish/vf2_partial_layout

That pass used the vf2 algorithm in rustworkx to find the deepest
partial interaction graph of a circuit which is isomorphic with the
coupling graph and uses that mapping to apply an initial layout. The
issue with the performance of that pass was the selection of the qubits
outside the partial interaction graph. Selecting the mapping for those
qubits is similar to the same heuristic layout that SabreLayout is
trying to solve, just for a subset of qubits. In VF2PartialLayout a
simple nearest neighbor based approach was used for selecting qubits
from the coupling graph for any virtual qubits outside the partial
layout. In practice this ended up performing worse than SabreLayout.

To address the shortcomings of that pass this commit combines the
partial layout selection from that external plugin with SabreLayout.
The sabre layout algorithm starts by randomly selecting a layout and
then progressively working forwards and backwards across the circuit
and swap mapping it to find the permutation caused by inserted swaps.
Those permutations are then used to modify the random layout and
eventual an initial layout that minimizes the number of swaps needed is
selected. With this commit instead of using a completely random layout
for all the initial guesses this starts a single trial with the partial
layout found in the same way as VF2PartialLayout. Then the remaining
qubits are selected at random and the Sabrelayout algorithm is run in
the same manner as before. This hopefully should improve the quality
of the results because we're starting from a partial layout that
doesn't require swaps for those qubits.

A similar (almost identical approach) was tried in #9174 except instead
of seeding a single trial with the partial layout it used the partial
layout for all the the trials. In that case the results were not
generally better and the results were mixed. At the time my guess was
that using the partial layout constrained the search space too much and
was inducing more swaps to be needed. However, looking at the details in
issue #10160 this adapts #9174 to see if doing the partial layout in a
more limited manner has any impact there.
---
 crates/accelerate/src/sabre_layout.rs         |  47 +++-
 .../transpiler/passes/layout/sabre_layout.py  | 218 +++++++++++++++++-
 .../transpiler/preset_passmanagers/level1.py  |   2 +
 .../transpiler/preset_passmanagers/level2.py  |   1 +
 .../transpiler/preset_passmanagers/level3.py  |   1 +
 5 files changed, 260 insertions(+), 9 deletions(-)
diff --git a/crates/accelerate/src/sabre_layout.rs b/crates/accelerate/src/sabre_layout.rs
index 1fe1b7d53827..a2e894dd7667 100644
--- a/crates/accelerate/src/sabre_layout.rs
+++ b/crates/accelerate/src/sabre_layout.rs
@@ -41,6 +41,7 @@ pub fn sabre_layout_and_routing(
     num_swap_trials: usize,
     num_layout_trials: usize,
     seed: Option<u64>,
+    partial_layout: Option<Vec<Option<usize>>>,
 ) -> ([NLayout; 2], SwapMap, PyObject) {
     let run_in_parallel = getenv_use_multiple_threads();
     let outer_rng = match seed {
@@ -57,6 +58,7 @@ pub fn sabre_layout_and_routing(
             .into_par_iter()
             .enumerate()
             .map(|(index, seed_trial)| {
+                let partial = if index > 0 { &partial_layout } else { &None };
                 (
                     index,
                     layout_trial(
@@ -69,6 +71,7 @@ pub fn sabre_layout_and_routing(
                         max_iterations,
                         num_swap_trials,
                         run_in_parallel,
+                        partial.clone(),
                     ),
                 )
             })
@@ -83,7 +86,9 @@ pub fn sabre_layout_and_routing(
     } else {
         seed_vec
             .into_iter()
-            .map(|seed_trial| {
+            .enumerate()
+            .map(|(index, seed_trial)| {
+                let partial = if index > 0 { &partial_layout } else { &None };
                 layout_trial(
                     num_clbits,
                     &mut dag_nodes,
@@ -94,6 +99,7 @@ pub fn sabre_layout_and_routing(
                     max_iterations,
                     num_swap_trials,
                     run_in_parallel,
+                    partial.clone(),
                 )
             })
             .min_by_key(|result| result.1.map.values().map(|x| x.len()).sum::<usize>())
@@ -112,13 +118,46 @@ fn layout_trial(
     max_iterations: usize,
     num_swap_trials: usize,
     run_swap_in_parallel: bool,
+    partial_layout: Option<Vec<Option<usize>>>,
 ) -> ([NLayout; 2], SwapMap, Vec<usize>) {
     // Pick a random initial layout and fully populate ancillas in that layout too
     let num_physical_qubits = distance_matrix.shape()[0];
     let mut rng = Pcg64Mcg::seed_from_u64(seed);
-    let mut physical_qubits: Vec<usize> = (0..num_physical_qubits).collect();
-    physical_qubits.shuffle(&mut rng);
-    let mut initial_layout = NLayout::from_logical_to_physical(physical_qubits);
+    let mut physical_qubits: Vec<usize>;
+    match partial_layout {
+        Some(partial_layout_bits) => {
+            let used_bits: HashSet<usize> = partial_layout_bits
+                .iter()
+                .filter_map(|x| x.as_ref())
+                .copied()
+                .collect();
+            let mut free_bits: Vec<usize> = (0..num_physical_qubits)
+                .filter(|x| !used_bits.contains(x))
+                .collect();
+            free_bits.shuffle(&mut rng);
+            physical_qubits = partial_layout_bits
+                .iter()
+                .map(|x| match x {
+                    Some(phys) => *phys,
+                    None => free_bits.pop().unwrap(),
+                })
+                .collect();
+        }
+        None => {
+            physical_qubits = (0..num_physical_qubits).collect();
+            physical_qubits.shuffle(&mut rng);
+        }
+    };
+    let mut phys_to_logic = vec![0; num_physical_qubits];
+    physical_qubits
+        .iter()
+        .enumerate()
+        .for_each(|(logic, phys)| phys_to_logic[*phys] = logic);
+
+    let mut initial_layout = NLayout {
+        logic_to_phys: physical_qubits,
+        phys_to_logic,
+    };
     let mut rev_dag_nodes: Vec<(usize, Vec<usize>, HashSet<usize>)> =
         dag_nodes.iter().rev().cloned().collect();
     for _iter in 0..max_iterations {
diff --git a/qiskit/transpiler/passes/layout/sabre_layout.py b/qiskit/transpiler/passes/layout/sabre_layout.py
index 2702786cde3a..170a6a8f657e 100644
--- a/qiskit/transpiler/passes/layout/sabre_layout.py
+++ b/qiskit/transpiler/passes/layout/sabre_layout.py
@@ -13,8 +13,11 @@
 """Layout selection using the SABRE bidirectional search approach from Li et al.
 """
 
+from collections import defaultdict
 import copy
 import logging
+import time
+
 import numpy as np
 import rustworkx as rx
 
@@ -24,6 +27,7 @@
 from qiskit.transpiler.passes.layout.enlarge_with_ancilla import EnlargeWithAncilla
 from qiskit.transpiler.passes.layout.apply_layout import ApplyLayout
 from qiskit.transpiler.passes.layout import disjoint_utils
+from qiskit.transpiler.passes.layout import vf2_utils
 from qiskit.transpiler.passmanager import PassManager
 from qiskit.transpiler.layout import Layout
 from qiskit.transpiler.basepasses import TransformationPass
@@ -38,6 +42,8 @@
 from qiskit.transpiler.target import Target
 from qiskit.transpiler.coupling import CouplingMap
 from qiskit.tools.parallel import CPU_COUNT
+from qiskit.circuit.controlflow import ControlFlowOp, ForLoopOp
+from qiskit.converters import circuit_to_dag
 
 logger = logging.getLogger(__name__)
 
@@ -45,11 +51,16 @@
 class SabreLayout(TransformationPass):
     """Choose a Layout via iterative bidirectional routing of the input circuit.
 
-    Starting with a random initial `Layout`, the algorithm does a full routing
-    of the circuit (via the `routing_pass` method) to end up with a
-    `final_layout`. This final_layout is then used as the initial_layout for
-    routing the reverse circuit. The algorithm iterates a number of times until
-    it finds an initial_layout that reduces full routing cost.
+    The algorithm does a full routing of the circuit (via the `routing_pass`
+    method) to end up with a `final_layout`. This final_layout is then used as
+    the initial_layout for routing the reverse circuit. The algorithm iterates a
+    number of times until it finds an initial_layout that reduces full routing cost.
+
+    Prior to running the SABRE algorithm this transpiler pass will try to find the layout
+    for deepest layer that is has an isomorphic subgraph in the coupling graph. This is
+    done by progressively using the algorithm from :class:`~.VF2Layout` on the circuit
+    until a mapping is not found. This partial layout is then used to seed the SABRE algorithm
+    and then random physical bits are selected for the remaining elements in the mapping.
 
     This method exploits the reversibility of quantum circuits, and tries to
     include global circuit information in the choice of initial_layout.
@@ -85,6 +96,10 @@ def __init__(
         swap_trials=None,
         layout_trials=None,
         skip_routing=False,
+        vf2_partial_layout=True,
+        vf2_call_limit=None,
+        vf2_time_limit=None,
+        vf2_max_trials=None,
     ):
         """SabreLayout initializer.
 
@@ -121,6 +136,16 @@ def __init__(
                 will be returned in the property set. This is a tradeoff to run custom
                 routing with multiple layout trials, as using this option will cause
                 SabreLayout to run the routing stage internally but not use that result.
+            vf2_partial_layout (bool): Run vf2 partial layout
+            vf2_call_limit (int): The number of state visits to attempt in each execution of
+                VF2 to attempt to find a partial layout.
+            vf2_time_limit (float): The total time limit in seconds to run VF2 to find a partial
+                layout
+            vf2_max_trials (int): The maximum number of trials to run VF2 to find
+                a partial layout. If this is not specified the number of trials will be limited
+                based on the number of edges in the interaction graph or the coupling graph
+                (whichever is larger) if no other limits are set. If set to a value <= 0 no
+                limit on the number of trials will be set.
 
         Raises:
             TranspilerError: If both ``routing_pass`` and ``swap_trials`` or
@@ -158,6 +183,11 @@ def __init__(
                     self.coupling_map = copy.deepcopy(self.coupling_map)
                 self.coupling_map.make_symmetric()
             self._neighbor_table = NeighborTable(rx.adjacency_matrix(self.coupling_map.graph))
+        self.avg_error_map = None
+        self.vf2_partial_layout = vf2_partial_layout
+        self.call_limit = vf2_call_limit
+        self.time_limit = vf2_time_limit
+        self.max_trials = vf2_max_trials
 
     def run(self, dag):
         """Run the SabreLayout pass on `dag`.
@@ -321,6 +351,13 @@ def _inner_run(self, dag, coupling_map):
                     cargs,
                 )
             )
+        partial_layout = None
+        if self.vf2_partial_layout:
+            partial_layout_virtual_bits = self._vf2_partial_layout(
+                dag, coupling_map
+            ).get_virtual_bits()
+            partial_layout = [partial_layout_virtual_bits.get(i, None) for i in dag.qubits]
+
         ((initial_layout, final_layout), swap_map, gate_order) = sabre_layout_and_routing(
             len(dag.clbits),
             dag_list,
@@ -331,6 +368,7 @@ def _inner_run(self, dag, coupling_map):
             self.swap_trials,
             self.layout_trials,
             self.seed,
+            partial_layout,
         )
         # Apply initial layout selected.
         layout_dict = {}
@@ -385,3 +423,173 @@ def _compose_layouts(self, initial_layout, pass_final_layout, qregs):
         qubit_map = Layout.combine_into_edge_map(initial_layout, trivial_layout)
         final_layout = {v: pass_final_layout._v2p[qubit_map[v]] for v in initial_layout._v2p}
         return Layout(final_layout)
+
+    # TODO: Migrate this to rust as part of sabre_layout.rs after
+    # https://github.com/Qiskit/rustworkx/issues/741 is implemented and released
+    def _vf2_partial_layout(self, dag, coupling_map):
+        """Find a partial layout using vf2 on the deepest subgraph that is isomorphic to
+        the coupling graph."""
+        im_graph_node_map = {}
+        reverse_im_graph_node_map = {}
+        im_graph = rx.PyGraph(multigraph=False)
+        logger.debug("Buidling interaction graphs")
+        largest_im_graph = None
+        best_mapping = None
+        first_mapping = None
+        if self.avg_error_map is None:
+            self.avg_error_map = vf2_utils.build_average_error_map(self.target, None, coupling_map)
+
+        cm_graph, cm_nodes = vf2_utils.shuffle_coupling_graph(coupling_map, self.seed, False)
+        # To avoid trying to over optimize the result by default limit the number
+        # of trials based on the size of the graphs. For circuits with simple layouts
+        # like an all 1q circuit we don't want to sit forever trying every possible
+        # mapping in the search space if no other limits are set
+        if self.max_trials is None and self.call_limit is None and self.time_limit is None:
+            im_graph_edge_count = len(im_graph.edge_list())
+            cm_graph_edge_count = len(coupling_map.graph.edge_list())
+            self.max_trials = max(im_graph_edge_count, cm_graph_edge_count) + 15
+
+        start_time = time.time()
+
+        # A more efficient search pattern would be to do a binary search
+        # and find, but to conserve memory and avoid a large number of
+        # unecessary graphs this searchs from the beginning and continues
+        # until there is no vf2 match
+        def _visit(dag, weight, wire_map):
+            for node in dag.topological_op_nodes():
+                nonlocal largest_im_graph
+                largest_im_graph = im_graph.copy()
+                if getattr(node.op, "_directive", False):
+                    continue
+                if isinstance(node.op, ControlFlowOp):
+                    if isinstance(node.op, ForLoopOp):
+                        inner_weight = len(node.op.params[0]) * weight
+                    else:
+                        inner_weight = weight
+                    for block in node.op.blocks:
+                        inner_wire_map = {
+                            inner: wire_map[outer] for outer, inner in zip(node.qargs, block.qubits)
+                        }
+                        _visit(circuit_to_dag(block), inner_weight, inner_wire_map)
+                    continue
+                len_args = len(node.qargs)
+                qargs = [wire_map[q] for q in node.qargs]
+                if len_args == 1:
+                    if qargs[0] not in im_graph_node_map:
+                        weights = defaultdict(int)
+                        weights[node.name] += weight
+                        im_graph_node_map[qargs[0]] = im_graph.add_node(weights)
+                        reverse_im_graph_node_map[im_graph_node_map[qargs[0]]] = qargs[0]
+                    else:
+                        im_graph[im_graph_node_map[qargs[0]]][node.op.name] += weight
+                if len_args == 2:
+                    if qargs[0] not in im_graph_node_map:
+                        im_graph_node_map[qargs[0]] = im_graph.add_node(defaultdict(int))
+                        reverse_im_graph_node_map[im_graph_node_map[qargs[0]]] = qargs[0]
+                    if qargs[1] not in im_graph_node_map:
+                        im_graph_node_map[qargs[1]] = im_graph.add_node(defaultdict(int))
+                        reverse_im_graph_node_map[im_graph_node_map[qargs[1]]] = qargs[1]
+                    edge = (im_graph_node_map[qargs[0]], im_graph_node_map[qargs[1]])
+                    if im_graph.has_edge(*edge):
+                        im_graph.get_edge_data(*edge)[node.name] += weight
+                    else:
+                        weights = defaultdict(int)
+                        weights[node.name] += weight
+                        im_graph.add_edge(*edge, weights)
+                if len_args > 2:
+                    raise TranspilerError(
+                        "Encountered an instruction operating on more than 2 qubits, this pass "
+                        "only functions with 1 or 2 qubit operations."
+                    )
+                vf2_mapping = rx.vf2_mapping(
+                    cm_graph,
+                    im_graph,
+                    subgraph=True,
+                    id_order=False,
+                    induced=False,
+                    call_limit=self.call_limit,
+                )
+                try:
+                    nonlocal first_mapping
+                    first_mapping = next(vf2_mapping)
+                except StopIteration:
+                    break
+                nonlocal best_mapping
+                best_mapping = vf2_mapping
+                elapsed_time = time.time() - start_time
+                if (
+                    self.time_limit is not None
+                    and best_mapping is not None
+                    and elapsed_time >= self.time_limit
+                ):
+                    logger.debug(
+                        "SabreLayout VF2 heuristic has taken %s which exceeds configured max time: %s",
+                        elapsed_time,
+                        self.time_limit,
+                    )
+                    break
+
+        _visit(dag, 1, {bit: bit for bit in dag.qubits})
+        logger.debug("Finding best mappings of largest partial subgraph")
+        im_graph = largest_im_graph
+
+        def mapping_to_layout(layout_mapping):
+            return Layout({reverse_im_graph_node_map[k]: v for k, v in layout_mapping.items()})
+
+        layout_mapping = {im_i: cm_nodes[cm_i] for cm_i, im_i in first_mapping.items()}
+        chosen_layout = mapping_to_layout(layout_mapping)
+        chosen_layout_score = vf2_utils.score_layout(
+            self.avg_error_map,
+            layout_mapping,
+            im_graph_node_map,
+            reverse_im_graph_node_map,
+            im_graph,
+            False,
+        )
+        trials = 1
+        for mapping in best_mapping:  # pylint: disable=not-an-iterable
+            trials += 1
+            logger.debug("Running trial: %s", trials)
+            layout_mapping = {im_i: cm_nodes[cm_i] for cm_i, im_i in mapping.items()}
+            # If the graphs have the same number of nodes we don't need to score or do multiple
+            # trials as the score heuristic currently doesn't weigh nodes based on gates on a
+            # qubit so the scores will always all be the same
+            if len(cm_graph) == len(im_graph):
+                break
+            layout_score = vf2_utils.score_layout(
+                self.avg_error_map,
+                layout_mapping,
+                im_graph_node_map,
+                reverse_im_graph_node_map,
+                im_graph,
+                False,
+            )
+            logger.debug("Trial %s has score %s", trials, layout_score)
+            if chosen_layout is None:
+                chosen_layout = mapping_to_layout(layout_mapping)
+                chosen_layout_score = layout_score
+            elif layout_score < chosen_layout_score:
+                layout = mapping_to_layout(layout_mapping)
+                logger.debug(
+                    "Found layout %s has a lower score (%s) than previous best %s (%s)",
+                    layout,
+                    layout_score,
+                    chosen_layout,
+                    chosen_layout_score,
+                )
+                chosen_layout = layout
+                chosen_layout_score = layout_score
+            if self.max_trials and trials >= self.max_trials:
+                logger.debug("Trial %s is >= configured max trials %s", trials, self.max_trials)
+                break
+            elapsed_time = time.time() - start_time
+            if self.time_limit is not None and elapsed_time >= self.time_limit:
+                logger.debug(
+                    "VF2Layout has taken %s which exceeds configured max time: %s",
+                    elapsed_time,
+                    self.time_limit,
+                )
+                break
+        for reg in dag.qregs.values():
+            chosen_layout.add_register(reg)
+        return chosen_layout
diff --git a/qiskit/transpiler/preset_passmanagers/level1.py b/qiskit/transpiler/preset_passmanagers/level1.py
index db8b09b716b0..fb8f35bedb6e 100644
--- a/qiskit/transpiler/preset_passmanagers/level1.py
+++ b/qiskit/transpiler/preset_passmanagers/level1.py
@@ -156,6 +156,7 @@ def _vf2_match_not_found(property_set):
             layout_trials=5,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
+            vf2_call_limit=int(5e4),
         )
     elif layout_method is None:
         _improve_layout = common.if_has_control_flow_else(
@@ -168,6 +169,7 @@ def _vf2_match_not_found(property_set):
                 layout_trials=5,
                 skip_routing=pass_manager_config.routing_method is not None
                 and routing_method != "sabre",
+                vf2_call_limit=int(5e4),
             ),
         ).to_flow_controller()
 
diff --git a/qiskit/transpiler/preset_passmanagers/level2.py b/qiskit/transpiler/preset_passmanagers/level2.py
index 743018881da3..e2b609682894 100644
--- a/qiskit/transpiler/preset_passmanagers/level2.py
+++ b/qiskit/transpiler/preset_passmanagers/level2.py
@@ -146,6 +146,7 @@ def _vf2_match_not_found(property_set):
             layout_trials=10,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
+            vf2_call_limit=int(5e6),
         )
 
     # Choose routing pass
diff --git a/qiskit/transpiler/preset_passmanagers/level3.py b/qiskit/transpiler/preset_passmanagers/level3.py
index 7fe64eed521d..8f373618d683 100644
--- a/qiskit/transpiler/preset_passmanagers/level3.py
+++ b/qiskit/transpiler/preset_passmanagers/level3.py
@@ -152,6 +152,7 @@ def _vf2_match_not_found(property_set):
             layout_trials=20,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
+            vf2_call_limit=int(3e7),
         )
 
     # Choose routing pass

From 6a9ba0bc34871eb5ba26548331d1fb613d06ffe6 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Tue, 30 May 2023 13:49:22 -0400
Subject: [PATCH 2/7] Drop partial layout scoring

This commit drops the vf2 layout scoring from the sabre layout partial
layout code. This code was just taken from the standalone vf2 passes
(VF2Layout and VF2PostLayout) which tries multiple isomorphic subgraphs
and figures out which layout has the best error characteristics.
However, in the case of using a partial layout for sabre this is just
wasted CPU cycles because we're not finalizing the layout and when we
run sabre the actual layout used will be different. So we just need to
pick any isomorphic subgraph and use that as a partial starting point.
This commit removes all the multiple subgraph and scoring logic to
improve the runtime performance with this branch.
---
 .../transpiler/passes/layout/sabre_layout.py  | 52 -------------------
 1 file changed, 52 deletions(-)

diff --git a/qiskit/transpiler/passes/layout/sabre_layout.py b/qiskit/transpiler/passes/layout/sabre_layout.py
index 170a6a8f657e..d1f78bfb2bec 100644
--- a/qiskit/transpiler/passes/layout/sabre_layout.py
+++ b/qiskit/transpiler/passes/layout/sabre_layout.py
@@ -538,58 +538,6 @@ def mapping_to_layout(layout_mapping):
 
         layout_mapping = {im_i: cm_nodes[cm_i] for cm_i, im_i in first_mapping.items()}
         chosen_layout = mapping_to_layout(layout_mapping)
-        chosen_layout_score = vf2_utils.score_layout(
-            self.avg_error_map,
-            layout_mapping,
-            im_graph_node_map,
-            reverse_im_graph_node_map,
-            im_graph,
-            False,
-        )
-        trials = 1
-        for mapping in best_mapping:  # pylint: disable=not-an-iterable
-            trials += 1
-            logger.debug("Running trial: %s", trials)
-            layout_mapping = {im_i: cm_nodes[cm_i] for cm_i, im_i in mapping.items()}
-            # If the graphs have the same number of nodes we don't need to score or do multiple
-            # trials as the score heuristic currently doesn't weigh nodes based on gates on a
-            # qubit so the scores will always all be the same
-            if len(cm_graph) == len(im_graph):
-                break
-            layout_score = vf2_utils.score_layout(
-                self.avg_error_map,
-                layout_mapping,
-                im_graph_node_map,
-                reverse_im_graph_node_map,
-                im_graph,
-                False,
-            )
-            logger.debug("Trial %s has score %s", trials, layout_score)
-            if chosen_layout is None:
-                chosen_layout = mapping_to_layout(layout_mapping)
-                chosen_layout_score = layout_score
-            elif layout_score < chosen_layout_score:
-                layout = mapping_to_layout(layout_mapping)
-                logger.debug(
-                    "Found layout %s has a lower score (%s) than previous best %s (%s)",
-                    layout,
-                    layout_score,
-                    chosen_layout,
-                    chosen_layout_score,
-                )
-                chosen_layout = layout
-                chosen_layout_score = layout_score
-            if self.max_trials and trials >= self.max_trials:
-                logger.debug("Trial %s is >= configured max trials %s", trials, self.max_trials)
-                break
-            elapsed_time = time.time() - start_time
-            if self.time_limit is not None and elapsed_time >= self.time_limit:
-                logger.debug(
-                    "VF2Layout has taken %s which exceeds configured max time: %s",
-                    elapsed_time,
-                    self.time_limit,
-                )
-                break
         for reg in dag.qregs.values():
             chosen_layout.add_register(reg)
         return chosen_layout

From 30ed6caa79f1916dad54ba585a30fedfbd7844b4 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Sun, 30 Jul 2023 07:49:53 -0400
Subject: [PATCH 3/7] Use nearest neighbors for free bits with partial layout

This commit tweaks the algorithm for the initial layout guess when using
a partial layout to use the nearest neighbor qubits instead of just a
random order. This should lead to better results because there is less
back and forth needed to bring the qubits together.
---
 crates/accelerate/src/sabre_layout.rs | 71 +++++++++++++++++++++------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/crates/accelerate/src/sabre_layout.rs b/crates/accelerate/src/sabre_layout.rs
index 7dc326799b1f..a6c6ec811180 100644
--- a/crates/accelerate/src/sabre_layout.rs
+++ b/crates/accelerate/src/sabre_layout.rs
@@ -54,7 +54,7 @@ pub fn sabre_layout_and_routing(
             .into_par_iter()
             .enumerate()
             .map(|(index, seed_trial)| {
-                let partial = if index > 0 { &partial_layout } else { &None };
+                let partial = if index == 0 { &partial_layout } else { &None };
                 (
                     index,
                     layout_trial(
@@ -83,7 +83,7 @@ pub fn sabre_layout_and_routing(
             .into_iter()
             .enumerate()
             .map(|(index, seed_trial)| {
-                let partial = if index > 0 { &partial_layout } else { &None };
+                let partial = if index == 0 { &partial_layout } else { &None };
                 layout_trial(
                     dag,
                     neighbor_table,
@@ -115,29 +115,70 @@ fn layout_trial(
     // Pick a random initial layout and fully populate ancillas in that layout too
     let num_physical_qubits = distance_matrix.shape()[0];
     let mut rng = Pcg64Mcg::seed_from_u64(seed);
-    let mut physical_qubits: Vec<usize>;
-    match partial_layout {
+    let physical_qubits: Vec<usize> = match partial_layout {
         Some(partial_layout_bits) => {
-            let used_bits: HashSet<usize> = partial_layout_bits
+            let mut used_bits: HashSet<usize> = partial_layout_bits
                 .iter()
                 .filter_map(|x| x.as_ref())
                 .copied()
                 .collect();
-            let mut free_bits: Vec<usize> = (0..num_physical_qubits)
-                .filter(|x| !used_bits.contains(x))
-                .collect();
-            free_bits.shuffle(&mut rng);
-            physical_qubits = partial_layout_bits
-                .iter()
-                .map(|x| match x {
-                    Some(phys) => *phys,
-                    None => free_bits.pop().unwrap(),
+            // Compute nearest neighbors to use for free bits
+            let mut free_bits_distance: Vec<(usize, f64)> = distance_matrix
+                .axis_iter(Axis(0))
+                .enumerate()
+                .filter_map(|(x, row)| {
+                    // If starting from free bit don't check distance
+                    if !used_bits.contains(&x) {
+                        None
+                    } else {
+                        let index_distance =
+                            row.into_iter().enumerate().filter_map(|(y, value)| {
+                                if used_bits.contains(&y) {
+                                    None
+                                } else {
+                                    Some((y, *value))
+                                }
+                            });
+                        Some(index_distance)
+                    }
                 })
+                .flatten()
                 .collect();
+            free_bits_distance.par_sort_by(|a, b| {
+                // Reverse arg order so lowest distance is at the end of Vec
+                // and when we pop below we get the closest nodes first
+                b.1.partial_cmp(&a.1).unwrap()
+            });
+
+            let mut get_free_bit = || -> usize {
+                // As the free_bits_distance Vec will have multiple
+                // entries for each bit we need to loop until we find
+                // an unused bit
+                let mut new_bit = free_bits_distance.pop().unwrap().0;
+                while used_bits.contains(&new_bit) {
+                    new_bit = free_bits_distance.pop().unwrap().0;
+                }
+                used_bits.insert(new_bit);
+                new_bit
+            };
+
+            (0..num_physical_qubits)
+                .map(|x| {
+                    if x >= partial_layout_bits.len() {
+                        get_free_bit()
+                    } else {
+                        match partial_layout_bits[x] {
+                            Some(phys) => phys,
+                            None => get_free_bit(),
+                        }
+                    }
+                })
+                .collect()
         }
         None => {
-            physical_qubits = (0..num_physical_qubits).collect();
+            let mut physical_qubits: Vec<usize> = (0..num_physical_qubits).collect();
             physical_qubits.shuffle(&mut rng);
+            physical_qubits
         }
     };
     let mut initial_layout = NLayout::from_logical_to_physical(physical_qubits);

From df6d5c22d3d0e9acfe607db2ab9b58fb4d9fb94c Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Sun, 30 Jul 2023 09:13:23 -0400
Subject: [PATCH 4/7] Remove unecessary clone of partial layout input

---
 crates/accelerate/src/sabre_layout.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/accelerate/src/sabre_layout.rs b/crates/accelerate/src/sabre_layout.rs
index a6c6ec811180..83927ac8afba 100644
--- a/crates/accelerate/src/sabre_layout.rs
+++ b/crates/accelerate/src/sabre_layout.rs
@@ -66,7 +66,7 @@ pub fn sabre_layout_and_routing(
                         max_iterations,
                         num_swap_trials,
                         run_in_parallel,
-                        partial.clone(),
+                        partial,
                     ),
                 )
             })
@@ -93,7 +93,7 @@ pub fn sabre_layout_and_routing(
                     max_iterations,
                     num_swap_trials,
                     run_in_parallel,
-                    partial.clone(),
+                    partial,
                 )
             })
             .min_by_key(|(_, result)| result.map.map.values().map(|x| x.len()).sum::<usize>())
@@ -110,7 +110,7 @@ fn layout_trial(
     max_iterations: usize,
     num_swap_trials: usize,
     run_swap_in_parallel: bool,
-    partial_layout: Option<Vec<Option<usize>>>,
+    partial_layout: &Option<Vec<Option<usize>>>,
 ) -> ([NLayout; 2], SabreResult) {
     // Pick a random initial layout and fully populate ancillas in that layout too
     let num_physical_qubits = distance_matrix.shape()[0];

From 5057e42895735d29cfbf79ca00e69352338f1318 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Sun, 30 Jul 2023 09:18:28 -0400
Subject: [PATCH 5/7] Fix lint

---
 qiskit/transpiler/passes/layout/sabre_layout.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/qiskit/transpiler/passes/layout/sabre_layout.py b/qiskit/transpiler/passes/layout/sabre_layout.py
index 64ba76a94d90..43bb494cef4b 100644
--- a/qiskit/transpiler/passes/layout/sabre_layout.py
+++ b/qiskit/transpiler/passes/layout/sabre_layout.py
@@ -466,7 +466,9 @@ def _visit(dag, weight, wire_map):
                         im_graph_node_map[qargs[0]] = im_graph.add_node(weights)
                         reverse_im_graph_node_map[im_graph_node_map[qargs[0]]] = qargs[0]
                     else:
-                        im_graph[im_graph_node_map[qargs[0]]][node.op.name] += weight
+                        im_graph[  # pylint: disable=unsubscriptable-object
+                            im_graph_node_map[qargs[0]]
+                        ][node.op.name] += weight
                 if len_args == 2:
                     if qargs[0] not in im_graph_node_map:
                         im_graph_node_map[qargs[0]] = im_graph.add_node(defaultdict(int))

From 3d628eacc5c14ce16b7acca536c631a54ecc6102 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Sun, 30 Jul 2023 12:33:13 -0400
Subject: [PATCH 6/7] Make partial layout trial addition to requested layout
 trials

---
 crates/accelerate/src/sabre_layout.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/accelerate/src/sabre_layout.rs b/crates/accelerate/src/sabre_layout.rs
index 83927ac8afba..c2a552e93a6f 100644
--- a/crates/accelerate/src/sabre_layout.rs
+++ b/crates/accelerate/src/sabre_layout.rs
@@ -46,7 +46,7 @@ pub fn sabre_layout_and_routing(
     };
     let seed_vec: Vec<u64> = outer_rng
         .sample_iter(&rand::distributions::Standard)
-        .take(num_layout_trials)
+        .take(num_layout_trials + 1)
         .collect();
     let dist = distance_matrix.as_array();
     if run_in_parallel && num_layout_trials > 1 {
@@ -54,7 +54,7 @@ pub fn sabre_layout_and_routing(
             .into_par_iter()
             .enumerate()
             .map(|(index, seed_trial)| {
-                let partial = if index == 0 { &partial_layout } else { &None };
+                let partial = if index == num_layout_trials { &partial_layout } else { &None };
                 (
                     index,
                     layout_trial(
@@ -83,7 +83,7 @@ pub fn sabre_layout_and_routing(
             .into_iter()
             .enumerate()
             .map(|(index, seed_trial)| {
-                let partial = if index == 0 { &partial_layout } else { &None };
+                let partial = if index == num_layout_trials { &partial_layout } else { &None };
                 layout_trial(
                     dag,
                     neighbor_table,

From cf7d82389019ce995a4268fe19e61bc4cc86cb51 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Tue, 8 Aug 2023 08:09:46 -0400
Subject: [PATCH 7/7] Increase heuristic effort for sabre

This commit increases the heuristic effort for sabre layout and routing.
This is made through 2 changes, the first is the depth of the internal
lookahead heuristic used in sabre swap has been increased from 20 to 72.
This is just a stop-gap for a potential reworking of the lookahead
heuristic. In local testing for larger backends and deeper circuits in
this is showing better output results than the current default of 20
without any runtime impact. The other aspect is that the trial counts
for each optimization level > 0 is increased 5x. This has a runtime cost,
but it the performance of the rust sabre implementation is fast enough
that even running the algorithm with more trials it is not a bottleneck
for typical compilation.

Related to #10160
---
 crates/accelerate/src/sabre_swap/mod.rs         | 2 +-
 qiskit/transpiler/preset_passmanagers/level1.py | 6 +++---
 qiskit/transpiler/preset_passmanagers/level2.py | 6 +++---
 qiskit/transpiler/preset_passmanagers/level3.py | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/crates/accelerate/src/sabre_swap/mod.rs b/crates/accelerate/src/sabre_swap/mod.rs
index 0d83f901bf1c..da43c3cea593 100644
--- a/crates/accelerate/src/sabre_swap/mod.rs
+++ b/crates/accelerate/src/sabre_swap/mod.rs
@@ -47,7 +47,7 @@ use swap_map::SwapMap;
 
 const BEST_EPSILON: f64 = 1e-10; // Epsilon used in minimum-score calculations.
 
-const EXTENDED_SET_SIZE: usize = 20; // Size of lookahead window.
+const EXTENDED_SET_SIZE: usize = 72; // Size of lookahead window.
 const DECAY_RATE: f64 = 0.001; // Decay coefficient for penalizing serial swaps.
 const DECAY_RESET_INTERVAL: u8 = 5; // How often to reset all decay rates to 1.
 const EXTENDED_SET_WEIGHT: f64 = 0.5; // Weight of lookahead window compared to front_layer.
diff --git a/qiskit/transpiler/preset_passmanagers/level1.py b/qiskit/transpiler/preset_passmanagers/level1.py
index fed34282a551..018c7c9d3f95 100644
--- a/qiskit/transpiler/preset_passmanagers/level1.py
+++ b/qiskit/transpiler/preset_passmanagers/level1.py
@@ -150,10 +150,10 @@ def _vf2_match_not_found(property_set):
     elif layout_method == "sabre":
         _improve_layout = SabreLayout(
             coupling_map_layout,
-            max_iterations=2,
+            max_iterations=10,
             seed=seed_transpiler,
-            swap_trials=5,
-            layout_trials=5,
+            swap_trials=25,
+            layout_trials=25,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
             vf2_call_limit=int(5e4),
diff --git a/qiskit/transpiler/preset_passmanagers/level2.py b/qiskit/transpiler/preset_passmanagers/level2.py
index 2fdf82f1f17e..71053c394ca3 100644
--- a/qiskit/transpiler/preset_passmanagers/level2.py
+++ b/qiskit/transpiler/preset_passmanagers/level2.py
@@ -140,10 +140,10 @@ def _vf2_match_not_found(property_set):
     elif layout_method == "sabre":
         _choose_layout_1 = SabreLayout(
             coupling_map_layout,
-            max_iterations=2,
+            max_iterations=10,
             seed=seed_transpiler,
-            swap_trials=10,
-            layout_trials=10,
+            swap_trials=50,
+            layout_trials=50,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
             vf2_call_limit=int(5e6),
diff --git a/qiskit/transpiler/preset_passmanagers/level3.py b/qiskit/transpiler/preset_passmanagers/level3.py
index 93855cff4eb6..33ff71884a2e 100644
--- a/qiskit/transpiler/preset_passmanagers/level3.py
+++ b/qiskit/transpiler/preset_passmanagers/level3.py
@@ -146,10 +146,10 @@ def _vf2_match_not_found(property_set):
     elif layout_method == "sabre":
         _choose_layout_1 = SabreLayout(
             coupling_map_layout,
-            max_iterations=4,
+            max_iterations=20,
             seed=seed_transpiler,
-            swap_trials=20,
-            layout_trials=20,
+            swap_trials=100,
+            layout_trials=100,
             skip_routing=pass_manager_config.routing_method is not None
             and routing_method != "sabre",
             vf2_call_limit=int(3e7),