From 0d7047101aafd8871873cb3b163050e4bfa898e5 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Fri, 30 Aug 2024 12:06:50 -0600 Subject: [PATCH] sync with internal repo1 (commit 7a1f1840f) and repo2 (commit 60b0672) --- benchmarks/README.md | 4 +- benchmarks/cuquantum_benchmarks/__init__.py | 4 +- benchmarks/cuquantum_benchmarks/_utils.py | 5 +- .../backends/backend_cutn.py | 72 +- .../backends/backend_pny.py | 75 +- .../backends/backend_qiskit.py | 8 +- .../cuquantum_benchmarks/benchmarks/qpe.py | 2 +- benchmarks/cuquantum_benchmarks/config.py | 2 +- .../frontends/frontend_dumper.py | 253 +++- benchmarks/cuquantum_benchmarks/run.py | 2 +- .../cuquantum_benchmarks/run_interface.py | 32 +- .../cuquantum_benchmarks_tests/test_run.py | 1 + .../cutensornet_tests/state_data.py | 161 +++ .../cutensornet_tests/state_tester.py | 1161 +++++++++++++++++ 14 files changed, 1653 insertions(+), 129 deletions(-) create mode 100644 python/tests/cuquantum_tests/cutensornet_tests/state_data.py create mode 100644 python/tests/cuquantum_tests/cutensornet_tests/state_tester.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 9ee3894..ff5dd8c 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -50,7 +50,7 @@ For backends that support MPI parallelism, it is assumed that `MPI_COMM_WORLD` i Examples: - `cuquantum-benchmarks api --benchmark apply_matrix --targets 4,5 --controls 2,3 --nqubits 16`: Apply a random gate matrix controlled by qubits 2 & 3 to qubits 4 & 5 of a 16-qubit statevector using cuStateVec's `apply_matrix()` API -- `cuquantum-benchmarks circuit --frontend qiskit --backend cutn --benchmark qft --nqubits 8 --ngpus 1`: Construct a 8-qubit QFT circuit in Qiskit and run it with cuTensorNet on GPU +- `cuquantum-benchmarks circuit --frontend qiskit --backend cutn --compute-mode statevector --benchmark qft --nqubits 8 --ngpus 1`: Construct a 8-qubit QFT circuit in Qiskit and compute the statevector with cuTensorNet on GPU. Note that the `--compute-mode` can be specified only for `cutn` backend and supports `amplitude` (default), `statevector`, and `expectation`. - `cuquantum-benchmarks circuit --frontend cirq --backend qsim-mgpu --benchmark qaoa --nqubits 16 --ngpus 2`: Construct a 16-qubit QAOA circuit in Cirq and run it with the (multi-GPU) `qsim-mgpu` backend on 2 GPUs (requires cuQuantum Appliance) - `mpiexec -n 4 cuquantum-benchmarks circuit --frontend qiskit --backend cusvaer --benchmark quantum_volume --nqubits 32 --ngpus 1 --cusvaer-global-index-bits 1,1 --cusvaer-p2p-device-bits 1`: Construct a 32-qubit Quantum Volume circuit in Qiskit and run it with the (multi-GPU-multi-node) `cusvaer` backend on 2 nodes. Each node runs 2 MPI processes, each of which controls 1 GPU (requires cuQuantum Appliance) @@ -69,9 +69,9 @@ It is recommended to loop over all recorded `sim_config_hash` to gather perf dat Currently all environment variables are reserved for internal use only, and are subject to change in the future without notification. * `CUTENSORNET_DUMP_TN=txt` -* `CUTENSORNET_BENCHMARK_TARGET={amplitude,state_vector,expectation}` (pick one) * `CUTENSORNET_APPROX_TN_UTILS_PATH` * `CUQUANTUM_BENCHMARKS_DUMP_GATES` +* `CUQUANTUM_BENCHMARKS_TCS_FULL_TENSOR` ## Development Overview diff --git a/benchmarks/cuquantum_benchmarks/__init__.py b/benchmarks/cuquantum_benchmarks/__init__.py index a185f84..71f6333 100644 --- a/benchmarks/cuquantum_benchmarks/__init__.py +++ b/benchmarks/cuquantum_benchmarks/__init__.py @@ -1,5 +1,5 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES # # SPDX-License-Identifier: BSD-3-Clause -__version__ = '0.3.2' +__version__ = '0.4.0' diff --git a/benchmarks/cuquantum_benchmarks/_utils.py b/benchmarks/cuquantum_benchmarks/_utils.py index 5797be6..29d3756 100644 --- a/benchmarks/cuquantum_benchmarks/_utils.py +++ b/benchmarks/cuquantum_benchmarks/_utils.py @@ -21,8 +21,6 @@ import cupy as cp import numpy as np import nvtx -from cuquantum import cudaDataType, ComputeType -from cuquantum.cutensornet._internal.einsum_parser import create_size_dict import psutil @@ -62,6 +60,7 @@ def precision_str_to_dtype(precision, is_complex=True): def dtype_to_cuda_type(dtype): + from cuquantum import cudaDataType if dtype == np.complex64: return cudaDataType.CUDA_C_32F elif dtype == np.complex128: @@ -71,6 +70,7 @@ def dtype_to_cuda_type(dtype): def dtype_to_compute_type(dtype): + from cuquantum import ComputeType if dtype == np.complex64: return ComputeType.COMPUTE_32F elif dtype == np.complex128: @@ -80,6 +80,7 @@ def dtype_to_compute_type(dtype): def generate_size_dict_from_operands(einsum, operands): + from cuquantum.cutensornet._internal.einsum_parser import create_size_dict inputs = einsum.split("->")[0] inputs = inputs.split(",") assert len(inputs) == len(operands) diff --git a/benchmarks/cuquantum_benchmarks/backends/backend_cutn.py b/benchmarks/cuquantum_benchmarks/backends/backend_cutn.py index b7f5925..aad04d7 100644 --- a/benchmarks/cuquantum_benchmarks/backends/backend_cutn.py +++ b/benchmarks/cuquantum_benchmarks/backends/backend_cutn.py @@ -6,13 +6,19 @@ import os import time import warnings +from math import log10, log2 import numpy as np import cupy as cp -from cuquantum import contract, contract_path, CircuitToEinsum -from cuquantum import cutensornet as cutn from .backend import Backend + +try: + from cuquantum import contract, contract_path, CircuitToEinsum + from cuquantum import cutensornet as cutn +except ImportError: + cutn = None + from .._utils import convert_einsum_to_txt, generate_size_dict_from_operands, is_running_mpiexec @@ -24,6 +30,8 @@ class cuTensorNet(Backend): def __init__(self, ngpus, ncpu_threads, precision, **kwargs): + if cutn is None: + raise RuntimeError("cuquantum-python is not installed") if ngpus != 1: raise ValueError("the cutn backend must be run with --ngpus 1 (regardless if MPI is in use)") @@ -32,6 +40,7 @@ def __init__(self, ngpus, ncpu_threads, precision, **kwargs): self.nqubits = kwargs.pop('nqubits') self.rank = 0 self.handle = cutn.create() + self.meta = {} try: # cuQuantum Python 22.11+ supports nonblocking & auto-MPI opts = cutn.NetworkOptions(handle=self.handle, blocking="auto") @@ -50,47 +59,49 @@ def __init__(self, ngpus, ncpu_threads, precision, **kwargs): # cuQuantum Python 22.07 or below opts = cutn.NetworkOptions(handle=self.handle) self.network_opts = opts - self.n_samples = kwargs.pop('nhypersamples') + self.n_hyper_samples = kwargs.pop('nhypersamples') self.version = cutn.get_version() + self.meta["backend"] = f"cutn-v{self.version} precision={self.precision}" + self.meta['nhypersamples'] = self.n_hyper_samples + self.meta['cpu_threads'] = self.ncpu_threads + def __del__(self): cutn.destroy(self.handle) def preprocess_circuit(self, circuit, *args, **kwargs): circuit_filename = kwargs.pop('circuit_filename') - target = kwargs.pop('target') - pauli = kwargs.pop('pauli') - preprocess_data = {} + self.compute_mode = kwargs.pop('compute_mode') + self.pauli = kwargs.pop('pauli') + valid_choices = ['amplitude', 'expectation', 'statevector'] + if self.compute_mode not in valid_choices: + raise ValueError(f"The string '{self.compute_mode}' is not a valid option for --compute-mode argument. Valid options are: {valid_choices}") t1 = time.perf_counter() - if self.precision == 'single': circuit_converter = CircuitToEinsum(circuit, dtype='complex64', backend=cp) else: circuit_converter = CircuitToEinsum(circuit, dtype='complex128', backend=cp) - t2 = time.perf_counter() time_circ2einsum = t2 - t1 - logger.info(f'CircuitToEinsum took {time_circ2einsum} s') - + t1 = time.perf_counter() - if target == 'amplitude': + if self.compute_mode == 'amplitude': # any bitstring would give same TN topology, so let's just pick "000...0" self.expression, self.operands = circuit_converter.amplitude('0'*self.nqubits) - elif target == 'state_vector': + elif self.compute_mode == 'statevector': self.expression, self.operands = circuit_converter.state_vector() - elif target == 'expectation': + elif self.compute_mode == 'expectation': # new in cuQuantum Python 22.11 - assert pauli is not None - logger.info(f"compute expectation value for Pauli string: {pauli}") - self.expression, self.operands = circuit_converter.expectation(pauli) + assert self.pauli is not None + logger.info(f"compute expectation value for Pauli string: {self.pauli}") + self.expression, self.operands = circuit_converter.expectation(self.pauli) else: # TODO: add other CircuitToEinsum methods? - raise NotImplementedError(f"the target {target} is not supported") + raise NotImplementedError(f"the target {self.compute_mode} is not supported") t2 = time.perf_counter() time_tn = t2 - t1 - logger.info(f'{target}() took {time_tn} s') - + tn_format = os.environ.get('CUTENSORNET_DUMP_TN') if tn_format == 'txt': size_dict = generate_size_dict_from_operands( @@ -106,23 +117,20 @@ def preprocess_circuit(self, circuit, *args, **kwargs): t1 = time.perf_counter() path, opt_info = self.network.contract_path( # TODO: samples may be too large for small circuits - optimize={'samples': self.n_samples, 'threads': self.ncpu_threads}) + optimize={'samples': self.n_hyper_samples, 'threads': self.ncpu_threads}) t2 = time.perf_counter() time_path = t2 - t1 - logger.info(f'contract_path() took {time_path} s') - logger.debug(f'# samples: {self.n_samples}') - logger.debug(opt_info) - - self.path = path - self.opt_info = opt_info - preprocess_data = { - 'CircuitToEinsum': time_circ2einsum, - target: time_tn, - 'contract_path': time_path, - } - return preprocess_data + self.opt_info = opt_info # cuTensorNet returns "real-number" Flops. To get the true FLOP count, multiply it by 4 + self.meta['compute-mode'] = f'{self.compute_mode}()' + self.meta[f'circuit to einsum'] = f"{time_circ2einsum + time_tn} s" + + logger.info(f'data: {self.meta}') + logger.info(f'log10[FLOPS]: {log10(self.opt_info.opt_cost * 4)} log2[SIZE]: {log2(opt_info.largest_intermediate)} contract_path(): {time_path} s') + pre_data = {'circuit to einsum time': time_circ2einsum + time_tn, 'contract path time': time_path, + 'log2[LargestInter]': log2(opt_info.largest_intermediate), 'log10[FLOPS]': log10(self.opt_info.opt_cost * 4) } + return pre_data def run(self, circuit, nshots=0): if self.rank == 0 and nshots > 0: warnings.warn("the cutn backend does not support sampling") diff --git a/benchmarks/cuquantum_benchmarks/backends/backend_pny.py b/benchmarks/cuquantum_benchmarks/backends/backend_pny.py index a94b7c5..a4be34a 100644 --- a/benchmarks/cuquantum_benchmarks/backends/backend_pny.py +++ b/benchmarks/cuquantum_benchmarks/backends/backend_pny.py @@ -43,51 +43,66 @@ def find_version(self, identifier): if identifier == "pennylane-lightning-gpu": if self.ngpus == 1: try: - import pennylane_lightning_gpu - except ImportError as e: - raise RuntimeError("PennyLane-Lightning-GPU plugin is not installed") from e + from pennylane_lightning.lightning_gpu import LightningGPU + return LightningGPU.version + except ImportError: + try: # pre pennylane_lightning 0.33.0 version + import pennylane_lightning_gpu + return pennylane_lightning_gpu.__version__ + except ImportError: + raise RuntimeError("PennyLane-Lightning-GPU plugin is not installed") else: raise ValueError(f"cannot specify --ngpus > 1 for the backend {identifier}") - ver = pennylane_lightning_gpu.__version__ elif identifier == "pennylane-lightning-kokkos": try: - import pennylane_lightning_kokkos - except ImportError as e: - raise RuntimeError("PennyLane-Lightning-Kokkos plugin is not installed") from e - ver = pennylane_lightning_kokkos.__version__ + from pennylane_lightning.lightning_kokkos import LightningKokkos + return LightningKokkos.version + except ImportError: + try: # pre pennylane_lightning 0.33.0 version + import pennylane_lightning_kokkos + return pennylane_lightning_kokkos.__version__ + except ImportError: + raise RuntimeError("PennyLane-Lightning-Kokkos plugin is not installed") elif identifier == "pennylane-lightning-qubit": try: from pennylane_lightning import lightning_qubit + return lightning_qubit.__version__ except ImportError as e: raise RuntimeError("PennyLane-Lightning plugin is not installed") from e - ver = lightning_qubit.__version__ else: # identifier == "pennylane" - ver = pennylane.__version__ - return ver - + return pennylane.__version__ + def _make_qnode(self, circuit, nshots=1024, **kwargs): if self.identifier == "pennylane-lightning-gpu": dev = pennylane.device("lightning.gpu", wires=self.nqubits, shots=nshots, c_dtype=self.dtype) elif self.identifier == "pennylane-lightning-kokkos": # there's no way for us to query what execution space (=backend) that kokkos supports at runtime, # so let's just set up Kokkos::InitArguments and hope kokkos to do the right thing... + dev = None try: - import pennylane_lightning_kokkos - except ImportError as e: - raise RuntimeError("PennyLane-Lightning-Kokkos plugin is not installed") from e - args = pennylane_lightning_kokkos.lightning_kokkos.InitArguments() - args.num_threads = self.ncpu_threads - args.disable_warnings = int(logger.getEffectiveLevel() != logging.DEBUG) - ## Disable MPI because it's unclear if pennylane actually supports it (at least it's untested) - # # if we're running MPI, we want to know now and get it init'd before kokkos is - # MPI = is_running_mpi() - # if MPI: - # comm = MPI.COMM_WORLD - # args.ndevices = min(comm.Get_size(), self.ngpus) # note: kokkos uses 1 GPU per process - dev = pennylane.device( + if self.ncpu_threads > 1 : + warnings.warn(f"--ncputhreads is ignored for {self.identifier}", stacklevel=2) + dev = pennylane.device( "lightning.kokkos", wires=self.nqubits, shots=nshots, c_dtype=self.dtype, - sync=False, - kokkos_args=args) + sync=False) + except ImportError: + try: # pre pennylane_lightning 0.33.0 version + from pennylane_lightning_kokkos.lightning_kokkos import InitArguments + args = InitArguments() + args.num_threads = self.ncpu_threads + args.disable_warnings = int(logger.getEffectiveLevel() != logging.DEBUG) + ## Disable MPI because it's unclear if pennylane actually supports it (at least it's untested) + # # if we're running MPI, we want to know now and get it init'd before kokkos is + # MPI = is_running_mpi() + # if MPI: + # comm = MPI.COMM_WORLD + # args.ndevices = min(comm.Get_size(), self.ngpus) # note: kokkos uses 1 GPU per process + dev = pennylane.device( + "lightning.kokkos", wires=self.nqubits, shots=nshots, c_dtype=self.dtype, + sync=False, + kokkos_args=args) + except ImportError: + raise RuntimeError("Could not load PennyLane-Lightning-Kokkos plugin. Is it installed?") elif self.identifier == "pennylane-lightning-qubit": if self.ngpus != 0: raise ValueError(f"cannot specify --ngpus for the backend {self.identifier}") @@ -98,10 +113,12 @@ def _make_qnode(self, circuit, nshots=1024, **kwargs): elif self.identifier == "pennylane": if self.ngpus != 0: raise ValueError(f"cannot specify --ngpus for the backend {self.identifier}") - dev = pennylane.device("default.qubit", wires=self.nqubits, shots=nshots, c_dtype=self.dtype) + if self.dtype == np.complex64: + raise ValueError("As of version 0.33.0, Pennylane's default.qubit device only supports double precision.") + dev = pennylane.device("default.qubit", wires=self.nqubits, shots=nshots) else: raise ValueError(f"the backend {self.identifier} is not recognized") - + qnode = pennylane.QNode(circuit, device=dev) return qnode diff --git a/benchmarks/cuquantum_benchmarks/backends/backend_qiskit.py b/benchmarks/cuquantum_benchmarks/backends/backend_qiskit.py index cc1e240..0f93292 100644 --- a/benchmarks/cuquantum_benchmarks/backends/backend_qiskit.py +++ b/benchmarks/cuquantum_benchmarks/backends/backend_qiskit.py @@ -44,8 +44,12 @@ def find_version(self, identifier): if identifier == 'cusvaer': return version('cusvaer') - if hasattr(qiskit_aer, "__version__"): - return qiskit_aer.__version__ + if hasattr(qiskit, "__version__") and qiskit.__version__ >= "1.0.0": + try: + from qiskit_aer import __version__ as aer_version + return aer_version + except ImportError as e: + raise RuntimeError("qiskit-aer (or qiskit-aer-gpu) is not installed") from e else: return qiskit.__qiskit_version__['qiskit-aer'] diff --git a/benchmarks/cuquantum_benchmarks/benchmarks/qpe.py b/benchmarks/cuquantum_benchmarks/benchmarks/qpe.py index 8ce2783..94e7b68 100644 --- a/benchmarks/cuquantum_benchmarks/benchmarks/qpe.py +++ b/benchmarks/cuquantum_benchmarks/benchmarks/qpe.py @@ -18,7 +18,7 @@ def generateGatesSequence(nqubits, config): # Example instantiation of QPE circuit paramterized by nqubits phase = 1/3 - U = np.mat([[1, 0], [0, np.exp(np.pi * 1j * phase)]]) + U = np.asmatrix([[1, 0], [0, np.exp(np.pi * 1j * phase)]]) in_nqubits = 1 unfold = config['unfold'] measure = config['measure'] diff --git a/benchmarks/cuquantum_benchmarks/config.py b/benchmarks/cuquantum_benchmarks/config.py index 5572b23..b5554e7 100644 --- a/benchmarks/cuquantum_benchmarks/config.py +++ b/benchmarks/cuquantum_benchmarks/config.py @@ -201,7 +201,7 @@ 'nfused': None, 'ngpus': 0, 'ncputhreads': 1, - 'precision': 'single', + 'precision': 'double', }, }, diff --git a/benchmarks/cuquantum_benchmarks/frontends/frontend_dumper.py b/benchmarks/cuquantum_benchmarks/frontends/frontend_dumper.py index 74217d2..844cb50 100644 --- a/benchmarks/cuquantum_benchmarks/frontends/frontend_dumper.py +++ b/benchmarks/cuquantum_benchmarks/frontends/frontend_dumper.py @@ -4,6 +4,7 @@ import cmath import logging +import os from math import pi import numpy as np @@ -20,29 +21,43 @@ class Dumper(Frontend): """Special frontend for dumping the gate sequence as pure text to disk. - Each gate (or operation) would be stored as 3 lines, with elements separated by 1 space: + The first line of the file is the number of qudits. + + The second line of the file contains a sequence of integers, one integer per qudit, + specifying qudit dimensions (qubit dimension equals 2). + + Following the header is the gate data. Each gate (or operation) would be stored as 3 lines, with elements separated by 1 space: 1. n_targets n_controls 2. targets controls 3. contiguity actual_matrix_data + + Regarding item 3, there are two available options specified using the CUQUANTUM_BENCHMARKS_TCS_FULL_TENSOR environment variable. + When set to 0 (which is the default), only the target matrix is included in the output. + Conversely, when set to 1, the output includes the full matrix, including control qubits, and when set to 1, both formats generate separately. Note that the qubit IDs are zero-based. The matrix data is flattened to a 1D contiguous array of length 2**(2*n_targets). The contiguity is a single character "C" (for C-major, or row-major) or "F" (for Fortran-major, or column-major) for how to interpret the matrix. All complex numbers are stored as two real numbers (ex: 0.5-0.1j -> "0.5 -0.1"). - As an example, a CCX gate acting on qubit 0 and controlled by qubits 2 & 4 is stored as + As an example, a CCX gate acting on qubit 0 and controlled by qubits 2 & 4, when CUQUANTUM_BENCHMARKS_TCS_FULL_TENSOR=0, is stored as ''' + 5\n + 2 2 2 2 2\n\n 1 2\n 0 2 4\n - C 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0\n + C 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0\n\n ''' Currently the measurement operation at the end of the gate sequence is not stored. An empty line can be used to separate different gates/operations and improve readability, but it is not required. + + Run command example: CUQUANTUM_BENCHMARKS_DUMP_GATES=1 CUQUANTUM_BENCHMARKS_TCS_FULL_TENSOR=2 cuquantum-benchmarks circuit + --frontend cirq --backend cirq --benchmark qaoa --nqubits 8 """ def __init__(self, nqubits, config): @@ -53,18 +68,17 @@ def __init__(self, nqubits, config): self.circuit_filename = circuit_filename.replace('.pickle', '_raw.txt') self.nqubits = nqubits self.order = 'C' # TODO - self.digits = 12 # TODO + self.digits = 14 # TODO + self.tcs_full = int(os.environ.get('CUQUANTUM_BENCHMARKS_TCS_FULL_TENSOR', 0)) def _dump_op(self, op, targets, controls=()): + np.set_printoptions(threshold=np.inf) + flattened_op = op.astype(self.dtype).reshape(-1, order=self.order).flatten().view(self.dtype.char.lower()) op = np.array2string( - op.astype(self.dtype).reshape(-1, order=self.order).view(self.dtype.char.lower()), + flattened_op, max_line_width=np.inf, precision=self.digits, ) - if isinstance(targets, int): - targets = (targets,) - if isinstance(controls, int): - controls = (controls,) op_data = f"{len(targets)} {len(controls)}\n" for t in targets: @@ -86,66 +100,219 @@ def _get_rotation_matrix(self, theta, phi, lam): matrix = np.asarray(matrix) return matrix - def generateCircuit(self, gateSeq): - circuit = '' + def generateSpecifiedCircuit(self, gateSeq): + circuit = str(self.nqubits) + '\n' + for i in range(self.nqubits): + circuit += '2 ' + circuit += '\n\n' for g in gateSeq: - if g.id == 'h': - circuit += self._dump_op( - np.asarray([[1, 1], [1, -1]])/np.sqrt(2), g.targets) + if isinstance(g.targets, int): + targets = [g.targets] + else: + targets = g.targets + if isinstance(g.controls, int): + controls = [g.controls] + else: + controls = g.controls + if g.id == 'h': + circuit += self._dump_op(np.asarray([[1, 1], [1, -1]])/np.sqrt(2), targets) + elif g.id == 'x': - circuit += self._dump_op( - np.asarray([[0, 1], [1, 0]]), g.targets) + circuit += self._dump_op(np.asarray([[0, 1], [1, 0]]), targets) + + elif g.id == 'rz': + circuit += self._dump_op(self._get_rotation_matrix(0, g.params, 0), targets) + + elif g.id == 'rx': + circuit += self._dump_op(self._get_rotation_matrix(g.params, -pi/2, pi/2), targets) + + elif g.id == 'ry': + circuit += self._dump_op(self._get_rotation_matrix(g.params, 0, 0), targets) + + elif g.id == 'swap': + assert len(targets) == 2 + matrix = np.eye(4, dtype=self.dtype) + matrix[1:3, 1:3] = [[0, 1], [1, 0]] + circuit += self._dump_op(np.asarray(matrix), targets) + + elif g.id == 'u': + circuit += self._dump_op(np.asarray(g.matrix), targets) elif g.id == 'cnot': - # TODO: use 4*4 version (merge targets & controls)? - circuit += self._dump_op( - np.asarray([[0, 1], [1, 0]]), g.targets, g.controls) + if self.tcs_full: + circuit += self._dump_op( + np.asarray([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]]), controls+targets) + else: + circuit += self._dump_op( + np.asarray([[0, 1], [1, 0]]), targets, controls) elif g.id == 'cz': - # TODO: use 4*4 version (merge targets & controls)? - circuit += self._dump_op( - np.asarray([[1, 0], [0, -1]]), g.targets, g.controls) + if self.tcs_full: + circuit += self._dump_op( + np.asarray([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, -1]]), controls+targets) + else: + circuit += self._dump_op( + np.asarray([[1, 0], [0, -1]]), targets, controls) + + elif g.id == 'czpowgate': + if self.tcs_full: + matrix = np.eye(4, dtype=self.dtype) + matrix[3, 3] = cmath.exp(1j*pi*g.params) + circuit += self._dump_op(np.asarray(matrix), controls+targets) + else: + matrix = np.eye(2, dtype=self.dtype) + matrix[1, 1] = cmath.exp(1j*pi*g.params) + circuit += self._dump_op(np.asarray(matrix), targets, controls) + + elif g.id == 'cu': + if self.tcs_full: + matrix = np.eye(4, dtype=self.dtype) + matrix[2:4, 2:4] = g.matrix + circuit += self._dump_op(np.asarray(matrix), controls+targets) + else: + circuit += self._dump_op(np.asarray(g.matrix), targets, controls) + + elif g.id == 'mcu': + if self.tcs_full: + n = 1 << (len(targets) + len(controls)) + matrix = np.eye(n, dtype=self.dtype) + matrix[n-2:n, n-2:n] = g.matrix + circuit += self._dump_op(np.asarray(matrix), controls+targets) + else: + circuit += self._dump_op(np.asarray(g.matrix), targets, controls) + + elif g.id == 'measure': + pass # treated as no-op for now + + else: + raise NotImplementedError(f"the gate type {g.id} is not defined") + return circuit + + def generateBothCircuits(self, gateSeq): + circuit1 = str(self.nqubits) + '\n' + for i in range(self.nqubits): + circuit1 += '2 ' + circuit1 += '\n\n' + + circuit2 = str(self.nqubits) + '\n' + for i in range(self.nqubits): + circuit2 += '2 ' + circuit2 += '\n\n' + + for g in gateSeq: + if isinstance(g.targets, int): + targets = [g.targets] + else: + targets = g.targets + if isinstance(g.controls, int): + controls = [g.controls] + else: + controls = g.controls + + if g.id == 'h': + h = self._dump_op(np.asarray([[1, 1], [1, -1]])/np.sqrt(2), targets) + circuit1 += h + circuit2 += h + + elif g.id == 'x': + x = self._dump_op(np.asarray([[0, 1], [1, 0]]), targets) + circuit1 += x + circuit2 += x elif g.id == 'rz': - circuit += self._dump_op( - self._get_rotation_matrix(0, g.params, 0), g.targets) + rz = self._dump_op(self._get_rotation_matrix(0, g.params, 0), targets) + circuit1 += rz + circuit2 += rz elif g.id == 'rx': - circuit += self._dump_op( - self._get_rotation_matrix(g.params, -pi/2, pi/2), g.targets) + rx = self._dump_op(self._get_rotation_matrix(g.params, -pi/2, pi/2), targets) + circuit1 += rx + circuit2 += rx elif g.id == 'ry': - circuit += self._dump_op( - self._get_rotation_matrix(g.params, 0, 0), g.targets) - - elif g.id == 'czpowgate': - matrix = np.eye(2, dtype=self.dtype) - matrix[1, 1] = cmath.exp(1j*pi*g.params) - circuit += self._dump_op(matrix, g.targets, g.controls) - + ry = self._dump_op(self._get_rotation_matrix(g.params, 0, 0), targets) + circuit1 += ry + circuit2 += ry + elif g.id == 'swap': - assert len(g.targets) == 2 + assert len(targets) == 2 matrix = np.eye(4, dtype=self.dtype) matrix[1:3, 1:3] = [[0, 1], [1, 0]] - circuit += self._dump_op(matrix, g.targets) - - elif g.id == 'cu': - circuit += self._dump_op(g.matrix, g.targets, g.controls) + circuit1 += self._dump_op(np.asarray(matrix), targets) + circuit2 += self._dump_op(np.asarray(matrix), targets) elif g.id == 'u': - circuit += self._dump_op(g.matrix, g.targets) + circuit1 += self._dump_op(np.asarray(g.matrix), targets) + circuit2 += self._dump_op(np.asarray(g.matrix), targets) + + elif g.id == 'cnot': + circuit1 += self._dump_op(np.asarray([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]]), controls+targets) + circuit2 += self._dump_op(np.asarray([[0, 1], [1, 0]]), targets, controls) + elif g.id == 'cz': + circuit1 += self._dump_op(np.asarray([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, -1]]), controls+targets) + circuit2 += self._dump_op(np.asarray([[1, 0], [0, -1]]), targets, controls) + + elif g.id == 'czpowgate': + matrix1 = np.eye(4, dtype=self.dtype) + matrix1[3, 3] = cmath.exp(1j*pi*g.params) + circuit1 += self._dump_op(np.asarray(matrix1), controls+targets) + matrix2 = np.eye(2, dtype=self.dtype) + matrix2[1, 1] = cmath.exp(1j*pi*g.params) + circuit2 += self._dump_op(np.asarray(matrix2), targets, controls) + + elif g.id == 'cu': + matrix1 = np.eye(4, dtype=self.dtype) + matrix1[2:4, 2:4] = g.matrix + circuit1 += self._dump_op(np.asarray(matrix1), controls+targets) + circuit2 += self._dump_op(np.asarray(g.matrix), targets, controls) + + elif g.id == 'mcu': + n = 1 << (len(targets) + len(controls)) + matrix1 = np.eye(n, dtype=self.dtype) + matrix1[n-2:n, n-2:n] = g.matrix + circuit1 += self._dump_op(matrix1, controls+targets) + circuit2 += self._dump_op(np.asarray(g.matrix), targets, controls) + elif g.id == 'measure': pass # treated as no-op for now else: raise NotImplementedError(f"the gate type {g.id} is not defined") + return circuit1, circuit2 + + def generateCircuit(self, gateSeq): + circuit = [] + circuit1 = [] + circuit2 = [] + + if self.tcs_full == 2: # Generate both full matrix and target matrix version of circuits. This is useful for random benchmark to generate the same circuits + circuit1, circuit2 = self.generateBothCircuits(gateSeq) + + else: + circuit = self.generateSpecifiedCircuit(gateSeq) def dump(): - logger.info(f"dumping (raw) circuit as {self.circuit_filename} ...") - with open(self.circuit_filename, 'w') as f: - f.write(circuit) + # Split the file name and extension + parts = self.circuit_filename.rsplit('.', 1) + if self.tcs_full==2: + file_name1 = parts[0]+'_tcs_full.tcs' + file_name2 = parts[0]+'_tcs_target.tcs' + logger.info(f"dumping (raw) circuit as {file_name1} ...") + with open(file_name1, 'w') as f1: + f1.write(circuit1) + logger.info(f"dumping (raw) circuit as {file_name2} ...") + with open(file_name2, 'w') as f2: + f2.write(circuit2) + else: + if self.tcs_full==1: + file_name = parts[0]+'_tcs_full.tcs' + else: + file_name = parts[0]+'_tcs_target.tcs' + logger.info(f"dumping (raw) circuit as {file_name} ...") + with open(file_name, 'w') as f: + f.write(circuit) call_by_root(dump) diff --git a/benchmarks/cuquantum_benchmarks/run.py b/benchmarks/cuquantum_benchmarks/run.py index f73f40d..f3302f4 100644 --- a/benchmarks/cuquantum_benchmarks/run.py +++ b/benchmarks/cuquantum_benchmarks/run.py @@ -132,7 +132,7 @@ backend_cutn = parser_circuit.add_argument_group('cutn-specific options') backend_cutn.add_argument('--nhypersamples', type=int, default=32, help='set the number of hypersamples for the pathfinder to explore') - +backend_cutn.add_argument('--compute-mode', type=str, required=False, help='set the mode for computation') # "cuquantum-benchmarks api" subcommand parser_api = subparsers.add_parser( diff --git a/benchmarks/cuquantum_benchmarks/run_interface.py b/benchmarks/cuquantum_benchmarks/run_interface.py index c127410..db07372 100644 --- a/benchmarks/cuquantum_benchmarks/run_interface.py +++ b/benchmarks/cuquantum_benchmarks/run_interface.py @@ -4,6 +4,7 @@ import functools import logging +import warnings import math import nvtx import os @@ -41,13 +42,14 @@ def __init__(self, **kwargs): 'cusvaer_data_transfer_buffer_bits', 'cusvaer_comm_plugin_type', 'cusvaer_comm_plugin_soname', # cutn options - 'nhypersamples'): + 'nhypersamples', + 'compute_mode'): v = kwargs.pop(k) - if k.startswith('cusvaer') or v is not None: + if k.startswith('cusvaer') or k == 'compute_mode' or v is not None: setattr(self, k, v) else: setattr(self, k, backend_config['config'][k]) - + # To be parsed in run() self._benchmarks = kwargs.pop("benchmarks") self._nqubits = kwargs.pop("nqubits") @@ -72,7 +74,7 @@ def run(self): if self._nqubits is None: gpu_prop = cp.cuda.runtime.getDeviceProperties(cp.cuda.Device().id) max_n_qubits = math.floor(math.log2(gpu_prop['totalGlobalMem'] / (8 if self.precision == 'single' else 16))) - nqubits_list = list(range(16, max_n_qubits + 4, 4)) + nqubits_list = list(range(4, max_n_qubits + 4, 4)) else: nqubits_list = [self._nqubits] @@ -194,14 +196,15 @@ def timer(self, backend, circuit, nshots): return perf_time / self.nrepeats, cuda_time / self.nrepeats, post_time / self.nrepeats, post_process def _fix_filename_for_cutn(self, circuit_filename, nqubits): - target = pauli = None + pauli = None if self.backend == 'cutn': - target = os.environ.get('CUTENSORNET_BENCHMARK_TARGET', 'amplitude') - circuit_filename += f'_{target}' - if target == 'expectation': + if self.compute_mode is None: + self.compute_mode = 'amplitude' # default value + circuit_filename += f'_{self.compute_mode}' + if self.compute_mode == 'expectation': pauli = random.choices(('I', 'X', 'Y', 'Z'), k=nqubits) circuit_filename += f"_{''.join(pauli)}" - return circuit_filename, target, pauli + return circuit_filename, pauli def extract_frontend_version(self): if self.frontend == 'qiskit': @@ -256,8 +259,8 @@ def _run(self): circuit_filename += f'_p{p}' if measure: circuit_filename += '_measure' - circuit_filename, target, pauli = self._fix_filename_for_cutn(circuit_filename, self.nqubits) - self.cutn_target = target + + circuit_filename, pauli = self._fix_filename_for_cutn(circuit_filename, self.nqubits) # get circuit circuit = self.get_circuit(circuit_filename) @@ -300,9 +303,9 @@ def _run(self): preprocess_data = backend.preprocess_circuit( circuit, - # only cutn needs these, TODO: backend config + # only TN-based backends need these, TODO: backend config circuit_filename=os.path.join(self.cache_dir, circuit_filename), - target=target, + compute_mode=self.compute_mode, pauli=pauli ) @@ -393,7 +396,8 @@ def canonicalize_benchmark_data(self, frontend_version, backend_version, run_env sim_config["backend"]["cusvaer_global_index_bits"] = self.cusvaer_global_index_bits sim_config["backend"]["cusvaer_p2p_device_bits"] = self.cusvaer_p2p_device_bits elif self.backend == "cutn": - sim_config["backend"]["target"] = self.cutn_target + sim_config["backend"]["compute_mode"] = self.compute_mode + sim_config["backend"]["nhypersamples"] = self.nhypersamples sim_config_hash = sim_config.get_hash() self.benchmark_data = {**self.benchmark_data, **sim_config} diff --git a/benchmarks/tests/cuquantum_benchmarks_tests/test_run.py b/benchmarks/tests/cuquantum_benchmarks_tests/test_run.py index 7857466..3f08a86 100644 --- a/benchmarks/tests/cuquantum_benchmarks_tests/test_run.py +++ b/benchmarks/tests/cuquantum_benchmarks_tests/test_run.py @@ -220,6 +220,7 @@ def test_benchmark(self, combo, nqubits, benchmark, precision, tmp_path, visible cmd += ['--cusvaer-global-index-bits', '--cusvaer-p2p-device-bits'] if backend == 'cutn': cmd += ['--nhypersamples', '2'] + cmd += ['--compute-target', 'amplitude'] for cmd_prefix in tests: result = subprocess.run(cmd_prefix+cmd, env=env, capture_output=True) diff --git a/python/tests/cuquantum_tests/cutensornet_tests/state_data.py b/python/tests/cuquantum_tests/cutensornet_tests/state_data.py new file mode 100644 index 0000000..a33526c --- /dev/null +++ b/python/tests/cuquantum_tests/cutensornet_tests/state_data.py @@ -0,0 +1,161 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES +# +# SPDX-License-Identifier: BSD-3-Clause + +import itertools +import pytest + +import cupy as cp +try: + import cirq + from cuquantum.cutensornet._internal import circuit_parser_utils_cirq +except ImportError: + cirq = circuit_parser_utils_cirq = None +try: + import qiskit +except ImportError: + qiskit = None +from cuquantum.cutensornet.experimental import NetworkState, NetworkOperator + +from .circuit_data import cirq_circuits, get_qiskit_unitary_gate, qiskit_circuits +from .test_utils import DEFAULT_RNG + +DEFAULT_NUM_RANDOM_LAYERS = 2 + + +def get_cirq_random_2q_gate(rng=DEFAULT_RNG): + random_state = int(rng.integers(0, high=2023)) + random_unitary = cirq.testing.random_unitary(4, random_state=random_state) # random 2-qubit gate + random_gate = cirq.MatrixGate(random_unitary) + return random_gate + + +def gen_random_layered_cirq_circuit(qubits, num_random_layers=DEFAULT_NUM_RANDOM_LAYERS): + n_qubits = len(qubits) + operations = [] + for n in range(num_random_layers): + for i in range(n%2, n_qubits-1, 2): + operations.append(get_cirq_random_2q_gate().on(qubits[i], qubits[i+1])) + return cirq.Circuit(operations) + + +def cirq_insert_random_layers(circuit, num_random_layers=DEFAULT_NUM_RANDOM_LAYERS): + if num_random_layers == 0: + return circuit + qubits = sorted(circuit.all_qubits()) + circuit = circuit_parser_utils_cirq.remove_measurements(circuit) + pre_circuit = gen_random_layered_cirq_circuit(qubits, num_random_layers=num_random_layers) + return pre_circuit.concat_ragged(circuit) + +cirq_circuits_mps = [cirq_insert_random_layers(circuit) for circuit in cirq_circuits] + + +def gen_random_layered_qiskit_circuit(qubits, num_random_layers=DEFAULT_NUM_RANDOM_LAYERS): + n_qubits = len(qubits) + circuit = qiskit.QuantumCircuit(qubits) + for n in range(num_random_layers): + for i in range(n%2, n_qubits-1, 2): + circuit.append(get_qiskit_unitary_gate(), qubits[i:i+2]) + return circuit + + +def qiskit_insert_random_layers(circuit, num_random_layers=DEFAULT_NUM_RANDOM_LAYERS): + if num_random_layers == 0: + return circuit + qubits = circuit.qubits + circuit.remove_final_measurements() + pre_circuit = gen_random_layered_qiskit_circuit(qubits, num_random_layers=num_random_layers) + circuit.data = pre_circuit.data + circuit.data + return circuit + +qiskit_circuits_mps = [qiskit_insert_random_layers(circuit) for circuit in qiskit_circuits] + +testing_circuits_mps = cirq_circuits_mps + qiskit_circuits_mps + +qudits_to_test = ( + 4, + 6, + 8, + (3, 3, 3, 3, 3), + (2, 3, 2, 3, 4, 2), + (2, 5, 3, 2) +) + +# NOTE: as of cuTensorNet v2.4.0, exact qudits simulation is not supported when there are non-adjacent two qudit gates in the system +state_settings = ( + # adjacent_double_layer, mpo_bond_dim, mpo_num_sites, mpo_geometry, ct_target_place, initial_mps_dim + (True, 2, 4, 'adjacent-ordered', "first", None), + (True, 2, 4, 'random-ordered', "middle", 2), + (True, 2, 4, 'random', "last", 3), + (False, 3, None, 'adjacent-ordered', "first", None), + (False, 3, None, 'random-ordered', "last", 2), + (False, 3, None, 'random', "middle", None) +) + +# NOTE: tests based on random operands from StateFactory may be sentitive to numerical noise +approx_mps_options = ( + {'max_extent': 2, 'canonical_center': 0, 'algorithm': 'gesvdj', 'gesvdj_max_sweeps': 100, 'normalization': 'LInf'}, # fixed extent truncation + {'abs_cutoff': 0.1, 'canonical_center': 3, 'rel_cutoff': 0.2, 'discarded_weight_cutoff': 0.1}, # value based truncation + {'max_extent': 4, 'normalization': 'L1', 'abs_cutoff': 0.1}, + {'max_extent': 3, 'canonical_center': 1, 'rel_cutoff': 0.1, 'normalization': 'L2'} +) + +@pytest.fixture(scope="session") +def factory_backend_cycle(): + return itertools.cycle(('numpy', 'cupy', 'torch', 'torch-cpu')) + +@pytest.fixture(scope="function") +def factory_backend(factory_backend_cycle): + return next(factory_backend_cycle) + +@pytest.fixture(scope="session") +def svd_algorithm_cycle(): + # restrict to gesvd/gesvdj algorithm to avoid accuracy fallout + return itertools.cycle(('gesvd', 'gesvdj')) + +@pytest.fixture(scope="function") +def svd_algorithm(svd_algorithm_cycle): + return next(svd_algorithm_cycle) + +STATE_UPDATE_CONFIGS = ({}, {'max_extent': 2}, {'rel_cutoff': 0.12}) + +def create_vqc_states(config): + # specify the dimensions of the tensor network state + n_state_modes = 6 + state_mode_extents = (2, ) * n_state_modes + dtype = 'complex128' + + # create random operators + cp.random.seed(4) # seed is chosen such that the value based truncation in STATE_UPDATE_CONFIGS will yield different output MPS extents for state_a and state_b + random_complex = lambda *args, **kwargs: cp.random.random(*args, **kwargs) + 1.j * cp.random.random(*args, **kwargs) + op_one_body = random_complex((2, 2,)) + op_two_body_x = random_complex((2, 2, 2, 2)) + op_two_body_y = random_complex((2, 2, 2, 2)) + + state_a = NetworkState(state_mode_extents, dtype=dtype, config=config) + state_b = NetworkState(state_mode_extents, dtype=dtype, config=config) + + # apply one body tensor operators to the tensor network state + for i in range(n_state_modes): + modes_one_body = (i, ) + # op_one_body are fixed, therefore setting immutable to True + tensor_id_a = state_a.apply_tensor_operator(modes_one_body, op_one_body, immutable=True) + tensor_id_b = state_b.apply_tensor_operator(modes_one_body, op_one_body, immutable=True) + assert tensor_id_a == tensor_id_b + + two_body_op_ids = [] + # apply two body tensor operators to the tensor network state + for i in range(6): + for site in range(i, n_state_modes, 2): + if site + 1 < n_state_modes: + modes_two_body = (site, site+1) + # op_two_body differs between state_a and state_b, therefore setting immutable to False + tensor_id_a = state_a.apply_tensor_operator(modes_two_body, op_two_body_x, immutable=False) + tensor_id_b = state_b.apply_tensor_operator(modes_two_body, op_two_body_y, immutable=False) + assert tensor_id_a == tensor_id_b + two_body_op_ids.append(tensor_id_a) + + pauli_string = {'IXIXIX': 0.5, 'IYIYIY': 0.2, 'IZIZIZ': 0.3, 'IIIIII': 0.1, 'ZIZIZI': 0.4, 'XIXIXI': 0.2} + operator = NetworkOperator.from_pauli_strings(pauli_string, dtype='complex128') + + return (state_a, op_two_body_x), (state_b, op_two_body_y), operator, two_body_op_ids \ No newline at end of file diff --git a/python/tests/cuquantum_tests/cutensornet_tests/state_tester.py b/python/tests/cuquantum_tests/cutensornet_tests/state_tester.py new file mode 100644 index 0000000..5788360 --- /dev/null +++ b/python/tests/cuquantum_tests/cutensornet_tests/state_tester.py @@ -0,0 +1,1161 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES +# +# SPDX-License-Identifier: BSD-3-Clause + +import numpy as np + +import opt_einsum as oe + +from cuquantum import CircuitToEinsum +from cuquantum.cutensornet.experimental import NetworkState, TNConfig, MPSConfig, NetworkOperator +from cuquantum.cutensornet._internal.decomposition_utils import compute_mid_extent +from cuquantum.cutensornet._internal.utils import infer_object_package +from cuquantum.cutensornet._internal import tensor_wrapper + +from .approxTN_utils import gate_decompose, tensor_decompose, SVD_TOLERANCE, verify_unitary +from .circuit_tester import BaseTester, get_random_pauli_strings, get_engine_iters +from .circuit_utils import _BaseComputeEngine, ConverterComputeEngine, get_mps_tolerance +from .test_utils import DEFAULT_RNG, EMPTY_DICT, TensorBackend, get_or_create_tensor_backend, atol_mapper, rtol_mapper, get_dtype_name, get_state_internal_backend_device + + +# valid simulation setting for reference MPS class +MPS_VALID_CONFIGS = {'max_extent', 'abs_cutoff', 'rel_cutoff', 'discarded_weight_cutoff', 'normalization', 'canonical_center', 'mpo'} + +def is_converter_mps_compatible(converter): + for _, qubits in converter.gates: + if len(qubits) > 2: + return False + return True + +def verify_mps_canonicalization(mps_tensors, canonical_center): + dtype = get_dtype_name(mps_tensors[0].dtype) + if canonical_center is None: + return True + is_canonical = True + for i, t in enumerate(mps_tensors): + if t.ndim == 3: + modes = 'ipj' + elif t.ndim == 2: + if i == 0: + modes = 'pj' + elif i == (len(mps_tensors) - 1): + modes = 'ip' + else: + raise ValueError + else: + raise ValueError + if i < canonical_center: + shared_mode = 'j' + elif i > canonical_center: + shared_mode = 'i' + else: + continue + is_canonical = is_canonical and verify_unitary(t, modes, shared_mode, + SVD_TOLERANCE[dtype], tensor_name=f"Site {i} canonicalization") + return is_canonical + + +def get_device_id(options): + if isinstance(options, dict): + device_id = options.get('device_id', 0) + else: + device_id = getattr(options, 'device_id', None) + return device_id + +def get_random_network_operator(state_dims, *, backend='cupy', rng=DEFAULT_RNG, num_repeats=2, dtype='complex128', options=None): + device_id = get_device_id(options) + backend = TensorBackend(backend=backend, device_id=device_id) + operator_obj = NetworkOperator(state_dims, dtype=dtype, options=options) + def get_random_modes(): + num_rand_modes = rng.integers(2, len(state_dims)) + rand_modes = list(range(len(state_dims))) + rng.shuffle(rand_modes) + return rand_modes[:num_rand_modes] + + # adding tensor product + for _ in range(num_repeats): + coefficient = rng.random(1).item() + if dtype.startswith('complex'): + coefficient += 1j * rng.random(1).item() + prod_modes = get_random_modes() + prod_tensors = [] + for i in prod_modes: + shape = (state_dims[i], ) * 2 + t = backend.random(shape, dtype, rng=rng) + prod_tensors.append(t) + prod_modes = [[i] for i in prod_modes] + operator_obj.append_product(coefficient, prod_modes, prod_tensors) + # adding MPOs + for _ in range(num_repeats): + coefficient = rng.random(1).item() + if dtype.startswith('complex'): + coefficient += 1j * rng.random(1).item() + mpo_modes = get_random_modes() + num_mpo_modes = len(mpo_modes) + mpo_tensors = [] + bond_prev = None + for i, m in enumerate(mpo_modes): + bond_next = rng.integers(2, 5) + dim = state_dims[m] + if i == 0: + shape = (dim, bond_next, dim) + elif i == num_mpo_modes - 1: + shape = (bond_prev, dim, dim) + else: + shape = (bond_prev, dim, bond_next, dim) + t = backend.random(shape, dtype, rng=rng) + mpo_tensors.append(t) + bond_prev = bond_next + operator_obj.append_mpo(coefficient, mpo_modes, mpo_tensors) + return operator_obj + + +class StateFactory: + def __init__( + self, + qudits, + dtype, + layers='SDCMDS', + rng=None, + backend='cupy', + adjacent_double_layer=True, + mpo_bond_dim=2, + mpo_num_sites=None, + mpo_geometry="adjacent-ordered", + ct_target_place="last", # Controlled-Tensor: ct + initial_mps_dim=None, + options=None, + ): + if isinstance(qudits, (int, np.integer)): + self.num_qudits = qudits + self.state_dims = (2, ) * self.num_qudits + else: + self.num_qudits = len(qudits) + self.state_dims = qudits + + self.device_id = get_device_id(options) + self.backend = TensorBackend(backend=backend, device_id=self.device_id) + self.dtype = get_dtype_name(dtype) + + assert set(layers).issubset(set('SDCM')) + self.layers = layers + + if rng is None: + rng = np.random.default_rng(2024) + self.rng = rng + self.adjacent_double_layer = adjacent_double_layer + + # settings for MPO layer + self.mpo_bond_dim = mpo_bond_dim + if mpo_num_sites is None: + self.mpo_num_sites = self.num_qudits + else: + mpo_num_sites = min(self.num_qudits, mpo_num_sites) + assert mpo_num_sites >= 2 + self.mpo_num_sites = mpo_num_sites + assert mpo_geometry in {"adjacent-ordered", "random-ordered", "random"} + self.mpo_geometry = mpo_geometry + assert ct_target_place in {"first", "middle", "last"} + self.ct_target_place = ct_target_place + self.initial_mps_dim = initial_mps_dim + self.psi = None + self._sequence = [] + + def get_initial_state(self): + if self.psi is None: + self.psi = [] + if self.initial_mps_dim is None: + for d in self.state_dims: + t = self.backend.zeros(d, dtype=self.dtype) + t[0] = 1 + self.psi.append(t) + else: + virtual_dim = self.initial_mps_dim + for i, d in enumerate(self.state_dims): + if i==0: + shape = (d, virtual_dim) + elif i == self.num_qudits - 1: + shape = (virtual_dim, d) + else: + shape = (virtual_dim, d, virtual_dim) + self.psi.append(self.backend.random(shape, self.dtype, rng=self.rng)) + return self.psi + + @property + def sequence(self): + if not self._sequence: + self._generate_raw_sequence() + return self._sequence + + def _generate_raw_sequence(self): + double_layer_offset = 0 + for layer in self.layers: + if layer == 'S': + self._append_single_qudit_layer() + elif layer == 'D': + self._append_double_qudit_layer(offset=double_layer_offset) + # intertwine double layers + double_layer_offset = 1 - double_layer_offset + elif layer == 'C': + self._append_controlled_tensor_mpo_layer() + elif layer == 'M': + self._append_mpo_layer() + else: + raise ValueError(f"layer type {layer} not supported") + + def append_sequence(self, sequence): + self._sequence.append(sequence) + + def _append_single_qudit_layer(self): + for i in range(self.num_qudits): + shape = (self.state_dims[i], ) * 2 + t = self.backend.random(shape, self.dtype, rng=self.rng) + t = t + t.conj().T + t /= self.backend.norm(t) + self._sequence.append((t, (i,), None)) + + def _append_double_qudit_layer(self, offset=0): + for i in range(offset, self.num_qudits-1, 2): + j = i + 1 if self.adjacent_double_layer else self.rng.integers(i+1, self.num_qudits) + shape = (self.state_dims[i], self.state_dims[j])* 2 + t = self.backend.random(shape, self.dtype, rng=self.rng) + try: + t = t + t.conj().transpose(2,3,0,1) + except TypeError: + t = t + t.conj().permute(2,3,0,1) + t /= self.backend.norm(t) + self._sequence.append((t, (i, j), None)) + + def _append_mpo_layer(self): + if self.mpo_geometry == "adjacent-ordered": + start_site = self.rng.integers(0, self.num_qudits-self.mpo_num_sites+1) + modes = list(range(start_site, start_site+self.mpo_num_sites)) + elif self.mpo_geometry in {"random-ordered", "random"}: + modes = list(range(self.num_qudits)) + self.rng.shuffle(modes) + modes = modes[:self.mpo_num_sites] + if self.mpo_geometry == "random-ordered": + modes = sorted(modes) + else: + raise ValueError(f"mpo geometry {self.mpo_geometry} not supported") + mpo_tensors = [] + for i, q in enumerate(modes): + phys_dim = self.state_dims[q] + if i == 0: + shape = (phys_dim, self.mpo_bond_dim, phys_dim) + transpose_order = (2,1,0) + elif i == self.mpo_num_sites - 1: + shape = (self.mpo_bond_dim, phys_dim, phys_dim) + transpose_order = (0,2,1) + else: + shape = (self.mpo_bond_dim, phys_dim, ) * 2 + transpose_order = (0,3,2,1) + t = self.backend.random(shape, self.dtype, rng=self.rng) + try: + t = t + t.conj().transpose(*transpose_order) + except TypeError: + t = t + t.conj().permute(*transpose_order) + t /= self.backend.norm(t) + mpo_tensors.append(t) + self._sequence.append((mpo_tensors, modes, None)) + + def _append_controlled_tensor_mpo_layer(self): + if self.mpo_geometry == "adjacent-ordered": + start_site = self.rng.integers(0, self.num_qudits-self.mpo_num_sites+1) + modes = list(range(start_site, start_site+self.mpo_num_sites)) + elif self.mpo_geometry in {"random-ordered", "random"}: + modes = list(range(self.num_qudits)) + self.rng.shuffle(modes) + modes = modes[:self.mpo_num_sites] + modes = sorted(modes) + else: + raise ValueError(f"controlled-tensor-mpo geometry {self.mpo_geometry} not supported") + + target_modes = [] + control_modes = [] + if self.ct_target_place == "first": + target_modes = [modes[0]] + control_modes = modes[1:] + elif self.ct_target_place == "middle": + if(len(modes) < 3): + raise ValueError(f"To apply target in the middle, #qubits should be > 2") + idx = self.rng.integers(1, len(modes)-1) + target_modes = [modes[idx]] + modes.pop(idx) + control_modes = modes + elif self.ct_target_place == "last": + target_modes = [modes[-1]] + control_modes = modes[:-1] + else: + raise ValueError(f"controlled-tensor target place {self.ct_target_place} not supported") + + # control values + control_values = [] + control_modes = sorted(control_modes) + for cm in control_modes: + v = self.rng.integers(0, self.state_dims[cm]) + control_values.append(v) + # target data + target_phys_dim = self.state_dims[target_modes[0]] # Currently we support only single-qubit target + shape = (target_phys_dim, target_phys_dim) + transpose_order = (1, 0) + t = self.backend.random(shape, self.dtype, rng=self.rng) + try: + t = t + t.conj().transpose(*transpose_order) + except TypeError: + t = t + t.conj().permute(*transpose_order) + t /= self.backend.norm(t) + self._sequence.append((t, target_modes, (control_modes, control_values))) + + def compute_control_tensor(self, control_dim, control_val, rank, direction): + c1_rank3 = self.backend.asarray([1, 0, 0, 0, 0, 0, 0, 1]).reshape(2, 2, 2) + c0_rank3_dwon = self.backend.asarray([0, 0, 1, 0, 0, 1, 0, 0]).reshape(2, 2, 2) + c0_rank3_up = self.backend.asarray([0, 0, 0, 1, 1, 0, 0, 0]).reshape(2, 2, 2) + c1_rank4_down = self.backend.asarray([1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]).reshape(2, 2, 2, 2) + c1_rank4_up = self.backend.asarray([1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]).reshape(2, 2, 2, 2) + c0_rank4_down = self.backend.asarray([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0]).reshape(2, 2, 2, 2) + c0_rank4_up = self.backend.asarray([1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0]).reshape(2, 2, 2, 2) + + if control_dim == 2: + if rank == "rank-3": + if control_val == 1: + return c1_rank3 + elif control_val == 0 and direction == "down": + return c0_rank3_dwon + elif control_val == 0 and direction == "up": + return c0_rank3_up + else: + raise ValueError(f" Failed! Computing control tensor with rank {rank} and direction {direction} is not supported.") + + + elif rank == "rank-4": + if control_val == 1 and direction == "down": + return c1_rank4_down + elif control_val == 1 and direction == "up": + return c1_rank4_up + elif control_val == 0 and direction == "down": + return c0_rank4_down + elif control_val == 0 and direction == "up": + return c0_rank4_up + else: + raise ValueError(f" Failed! Computing control tensor with rank {rank} and direction {direction} is not supported.") + else: + raise ValueError(f" Failed! Control tensor can be rank-3 or rank-4, {rank} is not supported.") + + else: # It is a qudit with arbitrary extent/dimension + if rank == "rank-3": + if direction == "down": + shape = (control_dim, 2, control_dim) + ctrl_tensor = self.backend.zeros(shape) + for i in range(control_dim): + if i == control_val: + b = 1 + else: + b = 0 + ctrl_tensor[i][b][i] = 1 + return ctrl_tensor + elif direction == "up": + shape = (2, control_dim, control_dim) + ctrl_tensor = self.backend.zeros(shape) + for i in range(control_dim): + if i == control_val: + b = 1 + else: + b = 0 + ctrl_tensor[b][i][i] = 1 + return ctrl_tensor + else: + raise ValueError(f" Failed! Computing control tensor with direction {direction} is not supported.") + + elif rank == "rank-4": + if direction == "down": + shape = (2, control_dim, 2, control_dim) + ctrl_tensor = self.backend.zeros(shape) + for b1 in range(2): + for i in range(control_dim): + if b1 == 1 and i == control_val: + b2 = 1 + else: + b2 = 0 + ctrl_tensor[b1][i][b2][i] = 1 + return ctrl_tensor + elif direction == "up": + shape = (2, control_dim, 2, control_dim) + ctrl_tensor = self.backend.zeros(shape) + for b2 in range(2): + for i in range(control_dim): + if b2 == 1 and i == control_val: + b1 = 1 + else: + b1 = 0 + ctrl_tensor[b1][i][b2][i] = 1 + return ctrl_tensor + else: + raise ValueError(f" Failed! Computing control tensor with direction {direction} is not supported.") + + else: + raise ValueError(f" Failed! Control tensor can be rank-3 or rank-4, {rank} is not supported.") + + def compute_ct_mpo_tensors(self, control_modes, control_vals, target_modes, target_data): + # Note: Input target_data has mode ABC...abc where ABCD are bra modes and abcd are ket modes + # output MPO tensors (provide to apply_mpo) should have mode panA where p and n refers to modes connecting to previous and next site + tensors = [] + modes = control_modes + target_modes + modes = sorted(modes) + n_qudits = len(modes) + target_tensor_applied = 0 + for i, q in enumerate(modes): + if i == 0: # rank-3 + if q == target_modes[0]: # target tensor + target = target_data.reshape(1, -1)[0] + t_dim = self.state_dims[q] + c0 = [0] * (t_dim ** 2) + for i in range(t_dim): + idx = i + (i * t_dim) + c0[idx] = 1 + c0 = self.backend.asarray(c0) + t_temp = self.backend.vstack([c0, target]) + # modes (n, A, a), i.e, virtual, bra, ket + t_temp = t_temp.reshape(2, t_dim, t_dim) + # convert to modes (a, n, A), i.e, ket, virtual, bra + try: + t_temp = t_temp.transpose(2, 0, 1) + except TypeError: + t_temp = t_temp.permute(2, 0, 1) + t = self.backend.asarray(t_temp, dtype=self.dtype) + target_tensor_applied = 1 + tensors.append(t) + else: # control tensor + c = self.compute_control_tensor(self.state_dims[q], control_vals[0], "rank-3", "down") + c = self.backend.asarray(c, dtype=self.dtype) + tensors.append(c) + + elif i == n_qudits-1: # rank-3 + if q == target_modes[0]: # target tensor + target = target_data.reshape(1, -1)[0] + t_dim = self.state_dims[q] + c0 = [0] * (t_dim ** 2) + for i in range(t_dim): + idx = i + (i * t_dim) + c0[idx] = 1 + c0 = self.backend.asarray(c0) + # modes (p, A, a), i.e, virtual, bra, ket + t_temp = self.backend.vstack([c0, target]) + t_temp = t_temp.reshape(2, t_dim, t_dim) + # convert to modes (p, a, A), i.e, virtual, ket, bra + try: + t_temp = t_temp.transpose(0, 2, 1) + except TypeError: + t_temp = t_temp.permute(0, 2, 1) + t = self.backend.asarray(t_temp, dtype=self.dtype) + target_tensor_applied = 1 + tensors.append(t) + else: # control tensor + c = self.compute_control_tensor(self.state_dims[q], control_vals[i-1], "rank-3", "up") + c = self.backend.asarray(c, dtype=self.dtype) + tensors.append(c) + + else: # rank-4 + if q == target_modes[0]: # target tensor + target = target_data.reshape(1, -1)[0] + t_dim = self.state_dims[q] + c0 = [0] * (t_dim ** 2) + for i in range(t_dim): + idx = i + (i * t_dim) + c0[idx] = 1 + c0 = self.backend.asarray(c0) + t_temp = self.backend.vstack([c0, c0, c0, target]) + # modes (p, n, A, a) + t_temp = t_temp.reshape(2, 2, t_dim, t_dim) + # convert to (p, a, n, A) + try: + t_temp = t_temp.transpose(0, 3, 1, 2) + except TypeError: + t_temp = t_temp.permute(0, 3, 1, 2) + t = self.backend.asarray(t_temp, dtype=self.dtype) + target_tensor_applied = 1 + tensors.append(t) + else: # control tensor + if target_tensor_applied == 1: + c = self.compute_control_tensor(self.state_dims[q], control_vals[i-1], "rank-4", "up") + c = self.backend.asarray(c, dtype=self.dtype) + tensors.append(c) + else: + c = self.compute_control_tensor(self.state_dims[q], control_vals[i], "rank-4", "down") + c = self.backend.asarray(c, dtype=self.dtype) + tensors.append(c) + + return tensors + + + def get_sv_contraction_expression(self): + operands = [] + # initial qudit mode + qudit_modes = list(range(self.num_qudits)) + mode_frontier = self.num_qudits + + initial_state = self.get_initial_state() + if self.initial_mps_dim is None: + for i, t in enumerate(initial_state): + operands += [t, [qudit_modes[i]]] + else: + prev_mode = None + for i, t in enumerate(initial_state): + if i == 0: + modes = (qudit_modes[i], mode_frontier) + elif i == self.num_qudits - 1: + modes = (prev_mode, qudit_modes[i]) + else: + modes = (prev_mode, qudit_modes[i], mode_frontier) + prev_mode = mode_frontier + mode_frontier += 1 + operands += [t, modes] + + for op, qudits, control_info in self.sequence: + if control_info is not None: + # convert control tensor into MPO + ctrl_modes, ctrl_vals = control_info + op = self.compute_ct_mpo_tensors(ctrl_modes, ctrl_vals, qudits, op) + qudits = qudits + ctrl_modes + qudits = sorted(qudits) + control_info = None + n_qudits = len(qudits) + if isinstance(op, (list, tuple)): + # for MPO + prev_mode = None + for i, q in enumerate(qudits): + mode_in = qudit_modes[q] + qudit_modes[q] = mode_frontier + mode_frontier += 1 + if i == 0: + modes = [mode_in, mode_frontier, qudit_modes[q]] + prev_mode = mode_frontier + mode_frontier += 1 + elif i == n_qudits - 1: + modes = [prev_mode, mode_in, qudit_modes[q]] + else: + modes = [prev_mode, mode_in, mode_frontier, qudit_modes[q]] + prev_mode = mode_frontier + mode_frontier +=1 + operands += [op[i], modes] + else: + modes_in = [qudit_modes[q] for q in qudits] + modes_out = [] + for q in qudits: + qudit_modes[q] = mode_frontier + modes_out.append(mode_frontier) + mode_frontier +=1 + operands += [op, modes_out + modes_in] + + operands.append(qudit_modes) + return operands + + def to_network_state(self, config=None, options=None): + network_state = NetworkState(self.state_dims, dtype=self.dtype, config=config, options=options) + if self.initial_mps_dim is not None: + network_state.set_initial_mps(self.get_initial_state()) + for op, modes, control_info in self.sequence: + if control_info is None: + if isinstance(op, (list, tuple)): + # MPO + network_state.apply_mpo(modes, op) + else: + # GATE + network_state.apply_tensor_operator(modes, op) + else: + # Controlled-Tensor + # NetworkState currently only support immutable controlled tensors + network_state.apply_tensor_operator(modes, op, control_modes=control_info[0], control_values=control_info[1], immutable=True) + return network_state + + +class MPS(_BaseComputeEngine): + + def __init__( + self, + qudits, + backend, + qudit_dims=2, + dtype='complex128', + mps_tensors=None, + mpo_application='approximate', + canonical_center=None, + sample_rng=DEFAULT_RNG, + **svd_options + ): + self._qudits = list(qudits) + self.backend = get_or_create_tensor_backend(backend) + if isinstance(qudit_dims, (int, np.integer)): + self.state_dims = (qudit_dims, ) * len(qudits) + else: + assert len(qudit_dims) == len(qudits), "qudit_dims must be either an integer or a sequence of integers with the same size as qudits" + self.state_dims = tuple(qudit_dims) + self.dtype = get_dtype_name(dtype) + self.n = len(qudits) + if mps_tensors is None: + self.mps_tensors = [] + for i in range(self.n): + data = [1, ] + [0, ] * (self.state_dims[i] - 1) + t = self.backend.asarray(data, dtype=dtype).reshape(1, self.state_dims[i], 1) + self.mps_tensors.append(t) + else: + # avoid in place modification + self.mps_tensors = mps_tensors.copy() + # potentially insert dummy labels for boundary tensors for consistent notation in this class + if self.mps_tensors[0].ndim == 2: + self.mps_tensors[0] = self.mps_tensors[0].reshape(1, *self.mps_tensors[0].shape) + if self.mps_tensors[-1].ndim == 2: + new_shape = self.mps_tensors[-1].shape + (1, ) + self.mps_tensors[-1] = self.mps_tensors[-1].reshape(*new_shape) + self._minimal_compression(0, self.n-1, True) + if canonical_center is not None: + assert canonical_center >= 0 and canonical_center < self.n + self.canonical_center = canonical_center + self.sample_rng = sample_rng + for key in svd_options.keys(): + if key not in MPS_VALID_CONFIGS: + raise ValueError(f"{key} not supported") + self.svd_options = {'partition': 'UV'} + self.svd_options.update(svd_options) + max_extent = self.svd_options.pop('max_extent', None) + self.is_exact_svd = self.svd_options.get('normalization', None) is None + for key in ('abs_cutoff', 'rel_cutoff', 'discarded_weight_cutoff'): + self.is_exact_svd = self.is_exact_svd and self.svd_options.get(key, None) in (0, None) + self.max_extents = [] + for i in range(self.n-1): + max_shared_extent = min(np.prod(self.state_dims[:i+1]), np.prod(self.state_dims[i+1:])) + if max_extent is None: + self.max_extents.append(max_shared_extent) + else: + self.max_extents.append(min(max_extent, max_shared_extent)) + assert mpo_application in {'exact', 'approximate'} + self.mpo_application = mpo_application + self._tolerance = None + self.sv = None + self.norm = None + + @property + def qudits(self): + return self._qudits + + @property + def qubits(self): + # overload + return self.qudits + + def __getitem__(self, key): + assert key >= 0 and key < self.n + return self.mps_tensors[key] + + def __setitem__(self, key, val): + assert key>=0 and key < self.n + self.mps_tensors[key] = val + # resetting SV and norm + self.sv = self.norm = None + + @property + def tolerance(self): + if self._tolerance is None: + self._tolerance = get_mps_tolerance(self.dtype) + return self._tolerance + + def _compute_state_vector(self): + inputs = [] + output_modes = [] + for i, o in enumerate(self.mps_tensors): + modes = [2*i, 2*i+1, 2*i+2] + inputs.extend([o, modes]) + output_modes.append(2*i+1) + inputs.append(output_modes) + return oe.contract(*inputs) + + def compute_expectation(self, tn_operator): + def _get_array(wrapped_tensor): + if self.backend.full_name == 'numpy': + return wrapped_tensor.tensor.get() + elif self.backend.full_name == 'torch-cpu': + return wrapped_tensor.to('cpu') + return wrapped_tensor.tensor + if isinstance(tn_operator, NetworkOperator): + expec = 0 + for (mpo_tensors, modes, coeff) in tn_operator.mpos: + mpo_tensors = [_get_array(o) for o in mpo_tensors] + expec += coeff * self._get_mpo_expectation(mpo_tensors, modes) + for (prod_tensors, modes, coeff) in tn_operator.tensor_products: + prod_tensors = [_get_array(o) for o in prod_tensors] + expec += coeff * self._get_product_expectation(prod_tensors, modes) + return expec + else: + # pauli string or pauli string dictionaries + return super().compute_expectation(tn_operator) + + def _get_mpo_expectation(self, mpo_tensors, mpo_modes): + sv = self.compute_state_vector() + modes = list(range(self.n)) + mode_frontier = self.n + current_modes = modes.copy() + operands = [sv, modes] + prev_mode = None + for i, m in enumerate(mpo_modes): + ket_mode = current_modes[m] + current_modes[m] = bra_mode = mode_frontier + mode_frontier += 1 + next_mode = mode_frontier + mode_frontier += 1 + if i==0: + operands += [mpo_tensors[i], (ket_mode, next_mode, bra_mode)] + elif i == len(mpo_modes) - 1: + operands += [mpo_tensors[i], (prev_mode, ket_mode, bra_mode)] + else: + operands += [mpo_tensors[i], (prev_mode, ket_mode, next_mode, bra_mode)] + prev_mode = next_mode + operands += [sv.conj(), current_modes] + return oe.contract(*operands).item() + + def _get_product_expectation(self, prod_tensors, prod_modes): + sv = self.compute_state_vector() + modes = list(range(self.n)) + mode_frontier = self.n + current_modes = modes.copy() + operands = [sv, modes] + for i, ms in enumerate(prod_modes): + ket_modes = [] + bra_modes = [] + for m in ms: + ket_modes.append(current_modes[m]) + bra_modes.append(mode_frontier) + current_modes[m] = mode_frontier + mode_frontier += 1 + operands += [prod_tensors[i], ket_modes + bra_modes] + operands += [sv.conj(), current_modes] + return oe.contract(*operands).item() + + def _swap(self, i, direction, exact=False): + assert direction in {'left', 'right'} + if direction == 'left': + a, b = i-1, i + else: + a, b = i, i+1 + assert a >= 0 and b <= self.n-1 + input_a = 'iPj' + 'xyzw'[:self[a].ndim-3] + input_b = 'jQl' + 'XYZW'[:self[b].ndim-3] + intm = 'iPQl' + input_a[3:] + input_b[3:] + output_a = 'iQj' + input_b[3:] + output_b = 'jPl' + input_a[3:] + tmp = self.backend.einsum(f'{input_a},{input_b}->{intm}', self[a], self[b]) + decompose_expr = f'{intm}->{output_a},{output_b}' + size_dict = dict(zip(input_a, self[a].shape)) + size_dict.update(dict(zip(input_b, self[b].shape))) + mid_extent = compute_mid_extent(size_dict, (input_a, input_b), (output_a, output_b)) + if exact: + svd_options = {'max_extent': mid_extent, 'partition': 'UV'} + else: + svd_options = self.svd_options.copy() + svd_options['max_extent'] = min(self.max_extents[a], mid_extent) + + self[a], _, self[b] = tensor_decompose(decompose_expr, tmp, method='svd', **svd_options) + + def _canonicalize_site(self, i, direction, max_extent=None, **svd_options): + if direction == 'right': + left, right = i, i+1 + partition = 'V' + assert i >=0 and i < self.n - 1 + elif direction == 'left': + left, right = i-1, i + partition = 'U' + assert i >0 and i <= self.n - 1 + else: + raise ValueError + ti, tj = self[left], self[right] + qr_min_extent_left = min(np.prod(ti.shape[:2]), ti.shape[-1]) + qr_min_extent_right = min(np.prod(tj.shape[1:]), tj.shape[0]) + min_exact_e = min(qr_min_extent_left, qr_min_extent_right) + if max_extent is not None: + max_extent = min(max_extent, min_exact_e) + else: + max_extent = min_exact_e + svd_options.pop('partition', None) + if not svd_options and direction == 'right' and max_extent == qr_min_extent_left: + self[left], r = tensor_decompose('ipj->ipx,xj', ti, method='qr') + self[right] = self.backend.einsum('xj,jql->xql', r, tj) + elif not svd_options and direction == 'left' and max_extent == qr_min_extent_right: + self[right], r = tensor_decompose('jql->xql,jx', tj, method='qr') + self[left] = self.backend.einsum('jx,ipj->ipx', r, ti) + else: + svd_options['partition'] = partition + tmp = self.backend.einsum('ipj,jql->ipql', ti, tj) + self[left], _, self[right] = tensor_decompose('ipql->ipj,jql', tmp, method='svd', max_extent=max_extent, **svd_options) + + def _minimal_compression(self, start, end, check_minimal=False): + for i in range(start, end+1): + if i == self.n - 1: + break + if check_minimal: + left_extent, shared_extent = np.prod(self[i].shape[:2]), self[i].shape[-1] + right_extent = np.prod(self[i+1].shape[1:]) + if shared_extent == min(left_extent, right_extent, shared_extent): + continue + self._canonicalize_site(i, 'right') + for i in range(end, start-1, -1): + if i==0: break + if check_minimal: + left_extent, shared_extent = np.prod(self[i-1].shape[:2]), self[i-1].shape[-1] + right_extent = np.prod(self[i].shape[1:]) + if shared_extent == min(left_extent, right_extent, shared_extent): + continue + self._canonicalize_site(i, 'left') + + def _apply_gate_1q(self, i, operand): + self[i] = self.backend.einsum('ipj,Pp->iPj', self[i], operand) + + def _apply_gate_2q(self, i, j, operand): + if i > j: + try: + operand = operand.transpose(1,0,3,2) + except TypeError: + operand = operand.permute(1,0,3,2) + return self._apply_gate_2q(j, i, operand) + elif i == j: + raise ValueError(f"gate acting on the same site {i} twice") + elif i == j - 1: + size_dict = dict(zip('ipjjqkPQpq', self[i].shape+self[j].shape+operand.shape)) + mid_extent = compute_mid_extent(size_dict, ('ipj','jqk','PQpq'), ('iPj','jQk')) + max_extent = min(mid_extent, self.max_extents[i]) + self[i], _, self[j] = gate_decompose('ipj,jqk,PQpq->iPj,jQk', self[i], self[j], operand, max_extent=max_extent, **self.svd_options) + else: + # insert swap gates recursively + swaps = [] + while (j != i+1): + self._swap(i, 'right', False) + swaps.append([i, 'right']) + i += 1 + if (j == i+1): + break + self._swap(j, 'left', False) + swaps.append([j, 'left']) + j -= 1 + self._apply_gate_2q(i, j, operand) + for x, direction in swaps[::-1]: + self._swap(x, direction) + + def apply_gate(self, qudits, operand): + sites = [self.qudits.index(q) for q in qudits] + if len(sites) == 1: + return self._apply_gate_1q(*sites, operand) + elif len(sites) == 2: + return self._apply_gate_2q(*sites, operand) + else: + raise NotImplementedError("Only single- and two- qubit gate supported") + + def apply_mpo(self, qudits, mpo_operands): + # map from site the the associated qudit id + qudits_order = list(range(self.n)) + sites = [self.qudits.index(q) for q in qudits] + # Step 1: Boundary Contraction + # + # ---X---Y---Z---W--- Y---Z---W--- + # | | | | -> | / | | | + # ---A---B---C---D--- ---a---B---C---D--- + # + a = sites[0] + self[a] = self.backend.einsum('ipj,pxP->iPjx', self[a], mpo_operands[0]) + exact_mpo = self.mpo_application == 'exact' + svd_options = {'partition': 'UV'} if exact_mpo else self.svd_options.copy() + def record_swap(i, direction): + # utility function to record the current qudits_order after swap operation, no computation is done + j = i + 1 if direction == 'right' else i - 1 + tmp = qudits_order[i] + qudits_order[i] = qudits_order[j] + qudits_order[j] = tmp + # Step 2: Contract-Decompose for all remaining sites, swaps inserted if needed + # + # Y---Z---W--- Z---W--- + # | / | | | -> | | / | | + # ---a---B---C---D--- ---a---b---C---D--- + num_mpo_sites = len(qudits) + for i in range(num_mpo_sites-1): + operand = mpo_operands[i+1] + qa = sites[i] + qb = sites[i+1] + q0 = qudits_order.index(qa) + q1 = qudits_order.index(qb) + forward_order = q1 > q0 + q0, q1 = sorted([q0, q1]) + while (q1 != q0 + 1): + self._swap(q0, 'right', exact_mpo) + record_swap(q0, 'right') + q0 += 1 + if (q1 == q0+1): + break + self._swap(q1, 'left', exact_mpo) + record_swap(q1, 'left') + q1 -= 1 + if not forward_order: + # revert to original ordering + q0, q1 = q1, q0 + explict_swap = False + if i != num_mpo_sites - 2: + q2 = qudits_order.index(sites[i+2]) + dis_02 = abs(q2-q0) + dis_12 = abs(q2-q1) + # if next mpo tensor is closer to q0, use contract decompose to perform swap + explict_swap = dis_02 < dis_12 + if explict_swap: + record_swap(q0, 'right' if q1 > q0 else 'left') + if forward_order: + if i == num_mpo_sites - 2: + expr = 'iPjx,jql,xqQ->iPQl' + decompose_expr = 'iPQl->iPx,xQl' + else: + expr = 'iPjx,jql,xqyQ->iPQly' + decompose_expr = 'iPQly->iQjy,jPl' if explict_swap else 'iPQly->iPj,jQly' + else: + if i == num_mpo_sites - 2: + expr = 'jPlx,iqj,xqQ->iPQl' + decompose_expr = 'iPQl->jPl,iQj' + else: + expr = 'jPlx,iqj,xqyQ->iPQly' + decompose_expr = 'iPQly->jQly,iPj' if explict_swap else 'iPQly->jPl,iQjy' + tmp = self.backend.einsum(expr, self[q0], self[q1], operand) + inputs = expr.split('->')[0].split(',') + outputs = decompose_expr.split('->')[1].split(',') + size_dict = dict(zip(inputs[0], self[q0].shape)) + size_dict.update(dict(zip(inputs[1], self[q1].shape))) + size_dict.update(dict(zip(inputs[2], operand.shape))) + mid_extent = compute_mid_extent(size_dict, inputs, outputs) + if exact_mpo: + max_extent = mid_extent + else: + max_extent = min(mid_extent, self.max_extents[min(q0,q1)]) + + self[q0], _, self[q1] = tensor_decompose(decompose_expr, tmp, method='svd', max_extent=max_extent, **svd_options) + + # swap operations to revert back to the original ordering + for i in range(self.n): + site = qudits_order.index(i) + while (site != i): + self._swap(site, 'left', exact_mpo) + record_swap(site, 'left') + site -= 1 + # sanity check to make sure original ordering is obtained + assert qudits_order == list(range(self.n)) + if self.mpo_application == 'exact': + # when MPO is applied in exact fashion, bond may exceed the maximal required + self._minimal_compression(min(qudits), max(qudits), False) + + @classmethod + def from_converter(cls, converter, **kwargs): + dtype = get_dtype_name(converter.dtype) + mps = cls(converter.qubits, converter.backend.__name__, dtype=dtype, **kwargs) + for operand, qubits in converter.gates: + if len(qubits) > 2: + return None + mps.apply_gate(qubits, operand) + mps.canonicalize() + return mps + + @classmethod + def from_circuit(cls, circuit, backend, dtype='complex128', **kwargs): + converter = CircuitToEinsum(circuit, backend=backend, dtype=dtype) + return cls.from_converter(converter, **kwargs) + + @classmethod + def from_factory(cls, factory, **kwargs): + qudits = list(range(factory.num_qudits)) + if factory.initial_mps_dim is not None: + mps_tensors = factory.get_initial_state() + else: + mps_tensors = None + qudit_dims = factory.state_dims + mps = cls(qudits, factory.backend, qudit_dims=qudit_dims, mps_tensors=mps_tensors, dtype=factory.dtype, **kwargs) + for op, modes, control_info in factory.sequence: + if control_info is None: + if isinstance(op, (list, tuple)): + # MPO + mps.apply_mpo(modes, op) + else: + # Gate + mps.apply_gate(modes, op) + else: + ctrl_modes, ctrl_vals = control_info + ct_tensors = factory.compute_ct_mpo_tensors(ctrl_modes, ctrl_vals, modes, op) + new_modes = modes + ctrl_modes + new_modes = sorted(new_modes) + mps.apply_mpo(new_modes, ct_tensors) + mps.canonicalize() + return mps + + def print(self): + print([o.shape[2] for o in self.mps_tensors[:-1]]) + + def canonicalize(self): + center = self.canonical_center + if center is None: + for i in range(self.n-1): + shared_e = self[i].shape[-1] + max_e = self.max_extents[i] + if (shared_e > max_e): + self._canonicalize_site(i, 'right', max_extent=max_e, **self.svd_options) + return + + for i in range(center): + self._canonicalize_site(i, 'right', max_extent=self.max_extents[i], **self.svd_options) + + for i in range(self.n-1, center, -1): + self._canonicalize_site(i, 'left', max_extent=self.max_extents[i-1], **self.svd_options) + + +class ExactStateAPITester(BaseTester): + + @classmethod + def from_circuit(cls, circuit, dtype, mps_options=EMPTY_DICT, backend='cupy', handle=None, rng=DEFAULT_RNG, **kwargs): + converter = CircuitToEinsum(circuit, dtype, backend=backend) + reference_engine = ConverterComputeEngine(converter, backend=backend, handle=handle, sample_rng=rng) + target_engines = [] + algorithm = mps_options.pop('algorithm', 'gesvd') + if mps_options: + raise ValueError("Exact tests should exclude approximate settings") + #TODO: add sv type TN computation + for config in (TNConfig(), MPSConfig(algorithm=algorithm, mpo_application='exact')): + if isinstance(config, MPSConfig) and not is_converter_mps_compatible(converter): + # NOTE: MPS simulation are only functioning if no multicontrol gates exist in the circuit. + continue + engine = NetworkState.from_converter(converter, config=config, options={'handle': handle}) + target_engines.append(engine) + mps_cp_engine = MPS.from_converter(converter, sample_rng=rng, mpo_application='exact') + if mps_cp_engine is not None: + target_engines.append(mps_cp_engine) + return cls(reference_engine, target_engines, converter=converter, rng=rng, **kwargs) + + def test_misc(self): + norm = self.reference_engine.compute_norm() + assert np.allclose(norm, 1, **self.reference_engine.tolerance) + + def test_expectation(self): + if self.reference_engine.dtype.startswith('complex'): + pauli_strings = get_random_pauli_strings(self.n_qubits, 6, rng=self.rng) + expec1 = self.reference_engine.compute_expectation(pauli_strings) + for engine in self.target_engines: + if isinstance(engine, (NetworkState, MPS)): + expec2 = engine.compute_expectation(pauli_strings) + message = f"{engine.__class__.__name__} maxDiff={abs(expec1-expec2)}" + assert np.allclose(expec1, expec2, **engine.tolerance), message + else: + raise ValueError + + +class ApproximateMPSTester(ExactStateAPITester): + + @classmethod + def from_converter(cls, converter, mps_options, handle=None, rng=DEFAULT_RNG, **kwargs): + reference_svd_options = {} + for key, value in mps_options.items(): + # not all options are supported in reference MPS + if key in MPS_VALID_CONFIGS: + reference_svd_options[key] = value + + reference_engine = MPS.from_converter(converter, sample_rng=rng, **reference_svd_options) + target_engines = [NetworkState.from_converter(converter, config=mps_options, options={'handle': handle})] + return cls(reference_engine, target_engines, converter=converter, rng=rng, **kwargs) + + @classmethod + def from_factory(cls, factory, mps_options, handle=None, rng=DEFAULT_RNG, **kwargs): + reference_svd_options = {} + for key, value in mps_options.items(): + # not all options are supported in reference MPS + if key in MPS_VALID_CONFIGS: + reference_svd_options[key] = value + reference_engine = MPS.from_factory(factory, sample_rng=rng, **reference_svd_options) + target_engines = [factory.to_network_state(config=mps_options, options={'handle': handle})] + return cls(reference_engine, target_engines, converter=None, state_dims=factory.state_dims, rng=rng, **kwargs) + + def test_sampling(self): + super().test_sampling() + for engine in self.all_engines: + if isinstance(engine, NetworkState): + samples_0 = engine.compute_sampling(88, seed=23) + samples_1 = engine.compute_sampling(88, seed=23) + # determinism check + assert samples_0 == samples_1 + + def test_expectation(self): + if self.is_qubit_system: + super().test_expectation() + + operator = get_random_network_operator(self.state_dims, backend=self.backend.__name__, rng=self.rng, dtype=self.reference_engine.dtype) + expec1 = self.reference_engine.compute_expectation(operator) + for engine in self.target_engines: + for _ in get_engine_iters(engine): + expec2 = engine.compute_expectation(operator) + message = f"{engine.__class__.__name__} maxDiff={abs(expec1-expec2)}" + assert np.allclose(expec1, expec2, **engine.tolerance), message + + def test_misc(self): + canonical_center = self.reference_engine.canonical_center + verify_mps_canonicalization(self.reference_engine.mps_tensors, canonical_center) + mps_tensors = self.target_engines[0].compute_output_state() + verify_mps_canonicalization(mps_tensors, canonical_center) + + +class NetworkStateFunctionalityTester: + """Basic functionality tests""" + + def __init__(self, state, backend, is_normalized=False): + self.state = state + self.backend = backend + self.package = self.backend.split('-')[0] + self.is_normalized = is_normalized + self.n = state.n + self.dtype = state.dtype + self.tolerance = {'atol': atol_mapper[self.dtype], + 'rtol': rtol_mapper[self.dtype]} + + def _check_tensor(self, tensor, shape=None): + if shape is not None: + assert tensor.shape == tuple(shape) + assert infer_object_package(tensor) == self.package + assert str(tensor.dtype).split('.')[-1] == self.dtype + o = tensor_wrapper.wrap_operand(tensor) + assert o.device_id == (None if self.backend in {'numpy', 'torch-cpu'} else self.state.device_id) + + def run_tests(self): + # SV + if isinstance(self.state.config, MPSConfig): + mps_tensors = self.state.compute_output_state() + for o in mps_tensors: + self._check_tensor(o) + + sv = self.state.compute_state_vector() + self._check_tensor(sv, shape=self.state.state_mode_extents) + + # amplitude + amplitude = self.state.compute_amplitude('0' * self.n) + np.allclose(sv.ravel()[0].item(), amplitude, **self.tolerance) + + # batched_amplitude + fixed = {0:1, 1:0} if self.n > 2 else {0:1} + batched_amplitude = self.state.compute_batched_amplitudes(fixed) + self._check_tensor(batched_amplitude, shape=self.state.state_mode_extents[len(fixed):]) + + # RDM + rdm = self.state.compute_reduced_density_matrix((0, ), fixed={1: 0}) + self._check_tensor(rdm, shape=(self.state.state_mode_extents[0], ) * 2) + + # sampling + modes = (0, 1) if self.n >= 2 else (0, ) + samples_0 = self.state.compute_sampling(123, modes=modes, seed=3) + samples_1 = self.state.compute_sampling(123, modes=modes, seed=3) + assert samples_0 == samples_1 # deterministic checking + assert all([len(bitstring) == len(modes) for bitstring in samples_0.keys()]) + assert sum(samples_0.values()) == 123 + + norm_ref = 1 if self.is_normalized else (abs(sv) ** 2).sum().item() + + # expectation + if (set(self.state.state_mode_extents) == set([2, ])) and self.state.dtype.startswith('complex'): + expectation = self.state.compute_expectation('I' * self.n) + assert np.allclose(expectation, norm_ref, **self.tolerance) + + # norm + norm = self.state.compute_norm() + assert np.allclose(norm, norm_ref, **self.tolerance) \ No newline at end of file