diff --git a/examples/llama/generator.py b/examples/llama/generator.py
index 17096af09..85af8e2cf 100644
--- a/examples/llama/generator.py
+++ b/examples/llama/generator.py
@@ -49,11 +49,7 @@ def __init__(
 
         self.runtime: ark.Runtime = None
 
-    def launch(
-        self,
-        pth_path: str,
-        tok_path: str,
-    ):
+    def launch(self, pth_path: str, tok_path: str):
         # Load a pretrained tokenizer
         self.tokenizer = Tokenizer(model_path=tok_path)
         self.args.vocab_size = self.tokenizer.n_words
@@ -163,9 +159,7 @@ def run(self, prompt: str):
 
     gen.launch(args.pth_path, args.tok_path)
 
-    prompt_list = [
-        "Where is the capital of France?",
-    ]
+    prompt_list = ["Where is the capital of France?"]
     for i, prompt in enumerate(prompt_list):
         output = gen.run(prompt)
         print(f"---\nPrompt[{i}]: {prompt}\nOutput[{i}]: {output}")
diff --git a/examples/llama/model.py b/examples/llama/model.py
index c19630077..611295748 100644
--- a/examples/llama/model.py
+++ b/examples/llama/model.py
@@ -204,8 +204,7 @@ def forward(self, x):
         for i in range(len(output_shape)):
             output_shape_bytes *= output_shape[i]
         output_parallel_reshape = ark.reshape(
-            output_parallel,
-            [output_shape_bytes],
+            output_parallel, [output_shape_bytes]
         )
         output_reshape = ark.all_reduce(
             output_parallel_reshape, self.local_rank, self.world_size
diff --git a/python/ark/data_type.py b/python/ark/data_type.py
index e6691ecac..d6dd0b70b 100644
--- a/python/ark/data_type.py
+++ b/python/ark/data_type.py
@@ -7,34 +7,13 @@
 
 
 _REGISTRY_DATA_TYPE = {
-    "fp32": {
-        "np": numpy.float32,
-        "doc": """32-bit floating point.""",
-    },
-    "fp16": {
-        "np": numpy.float16,
-        "doc": """16-bit floating point.""",
-    },
-    "bf16": {
-        "np": None,
-        "doc": """bfloat16 floating point.""",
-    },
-    "int32": {
-        "np": numpy.int32,
-        "doc": """32-bit signed integer.""",
-    },
-    "uint32": {
-        "np": numpy.uint32,
-        "doc": """32-bit unsigned integer.""",
-    },
-    "int8": {
-        "np": numpy.int8,
-        "doc": """8-bit signed integer.""",
-    },
-    "uint8": {
-        "np": numpy.uint8,
-        "doc": """8-bit unsigned integer.""",
-    },
+    "fp32": {"np": numpy.float32, "doc": """32-bit floating point."""},
+    "fp16": {"np": numpy.float16, "doc": """16-bit floating point."""},
+    "bf16": {"np": None, "doc": """bfloat16 floating point."""},
+    "int32": {"np": numpy.int32, "doc": """32-bit signed integer."""},
+    "uint32": {"np": numpy.uint32, "doc": """32-bit unsigned integer."""},
+    "int8": {"np": numpy.int8, "doc": """8-bit signed integer."""},
+    "uint8": {"np": numpy.uint8, "doc": """8-bit unsigned integer."""},
     "byte": {
         "np": numpy.ubyte,
         "doc": """
diff --git a/python/ark/model.py b/python/ark/model.py
index 9c8e4c848..97c8b9fdb 100644
--- a/python/ark/model.py
+++ b/python/ark/model.py
@@ -194,9 +194,7 @@ def reshape(
 
 @register_op
 def identity(
-    input: Tensor,
-    deps: List[Tensor] = [],
-    name: str = "identity",
+    input: Tensor, deps: List[Tensor] = [], name: str = "identity"
 ) -> Tensor:
     """
     Returns an identical tensor of `input` with execution dependencies `deps`.
@@ -214,10 +212,7 @@ def identity(
 
 @register_op
 def sharding(
-    input: Tensor,
-    axis: int,
-    dim_per_shard: int,
-    name: str = "sharding",
+    input: Tensor, axis: int, dim_per_shard: int, name: str = "sharding"
 ) -> List[Tensor]:
     """
     Shard `input` along `axis` into `dim_per_shard`-dimensional shards.
@@ -239,10 +234,7 @@ def sharding(
 
 @register_op
 def reduce_sum(
-    input: Tensor,
-    axis: int,
-    output: Tensor = None,
-    name: str = "reduce_sum",
+    input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_sum"
 ) -> Tensor:
     """
     Performs reduction along the `axis` of the `input` tensor and
@@ -260,10 +252,7 @@ def reduce_sum(
 
 @register_op
 def reduce_mean(
-    input: Tensor,
-    axis: int,
-    output: Tensor = None,
-    name: str = "reduce_mean",
+    input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_mean"
 ) -> Tensor:
     """
     Performs reduction along the `axis` of the `input` tensor and
@@ -279,10 +268,7 @@ def reduce_mean(
 
 @register_op
 def reduce_max(
-    input: Tensor,
-    axis: int,
-    output: Tensor = None,
-    name: str = "reduce_max",
+    input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_max"
 ) -> Tensor:
     """
     Performs reduction along the `axis` of the `input` tensor and
@@ -298,9 +284,7 @@ def reduce_max(
 
 @register_op
 def layernorm(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "layernorm",
+    input: Tensor, output: Tensor = None, name: str = "layernorm"
 ) -> Tensor:
     """
     Applies layer normalization to the `input` tensor and returns
@@ -316,9 +300,7 @@ def layernorm(
 
 @register_op
 def rmsnorm(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "rmsnorm",
+    input: Tensor, output: Tensor = None, name: str = "rmsnorm"
 ) -> Tensor:
     """
     Applies RMS (Root Mean Square Layer Normalization) normalization
@@ -334,9 +316,7 @@ def rmsnorm(
 
 @register_op
 def softmax(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "softmax",
+    input: Tensor, output: Tensor = None, name: str = "softmax"
 ) -> Tensor:
     """
     Applies softmax  to the `input` tensor on the last dimension.
@@ -351,10 +331,7 @@ def softmax(
 
 @register_op
 def transpose(
-    input: Tensor,
-    perm: list,
-    output: Tensor = None,
-    name: str = "transpose",
+    input: Tensor, perm: list, output: Tensor = None, name: str = "transpose"
 ) -> Tensor:
     """
     Transposes the `input` tensor according to the given `perm` permutation.
@@ -453,10 +430,7 @@ def im2col(
 
 @register_op
 def scale(
-    input: Tensor,
-    val: float,
-    output: Tensor = None,
-    name: str = "scale",
+    input: Tensor, val: float, output: Tensor = None, name: str = "scale"
 ) -> Tensor:
     """
     Multiplies the `input` tensor by a scalar `val`, element-wise.
@@ -465,21 +439,12 @@ def scale(
     """
     if output is not None:
         output = output._tensor
-    _tensor = Model.get_model().scale(
-        input._tensor,
-        val,
-        output,
-        name,
-    )
+    _tensor = Model.get_model().scale(input._tensor, val, output, name)
     return Tensor(_tensor)
 
 
 @register_op
-def exp(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "exp",
-) -> Tensor:
+def exp(input: Tensor, output: Tensor = None, name: str = "exp") -> Tensor:
     """
     Calculates the exponential of the `input` tensor, element-wise.
     Usage:
@@ -492,11 +457,7 @@ def exp(
 
 
 @register_op
-def sqrt(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "sqrt",
-) -> Tensor:
+def sqrt(input: Tensor, output: Tensor = None, name: str = "sqrt") -> Tensor:
     """
     Calculates the square root of the `input` tensor, element-wise.
     Usage:
@@ -510,10 +471,7 @@ def sqrt(
 
 @register_op
 def rope(
-    input: Tensor,
-    other: Tensor,
-    output: Tensor = None,
-    name: str = "rope",
+    input: Tensor, other: Tensor, output: Tensor = None, name: str = "rope"
 ) -> Tensor:
     """
     Performs rotary position embedding (RoPE) on the `input` tensor
@@ -527,11 +485,7 @@ def rope(
 
 
 @register_op
-def relu(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "relu",
-) -> Tensor:
+def relu(input: Tensor, output: Tensor = None, name: str = "relu") -> Tensor:
     """
     Applies the ReLU activation function to the `input` tensor,
     element-wise.
@@ -545,11 +499,7 @@ def relu(
 
 
 @register_op
-def gelu(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "gelu",
-) -> Tensor:
+def gelu(input: Tensor, output: Tensor = None, name: str = "gelu") -> Tensor:
     """
     Applies the Gaussian Error Linear Unit (GELU) activation
     function to the `input` tensor, element-wise. GELU is a smooth
@@ -566,9 +516,7 @@ def gelu(
 
 @register_op
 def sigmoid(
-    input: Tensor,
-    output: Tensor = None,
-    name: str = "sigmoid",
+    input: Tensor, output: Tensor = None, name: str = "sigmoid"
 ) -> Tensor:
     """
     Applies the Sigmoid activation function to the `input` tensor,
@@ -584,10 +532,7 @@ def sigmoid(
 
 @register_op
 def add(
-    input: Tensor,
-    other: Tensor,
-    output: Tensor = None,
-    name: str = "add",
+    input: Tensor, other: Tensor, output: Tensor = None, name: str = "add"
 ) -> Tensor:
     """
     Performs an element-wise addition operator between the `input`
@@ -603,10 +548,7 @@ def add(
 
 @register_op
 def sub(
-    input: Tensor,
-    other: Tensor,
-    output: Tensor = None,
-    name: str = "sub",
+    input: Tensor, other: Tensor, output: Tensor = None, name: str = "sub"
 ) -> Tensor:
     """
     Performs an element-wise addition operator between the `input`
@@ -622,10 +564,7 @@ def sub(
 
 @register_op
 def mul(
-    input: Tensor,
-    other: Tensor,
-    output: Tensor = None,
-    name: str = "mul",
+    input: Tensor, other: Tensor, output: Tensor = None, name: str = "mul"
 ) -> Tensor:
     """
     Performs an element-wise multiplication operator between the
@@ -641,10 +580,7 @@ def mul(
 
 @register_op
 def div(
-    input: Tensor,
-    other: Tensor,
-    output: Tensor = None,
-    name: str = "div",
+    input: Tensor, other: Tensor, output: Tensor = None, name: str = "div"
 ) -> Tensor:
     """
     Performs an element-wise division operator between the
@@ -660,11 +596,7 @@ def div(
 
 @register_op
 def send(
-    input: Tensor,
-    id: int,
-    dst_rank: int,
-    bytes: int = 0,
-    name: str = "send",
+    input: Tensor, id: int, dst_rank: int, bytes: int = 0, name: str = "send"
 ) -> Tensor:
     """
     Sends a tensor to a destination GPU (`dst_rank`). Multiple
@@ -679,33 +611,19 @@ def send(
     # on GPU1:
     tns = ark.recv(1, 0, bytes)
     """
-    _tensor = Model.get_model().send(
-        input._tensor,
-        id,
-        dst_rank,
-        bytes,
-        name,
-    )
+    _tensor = Model.get_model().send(input._tensor, id, dst_rank, bytes, name)
     return Tensor(_tensor)
 
 
 @register_op
 def send_done(
-    input: Tensor,
-    id: int,
-    dst_rank: int,
-    name: str = "send_done",
+    input: Tensor, id: int, dst_rank: int, name: str = "send_done"
 ) -> Tensor:
     """
     Blocks the execution until the corresponding 'send' operator
     with the specified `id` is completed.
     """
-    _tensor = Model.get_model().send_done(
-        input._tensor,
-        id,
-        dst_rank,
-        name,
-    )
+    _tensor = Model.get_model().send_done(input._tensor, id, dst_rank, name)
     return Tensor(_tensor)
 
 
@@ -724,13 +642,7 @@ def recv(
     """
     if output is not None:
         output = output._tensor
-    _tensor = Model.get_model().recv(
-        id,
-        src_rank,
-        bytes,
-        output,
-        name,
-    )
+    _tensor = Model.get_model().recv(id, src_rank, bytes, output, name)
     return Tensor(_tensor)
 
 
@@ -756,12 +668,7 @@ def send_mm(
         output = output._tensor
 
     _tensor = Model.get_model().send_mm(
-        input._tensor,
-        id,
-        gpu_dst,
-        bytes,
-        output,
-        name,
+        input._tensor, id, gpu_dst, bytes, output, name
     )
     return Tensor(_tensor)
 
@@ -782,12 +689,7 @@ def recv_mm(
     if output is not None:
         output = output._tensor
     _tensor = Model.get_model().recv_mm(
-        input._tensor,
-        id,
-        gpu_src,
-        bytes,
-        output,
-        name,
+        input._tensor, id, gpu_src, bytes, output, name
     )
     return Tensor(_tensor)
 
@@ -825,11 +727,7 @@ def all_gather(
     """
     output = [output_shard._tensor for output_shard in output]
     tensor_shards = Model.get_model().all_gather(
-        input._tensor,
-        rank,
-        world_size,
-        output,
-        name,
+        input._tensor, rank, world_size, output, name
     )
     return [Tensor(_tensor) for _tensor in tensor_shards]
 
@@ -854,11 +752,7 @@ def all_reduce(
     if output is not None:
         output = output._tensor
     _tensor = Model.get_model().all_reduce(
-        input._tensor,
-        rank,
-        world_size,
-        output,
-        name,
+        input._tensor, rank, world_size, output, name
     )
     return Tensor(_tensor)
 
@@ -874,28 +768,17 @@ def embedding(
     if output is not None:
         output = output._tensor
     _tensor = Model.get_model().embedding(
-        input._tensor,
-        weight._tensor,
-        output,
-        name,
+        input._tensor, weight._tensor, output, name
     )
     return Tensor(_tensor)
 
 
 @register_op
 def cast(
-    input: Tensor,
-    dtype: DataType,
-    output: Tensor = None,
-    name: str = "cast",
+    input: Tensor, dtype: DataType, output: Tensor = None, name: str = "cast"
 ) -> Tensor:
     """Type casting."""
     if output is not None:
         output = output._tensor
-    _tensor = Model.get_model().cast(
-        input._tensor,
-        dtype.ttype(),
-        output,
-        name,
-    )
+    _tensor = Model.get_model().cast(input._tensor, dtype.ttype(), output, name)
     return Tensor(_tensor)