diff --git a/examples/llama/generator.py b/examples/llama/generator.py index 17096af09..85af8e2cf 100644 --- a/examples/llama/generator.py +++ b/examples/llama/generator.py @@ -49,11 +49,7 @@ def __init__( self.runtime: ark.Runtime = None - def launch( - self, - pth_path: str, - tok_path: str, - ): + def launch(self, pth_path: str, tok_path: str): # Load a pretrained tokenizer self.tokenizer = Tokenizer(model_path=tok_path) self.args.vocab_size = self.tokenizer.n_words @@ -163,9 +159,7 @@ def run(self, prompt: str): gen.launch(args.pth_path, args.tok_path) - prompt_list = [ - "Where is the capital of France?", - ] + prompt_list = ["Where is the capital of France?"] for i, prompt in enumerate(prompt_list): output = gen.run(prompt) print(f"---\nPrompt[{i}]: {prompt}\nOutput[{i}]: {output}") diff --git a/examples/llama/model.py b/examples/llama/model.py index c19630077..611295748 100644 --- a/examples/llama/model.py +++ b/examples/llama/model.py @@ -204,8 +204,7 @@ def forward(self, x): for i in range(len(output_shape)): output_shape_bytes *= output_shape[i] output_parallel_reshape = ark.reshape( - output_parallel, - [output_shape_bytes], + output_parallel, [output_shape_bytes] ) output_reshape = ark.all_reduce( output_parallel_reshape, self.local_rank, self.world_size diff --git a/python/ark/data_type.py b/python/ark/data_type.py index e6691ecac..d6dd0b70b 100644 --- a/python/ark/data_type.py +++ b/python/ark/data_type.py @@ -7,34 +7,13 @@ _REGISTRY_DATA_TYPE = { - "fp32": { - "np": numpy.float32, - "doc": """32-bit floating point.""", - }, - "fp16": { - "np": numpy.float16, - "doc": """16-bit floating point.""", - }, - "bf16": { - "np": None, - "doc": """bfloat16 floating point.""", - }, - "int32": { - "np": numpy.int32, - "doc": """32-bit signed integer.""", - }, - "uint32": { - "np": numpy.uint32, - "doc": """32-bit unsigned integer.""", - }, - "int8": { - "np": numpy.int8, - "doc": """8-bit signed integer.""", - }, - "uint8": { - "np": numpy.uint8, - "doc": """8-bit unsigned integer.""", - }, + "fp32": {"np": numpy.float32, "doc": """32-bit floating point."""}, + "fp16": {"np": numpy.float16, "doc": """16-bit floating point."""}, + "bf16": {"np": None, "doc": """bfloat16 floating point."""}, + "int32": {"np": numpy.int32, "doc": """32-bit signed integer."""}, + "uint32": {"np": numpy.uint32, "doc": """32-bit unsigned integer."""}, + "int8": {"np": numpy.int8, "doc": """8-bit signed integer."""}, + "uint8": {"np": numpy.uint8, "doc": """8-bit unsigned integer."""}, "byte": { "np": numpy.ubyte, "doc": """ diff --git a/python/ark/model.py b/python/ark/model.py index 9c8e4c848..97c8b9fdb 100644 --- a/python/ark/model.py +++ b/python/ark/model.py @@ -194,9 +194,7 @@ def reshape( @register_op def identity( - input: Tensor, - deps: List[Tensor] = [], - name: str = "identity", + input: Tensor, deps: List[Tensor] = [], name: str = "identity" ) -> Tensor: """ Returns an identical tensor of `input` with execution dependencies `deps`. @@ -214,10 +212,7 @@ def identity( @register_op def sharding( - input: Tensor, - axis: int, - dim_per_shard: int, - name: str = "sharding", + input: Tensor, axis: int, dim_per_shard: int, name: str = "sharding" ) -> List[Tensor]: """ Shard `input` along `axis` into `dim_per_shard`-dimensional shards. @@ -239,10 +234,7 @@ def sharding( @register_op def reduce_sum( - input: Tensor, - axis: int, - output: Tensor = None, - name: str = "reduce_sum", + input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_sum" ) -> Tensor: """ Performs reduction along the `axis` of the `input` tensor and @@ -260,10 +252,7 @@ def reduce_sum( @register_op def reduce_mean( - input: Tensor, - axis: int, - output: Tensor = None, - name: str = "reduce_mean", + input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_mean" ) -> Tensor: """ Performs reduction along the `axis` of the `input` tensor and @@ -279,10 +268,7 @@ def reduce_mean( @register_op def reduce_max( - input: Tensor, - axis: int, - output: Tensor = None, - name: str = "reduce_max", + input: Tensor, axis: int, output: Tensor = None, name: str = "reduce_max" ) -> Tensor: """ Performs reduction along the `axis` of the `input` tensor and @@ -298,9 +284,7 @@ def reduce_max( @register_op def layernorm( - input: Tensor, - output: Tensor = None, - name: str = "layernorm", + input: Tensor, output: Tensor = None, name: str = "layernorm" ) -> Tensor: """ Applies layer normalization to the `input` tensor and returns @@ -316,9 +300,7 @@ def layernorm( @register_op def rmsnorm( - input: Tensor, - output: Tensor = None, - name: str = "rmsnorm", + input: Tensor, output: Tensor = None, name: str = "rmsnorm" ) -> Tensor: """ Applies RMS (Root Mean Square Layer Normalization) normalization @@ -334,9 +316,7 @@ def rmsnorm( @register_op def softmax( - input: Tensor, - output: Tensor = None, - name: str = "softmax", + input: Tensor, output: Tensor = None, name: str = "softmax" ) -> Tensor: """ Applies softmax to the `input` tensor on the last dimension. @@ -351,10 +331,7 @@ def softmax( @register_op def transpose( - input: Tensor, - perm: list, - output: Tensor = None, - name: str = "transpose", + input: Tensor, perm: list, output: Tensor = None, name: str = "transpose" ) -> Tensor: """ Transposes the `input` tensor according to the given `perm` permutation. @@ -453,10 +430,7 @@ def im2col( @register_op def scale( - input: Tensor, - val: float, - output: Tensor = None, - name: str = "scale", + input: Tensor, val: float, output: Tensor = None, name: str = "scale" ) -> Tensor: """ Multiplies the `input` tensor by a scalar `val`, element-wise. @@ -465,21 +439,12 @@ def scale( """ if output is not None: output = output._tensor - _tensor = Model.get_model().scale( - input._tensor, - val, - output, - name, - ) + _tensor = Model.get_model().scale(input._tensor, val, output, name) return Tensor(_tensor) @register_op -def exp( - input: Tensor, - output: Tensor = None, - name: str = "exp", -) -> Tensor: +def exp(input: Tensor, output: Tensor = None, name: str = "exp") -> Tensor: """ Calculates the exponential of the `input` tensor, element-wise. Usage: @@ -492,11 +457,7 @@ def exp( @register_op -def sqrt( - input: Tensor, - output: Tensor = None, - name: str = "sqrt", -) -> Tensor: +def sqrt(input: Tensor, output: Tensor = None, name: str = "sqrt") -> Tensor: """ Calculates the square root of the `input` tensor, element-wise. Usage: @@ -510,10 +471,7 @@ def sqrt( @register_op def rope( - input: Tensor, - other: Tensor, - output: Tensor = None, - name: str = "rope", + input: Tensor, other: Tensor, output: Tensor = None, name: str = "rope" ) -> Tensor: """ Performs rotary position embedding (RoPE) on the `input` tensor @@ -527,11 +485,7 @@ def rope( @register_op -def relu( - input: Tensor, - output: Tensor = None, - name: str = "relu", -) -> Tensor: +def relu(input: Tensor, output: Tensor = None, name: str = "relu") -> Tensor: """ Applies the ReLU activation function to the `input` tensor, element-wise. @@ -545,11 +499,7 @@ def relu( @register_op -def gelu( - input: Tensor, - output: Tensor = None, - name: str = "gelu", -) -> Tensor: +def gelu(input: Tensor, output: Tensor = None, name: str = "gelu") -> Tensor: """ Applies the Gaussian Error Linear Unit (GELU) activation function to the `input` tensor, element-wise. GELU is a smooth @@ -566,9 +516,7 @@ def gelu( @register_op def sigmoid( - input: Tensor, - output: Tensor = None, - name: str = "sigmoid", + input: Tensor, output: Tensor = None, name: str = "sigmoid" ) -> Tensor: """ Applies the Sigmoid activation function to the `input` tensor, @@ -584,10 +532,7 @@ def sigmoid( @register_op def add( - input: Tensor, - other: Tensor, - output: Tensor = None, - name: str = "add", + input: Tensor, other: Tensor, output: Tensor = None, name: str = "add" ) -> Tensor: """ Performs an element-wise addition operator between the `input` @@ -603,10 +548,7 @@ def add( @register_op def sub( - input: Tensor, - other: Tensor, - output: Tensor = None, - name: str = "sub", + input: Tensor, other: Tensor, output: Tensor = None, name: str = "sub" ) -> Tensor: """ Performs an element-wise addition operator between the `input` @@ -622,10 +564,7 @@ def sub( @register_op def mul( - input: Tensor, - other: Tensor, - output: Tensor = None, - name: str = "mul", + input: Tensor, other: Tensor, output: Tensor = None, name: str = "mul" ) -> Tensor: """ Performs an element-wise multiplication operator between the @@ -641,10 +580,7 @@ def mul( @register_op def div( - input: Tensor, - other: Tensor, - output: Tensor = None, - name: str = "div", + input: Tensor, other: Tensor, output: Tensor = None, name: str = "div" ) -> Tensor: """ Performs an element-wise division operator between the @@ -660,11 +596,7 @@ def div( @register_op def send( - input: Tensor, - id: int, - dst_rank: int, - bytes: int = 0, - name: str = "send", + input: Tensor, id: int, dst_rank: int, bytes: int = 0, name: str = "send" ) -> Tensor: """ Sends a tensor to a destination GPU (`dst_rank`). Multiple @@ -679,33 +611,19 @@ def send( # on GPU1: tns = ark.recv(1, 0, bytes) """ - _tensor = Model.get_model().send( - input._tensor, - id, - dst_rank, - bytes, - name, - ) + _tensor = Model.get_model().send(input._tensor, id, dst_rank, bytes, name) return Tensor(_tensor) @register_op def send_done( - input: Tensor, - id: int, - dst_rank: int, - name: str = "send_done", + input: Tensor, id: int, dst_rank: int, name: str = "send_done" ) -> Tensor: """ Blocks the execution until the corresponding 'send' operator with the specified `id` is completed. """ - _tensor = Model.get_model().send_done( - input._tensor, - id, - dst_rank, - name, - ) + _tensor = Model.get_model().send_done(input._tensor, id, dst_rank, name) return Tensor(_tensor) @@ -724,13 +642,7 @@ def recv( """ if output is not None: output = output._tensor - _tensor = Model.get_model().recv( - id, - src_rank, - bytes, - output, - name, - ) + _tensor = Model.get_model().recv(id, src_rank, bytes, output, name) return Tensor(_tensor) @@ -756,12 +668,7 @@ def send_mm( output = output._tensor _tensor = Model.get_model().send_mm( - input._tensor, - id, - gpu_dst, - bytes, - output, - name, + input._tensor, id, gpu_dst, bytes, output, name ) return Tensor(_tensor) @@ -782,12 +689,7 @@ def recv_mm( if output is not None: output = output._tensor _tensor = Model.get_model().recv_mm( - input._tensor, - id, - gpu_src, - bytes, - output, - name, + input._tensor, id, gpu_src, bytes, output, name ) return Tensor(_tensor) @@ -825,11 +727,7 @@ def all_gather( """ output = [output_shard._tensor for output_shard in output] tensor_shards = Model.get_model().all_gather( - input._tensor, - rank, - world_size, - output, - name, + input._tensor, rank, world_size, output, name ) return [Tensor(_tensor) for _tensor in tensor_shards] @@ -854,11 +752,7 @@ def all_reduce( if output is not None: output = output._tensor _tensor = Model.get_model().all_reduce( - input._tensor, - rank, - world_size, - output, - name, + input._tensor, rank, world_size, output, name ) return Tensor(_tensor) @@ -874,28 +768,17 @@ def embedding( if output is not None: output = output._tensor _tensor = Model.get_model().embedding( - input._tensor, - weight._tensor, - output, - name, + input._tensor, weight._tensor, output, name ) return Tensor(_tensor) @register_op def cast( - input: Tensor, - dtype: DataType, - output: Tensor = None, - name: str = "cast", + input: Tensor, dtype: DataType, output: Tensor = None, name: str = "cast" ) -> Tensor: """Type casting.""" if output is not None: output = output._tensor - _tensor = Model.get_model().cast( - input._tensor, - dtype.ttype(), - output, - name, - ) + _tensor = Model.get_model().cast(input._tensor, dtype.ttype(), output, name) return Tensor(_tensor)