Merge branch 'chhwang/minor-updates' into chhwang/llama

microsoft · Sep 18, 2023 · 04c3145 · 04c3145
2 parents 7c29085 + 41b7ac2
commit 04c3145
Show file tree

Hide file tree

Showing 7 changed files with 15 additions and 8 deletions.
diff --git a/ark/gpu/gpu_compile.cc b/ark/gpu/gpu_compile.cc
@@ -12,6 +12,7 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "cpu_timer.h"
 #include "env.h"
 #include "gpu/gpu_compile.h"
 #include "gpu/gpu_logging.h"
@@ -218,7 +219,8 @@ const string gpu_compile(const vector<string> &codes,
                 "-o " << item.second << ".cubin "
                 << cu_file_path << " 2>&1";
             // clang-format on
-            LOG(INFO, "Compiling ", cu_file_path);
+            double start = cpu_timer();
+            LOG(INFO, "Compiling: ", cu_file_path);
             LOG(DEBUG, exec_cmd.str());
             // Run the command.
             array<char, 4096> buffer;
@@ -235,6 +237,8 @@ const string gpu_compile(const vector<string> &codes,
             if (exec_print_str.size() > 0) {
                 LOG(ERROR, endl, exec_print_str, endl);
             }
+            LOG(INFO, "Compile succeed: ", cu_file_path, " (",
+                cpu_timer() - start, " seconds)");
         });
     string cu_file_path = items[0].second + ".cu";
     string cubin_file_path = items[0].second + ".cubin";

diff --git a/ark/gpu/gpu_mgr.cc b/ark/gpu/gpu_mgr.cc
@@ -371,6 +371,7 @@ void GpuMgrCtx::freeze()
 
     //
     if (total_bytes > 0) {
+        LOG(INFO, "Allocating ", total_bytes, " bytes of GPU memory");
         this->data_mem.init(total_bytes);
         // init the data mem
         CULOG(cuMemsetD32(this->data_mem.ref(), 0, total_bytes >> 2));

diff --git a/ark/ops/ops_layernorm.cc b/ark/ops/ops_layernorm.cc
@@ -12,7 +12,7 @@ extern const OpConfigMap LayernormConfigMap;
 LayernormOp::LayernormOp(OpPrecType prec_type, Tensor *input, Tensor *output,
                          const std::string &name)
     : Op{OP_LAYERNORM, prec_type,           {input}, {output}, {},
-         name,         &LayernormConfigMap, -1}
+         name,         &LayernormConfigMap, -1,      true}
 {
 }
 

diff --git a/ark/ops/ops_rmsnorm.cc b/ark/ops/ops_rmsnorm.cc
@@ -12,7 +12,7 @@ extern const OpConfigMap LayernormConfigMap;
 RMSnormOp::RMSnormOp(OpPrecType prec_type, Tensor *input, Tensor *output,
                      const std::string &name)
     : Op{OP_RMSNORM, prec_type,           {input}, {output}, {},
-         name,       &LayernormConfigMap, -1}
+         name,       &LayernormConfigMap, -1,      true}
 {
 }
 

diff --git a/ark/ops/ops_sendrecv_mm.cc b/ark/ops/ops_sendrecv_mm.cc
@@ -20,7 +20,8 @@ SendMMOp::SendMMOp(OpPrecType prec_type, Tensor *input, Tensor *recvbuf,
          {{id, gpu_dst, bytes}},
          name,
          &SendRecvMMConfigMap,
-         -1}
+         -1,
+         true}
 {
 }
 
@@ -81,7 +82,8 @@ RecvMMOp::RecvMMOp(OpPrecType prec_type, Tensor *input, Tensor *recvbuf,
          {{id, gpu_src, bytes}},
          name,
          &SendRecvMMConfigMap,
-         -1}
+         -1,
+         true}
 {
 }
 

diff --git a/ark/ops/ops_softmax.cc b/ark/ops/ops_softmax.cc
@@ -11,8 +11,8 @@ extern const OpConfigMap SoftmaxConfigMap;
 
 SoftmaxOp::SoftmaxOp(OpPrecType prec_type, Tensor *input, Tensor *output,
                      const std::string &name)
-    : Op{OP_SOFTMAX, prec_type, {input},           {output},
-         {},         name,      &SoftmaxConfigMap, -1}
+    : Op{OP_SOFTMAX, prec_type,         {input}, {output}, {},
+         name,       &SoftmaxConfigMap, -1,      true}
 {
 }
 

diff --git a/ark/ops/ops_transpose.cc b/ark/ops/ops_transpose.cc
@@ -12,7 +12,7 @@ extern const OpConfigMap TransposeConfigMap;
 TransposeOp::TransposeOp(OpPrecType prec_type, Tensor *input, Tensor *output,
                          int tp_type, const std::string &name)
     : Op{OP_TRANSPOSE, prec_type,           {input}, {output}, {{tp_type}},
-         name,         &TransposeConfigMap, -1}
+         name,         &TransposeConfigMap, -1,      true}
 {
 }
-Original file line number
+Diff line change
@@ Expand Up @@
              {{id, gpu_dst, bytes}},
              name,
              &SendRecvMMConfigMap,
-             -1}
+             -1,
+             true}
     {
     }
@@ Expand Down Expand Up @@
              {{id, gpu_src, bytes}},
              name,
              &SendRecvMMConfigMap,
-             -1}
+             -1,
+             true}
     {
     }
@@ Expand Down @@