From 358a4053d3d2d849f2aca7764347678f0a36b5c8 Mon Sep 17 00:00:00 2001
From: "xiang song(charlie.song)" <classicxsong@gmail.com>
Date: Mon, 27 Nov 2023 10:41:42 -0800
Subject: [PATCH] [Document] Update doc about saved prediction results and
 embeddings (#665)

*Issue #, if available:*

*Description of changes:*


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

Co-authored-by: Xiang Song <xiangsx@amazon.com>
---
 docs/source/tutorials/quick-start.rst        | 18 ++++++------------
 python/graphstorm/gconstruct/remap_result.py | 10 +++++-----
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/docs/source/tutorials/quick-start.rst b/docs/source/tutorials/quick-start.rst
index a1b319dacc..c0cb9cc41f 100644
--- a/docs/source/tutorials/quick-start.rst
+++ b/docs/source/tutorials/quick-start.rst
@@ -140,7 +140,7 @@ The inference command is:
                --save-prediction-path /tmp/ogbn-arxiv-nc/predictions/ \
                --restore-model-path /tmp/ogbn-arxiv-nc/models/epoch-7/
 
-This inference command predicts the classes of nodes in the testing set and saves the results, a Pytorch tensor file named "**predict-00000.pt**", into the ``/tmp/ogbn-arxiv-nc/predictions/`` folder.
+This inference command predicts the classes of nodes in the testing set and saves the results, a list of parquet files named **predict-00000_00000.parquet**, **predict-00001_00000.parquet**, ..., into the ``/tmp/ogbn-arxiv-nc/predictions/node/`` folder. Each parquet file has two columns, `nid` column for storing node IDs and `pred` column for storing prediction results.
 
 Inference on link prediction is similar as shown in the command below.
 
@@ -159,7 +159,7 @@ Inference on link prediction is similar as shown in the command below.
             --save-embed-path /tmp/ogbn-arxiv-lp/predictions/ \
             --restore-model-path /tmp/ogbn-arxiv-lp/models/epoch-2/
 
-The inference outputs include a **"emb_info.json"** metadata file and the prediction result file, **"embed-00000.pt"** in the ``/tmp/ogbn-arxiv-lp/predictions/`` folder.
+The inference outputs the saved embeddings, a list of parquet files named **embed-00000_00000.parquet**, **embed-00001_00000.parquet**, ...,  in the ``/tmp/ogbn-arxiv-lp/predictions/node/`` folder. Each parquet file has two columns, `nid` column for storing node IDs and `emb` column for storing embeddings.
 
 Generating Embedding
 --------------------
@@ -201,18 +201,12 @@ The saved result will be like:
     /tmp/saved_embed
         emb_info.json
         node_type1/
-            embed_nids-00000.pt
-            embed_nids-00001.pt
-            ...
-            embed-00000.pt
-            embed-00001.pt
+            embed-00000_00000.parquet
+            embed-00000_00001.parquet
             ...
         node_type2/
-            embed_nids-00000.pt
-            embed_nids-00001.pt
-            ...
-            embed-00000.pt
-            embed-00001.pt
+            embed-00000_00000.parquet
+            embed-00000_00001.parquet
             ...
 
 **That is it!** You have learnt how to use GraphStorm in three steps.
diff --git a/python/graphstorm/gconstruct/remap_result.py b/python/graphstorm/gconstruct/remap_result.py
index 7e7958500b..870bf01609 100644
--- a/python/graphstorm/gconstruct/remap_result.py
+++ b/python/graphstorm/gconstruct/remap_result.py
@@ -210,12 +210,12 @@ def remap_node_emb(emb_ntypes, node_emb_dir,
         --------
         # embedddings:
         #   ntype0:
-        #     emb_part00000_00000.parquet
-        #     emb_part00000_00001.parquet
+        #     embed-00000_00000.parquet
+        #     embed-00000_00001.parquet
         #     ...
         #   ntype1:
-        #     emb_part00000_00000.parquet
-        #     emb_part00000_00001.parquet
+        #     embed-00000_00000.parquet
+        #     embed-00000_00001.parquet
         #     ...
 
         Parameters
@@ -400,7 +400,7 @@ def remap_edge_pred(pred_etypes, pred_dir,
         #    dst_nids-00001.pt
         #    ...
 
-        The output emb files will be
+        The output prediction files will be
         #    predict-00000_00000.parquet
         #    predict-00000_00001.parquet
         #    ...