microsoft · you-n-g · Jun 21, 2024 · Jun 5, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/README.md b/README.md
@@ -10,8 +10,39 @@ As the maintainer of this project, please make a few updates:
 - Understanding the security reporting process in SECURITY.MD
 - Remove this section from the README
 
+## Configuration:
+
+You can manually source the `.env` file in your shell before running the Python script:
+Most of the workflow are controlled by the environment variables.
+```sh
+# Export each variable in the .env file; Please note that it is different from `source .env` without export
+export $(grep -v '^#' .env | xargs)
+# Run the Python script
+python your_script.py
+```
+
+## Naming convention
+
+### File naming convention
+
+| Name      | Description       |
+| --        | --                |
+| `conf.py` | The configuration for the module & app & project  | 
+
+<!-- TODO: renaming files -->
+
+
 ## Contributing
 
+### Guidance
+This project welcomes contributions and suggestions.
+You can find issues in the issues list or simply running `grep -r "TODO:"`.
+
+Making contributions is not a hard thing. Solving an issue(maybe just answering a question raised in issues list ), fixing/issuing a bug, improving the documents and even fixing a typo are important contributions to RDAgent.
+
+
+### Policy
+
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a
 Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
 the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.

diff --git a/rdagent/app/model_implementation/README.md b/rdagent/app/model_implementation/README.md
@@ -0,0 +1,39 @@
+
+# Preparation
+
+## Install Pytorch
+CPU CUDA will be enough for verify the implementation
+
+Please install pytorch based on your system.
+Here is an example on my system
+```bash
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+pip3 install torch_geometric
+
+```
+
+# Tasks
+
+## Task Extraction
+From paper to task.
+```bash
+python rdagent/app/model_implementation/task_extraction.py
+# It may based on rdagent/document_reader/document_reader.py
+```
+
+## Complete workflow
+From paper to implementation
+``` bash
+# Similar to
+# rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+```
+
+## Paper benchmark
+```bash
+python rdagent/app/model_implementation/eval.py
+
+TODO:
+- Is evaluation reasonable
+```
+
+## Evolving
diff --git a/rdagent/app/model_implementation/eval.py b/rdagent/app/model_implementation/eval.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+from rdagent.model_implementation.benchmark.eval import ModelImpValEval
+from rdagent.model_implementation.one_shot import ModelTaskGen
+from rdagent.model_implementation.task import ModelImpLoader, ModelTaskLoderJson
+
+mtl = ModelTaskLoderJson("TODO: A Path to json")
+
+task_l = mtl.load()
+
+mtg = ModelTaskGen()
+
+impl_l = mtg.generate(task_l)
+
+# TODO: Align it with the benchmark framework after @wenjun's refine the evaluation part.
+# Currently, we just handcraft a workflow for fast evaluation.
+
+mil = ModelImpLoader(DIRNAME.parent.parent / "model_implementation" / "benchmark" /  "gt_code")
+
+mie = ModelImpValEval()
+# Evaluation:
+eval_l = []
+for impl in impl_l:
+    gt_impl = mil.load(impl.target_task)
+    eval_l.append(mie.evaluate(gt_impl, impl))
+
+print(eval_l)
diff --git a/rdagent/core/implementation.py b/rdagent/core/implementation.py
@@ -1,13 +1,21 @@
 from abc import ABC, abstractmethod
-from typing import List
+from typing import List, Sequence
 
 from rdagent.core.task import (
+    BaseTask,
     TaskImplementation,
 )
 
 class TaskGenerator(ABC):
     @abstractmethod
-    def generate(self, *args, **kwargs) -> List[TaskImplementation]:
+    def generate(self, task_l: Sequence[BaseTask]) -> Sequence[TaskImplementation]:
+        """
+        Task Generator should take in a sequence of tasks.
+
+        Because the schedule of different tasks is crucial for the final performance
+        due to it affects the learning process.
+
+        """
         raise NotImplementedError("generate method is not implemented.")
 
     def collect_feedback(self, feedback_obj_l: List[object]):

diff --git a/rdagent/core/task.py b/rdagent/core/task.py
@@ -1,27 +1,121 @@
 from abc import ABC, abstractmethod
-from typing import Tuple
+from pathlib import Path
+from typing import Generic, Optional, Sequence, Tuple, TypeVar
 import pandas as pd
-
 """
 This file contains the all the data class for rdagent task.
 """
 
 
 class BaseTask(ABC):
-    # 把name放在这里作为主键
+    # TODO: 把name放在这里作为主键
+    # Please refer to rdagent/model_implementation/task.py for the implementation
+    # I think the task version applies to the base class.
     pass
 
+ASpecificTask = TypeVar("ASpecificTask", bound=BaseTask)
+
 
-class TaskImplementation(ABC):
-    def __init__(self, target_task: BaseTask) -> None:
+class TaskImplementation(ABC, Generic[ASpecificTask]):
+
+    def __init__(self, target_task: ASpecificTask) -> None:
         self.target_task = target_task
 
     @abstractmethod
-    def execute(self, *args, **kwargs) -> Tuple[str, pd.DataFrame]:
-        raise NotImplementedError("__call__ method is not implemented.")
+    def execute(self, data=None, config: dict = {}) -> object:
+        """
+        The execution of the implementation can be dynamic.
+
+        So we may passin the data and config dynamically.
+        """
+        raise NotImplementedError("execute method is not implemented.")
+
+    @abstractmethod
+    def execute_desc(self):
+        """
+        return the description how we will execute the code in the folder.
+        """
+        raise NotImplementedError(f"This type of input is not supported")
+
+    # TODO:
+    # After execution, it should return some results.
+    # Some evaluators will input the results and output
+
+
+ASpecificTaskImp = TypeVar("ASpecificTaskImp", bound=TaskImplementation)
+
+
+class ImpLoader(ABC, Generic[ASpecificTask, ASpecificTaskImp]):
+
+    @abstractmethod
+    def load(self, task: ASpecificTask) -> ASpecificTaskImp:
+        raise NotImplementedError("load method is not implemented.")
+
+
+class FBTaskImplementation(TaskImplementation):
+    """
+    File-based task implementation
+
+    The implemented task will be a folder which contains related elements.
+    - Data
+    - Code Implementation
+    - Output
+        - After execution, it will generate the final output as file.
+
+    A typical way to run the pipeline of FBTaskImplementation will be
+    (We didn't add it as a method due to that we may pass arguments into `prepare` or `execute` based on our requirements.)
+
+    .. code-block:: python
+
+        def run_pipline(self, **files: str):
+            self.prepare()
+            self.inject_code(**files)
+            self.execute()
+
+    """
+    # TODO:
+    # FileBasedFactorImplementation should inherient from it.
+    # Why not directly reuse FileBasedFactorImplementation.
+    #   Because it has too much concerete dependencies.
+    #   e.g.  dataframe, factors
+
+    path: Optional[Path]
+
+    @abstractmethod
+    def prepare(self, *args, **kwargs):
+        """
+        Prepare all the files except the injected code
+        - Data
+        - Documentation
+        - TODO: env?  Env is implicitly defined by the document?
+
+            typical usage of `*args, **kwargs`:
+                Different methods shares the same data. The data are passed by the arguments.
+        """
+
+    def inject_code(self, **files: str):
+        """
+        Inject the code into the folder.
+        {
+            "model.py": "<model code>"
+        }
+        """
+        for k, v in files.items():
+            with open(self.path / k, "w") as f:
+                f.write(v)
+
+    def get_files(self) -> list[Path]:
+        """
+        Get the environment description.
+
+        To be general, we only return a list of filenames.
+        How to summarize the environment is the responsibility of the TaskGenerator.
+        """
+        return list(self.path.iterdir())
 
 
 class TestCase:
+
     def __init__(
         self,
         target_task: BaseTask,
@@ -32,6 +126,7 @@ def __init__(
 
 
 class TaskLoader:
+
     @abstractmethod
-    def load(self, *args, **kwargs) -> BaseTask | list[BaseTask]:
+    def load(self, *args, **kwargs) -> Sequence[BaseTask]:
         raise NotImplementedError("load method is not implemented.")
diff --git a/rdagent/factor_implementation/evolving/factor.py b/rdagent/factor_implementation/evolving/factor.py
@@ -30,6 +30,8 @@
 
 
 class FactorImplementTask(BaseTask):
+    # TODO:  generalized the attributes into the BaseTask
+    # - factor_* -> *
     def __init__(
         self,
         factor_name,
@@ -39,6 +41,7 @@ def __init__(
         variables: dict = {},
         resource: str = None,
     ) -> None:
+        # TODO: remove the useless factor_formulation_description
         self.factor_name = factor_name
         self.factor_description = factor_description
         self.factor_formulation = factor_formulation

diff --git a/rdagent/model_implementation/benchmark/eval.py b/rdagent/model_implementation/benchmark/eval.py
@@ -0,0 +1,61 @@
+# TODO: inherent from the benchmark base class
+import torch
+from rdagent.model_implementation.task import ModelTaskImpl
+
+
+def get_data_conf(init_val):
+    # TODO: design this step in the workflow
+    in_dim = 1000
+    in_channels = 128
+    exec_config = {"model_eval_param_init": init_val}
+    node_feature = torch.randn(in_dim, in_channels)
+    edge_index = torch.randint(0, in_dim, (2, 2000))
+    return (node_feature, edge_index), exec_config
+
+
+class ModelImpValEval:
+    """
+    Evaluate the similarity of the model structure by changing the input and observate the output.
+
+    Assumption:
+    - If the model structure is similar, the output will change in similar way when we change the input.
+        - we try to initialize the model param in similar value. So only the model structure is different.
+    """
+
+    def evaluate(self, gt: ModelTaskImpl, gen: ModelTaskImpl):
+        round_n = 10
+
+        eval_pairs: list[tuple] = []
+
+        # run different input value
+        for _ in range(round_n):
+            # run different model initial parameters.
+            for init_val in [-0.2, -0.1, 0.1, 0.2]:
+                data, exec_config = get_data_conf(init_val)
+                gt_res = gt.execute(data=data, config=exec_config)
+                res = gen.execute(data=data, config=exec_config)
+                eval_pairs.append((res, gt_res))
+
+        # flat and concat the output
+        res_batch, gt_res_batch = [], []
+        for res, gt_res in eval_pairs:
+            res_batch.append(res.reshape(-1))
+            gt_res_batch.append(gt_res.reshape(-1))
+        res_batch = torch.stack(res_batch)
+        gt_res_batch = torch.stack(gt_res_batch)
+
+        res_batch = res_batch.detach().numpy()
+        gt_res_batch = gt_res_batch.detach().numpy()
+
+        # pearson correlation of each hidden output
+        def norm(x):
+            return (x - x.mean(axis=0)) / x.std(axis=0)
+        dim_corr = (norm(res_batch) * norm(gt_res_batch)).mean(axis=0)  # the correlation of each hidden output
+
+        # aggregate all the correlation
+        avr_corr = dim_corr.mean()
+        # FIXME: 
+        # It is too high(e.g. 0.944) .
+        # Check if it is not a good evaluation!! 
+        # Maybe all the same initial params will results in extreamly high correlation without regard to the model structure.
+        return avr_corr