Skip to content

Commit

Permalink
Merge pull request #798 from JaeseungYeom/sample_list_jag
Browse files Browse the repository at this point in the history
Sample list framework for the JAG Conduit data reader as well as a generic data store that is implemented for the JAG Conduit data reader.
  • Loading branch information
bvanessen authored Feb 8, 2019
2 parents 2da57eb + f6876c5 commit 0b107d7
Show file tree
Hide file tree
Showing 47 changed files with 2,940 additions and 787 deletions.
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,15 @@ include(SetupCXX)
################################################################

# Required dependencies
find_package(CEREAL NO_MODULE
HINTS ${CEREAL_DIR} $ENV{CEREAL_DIR}
PATH_SUFFIXES share/cmake/cereal
NO_DEFAULT_PATH)
if (NOT CEREAL_FOUND)
find_package(CEREAL NO_MODULE REQUIRED)
endif ()
set(LBANN_HAS_CEREAL ${CEREAL_FOUND})
# The imported target is just called "cereal". Super.

# Setup the linear algebra library
find_package(Hydrogen 1.1.0 NO_MODULE QUIET
Expand Down Expand Up @@ -423,6 +432,7 @@ target_include_directories(lbann PUBLIC

# Use the IMPORTED targets when possible.
target_link_libraries(lbann PUBLIC LbannProto)
target_link_libraries(lbann PUBLIC cereal)
target_link_libraries(lbann PUBLIC OpenMP::OpenMP_CXX)
target_link_libraries(lbann PUBLIC MPI::MPI_CXX)
target_link_libraries(lbann PUBLIC protobuf::libprotobuf)
Expand Down Expand Up @@ -610,6 +620,7 @@ append_str_tf(_str
LBANN_GNU_LINUX
LBANN_HAS_HYDROGEN
LBANN_HAS_OPENCV
LBANN_HAS_CEREAL
LBANN_HAS_CUDA
LBANN_HAS_CUDNN
LBANN_HAS_NCCL2
Expand Down
8 changes: 8 additions & 0 deletions ReleaseNotes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ I/O & data readers:
- Added support for data set metadata file that provides both schema
and normalization values unique to a given data set. Demonstrated
use in JAG Conduit data reader.
- Added support for an index list based approach for describing the
samples to use in the training and testing. Note that this is
currently only supported in the JAG Conduit data reader
- Create a general-purpose data store that operates on generic
Conduit node data structures. This should provide an extensible
and generic approach for holding and exchanging data between
epochs. Note that this is currently only supported in the JAG
Conduit data reader.

Build system:

Expand Down
18 changes: 17 additions & 1 deletion cmake/configure_files/LBANNConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ set(LBANN_DATATYPE @LBANN_DATATYPE@)
set(LBANN_DETERMINISTIC @LBANN_DETERMINISTIC@)
set(LBANN_GNU_LINUX @LBANN_GNU_LINUX@)
set(LBANN_HAS_ALUMINUM @LBANN_HAS_ALUMINUM@)
set(LBANN_HAS_CEREAL @LBANN_HAS_CEREAL@)
set(LBANN_HAS_CNPY @LBANN_HAS_CNPY@)
set(LBANN_HAS_CONDUIT @LBANN_HAS_CONDUIT@)
set(LBANN_HAS_CUDA @LBANN_HAS_CUDA@)
Expand All @@ -54,7 +55,22 @@ set(LBANN_TOPO_AWARE @LBANN_TOPO_AWARE@)

# Setup dependencies

# Start with Hydrogen. We can probably inherit Aluminum-ness from
# First, CEREAL.
if (LBANN_HAS_CEREAL)
find_package(CEREAL NO_MODULE
HINTS ${CEREAL_DIR} $ENV{CEREAL_DIR}
PATH_SUFFIXES share/cmake/cereal
NO_DEFAULT_PATH)
if (NOT CEREAL_FOUND)
find_package(CEREAL NO_MODULE)
endif ()
if (NOT CEREAL_FOUND AND NOT CEREAL_DIR)
set(CEREAL_DIR "@CEREAL_DIR@")
find_package(CEREAL NO_MODULE REQUIRED)
endif ()
endif ()

# Next, Hydrogen. We can probably inherit Aluminum-ness from
# there, as well as MPI and OpenMP.
if (LBANN_HAS_HYDROGEN)
if (NOT Hydrogen_DIR AND NOT HYDROGEN_DIR)
Expand Down
1 change: 1 addition & 0 deletions cmake/configure_files/lbann_config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#cmakedefine LBANN_GNU_LINUX

#cmakedefine LBANN_HAS_CEREAL
#cmakedefine LBANN_HAS_OPENCV
#cmakedefine LBANN_HAS_TBINF
#cmakedefine LBANN_HAS_CNPY
Expand Down
4 changes: 4 additions & 0 deletions docs/BuildingLBANN.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ The following basic tools are **required**.
+ A C++11-compliant compiler.
+ OpenMP, version 3.0 or newer.
+ An MPI-3.0 implementation.
+ [CEREAL](https://github.com/USCiLab/cereal) is used to handle
complex serialization tasks.
+ [CMake](https://cmake.org), version 3.9 or newer.

The following LLNL-maintained packages are **required**.
Expand Down Expand Up @@ -263,6 +265,8 @@ The latter option is recommended.
the Aluminum installation prefix _or_ the AluminumConfig.cmake
file. If Hydrogen has not been built with Aluminum support, set
`LBANN_WITH_ALUMINUM=ON` to enable Aluminum support.
+ `CEREAL_DIR`: The path to _either_ the CEREAL installation prefix
_or_ the cereal-config.cmake file.
+ `CNPY_DIR`: The path to the CNPY installation prefix. Must set
`LBANN_WITH_CNPY=ON` to enable CNPY support.
+ `CONDUIT_DIR` or `CONDUIT_DIR`: The path to _either_ the
Expand Down
31 changes: 26 additions & 5 deletions include/lbann/data_readers/data_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class generic_data_reader : public lbann_image_preprocessor {
m_world_master_mini_batch_adjustment(0),
m_num_parallel_readers(0), m_rank_in_model(0),
m_max_files_to_load(0),
m_file_dir(""), m_data_fn(""), m_label_fn(""),
m_file_dir(""), m_data_index_list(""), m_data_fn(""), m_label_fn(""),
m_shuffle(shuffle), m_absolute_sample_count(0), m_validation_percent(0.0),
m_use_percent(1.0),
m_master(false),
Expand Down Expand Up @@ -152,6 +152,18 @@ class generic_data_reader : public lbann_image_preprocessor {
*/
std::string get_local_file_dir() const;

/**
* Set the index list for your data (images, etc).
* The index lists contains an enumeration of all samples in the
* data set.
*/
void set_data_index_list(std::string s);

/**
* Returns the complete index list for your data set.
*/
std::string get_data_index_list() const;

/**
* Set the filename for your data (images, etc).
* This may either be a complete filepath, or a subdirectory;
Expand Down Expand Up @@ -344,7 +356,7 @@ class generic_data_reader : public lbann_image_preprocessor {
}
/// True if the data reader's current position is valid.
virtual bool position_valid() const {
return (m_current_pos < (int)m_shuffled_indices.size());
return (m_current_pos < get_num_data());
}
/// True if the data reader's current position is not valid but within # ranks per model
/// of the end of the data set (e.g. it is a rank with no valid data on the last iteration)
Expand Down Expand Up @@ -574,6 +586,12 @@ class generic_data_reader : public lbann_image_preprocessor {
/// returns true if the data set is partitioned
bool is_partitioned() const { return m_is_partitioned; }

/// Does the data reader have a unqiue index list per model
virtual bool has_list_per_model() const { return false; }
/// Does the data reader have a unqiue index list per trainer
virtual bool has_list_per_trainer() const { return false; }


/** \brief Given directory to store checkpoint files, write state to file and add to number of bytes written */
bool save_to_checkpoint_shared(persist& p, const char *name);

Expand Down Expand Up @@ -669,9 +687,7 @@ class generic_data_reader : public lbann_image_preprocessor {
/// returns the data store
generic_data_store * get_data_store() const {
if (m_data_store == nullptr) {
std::stringstream err;
err << __FILE__ << " :: " << __LINE__ << " :: "
<< " m_data_store is nullptr";
LBANN_ERROR("m_data_store is nullptr");
}
return m_data_store;
}
Expand Down Expand Up @@ -711,6 +727,10 @@ class generic_data_reader : public lbann_image_preprocessor {
/// support of data store functionality
void set_data_store(generic_data_store *g);

virtual bool data_store_active() const;

virtual bool priming_data_store() const;

void set_model(model *m) { m_model = m; }

/// experimental; used to ensure all readers for jag_conduit_hdf5
Expand Down Expand Up @@ -833,6 +853,7 @@ class generic_data_reader : public lbann_image_preprocessor {
size_t m_max_files_to_load;
std::string m_file_dir;
std::string m_local_file_dir;
std::string m_data_index_list;
std::string m_data_fn;
std::string m_label_fn;
bool m_shuffle;
Expand Down
Loading

0 comments on commit 0b107d7

Please sign in to comment.