Skip to content

Commit

Permalink
[arcane,accelerator] Ajoute implémentation de l'algorithme de 'Scan' …
Browse files Browse the repository at this point in the history
…en mode multi-thread.
  • Loading branch information
grospelliergilles committed Dec 29, 2024
1 parent 3ca87c2 commit f146d53
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 1 deletion.
10 changes: 9 additions & 1 deletion arcane/src/arcane/accelerator/GenericScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "arcane/accelerator/RunCommandLaunchInfo.h"
#include "arcane/accelerator/RunCommandLoop.h"
#include "arcane/accelerator/ScanImpl.h"
#include "arcane/accelerator/MultiThreadAlgo.h"

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
Expand Down Expand Up @@ -141,7 +142,14 @@ class ScannerImpl
} break;
#endif
case eExecutionPolicy::Thread:
// Pas encore implémenté en multi-thread
// Si le nombre de valeurs est 1 on utilise la version séquentielle.
// TODO: il serait judicieux de faire cela aussi pour des valeurs plus importantes
// car en général sur les petites boucles le multi-threading est contre productif.
if (nb_item > 1) {
MultiThreadAlgo scanner;
scanner.doScan<IsExclusive, DataType>(launch_info.loopRunInfo(), nb_item, input_data, output_data, init_value, op);
break;
}
[[fallthrough]];
case eExecutionPolicy::Sequential: {
DataType sum = init_value;
Expand Down
20 changes: 20 additions & 0 deletions arcane/src/arcane/accelerator/MultiThreadAlgo.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
//-----------------------------------------------------------------------------
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
// See the top-level COPYRIGHT file for details.
// SPDX-License-Identifier: Apache-2.0
//-----------------------------------------------------------------------------
/*---------------------------------------------------------------------------*/
/* MultiThreadAlgo.cc (C) 2000-2024 */
/* */
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

#include "arcane/accelerator/MultiThreadAlgo.h"

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
166 changes: 166 additions & 0 deletions arcane/src/arcane/accelerator/MultiThreadAlgo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
//-----------------------------------------------------------------------------
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
// See the top-level COPYRIGHT file for details.
// SPDX-License-Identifier: Apache-2.0
//-----------------------------------------------------------------------------
/*---------------------------------------------------------------------------*/
/* MultiThreadAlgo.h (C) 2000-2024 */
/* */
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
/*---------------------------------------------------------------------------*/
#ifndef ARCANE_ACCELERATOR_MULTITHREADALGO_H
#define ARCANE_ACCELERATOR_MULTITHREADALGO_H
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

#include "arcane/utils/SmallArray.h"

#include "arcane/core/Concurrency.h"

#include "arcane/accelerator/AcceleratorGlobal.h"

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

namespace Arcane::Accelerator::impl
{

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/*!
* \brief Algorithmes avancée en mode multi-thread.
*
* Pour l'instant seule l'opération de Scan est implémentée.
*/
class MultiThreadAlgo
{
public:

/*!
* \brief Algorithme de scan multi-thread.
*
* \note Cette classe est interne à Arcane. La version de l'API publique
* est accessible via la classe GenericScanner.
*
* Cet algorithme basique utilise deux passes pour le calcul.
* L'interval d'itération est divisé en N blocs. On prend N = 2*nb_thread.
* - la première passe calcule en parallèle le résultat du scan pour tous
* les éléments d'un bloc.
* - la deuxième passe calcule la valeurs finale.
*
* Le calcul donne toujours la même valeur pour un nombre de bloc donné.
*
* TODO: Utilise du padding pour conserver les valeurs partielles par bloc.
* TODO: Faire des versions spécialisées si DataType est un type
* de base tel que 'Int32', 'Int64', 'float' ou 'double'.
*/
template <bool IsExclusive, typename DataType, typename Operator,
typename InputIterator, typename OutputIterator>
void doScan(ForLoopRunInfo run_info, Int32 nb_value,
InputIterator input, OutputIterator output,
DataType init_value, Operator op)
{
//std::cout << "DO_SCAN MULTI_THREAD nb_value=" << nb_value << " init_value=" << init_value << "\n";
auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> DataType {
DataType partial_value = Operator::defaultValue();
for (Int32 x = 0; x < nb_value; ++x)
partial_value = op(input[x + input_index], partial_value);
return partial_value;
};

auto multiple_setter_func = [=](DataType previous_sum, Int32 input_index, Int32 nb_value) {
for (Int32 x = 0; x < nb_value; ++x) {
if constexpr (IsExclusive) {
output[x + input_index] = previous_sum;
previous_sum = op(input[x + input_index], previous_sum);
}
else {
previous_sum = op(input[x + input_index], previous_sum);
output[x + input_index] = previous_sum;
}
}
};
// TODO: calculer automatiquement cette valeur.
const Int32 nb_block = 10;

// Tableau pour conserver les valeurs partielles des blocs.
// TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
SmallArray<DataType> partial_values(nb_block);
Span<DataType> out_partial_values = partial_values;

auto partial_value_func = [=](Int32 a, Int32 n) {
for (Int32 i = 0; i < n; ++i) {
Int32 interval_index = i + a;

Int32 input_index = 0;
Int32 nb_value_in_interval = 0;
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);

DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval);

out_partial_values[interval_index] = partial_value;
}
};

ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
loop_options.setGrainSize(1);
run_info.addOptions(loop_options);

// Calcule les sommes partielles pour nb_block
Arcane::arcaneParallelFor(0, nb_block, run_info, partial_value_func);

auto final_sum_func = [=](Int32 a, Int32 n) {
for (Int32 i = 0; i < n; ++i) {
Int32 interval_index = i + a;

DataType previous_sum = init_value;
for (Int32 z = 0; z < interval_index; ++z)
previous_sum = op(out_partial_values[z], previous_sum);

Int32 input_index = 0;
Int32 nb_value_in_interval = 0;
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);

multiple_setter_func(previous_sum, input_index, nb_value_in_interval);
}
};

// Calcule les valeurs finales
Arcane::arcaneParallelFor(0, nb_block, run_info, final_sum_func);
}

private:

template <typename SizeType>
static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval,
SizeType* out_begin_index, SizeType* out_interval_size)
{
*out_begin_index = 0;
*out_interval_size = 0;
if (nb_interval <= 0)
return;
if (interval_index < 0 || interval_index >= nb_interval)
return;
SizeType isize = size / nb_interval;
SizeType ibegin = interval_index * isize;
// Pour le dernier interval, prend les elements restants
if ((interval_index + 1) == nb_interval)
isize = size - ibegin;
*out_begin_index = ibegin;
*out_interval_size = isize;
}
};

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

} // namespace Arcane::Accelerator::impl

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

#endif

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
2 changes: 2 additions & 0 deletions arcane/src/arcane/accelerator/srcs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ set( ARCANE_SOURCES
MaterialVariableViews.h
MaterialVariableViews.cc
MemoryCopier.cc
MultiThreadAlgo.h
MultiThreadAlgo.cc
NumArray.h
NumArrayViews.h
NumArrayViews.cc
Expand Down

0 comments on commit f146d53

Please sign in to comment.