-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[arcane,accelerator] Ajoute implémentation de l'algorithme de 'Scan' …
…en mode multi-thread.
- Loading branch information
1 parent
3ca87c2
commit f146d53
Showing
4 changed files
with
197 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- | ||
//----------------------------------------------------------------------------- | ||
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) | ||
// See the top-level COPYRIGHT file for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
//----------------------------------------------------------------------------- | ||
/*---------------------------------------------------------------------------*/ | ||
/* MultiThreadAlgo.cc (C) 2000-2024 */ | ||
/* */ | ||
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */ | ||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
#include "arcane/accelerator/MultiThreadAlgo.h" | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- | ||
//----------------------------------------------------------------------------- | ||
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) | ||
// See the top-level COPYRIGHT file for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
//----------------------------------------------------------------------------- | ||
/*---------------------------------------------------------------------------*/ | ||
/* MultiThreadAlgo.h (C) 2000-2024 */ | ||
/* */ | ||
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */ | ||
/*---------------------------------------------------------------------------*/ | ||
#ifndef ARCANE_ACCELERATOR_MULTITHREADALGO_H | ||
#define ARCANE_ACCELERATOR_MULTITHREADALGO_H | ||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
#include "arcane/utils/SmallArray.h" | ||
|
||
#include "arcane/core/Concurrency.h" | ||
|
||
#include "arcane/accelerator/AcceleratorGlobal.h" | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
namespace Arcane::Accelerator::impl | ||
{ | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
/*! | ||
* \brief Algorithmes avancée en mode multi-thread. | ||
* | ||
* Pour l'instant seule l'opération de Scan est implémentée. | ||
*/ | ||
class MultiThreadAlgo | ||
{ | ||
public: | ||
|
||
/*! | ||
* \brief Algorithme de scan multi-thread. | ||
* | ||
* \note Cette classe est interne à Arcane. La version de l'API publique | ||
* est accessible via la classe GenericScanner. | ||
* | ||
* Cet algorithme basique utilise deux passes pour le calcul. | ||
* L'interval d'itération est divisé en N blocs. On prend N = 2*nb_thread. | ||
* - la première passe calcule en parallèle le résultat du scan pour tous | ||
* les éléments d'un bloc. | ||
* - la deuxième passe calcule la valeurs finale. | ||
* | ||
* Le calcul donne toujours la même valeur pour un nombre de bloc donné. | ||
* | ||
* TODO: Utilise du padding pour conserver les valeurs partielles par bloc. | ||
* TODO: Faire des versions spécialisées si DataType est un type | ||
* de base tel que 'Int32', 'Int64', 'float' ou 'double'. | ||
*/ | ||
template <bool IsExclusive, typename DataType, typename Operator, | ||
typename InputIterator, typename OutputIterator> | ||
void doScan(ForLoopRunInfo run_info, Int32 nb_value, | ||
InputIterator input, OutputIterator output, | ||
DataType init_value, Operator op) | ||
{ | ||
//std::cout << "DO_SCAN MULTI_THREAD nb_value=" << nb_value << " init_value=" << init_value << "\n"; | ||
auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> DataType { | ||
DataType partial_value = Operator::defaultValue(); | ||
for (Int32 x = 0; x < nb_value; ++x) | ||
partial_value = op(input[x + input_index], partial_value); | ||
return partial_value; | ||
}; | ||
|
||
auto multiple_setter_func = [=](DataType previous_sum, Int32 input_index, Int32 nb_value) { | ||
for (Int32 x = 0; x < nb_value; ++x) { | ||
if constexpr (IsExclusive) { | ||
output[x + input_index] = previous_sum; | ||
previous_sum = op(input[x + input_index], previous_sum); | ||
} | ||
else { | ||
previous_sum = op(input[x + input_index], previous_sum); | ||
output[x + input_index] = previous_sum; | ||
} | ||
} | ||
}; | ||
// TODO: calculer automatiquement cette valeur. | ||
const Int32 nb_block = 10; | ||
|
||
// Tableau pour conserver les valeurs partielles des blocs. | ||
// TODO: Utiliser un padding pour éviter des conflits de cache entre les threads. | ||
SmallArray<DataType> partial_values(nb_block); | ||
Span<DataType> out_partial_values = partial_values; | ||
|
||
auto partial_value_func = [=](Int32 a, Int32 n) { | ||
for (Int32 i = 0; i < n; ++i) { | ||
Int32 interval_index = i + a; | ||
|
||
Int32 input_index = 0; | ||
Int32 nb_value_in_interval = 0; | ||
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval); | ||
|
||
DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval); | ||
|
||
out_partial_values[interval_index] = partial_value; | ||
} | ||
}; | ||
|
||
ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{})); | ||
loop_options.setGrainSize(1); | ||
run_info.addOptions(loop_options); | ||
|
||
// Calcule les sommes partielles pour nb_block | ||
Arcane::arcaneParallelFor(0, nb_block, run_info, partial_value_func); | ||
|
||
auto final_sum_func = [=](Int32 a, Int32 n) { | ||
for (Int32 i = 0; i < n; ++i) { | ||
Int32 interval_index = i + a; | ||
|
||
DataType previous_sum = init_value; | ||
for (Int32 z = 0; z < interval_index; ++z) | ||
previous_sum = op(out_partial_values[z], previous_sum); | ||
|
||
Int32 input_index = 0; | ||
Int32 nb_value_in_interval = 0; | ||
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval); | ||
|
||
multiple_setter_func(previous_sum, input_index, nb_value_in_interval); | ||
} | ||
}; | ||
|
||
// Calcule les valeurs finales | ||
Arcane::arcaneParallelFor(0, nb_block, run_info, final_sum_func); | ||
} | ||
|
||
private: | ||
|
||
template <typename SizeType> | ||
static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval, | ||
SizeType* out_begin_index, SizeType* out_interval_size) | ||
{ | ||
*out_begin_index = 0; | ||
*out_interval_size = 0; | ||
if (nb_interval <= 0) | ||
return; | ||
if (interval_index < 0 || interval_index >= nb_interval) | ||
return; | ||
SizeType isize = size / nb_interval; | ||
SizeType ibegin = interval_index * isize; | ||
// Pour le dernier interval, prend les elements restants | ||
if ((interval_index + 1) == nb_interval) | ||
isize = size - ibegin; | ||
*out_begin_index = ibegin; | ||
*out_interval_size = isize; | ||
} | ||
}; | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
} // namespace Arcane::Accelerator::impl | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ | ||
|
||
#endif | ||
|
||
/*---------------------------------------------------------------------------*/ | ||
/*---------------------------------------------------------------------------*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters