10#include <unordered_map>
58template <
class T,
int D>
61 using IndexType = viennahrle::Index<D>;
62 using ConstSparseIterator =
63 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
84 enum class Boundary { NONE, REACTION, AMBIENT, MASK };
86 struct BoundaryIntersection {
87 Boundary boundary = Boundary::NONE;
91 template <
class ValueType>
struct StencilPoint {
98 Vec3D<T> velocity{0., 0., 0.};
101 std::array<T, 9> strainRateTensor{};
102 std::array<T, 9> stressTensor{};
103 T vonMisesStress = 0.;
106 SmartPointer<Domain<T, D>> reactionInterface =
nullptr;
107 SmartPointer<Domain<T, D>> ambientInterface =
nullptr;
108 SmartPointer<Domain<T, D>> maskInterface =
nullptr;
109 SmartPointer<OxidationDiffusion<T, D>> diffusionField =
nullptr;
110 SmartPointer<VelocityField<T>> maskVelocityField =
nullptr;
113 int reactionSign = 1;
114 int ambientSign = -1;
117 IndexType requestedMinIndex{};
118 IndexType requestedMaxIndex{};
119 unsigned iterations = 0;
120 T residual = std::numeric_limits<T>::max();
123 unsigned lastPressureIters_ = 0;
124 T lastPressureResidual_ = 0.;
125 unsigned lastStokesIters_ = 0;
126 T lastStokesResidual_ = 0.;
127 T avgExpansionSpeed_ = 0.;
128 bool avgExpansionSpeedComputed =
false;
130 bool nodesDirty_ =
true;
131 std::array<T, D> maxVelocity_{};
132 bool useRequestedBounds =
false;
134 deviatoricStressHistory;
137 std::vector<Vec3D<T>> previousVelocity_;
138 std::vector<T> previousPressure_;
139 bool hasPreviousSolution_ =
false;
141 static bool isFiniteVec(
const Vec3D<T> &value) {
142 for (
unsigned i = 0; i < 3; ++i)
143 if (!std::isfinite(value[i]))
148 static bool isFiniteTensor(
const std::array<T, 9> &value) {
149 for (
const auto component : value)
150 if (!std::isfinite(component))
156 std::vector<Boundary> faceBCTypes_;
157 std::vector<T> faceBCDists_;
167#ifdef VIENNALS_GPU_BICGSTAB
171 std::vector<double> pressCoeffGpu_;
172 std::vector<uint32_t> pressNeighId32_;
173 std::vector<double> actualDiagGpu_;
174 gpu::GpuBiCGSTABBuffers *gpuPressBufs_ =
nullptr;
179 std::vector<double> stokesCoeffGpu_;
180 std::vector<uint32_t> stokesNeighId32_;
181 std::vector<double> stokesDiagGpu_;
182 gpu::GpuBiCGSTABBuffers *gpuStokesBufs_ =
nullptr;
191 gpu::GpuBiCGSTABBuffers *gpuHarmonicBufs_ =
nullptr;
205 : reactionInterface(passedReactionInterface),
206 ambientInterface(passedAmbientInterface),
207 diffusionField(passedDiffusionField),
208 deformationParameters(passedDeformationParameters),
209 oxidationParameters(passedOxidationParameters) {}
211 template <
class... Args>
static auto New(Args &&...args) {
212 return SmartPointer<OxidationDeformation>::New(std::forward<Args>(args)...);
216#ifdef VIENNALS_GPU_BICGSTAB
217 gpu::freeGpuBuffers(gpuPressBufs_);
218 gpu::freeGpuBuffers(gpuStokesBufs_);
219 gpu::freeGpuBuffers(gpuHarmonicBufs_);
225 gpuPreconditioner_ = prec;
229 reactionInterface = passedInterface;
235 ambientInterface = passedInterface;
241 int passedMaskSign = 1) {
242 maskInterface = passedInterface;
243 maskSign = (passedMaskSign < 0) ? -1 : 1;
249 maskInterface =
nullptr;
256 maskVelocityField = passedVelocityField;
261 maskVelocityField =
nullptr;
267 diffusionField = passedDiffusionField;
272 oxidationParameters = passedParameters;
278 deformationParameters = passedParameters;
283 reactionSign = (passedReactionSign < 0) ? -1 : 1;
284 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
289 const IndexType &passedMaxIndex) {
290 requestedMinIndex = passedMinIndex;
291 requestedMaxIndex = passedMaxIndex;
292 useRequestedBounds =
true;
298 useRequestedBounds =
false;
309 if (reactionInterface ==
nullptr || ambientInterface ==
nullptr ||
310 diffusionField ==
nullptr) {
311 Logger::getInstance()
312 .addError(
"OxidationDeformation: Missing interface or "
324 Logger::getInstance()
325 .addWarning(
"OxidationDeformation: no oxide nodes found after "
326 "buildNodes(). Verify that the reaction and ambient "
327 "level sets enclose a non-empty oxide band.")
329 hasPreviousSolution_ =
335 }
else if (hasPreviousSolution_ &&
336 previousVelocity_.size() ==
nodes.size() &&
337 previousPressure_.size() ==
nodes.size()) {
343 for (std::size_t i = 0; i <
nodes.size(); ++i) {
344 nodes[i].velocity = previousVelocity_[i];
345 nodes[i].pressure = previousPressure_[i];
349 const std::size_t nn =
nodes.size();
350 Timer<> tHarmonic, tMechanics;
357 Logger::getInstance()
358 .addTiming(
" deformation n=" + std::to_string(nn) +
" harmonic",
360 .addTiming(
" deformation n=" + std::to_string(nn) +
364 avgExpansionSpeedComputed =
false;
366 maxVelocity_.fill(
T(0));
367 for (
const auto &node :
nodes) {
368 for (
unsigned d = 0; d <
D; ++d)
369 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
372 for (
unsigned d = 0; d <
D; ++d)
373 maxVelocity_[d] = std::max(maxVelocity_[d], unresolvedMax[d]);
376 previousVelocity_.resize(
nodes.size());
377 previousPressure_.resize(
nodes.size());
378 for (std::size_t i = 0; i <
nodes.size(); ++i) {
379 previousVelocity_[i] =
nodes[i].velocity;
380 previousPressure_[i] =
nodes[i].pressure;
382 hasPreviousSolution_ =
true;
389 unsigned long )
final {
393 if (deformationParameters.material >= 0 &&
394 material != deformationParameters.material)
399 for (
unsigned d = 0; d <
D; ++d)
400 norm2 += velocity[d] * velocity[d];
401 if (norm2 > std::numeric_limits<T>::epsilon())
408 const Vec3D<T> &normalVector,
409 unsigned long )
final {
414 const Vec3D<T> & )
final {
415 if (deformationParameters.material >= 0 &&
416 material != deformationParameters.material)
418 return maxVelocity_[direction];
422 template <
class ValueType,
class NodeAccessor>
423 ValueType getField(
const IndexType &index, ValueType fallback,
424 NodeAccessor accessor)
const {
427 return accessor(
nodes[nodeId]);
432 return accessor(
nodes[nearby]);
435 template <
class ValueType,
class NodeAccessor>
436 ValueType getField(
const Vec3D<T> &coordinate, ValueType fallback,
437 NodeAccessor accessor)
const {
444 if constexpr (!std::is_same_v<ValueType, Vec3D<T>> &&
445 !std::is_same_v<ValueType, T>) {
447 for (
unsigned i = 0; i <
D; ++i)
448 index[i] = std::llround(coordinate[i] /
gridDelta);
449 return getField(index, fallback, accessor);
451 using IdxScalar = std::decay_t<decltype(std::declval<IndexType>()[0])>;
454 for (
unsigned d = 0; d <
D; ++d) {
456 const T c_flo = std::floor(c);
457 lo[d] =
static_cast<IdxScalar
>(c_flo);
462 T totalWeight =
T(0);
463 for (
int corner = 0; corner < (1 <<
D); ++corner) {
466 for (
unsigned d = 0; d <
D; ++d) {
467 if ((corner >> d) & 1) {
479 result = result + accessor(
nodes[nodeId]) * w;
483 if (totalWeight <
T(1e-14))
485 if (totalWeight <
T(1) -
T(1e-6))
486 result = result * (
T(1) / totalWeight);
493 return getField(coordinate, Vec3D<T>{0., 0., 0.},
494 [](
const Node &n) {
return n.velocity; });
498 return getField(index, Vec3D<T>{0., 0., 0.},
499 [](
const Node &n) {
return n.velocity; });
503 return getField(coordinate,
T(0), [](
const Node &n) {
return n.pressure; });
507 return getField(index,
T(0), [](
const Node &n) {
return n.pressure; });
511 return getField(coordinate,
T(0),
512 [](
const Node &n) {
return n.strainTrace; });
516 return getField(index,
T(0), [](
const Node &n) {
return n.strainTrace; });
520 return getField(coordinate, std::array<T, 9>{},
521 [](
const Node &n) {
return n.strainRateTensor; });
525 return getField(index, std::array<T, 9>{},
526 [](
const Node &n) {
return n.strainRateTensor; });
530 return getField(coordinate, std::array<T, 9>{},
531 [](
const Node &n) {
return n.stressTensor; });
535 return getField(index, std::array<T, 9>{},
536 [](
const Node &n) {
return n.stressTensor; });
540 return getField(coordinate,
T(0),
541 [](
const Node &n) {
return n.vonMisesStress; });
545 return getField(index,
T(0),
546 [](
const Node &n) {
return n.vonMisesStress; });
554 return std::isfinite(residual) &&
555 residual <= deformationParameters.mechanicsTolerance &&
556 std::isfinite(lastPressureResidual_) &&
557 lastPressureResidual_ <= deformationParameters.pressureTolerance &&
558 std::isfinite(lastStokesResidual_) &&
559 lastStokesResidual_ <= deformationParameters.stokesTolerance &&
563 for (
const auto &node :
nodes) {
564 if (!isFiniteVec(node.velocity) || !std::isfinite(node.pressure) ||
565 !std::isfinite(node.strainTrace) ||
566 !isFiniteTensor(node.strainRateTensor) ||
567 !isFiniteTensor(node.stressTensor) ||
568 !std::isfinite(node.vonMisesStress))
575 if (!avgExpansionSpeedComputed) {
577 avgExpansionSpeedComputed =
true;
579 return avgExpansionSpeed_;
582 for (
const auto &node :
nodes)
583 callback(node.index, node.pressure);
591 if (
nodes.empty() || ambientInterface ==
nullptr)
594 using VD =
typename PointData<T>::VectorDataType;
595 VD velocity, stressR0, stressR1, stressR2;
597 ConstSparseIterator it(ambientInterface->getDomain());
598 for (; !it.isFinished(); ++it) {
601 const IndexType idx = it.getStartIndices();
605 const auto &n =
nodes[nId];
607 isFiniteVec(n.velocity) ? n.velocity : Vec3D<T>{T(0), T(0), T(0)});
608 const auto sIt = deviatoricStressHistory.find(idx);
609 if (sIt != deviatoricStressHistory.end() &&
610 isFiniteTensor(sIt->second)) {
611 const auto &s = sIt->second;
612 stressR0.push_back({s[0], s[1], s[2]});
613 stressR1.push_back({s[3], s[4], s[5]});
614 stressR2.push_back({s[6], s[7], s[8]});
616 stressR0.push_back({
T(0),
T(0),
T(0)});
617 stressR1.push_back({
T(0),
T(0),
T(0)});
618 stressR2.push_back({
T(0),
T(0),
T(0)});
621 velocity.push_back({
T(0),
T(0),
T(0)});
622 stressR0.push_back({
T(0),
T(0),
T(0)});
623 stressR1.push_back({
T(0),
T(0),
T(0)});
624 stressR2.push_back({
T(0),
T(0),
T(0)});
628 auto &pd = ambientInterface->getPointData();
629 pd.insertReplaceVectorData(std::move(velocity),
"OxVelocity");
630 pd.insertReplaceVectorData(std::move(stressR0),
"OxStressR0");
631 pd.insertReplaceVectorData(std::move(stressR1),
"OxStressR1");
632 pd.insertReplaceVectorData(std::move(stressR2),
"OxStressR2");
639 void seedFromLevelSet() {
640 if (ambientInterface ==
nullptr ||
nodes.empty())
643 auto &pd = ambientInterface->getPointData();
644 const int vIdx = pd.getVectorDataIndex(
"OxVelocity");
645 const int r0Idx = pd.getVectorDataIndex(
"OxStressR0");
646 const int r1Idx = pd.getVectorDataIndex(
"OxStressR1");
647 const int r2Idx = pd.getVectorDataIndex(
"OxStressR2");
648 const int pIdx = pd.getScalarDataIndex(
"OxPressure");
650 const bool hasVelocity = (vIdx != -1);
651 const bool hasStress = (r0Idx != -1 && r1Idx != -1 && r2Idx != -1);
652 const bool hasPressure = (pIdx != -1);
654 if (!hasVelocity && !hasStress && !hasPressure)
657 const auto *vd = hasVelocity ? pd.getVectorData(vIdx) :
nullptr;
658 const auto *r0d = hasStress ? pd.getVectorData(r0Idx) :
nullptr;
659 const auto *r1d = hasStress ? pd.getVectorData(r1Idx) :
nullptr;
660 const auto *r2d = hasStress ? pd.getVectorData(r2Idx) :
nullptr;
661 const auto *ppd = hasPressure ? pd.getScalarData(pIdx) :
nullptr;
663 previousVelocity_.assign(
nodes.size(), Vec3D<T>{});
664 previousPressure_.assign(
nodes.size(),
T(0));
667 for (; !it.isFinished(); ++it) {
670 const auto ptId = it.getPointId();
671 const IndexType idx = it.getStartIndices();
675 if (vd && ptId <
static_cast<decltype(ptId)
>(vd->size()) &&
676 isFiniteVec((*vd)[ptId]))
677 previousVelocity_[ni] = (*vd)[ptId];
678 if (ppd && ptId <
static_cast<decltype(ptId)
>(ppd->size()) &&
679 std::isfinite((*ppd)[ptId]))
680 previousPressure_[ni] = (*ppd)[ptId];
683 if (hasStress && ptId <
static_cast<decltype(ptId)
>(r0d->size()) &&
684 ptId <
static_cast<decltype(ptId)
>(r1d->size()) &&
685 ptId <
static_cast<decltype(ptId)
>(r2d->size())) {
686 const auto &row0 = (*r0d)[ptId];
687 const auto &row1 = (*r1d)[ptId];
688 const auto &row2 = (*r2d)[ptId];
689 std::array<T, 9> s{row0[0], row0[1], row0[2], row1[0], row1[1],
690 row1[2], row2[0], row2[1], row2[2]};
691 if (isFiniteTensor(s))
692 deviatoricStressHistory[idx] = s;
696 if (hasVelocity || hasPressure) {
697 hasPreviousSolution_ =
true;
700 for (std::size_t i = 0; i <
nodes.size(); ++i) {
701 nodes[i].velocity = previousVelocity_[i];
702 nodes[i].pressure = previousPressure_[i];
707#ifdef VIENNALS_GPU_BICGSTAB
712 void buildPressureGpuGeometry() {
713 const std::size_t n =
nodes.size();
714 const T eps = std::numeric_limits<T>::epsilon();
715 pressCoeffGpu_.assign(2 *
D * n, 0.0);
716 pressNeighId32_.assign(2 *
D * n, gpu::kNoNode);
717 actualDiagGpu_.assign(n, 0.0);
719 for (std::size_t
id = 0;
id < n; ++id) {
720 if (touchesAmbient_[
id]) {
721 actualDiagGpu_[id] = 1.0;
724 for (
unsigned dir = 0; dir <
D; ++dir) {
725 const unsigned fiNeg = dir * 2u;
726 const unsigned fiPos = dir * 2u + 1u;
727 IndexType nbNeg =
nodes[id].index;
729 IndexType nbPos =
nodes[id].index;
731 const std::size_t jNeg =
733 const std::size_t jPos =
736 auto effDist = [&](
unsigned fi, std::size_t j) ->
T {
739 const Boundary bt = faceBCTypes_[fi * n + id];
740 return (bt != Boundary::NONE) ? faceBCDists_[fi * n + id] :
gridDelta;
743 const T dNeg = effDist(fiNeg, jNeg);
744 const T dPos = effDist(fiPos, jPos);
745 const T dSum = dNeg + dPos;
749 auto processFace = [&](
unsigned fi, std::size_t j,
T d) {
750 const T c =
T(2) / (d * dSum);
751 if (j !=
noNode && !touchesAmbient_[j]) {
752 pressCoeffGpu_[fi * n + id] =
static_cast<double>(c);
753 pressNeighId32_[fi * n + id] =
static_cast<uint32_t
>(j);
754 actualDiagGpu_[id] += c;
756 faceBCTypes_[fi * n +
id] == Boundary::AMBIENT) {
761 actualDiagGpu_[id] += c;
764 processFace(fiNeg, jNeg, dNeg);
765 processFace(fiPos, jPos, dPos);
767 if (actualDiagGpu_[
id] <= eps)
768 actualDiagGpu_[id] = 1.0;
777 void buildStokesGpuGeometry() {
778 const std::size_t n =
nodes.size();
779 const T eps = std::numeric_limits<T>::epsilon();
780 stokesCoeffGpu_.assign(2 *
D * n, 0.0);
781 stokesNeighId32_.assign(2 *
D * n, gpu::kNoNode);
782 stokesDiagGpu_.assign(
D * n, 0.0);
784 auto addDiag = [&](std::size_t id,
T c) {
785 for (
unsigned comp = 0; comp <
D; ++comp)
786 stokesDiagGpu_[comp * n +
id] +=
static_cast<double>(c);
789 for (std::size_t
id = 0;
id < n; ++id) {
790 for (
unsigned dir = 0; dir <
D; ++dir) {
791 const unsigned fiNeg = dir * 2u;
792 const unsigned fiPos = dir * 2u + 1u;
793 IndexType nbNeg =
nodes[id].index;
795 IndexType nbPos =
nodes[id].index;
797 const bool negInBounds =
inBounds(nbNeg);
798 const bool posInBounds =
inBounds(nbPos);
799 const std::size_t jNeg =
801 const std::size_t jPos =
806 auto stokesFaceDist = [&](
unsigned fi,
bool inb, std::size_t j) ->
T {
809 const Boundary bt = faceBCTypes_[fi * n + id];
810 return (bt != Boundary::NONE) ? faceBCDists_[fi * n + id] :
gridDelta;
813 const T dNeg = stokesFaceDist(fiNeg, negInBounds, jNeg);
814 const T dPos = stokesFaceDist(fiPos, posInBounds, jPos);
815 const T dSum = dNeg + dPos;
819 auto processFace = [&](
unsigned fi,
bool inb, std::size_t j,
T d) {
820 const T c =
T(2) / (d * dSum);
823 stokesCoeffGpu_[fi * n + id] =
static_cast<double>(c);
824 stokesNeighId32_[fi * n + id] =
static_cast<uint32_t
>(j);
831 const Boundary bt = faceBCTypes_[fi * n + id];
832 if (bt == Boundary::REACTION || bt == Boundary::MASK)
837 processFace(fiNeg, negInBounds, jNeg, dNeg);
838 processFace(fiPos, posInBounds, jPos, dPos);
840 for (
unsigned comp = 0; comp <
D; ++comp)
841 if (stokesDiagGpu_[comp * n +
id] <=
static_cast<double>(eps))
842 stokesDiagGpu_[comp * n + id] = 1.0;
859 void buildHarmonicGpuGeometry() {
860 const std::size_t n =
nodes.size();
861 harmonicCoeffGpu_.assign(2 *
D * n, 0.0);
862 harmonicDiagGpu_.assign(n, 0.0);
863 for (std::size_t
id = 0;
id < n; ++id) {
864 for (
unsigned fi = 0; fi < 2 *
D; ++fi) {
865 if (stokesNeighId32_[fi * n +
id] != gpu::kNoNode) {
867 harmonicCoeffGpu_[fi * n + id] = 1.0;
868 harmonicDiagGpu_[id] += 1.0;
875 const Boundary bt = faceBCTypes_[fi * n + id];
876 if (bt == Boundary::REACTION || bt == Boundary::MASK)
877 harmonicDiagGpu_[id] += 1.0;
880 if (harmonicDiagGpu_[
id] < 1e-10)
881 harmonicDiagGpu_[id] = 1.0;
888 void setupDeformationGpuBuffers() {
889 const std::size_t n =
nodes.size();
891 gpu::freeGpuBuffers(gpuPressBufs_);
892 gpuPressBufs_ =
nullptr;
893 gpu::freeGpuBuffers(gpuStokesBufs_);
894 gpuStokesBufs_ =
nullptr;
900 gpu::freeGpuBuffers(gpuPressBufs_);
901 gpuPressBufs_ =
nullptr;
904 gpu::allocGpuBuffers(
static_cast<uint32_t
>(n), 2 *
D, useIlu0);
905 if (!gpuPressBufs_) {
906 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
907 "pressure solver CUDA buffers could not be "
908 "allocated or the CUDA context could not be "
912 if (!gpu::gpuUploadNeighborIds(gpuPressBufs_, pressNeighId32_.data(),
914 gpu::freeGpuBuffers(gpuPressBufs_);
915 gpuPressBufs_ =
nullptr;
916 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
917 "uploading pressure GPU neighbor IDs failed." +
920 if (useIlu0 && !gpu::gpuSetupCSR(gpuPressBufs_, pressNeighId32_.data(),
921 static_cast<uint32_t
>(n), 2 *
D)) {
922 gpu::freeGpuBuffers(gpuPressBufs_);
923 gpuPressBufs_ =
nullptr;
924 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
925 "CUSPARSE setup for the pressure GPU BiCGSTAB "
931 gpu::freeGpuBuffers(gpuStokesBufs_);
932 gpuStokesBufs_ =
nullptr;
935 gpu::allocGpuBuffers(
static_cast<uint32_t
>(n), 2 *
D, useIlu0);
936 if (!gpuStokesBufs_) {
937 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
938 "Stokes solver CUDA buffers could not be "
939 "allocated or the CUDA context could not be "
943 if (!gpu::gpuUploadNeighborIds(gpuStokesBufs_, stokesNeighId32_.data(),
945 gpu::freeGpuBuffers(gpuStokesBufs_);
946 gpuStokesBufs_ =
nullptr;
947 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
948 "uploading Stokes GPU neighbor IDs failed." +
951 if (useIlu0 && !gpu::gpuSetupCSR(gpuStokesBufs_, stokesNeighId32_.data(),
952 static_cast<uint32_t
>(n), 2 *
D)) {
953 gpu::freeGpuBuffers(gpuStokesBufs_);
954 gpuStokesBufs_ =
nullptr;
955 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
956 "CUSPARSE setup for the Stokes GPU BiCGSTAB "
963 gpu::freeGpuBuffers(gpuHarmonicBufs_);
964 gpuHarmonicBufs_ =
nullptr;
967 gpu::allocGpuBuffers(
static_cast<uint32_t
>(n), 2 *
D, useIlu0);
968 if (!gpuHarmonicBufs_) {
969 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
970 "harmonic solver CUDA buffers could not be "
974 if (!gpu::gpuUploadNeighborIds(gpuHarmonicBufs_, stokesNeighId32_.data(),
976 gpu::freeGpuBuffers(gpuHarmonicBufs_);
977 gpuHarmonicBufs_ =
nullptr;
978 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
979 "uploading harmonic GPU neighbor IDs failed." +
983 !gpu::gpuSetupCSR(gpuHarmonicBufs_, stokesNeighId32_.data(),
984 static_cast<uint32_t
>(n), 2 *
D)) {
985 gpu::freeGpuBuffers(gpuHarmonicBufs_);
986 gpuHarmonicBufs_ =
nullptr;
987 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
988 "CUSPARSE setup for the harmonic GPU BiCGSTAB "
995 logDeformationBackend(
"GPU BiCGSTAB",
996 "pressure/Stokes/harmonic, preconditioner=" +
997 std::string(useIlu0 ?
"ILU0" :
"Jacobi"));
1001#ifdef VIENNALS_GPU_BICGSTAB
1002 static std::string gpuErrorDetail() {
1003 const char *detail = gpu::gpuGetLastErrorMessage();
1004 if (detail && detail[0] !=
'\0')
1005 return std::string(
" Detail: ") + detail;
1010 void logDeformationBackend(
const std::string &backend,
1011 const std::string &detail)
const {
1012 if (!Logger::hasInfo())
1014 const std::string msg =
"OxidationDeformation: using " + backend +
1015 " for mechanics pressure/Stokes solves (nodes=" +
1016 std::to_string(
nodes.size()) +
1017 (detail.empty() ? std::string() :
", " + detail) +
1019 if (msg == lastLoggedBackend_)
1021 lastLoggedBackend_ = msg;
1022 Logger::getInstance().addInfo(msg).print();
1028 reactionInterface, ambientInterface, maskInterface, useRequestedBounds,
1029 requestedMinIndex, requestedMaxIndex,
1030 deformationParameters.maxGridPoints,
"OxidationDeformation");
1037 ConstSparseIterator reactionIt(reactionInterface->getDomain());
1038 ConstSparseIterator ambientIt(ambientInterface->getDomain());
1043 const T reactionPhi =
valueAt(reactionIt, index);
1044 const T ambientPhi =
valueAt(ambientIt, index);
1047 const std::size_t
id =
nodes.size();
1049 nodes.push_back({index});
1057 const std::size_t n =
nodes.size();
1058 faceBCTypes_.assign(2 *
D * n, Boundary::NONE);
1059 faceBCDists_.assign(2 *
D * n,
T(1));
1060 touchesAmbient_.assign(n, uint8_t(0));
1061 for (std::size_t
id = 0;
id < n; ++id) {
1062 const auto &node =
nodes[id];
1063 bool touchesAmbient =
false;
1064 bool touchesSolidBoundary =
false;
1065 for (
unsigned dir = 0; dir <
D; ++dir) {
1066 for (
int off : {-1, 1}) {
1067 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
1068 IndexType nb = node.index;
1074 faceBCTypes_[fi * n + id] = bi.boundary;
1075 faceBCDists_[fi * n + id] = bi.distance;
1076 if (bi.boundary == Boundary::AMBIENT)
1077 touchesAmbient =
true;
1078 else if (bi.boundary == Boundary::MASK ||
1079 bi.boundary == Boundary::REACTION)
1080 touchesSolidBoundary =
true;
1087 touchesAmbient_[id] =
1088 (touchesAmbient && !touchesSolidBoundary) ? uint8_t(1) : uint8_t(0);
1091#ifdef VIENNALS_GPU_BICGSTAB
1092 buildPressureGpuGeometry();
1093 buildStokesGpuGeometry();
1094 buildHarmonicGpuGeometry();
1095 setupDeformationGpuBuffers();
1098 VIENNACORE_LOG_ERROR(
"OxidationDeformation: explicit GPU mode was "
1099 "requested, but ViennaLS was built without "
1100 "VIENNALS_GPU_BICGSTAB.");
1109 template <
class SolverT>
1111 const std::vector<Vec3D<SolverT>> &v,
1112 Vec3D<T> &sum)
const {
1113 const auto &node =
nodes[nodeId];
1114 sum = {
T(0),
T(0),
T(0)};
1116 const auto toT = [](
const Vec3D<SolverT> &w) -> Vec3D<T> {
1117 return {
static_cast<T>(w[0]),
static_cast<T>(w[1]),
static_cast<T>(w[2])};
1120 for (
unsigned direction = 0; direction <
D; ++direction) {
1121 for (
int offset : {-1, 1}) {
1122 IndexType neighbor = node.index;
1123 neighbor[direction] += offset;
1130 const std::size_t neighborId =
lookupNode(neighbor);
1131 if (neighborId !=
noNode) {
1136 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
1137 const Boundary boundary = faceBCTypes_[fi *
nodes.size() + nodeId];
1138 if (boundary == Boundary::REACTION) {
1140 }
else if (boundary == Boundary::MASK) {
1151 template <
class SolverT>
1153 const std::vector<Vec3D<T>> &b,
1154 std::vector<Vec3D<SolverT>> &Av)
const {
1155 const T diagVal =
static_cast<T>(2 *
D);
1156#pragma omp parallel for schedule(static)
1157 for (std::size_t i = 0; i <
nodes.size(); ++i) {
1160 for (
unsigned c = 0; c <
D; ++c)
1161 Av[i][c] =
static_cast<SolverT
>(diagVal * v[i][c] - sum[c] + b[i][c]);
1173 const std::size_t n =
nodes.size();
1174 const T diagVal =
static_cast<T>(2 *
D);
1179 std::vector<Vec3D<T>> b(n);
1181 const std::vector<Vec3D<SolverT>> zeros(
1182 n, Vec3D<SolverT>{SolverT(0), SolverT(0), SolverT(0)});
1183#pragma omp parallel for schedule(static)
1184 for (std::size_t i = 0; i < n; ++i)
1189 std::vector<Vec3D<SolverT>> x(n);
1190 for (std::size_t i = 0; i < n; ++i)
1191 for (
unsigned c = 0; c <
D; ++c) {
1192 const T value =
nodes[i].velocity[c];
1193 x[i][c] =
static_cast<SolverT
>(std::isfinite(value) ? value :
T(0));
1196#ifdef VIENNALS_GPU_BICGSTAB
1197 if (gpu::gpuIsValid(gpuHarmonicBufs_)) {
1198 const std::size_t nf = 2u *
D * n;
1199 if (harmonicDiagGpu_.size() != n || harmonicCoeffGpu_.size() != nf) {
1200 VIENNACORE_LOG_ERROR(
"OxidationDeformation: harmonic GPU geometry has "
1201 "the wrong size for the current node set.");
1204 Timer<> tUpload, tSolve;
1205 std::vector<Vec3D<SolverT>> xSolved(n);
1206 unsigned maxGpuIterations = 0;
1207 double maxGpuResidual = 0.0;
1209 for (
unsigned c = 0; c <
D; ++c) {
1210 std::vector<double> bGpu(n), xGpu(n);
1211 for (std::size_t i = 0; i < n; ++i) {
1212 bGpu[i] =
static_cast<double>(b[i][c]);
1213 xGpu[i] =
static_cast<double>(x[i][c]);
1217 const bool gpuUploadOk =
1218 (c == 0) ? gpu::gpuUploadSolverArrays(
1219 gpuHarmonicBufs_, harmonicDiagGpu_.data(),
1220 bGpu.data(), harmonicCoeffGpu_.data(),
1221 static_cast<uint32_t
>(n), harmonicCoeffGpu_.size())
1222 : gpu::gpuUploadRhs(gpuHarmonicBufs_, bGpu.data(),
1223 static_cast<uint32_t
>(n));
1226 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, "
1227 "but uploading harmonic solver arrays failed." +
1231 unsigned gpuIterations = 0;
1232 double gpuResidual = 0.0;
1234 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
1235 gpuHarmonicBufs_, xGpu.data(),
1236 static_cast<double>(std::numeric_limits<SolverT>::epsilon()),
1237 deformationParameters.harmonicIterations,
1238 static_cast<double>(deformationParameters.tolerance), gpuIterations,
1242 if (!gpuConverged || !std::isfinite(gpuResidual)) {
1243 VIENNACORE_LOG_ERROR(
1244 "OxidationDeformation: harmonic GPU BiCGSTAB failed or produced "
1245 "a non-finite residual for component " +
1246 std::to_string(c) +
" (iters=" + std::to_string(gpuIterations) +
1247 ", residual=" + std::to_string(gpuResidual) +
").");
1250 maxGpuIterations = std::max(maxGpuIterations, gpuIterations);
1251 maxGpuResidual = std::max(maxGpuResidual, gpuResidual);
1252 for (std::size_t i = 0; i < n; ++i)
1253 xSolved[i][c] =
static_cast<SolverT
>(xGpu[i]);
1256 for (std::size_t i = 0; i < n; ++i)
1257 for (
unsigned c = 0; c <
D; ++c)
1258 nodes[i].velocity[c] =
static_cast<T>(xSolved[i][c]);
1259 iterations = maxGpuIterations;
1260 residual = maxGpuResidual;
1262 if (Logger::hasTiming()) {
1263 Logger::getInstance()
1264 .addTiming(
"harmonic n=" + std::to_string(n) +
1265 " iters=" + std::to_string(iterations) +
" res=" +
1266 std::to_string(residual) +
" [GPU] GPU BiCGSTAB",
1270 if (Logger::hasDebug()) {
1271 Logger::getInstance()
1272 .addTiming(
"harmonic n=" + std::to_string(n) +
" [GPU] GPU upload",
1281 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
1282 std::vector<Vec3D<SolverT>> Ax(n);
1284 std::vector<Vec3D<SolverT>> r(n), r_hat(n);
1285 for (std::size_t i = 0; i < n; ++i)
1286 for (
unsigned c = 0; c <
D; ++c) {
1287 r[i][c] =
static_cast<SolverT
>(b[i][c] - Ax[i][c]);
1288 r_hat[i][c] = r[i][c];
1292 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
1294 T rho =
T(1), alpha =
T(1), omega =
T(1);
1296 auto vecDot = [&](
const std::vector<Vec3D<SolverT>> &a,
1297 const std::vector<Vec3D<SolverT>> &bv) {
1299 for (std::size_t i = 0; i < n; ++i)
1300 for (
unsigned c = 0; c <
D; ++c)
1301 sum +=
static_cast<T>(a[i][c]) *
static_cast<T>(bv[i][c]);
1305 auto vecMaxAbs = [&](
const std::vector<Vec3D<SolverT>> &vin) {
1307 for (std::size_t i = 0; i < n; ++i)
1308 for (
unsigned c = 0; c <
D; ++c)
1309 m = std::max(m, std::abs(
static_cast<T>(vin[i][c])));
1313 const T b_norm = [&] {
1315 for (std::size_t i = 0; i < n; ++i)
1316 for (
unsigned c = 0; c <
D; ++c)
1317 m = std::max(m, std::abs(b[i][c]));
1318 return (m <
T(1e-100)) ?
T(1) : m;
1321 for (; iterations < deformationParameters.harmonicIterations;
1323 const T rho_new = vecDot(r_hat, r);
1324 if (!std::isfinite(rho_new) || std::abs(rho_new) <
T(1e-100))
1326 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1327 !std::isfinite(omega) || std::abs(omega) <
T(1e-100))
1330 const T beta = (rho_new / rho) * (alpha / omega);
1331 if (!std::isfinite(beta))
1335 for (std::size_t i = 0; i < n; ++i)
1336 for (
unsigned c = 0; c <
D; ++c)
1337 pv[i][c] =
static_cast<SolverT
>(r[i][c] +
1338 beta * (pv[i][c] - omega * sv[i][c]));
1341 for (std::size_t i = 0; i < n; ++i)
1342 for (
unsigned c = 0; c <
D; ++c)
1343 y[i][c] =
static_cast<SolverT
>(
static_cast<T>(pv[i][c]) / diagVal);
1347 const T r_hat_v = vecDot(r_hat, sv);
1348 if (!std::isfinite(r_hat_v) || std::abs(r_hat_v) <
T(1e-100))
1351 alpha = rho_new / r_hat_v;
1352 if (!std::isfinite(alpha))
1355 for (std::size_t i = 0; i < n; ++i)
1356 for (
unsigned c = 0; c <
D; ++c)
1357 s[i][c] =
static_cast<SolverT
>(r[i][c] - alpha * sv[i][c]);
1359 residual = vecMaxAbs(s);
1360 if (!std::isfinite(residual))
1362 if (residual < deformationParameters.tolerance * b_norm) {
1363 for (std::size_t i = 0; i < n; ++i)
1364 for (
unsigned c = 0; c <
D; ++c)
1365 x[i][c] =
static_cast<SolverT
>(x[i][c] + alpha * y[i][c]);
1371 for (std::size_t i = 0; i < n; ++i)
1372 for (
unsigned c = 0; c <
D; ++c)
1373 z[i][c] =
static_cast<SolverT
>(
static_cast<T>(s[i][c]) / diagVal);
1377 const T t_s = vecDot(t, s);
1378 const T t_t = vecDot(t, t);
1379 if (!std::isfinite(t_s) || !std::isfinite(t_t))
1381 omega = (t_t >
T(1e-100)) ? t_s / t_t :
T(0);
1382 if (!std::isfinite(omega))
1385 for (std::size_t i = 0; i < n; ++i)
1386 for (
unsigned c = 0; c <
D; ++c) {
1388 static_cast<SolverT
>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
1389 r[i][c] =
static_cast<SolverT
>(s[i][c] - omega * t[i][c]);
1392 residual = vecMaxAbs(r);
1393 if (!std::isfinite(residual))
1395 if (residual < deformationParameters.tolerance * b_norm) {
1401 bool finiteSolution =
true;
1402 for (std::size_t i = 0; i < n; ++i)
1403 for (
unsigned c = 0; c <
D; ++c)
1404 if (!std::isfinite(
static_cast<T>(x[i][c])))
1405 finiteSolution =
false;
1407 if (finiteSolution) {
1408 for (std::size_t i = 0; i < n; ++i)
1409 for (
unsigned c = 0; c <
D; ++c)
1410 nodes[i].velocity[c] =
static_cast<T>(x[i][c]);
1412 residual = std::numeric_limits<T>::infinity();
1414 if (residual > deformationParameters.tolerance * b_norm)
1415 VIENNACORE_LOG_WARNING(
1416 "solveVelocity (harmonic): BiCGSTAB did not converge after " +
1417 std::to_string(iterations) +
"/" +
1418 std::to_string(deformationParameters.harmonicIterations) +
1419 " iterations (residual=" + std::to_string(residual / b_norm) +
1420 ", tolerance=" + std::to_string(deformationParameters.tolerance) +
1431 const std::size_t n =
nodes.size();
1432 std::vector<Vec3D<T>> diagV(n, Vec3D<T>{
T(0),
T(0),
T(0)});
1435 const std::vector<Vec3D<T>> zeros(n, Vec3D<T>{
T(0),
T(0),
T(0)});
1436 std::vector<Vec3D<T>> tmp(n);
1437#pragma omp parallel for schedule(static)
1438 for (std::size_t i = 0; i < n; ++i) {
1441 for (
unsigned comp = 0; comp <
D; ++comp)
1442 diagV[i][comp] = diag;
1449 T mechanicsResidual = 0.;
1467 const std::vector<Vec3D<T>> diagV =
1470 for (
unsigned iteration = 0;
1471 iteration < deformationParameters.mechanicsIterations; ++iteration) {
1479 Timer<> tStokes, tPressure;
1483 if (!std::isfinite(lastStokesResidual_)) {
1484 mechanicsResidual = std::numeric_limits<T>::infinity();
1492 if (!std::isfinite(lastPressureResidual_)) {
1493 mechanicsResidual = std::numeric_limits<T>::infinity();
1502 if (!std::isfinite(mechanicsResidual)) {
1503 mechanicsResidual = std::numeric_limits<T>::infinity();
1507 if (Logger::hasDebug())
1508 Logger::getInstance()
1510 " mechanics[" + std::to_string(iteration) +
1511 "] stokes iters=" + std::to_string(lastStokesIters_) +
1513 std::to_string(deformationParameters.stokesIterations) +
1514 " res=" + std::to_string(lastStokesResidual_),
1517 " mechanics[" + std::to_string(iteration) +
1518 "] pressure iters=" + std::to_string(lastPressureIters_) +
1520 std::to_string(deformationParameters.pressureIterations) +
1521 " res=" + std::to_string(lastPressureResidual_) +
1522 " coupling=" + std::to_string(mechanicsResidual),
1526 if (mechanicsResidual < deformationParameters.mechanicsTolerance)
1532 residual = mechanicsResidual;
1533 if (residual > deformationParameters.mechanicsTolerance)
1534 VIENNACORE_LOG_WARNING(
1535 "solveMechanics: did not converge after " +
1536 std::to_string(deformationParameters.mechanicsIterations) +
1537 " iterations (residual=" + std::to_string(residual) +
", tolerance=" +
1538 std::to_string(deformationParameters.mechanicsTolerance) +
")");
1548 const std::vector<Vec3D<T>> &diagV) {
1551 if (deformationParameters.viscosity <= std::numeric_limits<T>::epsilon())
1554 const std::size_t n =
nodes.size();
1555 const T invEta =
T(1) / deformationParameters.viscosity;
1557#pragma omp parallel for schedule(static)
1558 for (std::size_t i = 0; i < n; ++i) {
1559 for (
unsigned dir = 0; dir <
D; ++dir) {
1560 const T ai = diagV[i][dir];
1561 if (ai <= std::numeric_limits<T>::epsilon())
1567 const unsigned fi = dir * 2u;
1568 IndexType nb =
nodes[i].index;
1573 dpMinus =
nodes[j].pressure - pressureOld[j];
1577 dMinus = faceBCDists_[fi * n + i];
1588 const unsigned fi = dir * 2u + 1u;
1589 IndexType nb =
nodes[i].index;
1594 dpPlus =
nodes[j].pressure - pressureOld[j];
1598 dPlus = faceBCDists_[fi * n + i];
1606 const T dpCenter =
nodes[i].pressure - pressureOld[i];
1609 const T correction =
1610 gradDP * invEta / ai * deformationParameters.relaxation;
1611 if (std::isfinite(correction) && std::isfinite(
nodes[i].velocity[dir]))
1612 nodes[i].velocity[dir] -= correction;
1620 template <
class SolverT>
1622 const std::vector<SolverT> &p,
1623 const std::vector<T> &ambientBP,
T &diag,
1625 if (touchesAmbient_[nodeId]) {
1627 rhs = ambientBP[nodeId];
1632 for (
unsigned direction = 0; direction <
D; ++direction) {
1637 const T dSum = plus.distance + minus.distance;
1638 const T plusCoeff =
T(2) / (plus.distance * dSum);
1639 const T minusCoeff =
T(2) / (minus.distance * dSum);
1640 rhs += plusCoeff * plus.value + minusCoeff * minus.value;
1641 diag += plusCoeff + minusCoeff;
1646 template <
class SolverT>
1649 const std::vector<T> &precomputedDiag,
1650 const std::vector<T> &pBC, std::vector<SolverT> &Av)
const {
1651#pragma omp parallel for schedule(static)
1652 for (std::size_t i = 0; i <
nodes.size(); ++i) {
1655 Av[i] =
static_cast<SolverT
>(precomputedDiag[i] * v[i] - rhs + pBC[i]);
1665 const std::size_t n =
nodes.size();
1666 const T eps = std::numeric_limits<T>::epsilon();
1668 std::vector<T> divergence(n), ambientBP(n);
1669#pragma omp parallel for schedule(static)
1670 for (std::size_t i = 0; i < n; ++i) {
1675 auto warnBadPressureAssembly = [](
const std::string &stage,
1676 std::size_t nodeId,
const IndexType &idx,
1678 VIENNACORE_LOG_WARNING(
1679 "solvePressure: non-finite/overflow " + stage +
1680 " at node=" + std::to_string(nodeId) +
" index=(" +
1681 std::to_string(idx[0]) +
"," + std::to_string(idx[1]) +
1682 (
D == 3 ?
"," + std::to_string(idx[2]) : std::string()) +
1683 ") value=" + std::to_string(value));
1686 const T solverMax =
static_cast<T>(std::numeric_limits<SolverT>::max());
1687 for (std::size_t i = 0; i < n; ++i) {
1688 if (!std::isfinite(divergence[i]) ||
1689 std::abs(divergence[i]) > solverMax) {
1690 warnBadPressureAssembly(
"divergence", i,
nodes[i].index, divergence[i]);
1693 if (!std::isfinite(ambientBP[i]) || std::abs(ambientBP[i]) > solverMax) {
1694 warnBadPressureAssembly(
"ambient pressure boundary", i,
nodes[i].index,
1701 std::vector<T> diag(n), pBC(n);
1703 const std::vector<SolverT> zeros(n, SolverT(0));
1704#pragma omp parallel for schedule(static)
1705 for (std::size_t i = 0; i < n; ++i)
1709 for (std::size_t i = 0; i < n; ++i) {
1710 if (!std::isfinite(diag[i]) || std::abs(diag[i]) > solverMax) {
1711 warnBadPressureAssembly(
"pressure diagonal", i,
nodes[i].index,
1715 if (!std::isfinite(pBC[i]) || std::abs(pBC[i]) > solverMax) {
1716 warnBadPressureAssembly(
"pressure boundary rhs", i,
nodes[i].index,
1722 std::vector<T> b(n);
1724 for (std::size_t i = 0; i < n; ++i) {
1725 b[i] = pBC[i] + deformationParameters.bulkModulus * divergence[i];
1726 b_norm = std::max(b_norm, std::abs(b[i]));
1728 for (std::size_t i = 0; i < n; ++i) {
1729 if (!std::isfinite(b[i]) || std::abs(b[i]) > solverMax) {
1730 warnBadPressureAssembly(
"pressure rhs", i,
nodes[i].index, b[i]);
1734 if (b_norm <
T(1e-100))
1737 std::vector<SolverT> x(n);
1738 for (std::size_t i = 0; i < n; ++i) {
1739 T guess = touchesAmbient_[i] ? ambientBP[i] :
nodes[i].pressure;
1740 if (!std::isfinite(guess))
1741 guess = deformationParameters.ambientPressure;
1742 x[i] =
static_cast<SolverT
>(guess);
1745#ifdef VIENNALS_GPU_BICGSTAB
1746 if (gpu::gpuIsValid(gpuPressBufs_)) {
1747 const std::size_t nf = 2u *
D * n;
1748 if (actualDiagGpu_.size() != n || pressCoeffGpu_.size() != nf) {
1749 VIENNACORE_LOG_ERROR(
"OxidationDeformation: pressure GPU geometry has "
1750 "the wrong size for the current node set.");
1753 Timer<> tUpload, tSolve;
1754 std::vector<double> bGpu(n), xGpu(n);
1755 for (std::size_t i = 0; i < n; ++i) {
1756 bGpu[i] =
static_cast<double>(b[i]);
1757 xGpu[i] =
static_cast<double>(x[i]);
1761 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
1762 gpuPressBufs_, actualDiagGpu_.data(), bGpu.data(),
1763 pressCoeffGpu_.data(),
static_cast<uint32_t
>(n),
1764 pressCoeffGpu_.size());
1767 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, but "
1768 "uploading pressure solver arrays or factorizing "
1773 unsigned gpuIterations = 0;
1774 double gpuResidual = 0.0;
1776 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
1777 gpuPressBufs_, xGpu.data(),
static_cast<double>(eps),
1778 deformationParameters.pressureIterations,
1779 static_cast<double>(deformationParameters.pressureTolerance),
1780 gpuIterations, gpuResidual);
1786 if (!gpuConverged || !std::isfinite(gpuResidual)) {
1787 VIENNACORE_LOG_ERROR(
1788 "OxidationDeformation: pressure GPU BiCGSTAB failed or produced "
1789 "a non-finite residual (iters=" +
1790 std::to_string(gpuIterations) +
1791 ", residual=" + std::to_string(gpuResidual) +
").");
1795 const T beta = deformationParameters.pressureRelaxation;
1796 const T oneMinB =
T(1) - beta;
1797 for (std::size_t i = 0; i < n; ++i)
1799 oneMinB *
nodes[i].pressure + beta *
static_cast<T>(xGpu[i]);
1801 lastPressureIters_ = gpuIterations;
1802 lastPressureResidual_ = gpuResidual / b_norm;
1804 if (Logger::hasDebug()) {
1805 const std::string tag =
1806 "pressure n=" + std::to_string(n) +
1807 " iters=" + std::to_string(lastPressureIters_) +
1808 " res=" + std::to_string(lastPressureResidual_) +
" [GPU]";
1809 Logger::getInstance()
1810 .addTiming(tag +
" GPU upload", tUpload)
1811 .addTiming(tag +
" GPU BiCGSTAB", tSolve)
1833 std::vector<T> pressCoeff(2 *
D * n,
T(0));
1834 std::vector<std::size_t> pressNeighId(2 *
D * n,
noNode);
1835 std::vector<T> actualDiag(n,
T(0));
1837 for (std::size_t
id = 0;
id < n; ++id) {
1838 if (touchesAmbient_[
id]) {
1839 actualDiag[id] =
T(1);
1842 for (
unsigned dir = 0; dir <
D; ++dir) {
1843 const unsigned fiNeg = dir * 2u;
1844 const unsigned fiPos = dir * 2u + 1u;
1845 IndexType nbNeg =
nodes[id].index;
1847 IndexType nbPos =
nodes[id].index;
1849 const std::size_t jNeg =
1851 const std::size_t jPos =
1859 auto effectiveDist = [&](
unsigned fi, std::size_t j) ->
T {
1862 const Boundary bt = faceBCTypes_[fi * n + id];
1863 if (bt != Boundary::NONE)
1864 return faceBCDists_[fi * n + id];
1868 const T dNeg = effectiveDist(fiNeg, jNeg);
1869 const T dPos = effectiveDist(fiPos, jPos);
1870 const T dSum = dNeg + dPos;
1874 if (jNeg !=
noNode && !touchesAmbient_[jNeg]) {
1875 const T c =
T(2) / (dNeg * dSum);
1876 pressCoeff[fiNeg * n + id] = c;
1877 pressNeighId[fiNeg * n + id] = jNeg;
1878 actualDiag[id] += c;
1879 }
else if (jNeg !=
noNode ||
1880 faceBCTypes_[fiNeg * n +
id] == Boundary::AMBIENT) {
1885 actualDiag[id] +=
T(2) / (dNeg * dSum);
1887 if (jPos !=
noNode && !touchesAmbient_[jPos]) {
1888 const T c =
T(2) / (dPos * dSum);
1889 pressCoeff[fiPos * n + id] = c;
1890 pressNeighId[fiPos * n + id] = jPos;
1891 actualDiag[id] += c;
1892 }
else if (jPos !=
noNode ||
1893 faceBCTypes_[fiPos * n +
id] == Boundary::AMBIENT) {
1894 actualDiag[id] +=
T(2) / (dPos * dSum);
1899 if (actualDiag[
id] <= eps)
1900 actualDiag[id] =
T(1);
1924 std::vector<T> ilu_diag(n);
1925 for (std::size_t
id = 0;
id < n; ++id) {
1926 if (touchesAmbient_[
id]) {
1927 ilu_diag[id] =
T(1);
1930 ilu_diag[id] = actualDiag[id];
1931 for (
unsigned dir = 0; dir <
D; ++dir) {
1932 const unsigned fi_L = dir * 2u;
1933 const unsigned fi_U =
1935 const std::size_t j = pressNeighId[fi_L * n + id];
1936 if (j ==
noNode || ilu_diag[j] <= eps)
1945 pressCoeff[fi_L * n + id] * pressCoeff[fi_U * n + j] / ilu_diag[j];
1947 if (ilu_diag[
id] <= eps)
1948 ilu_diag[id] = actualDiag[id];
1951 auto applyIlu = [&](
const std::vector<SolverT> &in,
1952 std::vector<SolverT> &out) {
1953 std::vector<T> y(n);
1955 for (std::size_t i = 0; i < n; ++i) {
1956 T val =
static_cast<T>(in[i]);
1957 for (
unsigned dir = 0; dir <
D; ++dir) {
1958 const unsigned fi_L = dir * 2u;
1959 const std::size_t j = pressNeighId[fi_L * n + i];
1964 val += (pressCoeff[fi_L * n + i] / ilu_diag[j]) * y[j];
1969 for (std::size_t i = n; i-- > 0;) {
1971 for (
unsigned dir = 0; dir <
D; ++dir) {
1972 const unsigned fi_U = dir * 2u + 1u;
1973 const std::size_t j = pressNeighId[fi_U * n + i];
1978 val += pressCoeff[fi_U * n + i] *
static_cast<T>(out[j]);
1980 out[i] =
static_cast<SolverT
>(val / ilu_diag[i]);
1984 std::vector<SolverT> Ax(n);
1986 for (std::size_t i = 0; i < n; ++i) {
1987 if (!std::isfinite(
static_cast<T>(Ax[i])) ||
1988 std::abs(
static_cast<T>(Ax[i])) > solverMax) {
1989 warnBadPressureAssembly(
"initial pressure matvec", i,
nodes[i].index,
1990 static_cast<T>(Ax[i]));
1994 std::vector<SolverT> r(n), r_hat(n), p(n, SolverT(0)), v(n, SolverT(0)),
1995 y(n), z(n), s(n), t(n);
1996 for (std::size_t i = 0; i < n; ++i) {
1997 r[i] =
static_cast<SolverT
>(b[i] - Ax[i]);
2001 T rho =
T(1), alpha =
T(1), omega =
T(1);
2002 T pressureResidual =
T(0);
2003 unsigned pressureIter = 0;
2004 bool pressureBreakdown =
false;
2005 for (std::size_t i = 0; i < n; ++i) {
2006 const T ri =
static_cast<T>(r[i]);
2007 if (!std::isfinite(ri)) {
2008 pressureBreakdown =
true;
2011 pressureResidual = std::max(pressureResidual, std::abs(ri));
2014 for (; !pressureBreakdown &&
2015 pressureIter < deformationParameters.pressureIterations;
2018 for (std::size_t i = 0; i < n; ++i)
2019 rho_new +=
static_cast<T>(r_hat[i]) *
static_cast<T>(r[i]);
2021 if (!std::isfinite(rho_new)) {
2022 pressureBreakdown =
true;
2025 if (std::abs(rho_new) <
T(1e-100))
2027 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
2028 !std::isfinite(omega) || std::abs(omega) <
T(1e-100)) {
2029 pressureBreakdown =
true;
2033 const T beta = (rho_new / rho) * (alpha / omega);
2034 if (!std::isfinite(beta)) {
2035 pressureBreakdown =
true;
2040 for (std::size_t i = 0; i < n; ++i)
2041 p[i] =
static_cast<SolverT
>(r[i] + beta * (p[i] - omega * v[i]));
2048 for (std::size_t i = 0; i < n; ++i)
2049 r_hat_v +=
static_cast<T>(r_hat[i]) *
static_cast<T>(v[i]);
2050 if (!std::isfinite(r_hat_v)) {
2051 pressureBreakdown =
true;
2054 if (std::abs(r_hat_v) <
T(1e-100))
2057 alpha = rho_new / r_hat_v;
2058 if (!std::isfinite(alpha)) {
2059 pressureBreakdown =
true;
2063 for (std::size_t i = 0; i < n; ++i)
2064 s[i] =
static_cast<SolverT
>(r[i] - alpha * v[i]);
2066 pressureResidual =
T(0);
2067 for (std::size_t i = 0; i < n; ++i)
2069 std::max(pressureResidual, std::abs(
static_cast<T>(s[i])));
2070 if (!std::isfinite(pressureResidual)) {
2071 pressureBreakdown =
true;
2074 if (pressureResidual < deformationParameters.pressureTolerance * b_norm) {
2075 for (std::size_t i = 0; i < n; ++i)
2076 x[i] =
static_cast<SolverT
>(x[i] + alpha * y[i]);
2084 T t_s =
T(0), t_t =
T(0);
2085 for (std::size_t i = 0; i < n; ++i) {
2086 t_s +=
static_cast<T>(t[i]) *
static_cast<T>(s[i]);
2087 t_t +=
static_cast<T>(t[i]) *
static_cast<T>(t[i]);
2089 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
2090 pressureBreakdown =
true;
2093 omega = (t_t >
T(1e-100)) ? t_s / t_t :
T(0);
2094 if (!std::isfinite(omega)) {
2095 pressureBreakdown =
true;
2099 for (std::size_t i = 0; i < n; ++i) {
2100 x[i] =
static_cast<SolverT
>(x[i] + alpha * y[i] + omega * z[i]);
2101 r[i] =
static_cast<SolverT
>(s[i] - omega * t[i]);
2104 pressureResidual =
T(0);
2105 for (std::size_t i = 0; i < n; ++i)
2107 std::max(pressureResidual, std::abs(
static_cast<T>(r[i])));
2108 if (!std::isfinite(pressureResidual)) {
2109 pressureBreakdown =
true;
2112 if (pressureResidual < deformationParameters.pressureTolerance * b_norm)
2116 if (pressureBreakdown)
2117 pressureResidual = std::numeric_limits<T>::infinity();
2119 bool finiteSolution = !pressureBreakdown;
2120 for (std::size_t i = 0; i < n; ++i)
2121 if (!std::isfinite(
static_cast<T>(x[i])))
2122 finiteSolution =
false;
2124 lastPressureIters_ = pressureIter;
2125 lastPressureResidual_ = pressureResidual / b_norm;
2126 if (finiteSolution) {
2127 const T beta = deformationParameters.pressureRelaxation;
2128 const T oneMinB =
T(1) - beta;
2129 for (std::size_t i = 0; i < n; ++i)
2131 oneMinB *
nodes[i].pressure + beta *
static_cast<T>(x[i]);
2133 lastPressureResidual_ = std::numeric_limits<T>::infinity();
2135 if (lastPressureResidual_ > deformationParameters.pressureTolerance)
2136 VIENNACORE_LOG_WARNING(
2137 "solvePressure: BiCGSTAB did not converge after " +
2138 std::to_string(lastPressureIters_) +
"/" +
2139 std::to_string(deformationParameters.pressureIterations) +
2140 " iterations (residual=" + std::to_string(lastPressureResidual_) +
2142 std::to_string(deformationParameters.pressureTolerance) +
")");
2147 template <
class SolverT>
2149 const std::vector<Vec3D<SolverT>> &v,
T &diag,
2150 Vec3D<T> &rhs)
const {
2152 rhs = {
T(0),
T(0),
T(0)};
2153 for (
unsigned direction = 0; direction <
D; ++direction) {
2156 const T dSum = plus.distance + minus.distance;
2157 const T plusCoeff =
T(2) / (plus.distance * dSum);
2158 const T minusCoeff =
T(2) / (minus.distance * dSum);
2161 diag += plusCoeff + minusCoeff;
2166 if (deformationParameters.viscosity <= std::numeric_limits<T>::epsilon())
2173 const std::size_t n =
nodes.size();
2174 const T eps = std::numeric_limits<T>::epsilon();
2177 std::vector<T> diag(n);
2178 std::vector<Vec3D<T>> vBC(n), forcing(n);
2180 const std::vector<Vec3D<SolverT>> zeros(
2181 n, Vec3D<SolverT>{SolverT(0), SolverT(0), SolverT(0)});
2182#pragma omp parallel for schedule(static)
2183 for (std::size_t i = 0; i < n; ++i) {
2190 std::vector<Vec3D<T>> b(n);
2192 for (std::size_t i = 0; i < n; ++i) {
2193 for (
unsigned c = 0; c <
D; ++c) {
2194 b[i][c] = vBC[i][c] - forcing[i][c] / deformationParameters.viscosity;
2195 b_norm = std::max(b_norm, std::abs(b[i][c]));
2198 if (b_norm <
T(1e-100))
2203 std::vector<Vec3D<SolverT>> x(n);
2206 for (std::size_t i = 0; i < n; ++i)
2207 for (
unsigned c = 0; c <
D; ++c) {
2208 const T value = vel[i][c];
2209 x[i][c] =
static_cast<SolverT
>(std::isfinite(value) ? value :
T(0));
2213#ifdef VIENNALS_GPU_BICGSTAB
2214 if (gpu::gpuIsValid(gpuStokesBufs_)) {
2215 const std::size_t nf = 2u *
D * n;
2216 if (stokesDiagGpu_.size() !=
D * n || stokesCoeffGpu_.size() != nf) {
2217 VIENNACORE_LOG_ERROR(
"OxidationDeformation: Stokes GPU geometry has "
2218 "the wrong size for the current node set.");
2221 Timer<> tUpload, tSolve;
2222 std::vector<Vec3D<SolverT>> xSolved(n);
2223 unsigned maxGpuIterations = 0;
2224 double maxGpuResidual = 0.0;
2226 for (
unsigned c = 0; c <
D; ++c) {
2227 std::vector<double> bGpu(n), xGpu(n);
2228 for (std::size_t i = 0; i < n; ++i) {
2229 bGpu[i] =
static_cast<double>(b[i][c]);
2230 xGpu[i] =
static_cast<double>(x[i][c]);
2234 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
2235 gpuStokesBufs_, stokesDiagGpu_.data() + c * n, bGpu.data(),
2236 stokesCoeffGpu_.data(),
static_cast<uint32_t
>(n),
2237 stokesCoeffGpu_.size());
2240 VIENNACORE_LOG_ERROR(
"OxidationDeformation: GPU mode was selected, "
2241 "but uploading Stokes solver arrays failed." +
2245 unsigned gpuIterations = 0;
2246 double gpuResidual = 0.0;
2248 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
2249 gpuStokesBufs_, xGpu.data(),
static_cast<double>(eps),
2250 deformationParameters.stokesIterations,
2251 static_cast<double>(deformationParameters.stokesTolerance),
2252 gpuIterations, gpuResidual);
2255 if (!gpuConverged || !std::isfinite(gpuResidual)) {
2256 VIENNACORE_LOG_ERROR(
2257 "OxidationDeformation: Stokes GPU BiCGSTAB failed or produced "
2258 "a non-finite residual for component " +
2259 std::to_string(c) +
" (iters=" + std::to_string(gpuIterations) +
2260 ", residual=" + std::to_string(gpuResidual) +
").");
2263 maxGpuIterations = std::max(maxGpuIterations, gpuIterations);
2264 maxGpuResidual = std::max(maxGpuResidual, gpuResidual);
2265 for (std::size_t i = 0; i < n; ++i)
2266 xSolved[i][c] =
static_cast<SolverT
>(xGpu[i]);
2269 for (std::size_t i = 0; i < n; ++i)
2270 for (
unsigned c = 0; c <
D; ++c)
2271 nodes[i].velocity[c] =
static_cast<T>(xSolved[i][c]);
2273 lastStokesIters_ = maxGpuIterations;
2274 lastStokesResidual_ = maxGpuResidual / b_norm;
2276 if (Logger::hasDebug()) {
2277 const std::string tag =
"stokes n=" + std::to_string(n) +
2278 " iters=" + std::to_string(lastStokesIters_) +
2279 " res=" + std::to_string(lastStokesResidual_) +
2281 Logger::getInstance()
2282 .addTiming(tag +
" GPU upload", tUpload)
2283 .addTiming(tag +
" GPU BiCGSTAB", tSolve)
2292 auto stokesMatvec = [&](
const std::vector<Vec3D<SolverT>> &vin,
2293 std::vector<Vec3D<SolverT>> &Av) {
2294#pragma omp parallel for schedule(static)
2295 for (std::size_t i = 0; i < n; ++i) {
2299 for (
unsigned c = 0; c <
D; ++c)
2301 static_cast<SolverT
>(diag[i] * vin[i][c] - rhs[c] + vBC[i][c]);
2306 auto vecDot = [&](
const std::vector<Vec3D<SolverT>> &a,
2307 const std::vector<Vec3D<SolverT>> &bv) {
2309 for (std::size_t i = 0; i < n; ++i)
2310 for (
unsigned c = 0; c <
D; ++c) {
2311 const T av =
static_cast<T>(a[i][c]);
2312 const T bvVal =
static_cast<T>(bv[i][c]);
2313 if (!std::isfinite(av) || !std::isfinite(bvVal))
2314 return std::numeric_limits<T>::quiet_NaN();
2320 auto vecMaxAbs = [&](
const std::vector<Vec3D<SolverT>> &vin) {
2322 for (std::size_t i = 0; i < n; ++i)
2323 for (
unsigned c = 0; c <
D; ++c) {
2324 const T value =
static_cast<T>(vin[i][c]);
2325 if (!std::isfinite(value))
2326 return std::numeric_limits<T>::infinity();
2327 m = std::max(m, std::abs(value));
2333 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
2334 std::vector<Vec3D<SolverT>> Ax(n), r(n), r_hat(n);
2335 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
2337 stokesMatvec(x, Ax);
2338 for (std::size_t i = 0; i < n; ++i)
2339 for (
unsigned c = 0; c <
D; ++c) {
2340 r[i][c] =
static_cast<SolverT
>(b[i][c] - Ax[i][c]);
2341 r_hat[i][c] = r[i][c];
2344 T rho =
T(1), alpha =
T(1), omega =
T(1);
2345 T velocityResidual =
T(0);
2346 unsigned stokesIter = 0;
2347 bool stokesBreakdown =
false;
2348 velocityResidual = vecMaxAbs(r);
2350 for (; stokesIter < deformationParameters.stokesIterations; ++stokesIter) {
2351 const T rho_new = vecDot(r_hat, r);
2352 if (!std::isfinite(rho_new)) {
2353 stokesBreakdown =
true;
2356 if (std::abs(rho_new) <
T(1e-100))
2358 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
2359 !std::isfinite(omega) || std::abs(omega) <
T(1e-100)) {
2360 stokesBreakdown =
true;
2364 const T beta = (rho_new / rho) * (alpha / omega);
2365 if (!std::isfinite(beta)) {
2366 stokesBreakdown =
true;
2371 for (std::size_t i = 0; i < n; ++i)
2372 for (
unsigned c = 0; c <
D; ++c)
2373 pv[i][c] =
static_cast<SolverT
>(r[i][c] +
2374 beta * (pv[i][c] - omega * sv[i][c]));
2376 for (std::size_t i = 0; i < n; ++i)
2377 for (
unsigned c = 0; c <
D; ++c) {
2378 const T pvc = pv[i][c];
2379 const T pcDiag = precondDiag[i][c];
2380 y[i][c] =
static_cast<SolverT
>((pcDiag > eps) ? pvc / pcDiag : pvc);
2383 stokesMatvec(y, sv);
2385 const T r_hat_v = vecDot(r_hat, sv);
2386 if (!std::isfinite(r_hat_v)) {
2387 stokesBreakdown =
true;
2390 if (std::abs(r_hat_v) <
T(1e-100))
2393 alpha = rho_new / r_hat_v;
2394 if (!std::isfinite(alpha)) {
2395 stokesBreakdown =
true;
2399 for (std::size_t i = 0; i < n; ++i)
2400 for (
unsigned c = 0; c <
D; ++c)
2401 s[i][c] =
static_cast<SolverT
>(r[i][c] - alpha * sv[i][c]);
2403 velocityResidual = vecMaxAbs(s);
2404 if (!std::isfinite(velocityResidual)) {
2405 stokesBreakdown =
true;
2408 if (velocityResidual < deformationParameters.stokesTolerance * b_norm) {
2409 for (std::size_t i = 0; i < n; ++i)
2410 for (
unsigned c = 0; c <
D; ++c)
2411 x[i][c] =
static_cast<SolverT
>(x[i][c] + alpha * y[i][c]);
2415 for (std::size_t i = 0; i < n; ++i)
2416 for (
unsigned c = 0; c <
D; ++c) {
2417 const T sc = s[i][c];
2418 const T pcDiag = precondDiag[i][c];
2419 z[i][c] =
static_cast<SolverT
>((pcDiag > eps) ? sc / pcDiag : sc);
2424 const T t_s = vecDot(t, s);
2425 const T t_t = vecDot(t, t);
2426 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
2427 stokesBreakdown =
true;
2430 omega = (t_t >
T(1e-100)) ? t_s / t_t :
T(0);
2431 if (!std::isfinite(omega)) {
2432 stokesBreakdown =
true;
2436 for (std::size_t i = 0; i < n; ++i)
2437 for (
unsigned c = 0; c <
D; ++c) {
2439 static_cast<SolverT
>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
2440 r[i][c] =
static_cast<SolverT
>(s[i][c] - omega * t[i][c]);
2443 velocityResidual = vecMaxAbs(r);
2444 if (!std::isfinite(velocityResidual)) {
2445 stokesBreakdown =
true;
2448 if (velocityResidual < deformationParameters.stokesTolerance * b_norm)
2452 if (stokesBreakdown)
2453 velocityResidual = std::numeric_limits<T>::infinity();
2455 bool finiteSolution = !stokesBreakdown;
2456 for (std::size_t i = 0; i < n; ++i)
2457 for (
unsigned c = 0; c <
D; ++c)
2458 if (!std::isfinite(
static_cast<T>(x[i][c])))
2459 finiteSolution =
false;
2461 if (finiteSolution) {
2462 for (std::size_t i = 0; i < n; ++i)
2463 for (
unsigned c = 0; c <
D; ++c)
2464 nodes[i].velocity[c] =
static_cast<T>(x[i][c]);
2467 lastStokesIters_ = stokesIter;
2468 lastStokesResidual_ = finiteSolution ? velocityResidual / b_norm
2469 : std::numeric_limits<T>::infinity();
2470 if (lastStokesResidual_ > deformationParameters.stokesTolerance)
2471 VIENNACORE_LOG_WARNING(
2472 "solveStokesVelocity: BiCGSTAB did not converge after " +
2473 std::to_string(lastStokesIters_) +
"/" +
2474 std::to_string(deformationParameters.stokesIterations) +
2475 " iterations (residual=" + std::to_string(lastStokesResidual_) +
2477 std::to_string(deformationParameters.stokesTolerance) +
")");
2481 std::vector<Vec3D<T>> velocities;
2482 velocities.reserve(
nodes.size());
2483 for (
const auto &node :
nodes)
2484 velocities.push_back(node.velocity);
2489 std::vector<T> pressures;
2490 pressures.reserve(
nodes.size());
2491 for (
const auto &node :
nodes)
2492 pressures.push_back(node.pressure);
2496 template <
class SolverT>
2499 const std::vector<T> &ambientBoundaryPressure,
2500 std::size_t nodeId,
unsigned direction,
2502 const auto &node =
nodes[nodeId];
2503 IndexType neighbor = node.index;
2504 neighbor[direction] += offset;
2507 return {
static_cast<T>(pressure[nodeId]),
gridDelta};
2510 if (neighborId !=
noNode) {
2511 if (touchesAmbient_[neighborId])
2512 return {ambientBoundaryPressure[neighborId],
gridDelta};
2513 return {
static_cast<T>(pressure[neighborId]),
gridDelta};
2516 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2517 const std::size_t nn =
nodes.size();
2518 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2519 const T faceDist = faceBCDists_[fi * nn + nodeId];
2520 if (faceType == Boundary::AMBIENT)
2521 return {ambientBoundaryPressure[nodeId], faceDist};
2527 if (faceType == Boundary::REACTION)
2528 return {
static_cast<T>(pressure[nodeId]), faceDist};
2529 if (faceType == Boundary::MASK)
2531 static_cast<T>(pressure[nodeId])),
2534 return {
static_cast<T>(pressure[nodeId]),
gridDelta};
2537 template <
class SolverT>
2538 StencilPoint<Vec3D<T>>
2540 std::size_t nodeId,
unsigned direction,
2542 const auto &node =
nodes[nodeId];
2543 IndexType neighbor = node.index;
2544 neighbor[direction] += offset;
2546 const auto toT = [](
const Vec3D<SolverT> &v) -> Vec3D<T> {
2547 return {
static_cast<T>(v[0]),
static_cast<T>(v[1]),
static_cast<T>(v[2])};
2551 return {toT(velocity[nodeId]),
gridDelta};
2554 if (neighborId !=
noNode)
2555 return {toT(velocity[neighborId]),
gridDelta};
2557 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2558 const std::size_t nn =
nodes.size();
2559 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2560 const T faceDist = faceBCDists_[fi * nn + nodeId];
2561 if (faceType == Boundary::REACTION)
2563 if (faceType == Boundary::AMBIENT)
2565 faceDist, toT(velocity[nodeId])),
2567 if (faceType == Boundary::MASK)
2571 return {toT(velocity[nodeId]),
gridDelta};
2577 const auto &node =
nodes[nodeId];
2578 IndexType neighbor = node.index;
2579 neighbor[direction] += offset;
2585 if (neighborId !=
noNode)
2588 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2589 const std::size_t nn =
nodes.size();
2590 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2591 const T faceDist = faceBCDists_[fi * nn + nodeId];
2592 if (faceType == Boundary::AMBIENT)
2594 if (faceType == Boundary::REACTION)
2595 return {node.pressure, faceDist};
2596 if (faceType == Boundary::MASK)
2607 const auto &node =
nodes[nodeId];
2608 IndexType neighbor = node.index;
2609 neighbor[direction] += offset;
2615 if (neighborId !=
noNode)
2618 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2619 const std::size_t nn =
nodes.size();
2620 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2621 const T faceDist = faceBCDists_[fi * nn + nodeId];
2622 if (faceType == Boundary::REACTION)
2624 if (faceType == Boundary::AMBIENT)
2626 faceDist, node.velocity),
2628 if (faceType == Boundary::MASK)
2637 const auto count = std::min(previous.size(),
nodes.size());
2638 for (std::size_t i = 0; i < count; ++i) {
2639 for (
unsigned j = 0; j <
D; ++j) {
2640 maxChange = std::max(maxChange,
2641 std::abs(
nodes[i].velocity[j] - previous[i][j]));
2642 maxVelocity = std::max(maxVelocity, std::abs(
nodes[i].velocity[j]));
2646 if (maxVelocity <= std::numeric_limits<T>::epsilon())
2648 return maxChange / maxVelocity;
2654 const auto count = std::min(previous.size(),
nodes.size());
2655 for (std::size_t i = 0; i < count; ++i) {
2657 std::max(maxChange, std::abs(
nodes[i].pressure - previous[i]));
2658 maxPressure = std::max(maxPressure, std::abs(
nodes[i].pressure));
2661 if (maxPressure <= std::numeric_limits<T>::epsilon())
2663 return maxChange / maxPressure;
2669 const auto deviatoricRate =
2674 (relaxationTime <= std::numeric_limits<T>::epsilon())
2676 : std::exp(-deformationParameters.stressTimeStep / relaxationTime);
2678 std::array<T, 9> deviatoricStress{};
2679 for (
unsigned i = 0; i < 9; ++i) {
2680 const T viscousStress =
2681 T(2) * deformationParameters.viscosity * deviatoricRate[i];
2682 deviatoricStress[i] =
2683 decay * previousStress[i] + (
T(1) - decay) * viscousStress;
2686 return deviatoricStress;
2693 return deformationParameters.ambientPressure +
2698 int ,
T fallbackPressure)
const {
2699 return fallbackPressure;
2703 unsigned direction,
int offset,
2705 const Vec3D<T> &interiorVelocity)
const {
2706 Vec3D<T> boundaryVelocity = interiorVelocity;
2711 Vec3D<T> deviatoricTraction{0., 0., 0.};
2712 for (
unsigned component = 0; component <
D; ++component) {
2713 for (
unsigned j = 0; j <
D; ++j)
2714 deviatoricTraction[component] +=
2715 deviatoricStress[
tensorIndex(component, j)] * normal[j];
2718 for (
unsigned component = 0; component <
D; ++component) {
2719 const T normalTraction =
2720 pressure * normal[component] - deviatoricTraction[component];
2721 const T faceDerivative = normalTraction * normal[direction] /
2722 std::max(deformationParameters.viscosity,
2723 std::numeric_limits<T>::epsilon());
2724 boundaryVelocity[component] +=
2725 static_cast<T>(offset) * distance * faceDerivative;
2728 return boundaryVelocity;
2732 const Vec3D<T> &interiorVelocity)
const {
2733 if (maskVelocityField !=
nullptr) {
2734 Vec3D<T> coordinate{0., 0., 0.};
2735 for (
unsigned i = 0; i <
D; ++i)
2737 return maskVelocityField->getVectorVelocity(
2738 coordinate, deformationParameters.material, {0., 0., 0.}, 0);
2740 return {0., 0., 0.};
2744 avgExpansionSpeed_ = 0.;
2748 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2749 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2751 std::size_t count = 0;
2753 for (
const auto &node :
nodes) {
2754 bool touchesReactionBoundary =
false;
2755 for (
unsigned direction = 0; direction <
D; ++direction) {
2756 for (
int offset : {-1, 1}) {
2757 IndexType neighbor = node.index;
2758 neighbor[direction] += offset;
2766 neighbor) == Boundary::REACTION) {
2767 touchesReactionBoundary =
true;
2771 if (touchesReactionBoundary)
2775 if (touchesReactionBoundary) {
2776 Vec3D<T> coordinate{0., 0., 0.};
2777 for (
unsigned i = 0; i <
D; ++i)
2778 coordinate[i] = node.index[i] *
gridDelta;
2779 avgExpansionSpeed_ +=
2780 (oxidationParameters.expansionCoefficient -
T(1)) *
2781 std::abs(diffusionField->getScalarVelocity(coordinate, 0,
2788 avgExpansionSpeed_ /=
static_cast<T>(count);
2792 Vec3D<T> coordinate{0., 0., 0.};
2793 for (
unsigned i = 0; i <
D; ++i)
2797 reactionSign * expansionVelocity);
2801 if (diffusionField ==
nullptr || ambientInterface ==
nullptr)
2802 return {0., 0., 0.};
2805 for (
unsigned i = 0; i <
D; ++i)
2806 index[i] = std::llround(coordinate[i] /
gridDelta);
2808 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2814 Vec3D<T> maxVelocity{0., 0., 0.};
2815 if (ambientInterface ==
nullptr || diffusionField ==
nullptr)
2818 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2819 for (; !ambientIt.isFinished(); ++ambientIt) {
2820 if (!ambientIt.isDefined())
2823 Vec3D<T> coordinate{0., 0., 0.};
2824 const auto &index = ambientIt.getStartIndices();
2825 for (
unsigned d = 0; d <
D; ++d)
2829 for (
unsigned d = 0; d <
D; ++d)
2830 maxVelocity[d] = std::max(maxVelocity[d], std::abs(velocity[d]));
2837 for (
unsigned i = 0; i <
D; ++i) {
2844 Vec3D<T> gradient{0., 0., 0.};
2845 for (
unsigned i = 0; i <
D; ++i)
2853 for (
unsigned i = 0; i <
D; ++i)
2854 forcing[i] -= stressDivergence[i];
2859 Vec3D<T> divergence{0., 0., 0.};
2860 for (
unsigned component = 0; component <
D; ++component) {
2861 for (
unsigned direction = 0; direction <
D; ++direction) {
2862 IndexType pos = index;
2863 IndexType neg = index;
2864 pos[direction] += 1;
2865 neg[direction] -= 1;
2868 divergence[component] +=
2881 const std::size_t nodeId =
lookupNode(index);
2885 std::array<T, 9> deviatoric =
nodes[nodeId].stressTensor;
2886 for (
unsigned i = 0; i < 3; ++i)
2893 return deformationParameters.ambientPressure;
2895 const std::size_t nodeId =
lookupNode(index);
2897 return deformationParameters.ambientPressure;
2898 return nodes[nodeId].pressure;
2902 return (oxidationParameters.expansionCoefficient -
T(1)) *
2903 std::abs(diffusionField->getScalarVelocity(coordinate, 0,
2908 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2913 if (boundary == Boundary::AMBIENT) {
2914 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2917 if (boundary == Boundary::MASK && maskInterface !=
nullptr) {
2918 ConstSparseIterator maskIt(maskInterface->getDomain());
2922 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2927 const IndexType &index)
const {
2928 Vec3D<T> normal{0., 0., 0.};
2931 for (
unsigned i = 0; i <
D; ++i) {
2932 IndexType pos = index;
2933 IndexType neg = index;
2942 norm += normal[i] * normal[i];
2945 if (norm <= std::numeric_limits<T>::epsilon()) {
2946 normal = Vec3D<T>{0., 0., 0.};
2951 norm = std::sqrt(norm);
2952 for (
unsigned i = 0; i <
D; ++i)
2958#pragma omp parallel for schedule(static)
2959 for (std::size_t i = 0; i <
nodes.size(); ++i)
2966 (relaxationTime <= std::numeric_limits<T>::epsilon())
2968 : std::exp(-deformationParameters.stressTimeStep / relaxationTime);
2972 std::vector<std::pair<IndexType, std::array<T, 9>>> historyEntries(
2974#pragma omp parallel for schedule(static)
2975 for (std::size_t i = 0; i <
nodes.size(); ++i) {
2976 auto &node =
nodes[i];
2978 const auto deviatoricRate =
2982 std::array<T, 9> deviatoricStress{};
2983 for (
unsigned j = 0; j < 9; ++j) {
2984 const T viscousStress =
2985 T(2) * deformationParameters.viscosity * deviatoricRate[j];
2986 deviatoricStress[j] =
2987 decay * previousStress[j] + (
T(1) - decay) * viscousStress;
2990 node.stressTensor = deviatoricStress;
2991 for (
unsigned j = 0; j < 3; ++j)
2992 node.stressTensor[
tensorIndex(j, j)] -= node.pressure;
2995 historyEntries[i] = {node.index, deviatoricStress};
3000 nextHistory.reserve(
nodes.size());
3001 for (
const auto &entry : historyEntries)
3002 nextHistory[entry.first] = entry.second;
3003 deviatoricStressHistory.swap(nextHistory);
3007 std::array<T, 9> tensor{};
3008 for (
unsigned i = 0; i <
D; ++i) {
3009 for (
unsigned j = 0; j <
D; ++j) {
3018 unsigned direction)
const {
3019 const std::size_t nodeId =
lookupNode(index);
3026 minus.value[component],
nodes[nodeId].velocity[component],
3027 plus.value[component], minus.distance, plus.distance);
3031 const std::size_t nodeId =
lookupNode(index);
3038 minus.distance, plus.distance);
3043 std::array<T, 9> result = tensor;
3044 const T mean = trace /
T(3);
3045 for (
unsigned i = 0; i < 3; ++i)
3051 const auto found = deviatoricStressHistory.find(index);
3052 if (found == deviatoricStressHistory.end())
3054 return found->second;
3058 if (deformationParameters.stressRelaxationTime >
T(0))
3059 return deformationParameters.stressRelaxationTime;
3060 if (deformationParameters.shearModulus > std::numeric_limits<T>::epsilon())
3061 return deformationParameters.viscosity /
3062 deformationParameters.shearModulus;
3067 T doubleContraction = 0.;
3068 for (
unsigned i = 0; i < 3; ++i) {
3069 for (
unsigned j = 0; j < 3; ++j) {
3070 const T value =
T(0.5) * (deviatoricStress[
tensorIndex(i, j)] +
3072 doubleContraction += value * value;
3075 return std::sqrt(
T(1.5) * doubleContraction);
3080 for (
unsigned i = 0; i < 3; ++i) {
3081 for (
unsigned j = 0; j < 3; ++j)
3082 result += normal[i] * tensor[
tensorIndex(i, j)] * normal[j];
3088 ConstSparseIterator &ambientIt,
3089 ConstSparseIterator &maskIt,
3090 const IndexType &inside,
3091 const IndexType &outside)
const {
3097 ConstSparseIterator &ambientIt,
3098 ConstSparseIterator &maskIt,
3099 const IndexType &inside,
3100 const IndexType &outside)
const {
3101 const T reactionInside =
valueAt(reactionIt, inside);
3102 const T reactionOutside =
valueAt(reactionIt, outside);
3103 const T ambientInside =
valueAt(ambientIt, inside);
3104 const T ambientOutside =
valueAt(ambientIt, outside);
3108 const bool reactionCrosses =
crosses(reactionInside, reactionOutside);
3109 const bool ambientCrosses =
crosses(ambientInside, ambientOutside);
3110 const bool maskCrosses =
3111 maskInterface !=
nullptr &&
crosses(maskInside, maskOutside);
3113 if (!reactionCrosses && !ambientCrosses && !maskCrosses)
3115 if (reactionCrosses && !ambientCrosses && !maskCrosses)
3116 return {Boundary::REACTION,
3118 if (!reactionCrosses && ambientCrosses && !maskCrosses)
3120 maskInside, maskOutside,
3122 if (!reactionCrosses && !ambientCrosses && maskCrosses)
3125 const T reactionDistance =
3127 : std::numeric_limits<T>::max();
3128 const T ambientDistance =
3130 : std::numeric_limits<T>::max();
3131 const T maskDistance = maskCrosses
3133 : std::numeric_limits<T>::max();
3134 if (reactionDistance <= ambientDistance && reactionDistance <= maskDistance)
3135 return {Boundary::REACTION, reactionDistance};
3136 if (ambientDistance != std::numeric_limits<T>::max()) {
3137 const auto maskedAmbient =
3139 if (maskedAmbient.boundary == Boundary::MASK)
3140 return maskedAmbient;
3142 if (maskDistance <= ambientDistance)
3143 return {Boundary::MASK, maskDistance};
3144 return {Boundary::AMBIENT, ambientDistance};
3148 ConstSparseIterator &ambientIt,
3149 ConstSparseIterator &maskIt,
const IndexType &index,
3150 Boundary requestedBoundary)
const {
3151 for (
unsigned direction = 0; direction <
D; ++direction) {
3152 for (
int offset : {-1, 1}) {
3153 IndexType neighbor = index;
3154 neighbor[direction] += offset;
3175 constexpr T eps =
T(1e-9);
3176 return reactionSign * reactionPhi >= -eps &&
3177 ambientSign * ambientPhi >= -eps;
3181 if (maskInterface ==
nullptr)
3186 bool isInsideMask(ConstSparseIterator &maskIt,
const IndexType &index)
const {
3187 if (maskInterface ==
nullptr)
3189 return maskSign *
valueAt(maskIt, index) >= 0.;
3193 if (maskInterface ==
nullptr)
3194 return std::numeric_limits<T>::max();
3195 return valueAt(maskIt, index);
3201 return {Boundary::MASK, distance};
3206 if (maskInterface !=
nullptr &&
3207 static_cast<T>(maskSign) * maskOutside >=
T(0))
3208 return {Boundary::MASK, distance};
3209 return {Boundary::AMBIENT, distance};
3213 if (maskInterface ==
nullptr)
3215 const T fraction = std::clamp(distance /
gridDelta,
T(0),
T(1));
3218 const T maskPhi = insidePhi + fraction * (outsidePhi - insidePhi);
3219 return static_cast<T>(maskSign) * maskPhi >=
T(0);
3224 insidePhi, outsidePhi,
3225 deformationParameters.minMechanicsBoundaryDistance,
gridDelta);
3229 T minusDistance,
T plusDistance) {
3230 const T denominator =
3231 minusDistance * plusDistance * (minusDistance + plusDistance);
3232 if (denominator <= std::numeric_limits<T>::epsilon())
3235 return (-plusDistance * plusDistance * minusValue +
3236 (plusDistance * plusDistance - minusDistance * minusDistance) *
3238 minusDistance * minusDistance * plusValue) /
3242 static constexpr unsigned tensorIndex(
unsigned row,
unsigned column) {
3243 return 3 * row + column;
constexpr int D
Definition Epitaxy.cpp:12
double T
Definition Epitaxy.cpp:13
Class containing all information about the level set, including the dimensions of the domain,...
Definition lsDomain.hpp:27
Solves the oxidant diffusion step of the Suvorov et al. (10.1007/s10825-006-0003-z) oxidation model o...
Definition lsOxidationDiffusion.hpp:92
Common Cartesian-grid infrastructure shared by the three oxidation solver classes (diffusion,...
Definition lsOxidationSolverBase.hpp:90
static constexpr std::size_t noNode
Definition lsOxidationSolverBase.hpp:96
bool crosses(T a, T b) const
Definition lsOxidationSolverBase.hpp:110
std::size_t lookupNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:137
std::size_t linearIndex(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:143
void initNodeLookup()
Definition lsOxidationSolverBase.hpp:130
bool inBounds(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:123
std::array< std::size_t, D > strides
Definition lsOxidationSolverBase.hpp:101
T gridDelta
Definition lsOxidationSolverBase.hpp:102
std::vector< std::size_t > nodeLookupFlat
Definition lsOxidationSolverBase.hpp:97
bool initializeGridFromInterfaces(SmartPointer< Domain< T, D > > reactionInterface, SmartPointer< Domain< T, D > > ambientInterface, SmartPointer< Domain< T, D > > maskInterface, bool useRequestedBounds, const IndexType &requestedMinIndex, const IndexType &requestedMaxIndex, std::size_t maxGridPoints, const std::string &solverName)
Definition lsOxidationSolverBase.hpp:207
std::array< std::size_t, D > extents
Definition lsOxidationSolverBase.hpp:100
viennahrle::ConstSparseIterator< typename Domain< T, D >::DomainType > ConstSparseIterator
Definition lsOxidationSolverBase.hpp:93
T valueAt(ConstSparseIterator &it, const IndexType &index) const
Definition lsOxidationSolverBase.hpp:118
bool increment(IndexType &index) const
Definition lsOxidationSolverBase.hpp:152
IndexType minIndex
Definition lsOxidationSolverBase.hpp:98
viennahrle::Index< D > IndexType
Definition lsOxidationSolverBase.hpp:92
std::size_t findNearbyNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:166
IndexType maxIndex
Definition lsOxidationSolverBase.hpp:99
T levelSetCrossingDistance(T insidePhi, T outsidePhi, T minBoundaryFraction, T gridDelta)
Definition lsOxidationSolverBase.hpp:76
T clampLevelSetPhi(T v)
Clamp HRLE far-field sentinels (±DBL_MAX) to ±1 before differencing to prevent DBL_MAX² overflow that...
Definition lsOxidationSolverBase.hpp:71
Vec3D< T > vecScaled(const Vec3D< T > &source, T factor)
Definition lsOxidationSolverBase.hpp:40
void vecAddTo(Vec3D< T > &target, const Vec3D< T > &source)
Definition lsOxidationSolverBase.hpp:64
Definition lsAdvect.hpp:41
GpuMode
Selects the BiCGSTAB back-end for the diffusion solve. GPU failures are reported and not silently fal...
Definition lsOxidationDiffusion.hpp:26
@ Gpu
Always use GPU; fail if unavailable or unsuccessful.
Definition lsOxidationDiffusion.hpp:28
@ Cpu
Always use CPU (default).
Definition lsOxidationDiffusion.hpp:27
GpuPreconditioner
Selects the preconditioner used by the GPU BiCGSTAB solver. Jacobi matches the CPU solver's precondit...
Definition lsOxidationDiffusion.hpp:35
@ ILU0
Definition lsOxidationDiffusion.hpp:35
@ Jacobi
Definition lsOxidationDiffusion.hpp:35
Parameters for the steady oxidant diffusion model used by OxidationDiffusion.
Definition lsOxidationDiffusion.hpp:39
Definition lsOxidationSolverBase.hpp:34