93 using IndexType = viennahrle::Index<D>;
94 using ConstSparseIterator =
95 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
98 static constexpr T boltzmannConstant =
T(1.380649e-23);
99 static constexpr T minStressFactor =
T(1.e-6);
100 static constexpr T maxStressFactor =
T(1.e6);
120 enum class Boundary { NONE, REACTION, AMBIENT, MASK };
124 T concentration = 0.;
125 Vec3D<T> siNormal = {0., 0.,
131 T nodeCoefficient = 1.;
135 SmartPointer<Domain<T, D>> reactionInterface =
nullptr;
136 SmartPointer<Domain<T, D>> ambientInterface =
nullptr;
137 SmartPointer<Domain<T, D>> maskInterface =
nullptr;
139 int reactionSign = 1;
140 int ambientSign = -1;
142 IndexType requestedMinIndex{};
143 IndexType requestedMaxIndex{};
144 unsigned iterations = 0;
145 T residual = std::numeric_limits<T>::max();
146 T normalizedResidual_ = std::numeric_limits<T>::max();
147 bool lastSolveConverged_ =
false;
148 T maxScalarVelocity_ = 0.;
150 bool nodesDirty_ =
true;
153 bool useRequestedBounds =
false;
154 bool warmStartable_ =
156 std::unordered_map<std::size_t, T> pressureLookup;
157 std::unordered_map<std::size_t, T> concentrationCache_;
158 std::vector<Node> nodes;
162 std::vector<Boundary> faceBCTypes_;
163 std::vector<T> faceBCDists_;
166 std::vector<std::size_t> neighborIds_;
171#ifdef VIENNALS_GPU_BICGSTAB
175 gpu::GpuBiCGSTABBuffers *gpuBufs_ =
nullptr;
197 : reactionInterface(passedReactionInterface),
198 ambientInterface(passedAmbientInterface), parameters(passedParameters) {
202#ifdef VIENNALS_GPU_BICGSTAB
203 gpu::freeGpuBuffers(gpuBufs_);
208 template <
class... Args>
static auto New(Args &&...args) {
209 return SmartPointer<OxidationDiffusion>::New(std::forward<Args>(args)...);
220 reactionInterface = passedInterface;
226 ambientInterface = passedInterface;
232 int passedMaskSign = 1) {
233 maskInterface = passedInterface;
234 maskSign = (passedMaskSign < 0) ? -1 : 1;
240 maskInterface =
nullptr;
246 parameters = passedParameters;
254 for (
unsigned i = 0; i <
D; ++i)
255 index[i] = std::llround(coordinate[i] /
gridDelta);
260 pressureLookup.clear();
266 if (std::isfinite(pressure))
267 pressureLookup[key] = pressure;
269 pressureLookup.erase(key);
275 for (
unsigned i = 0; i <
D; ++i)
276 index[i] = std::llround(coordinate[i] /
gridDelta);
283 reactionSign = (passedReactionSign < 0) ? -1 : 1;
284 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
291 const IndexType &passedMaxIndex) {
292 requestedMinIndex = passedMinIndex;
293 requestedMaxIndex = passedMaxIndex;
294 useRequestedBounds =
true;
300 useRequestedBounds =
false;
306 if (reactionInterface ==
nullptr || ambientInterface ==
nullptr) {
307 Logger::getInstance()
308 .addError(
"OxidationDiffusion: Missing level-set "
315 if (!initialiseGrid())
320 Logger::getInstance()
321 .addWarning(
"OxidationDiffusion: no oxide nodes found after "
322 "buildNodes(). Verify that the reaction and ambient "
323 "level sets enclose a non-empty oxide band.")
328 concentrationCache_.clear();
329 for (
const auto &node : nodes)
333 maxScalarVelocity_ = 0.;
334 ConstSparseIterator reactionIt(reactionInterface->getDomain());
335 for (
const auto &node : nodes) {
336 const auto sample = reactionBoundarySampleFromNode(reactionIt, node);
341 std::abs(parameters.velocitySign) * rate * sample.concentration /
342 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient);
343 maxScalarVelocity_ = std::max(maxScalarVelocity_, vel);
350 const Vec3D<T> &normalVector,
351 unsigned long )
final {
355 if (parameters.material >= 0 && material != parameters.material)
359 for (
unsigned i = 0; i <
D; ++i)
360 index[i] = std::llround(coordinate[i] /
gridDelta);
362 const auto boundarySample = reactionBoundarySample(index);
363 const IndexType rateIndex =
364 boundarySample.found ? boundarySample.nodeIndex : index;
365 const T concentration = boundarySample.found
366 ? boundarySample.concentration
370 (parameters.oxidantMoleculeDensity *
371 parameters.expansionCoefficient);
375 const Vec3D<T> & )
final {
376 if (parameters.material >= 0 && material != parameters.material)
380 std::abs(parameters.velocitySign) * parameters.reactionRate *
381 parameters.equilibriumConcentration /
382 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient);
384 return std::max(maxScalarVelocity_, bulkVel);
389 for (
unsigned i = 0; i <
D; ++i)
390 index[i] = std::llround(coordinate[i] /
gridDelta);
400 return nodes[nearby].concentration;
402 return nodes[nodeId].concentration;
407 for (
unsigned i = 0; i <
D; ++i)
408 index[i] = std::llround(coordinate[i] /
gridDelta);
413 const auto sample = reactionBoundarySample(index);
423 for (
const auto &node : nodes)
424 if (!std::isfinite(node.concentration))
430 return concentrationCache_;
434 concentrationCache_ = std::move(cache);
442 gpuPreconditioner_ = preconditioner;
454 if (nodes.empty() || ambientInterface ==
nullptr)
457 std::vector<T> concentrations;
458 ConstSparseIterator it(ambientInterface->getDomain());
459 for (; !it.isFinished(); ++it) {
462 const IndexType idx = it.getStartIndices();
471 const T value = nodeId !=
noNode ? nodes[nodeId].concentration :
T(0);
472 concentrations.push_back(std::isfinite(value) ? value :
T(0));
474 ambientInterface->getPointData().insertReplaceScalarData(
475 std::move(concentrations),
"OxConcentration");
481 if (pressureLookup.empty() || ambientInterface ==
nullptr)
484 std::vector<T> pressures;
485 ConstSparseIterator it(ambientInterface->getDomain());
486 for (; !it.isFinished(); ++it) {
489 const IndexType idx = it.getStartIndices();
491 const T value = pIt != pressureLookup.end() ? pIt->second :
T(0);
492 pressures.push_back(std::isfinite(value) ? value :
T(0));
494 ambientInterface->getPointData().insertReplaceScalarData(
495 std::move(pressures),
"OxPressure");
507 ReactionBoundarySample
510 for (
unsigned i = 0; i <
D; ++i)
511 index[i] = std::llround(coordinate[i] /
gridDelta);
512 return reactionBoundarySample(index);
522 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient));
526 bool initialiseGrid() {
528 reactionInterface, ambientInterface, maskInterface, useRequestedBounds,
529 requestedMinIndex, requestedMaxIndex, parameters.
maxGridPoints,
530 "OxidationDiffusion");
541 warmStartable_ = !concentrationCache_.empty();
542 if (!warmStartable_) {
545 const int cIdx = ambientInterface->getPointData().getScalarDataIndex(
548 ambientInterface->getPointData().getScalarDataIndex(
"OxPressure");
550 (cIdx != -1) ? ambientInterface->getPointData().getScalarData(cIdx)
553 (pIdx != -1) ? ambientInterface->getPointData().getScalarData(pIdx)
555 if (cd !=
nullptr || pd !=
nullptr) {
557 for (; !it.isFinished(); ++it) {
560 const auto ptId = it.getPointId();
561 const std::size_t key =
563 if (cd !=
nullptr && ptId <
static_cast<decltype(ptId)
>(cd->size()) &&
564 std::isfinite((*cd)[ptId]))
565 concentrationCache_[key] = (*cd)[ptId];
566 if (pd !=
nullptr && ptId <
static_cast<decltype(ptId)
>(pd->size()) &&
567 std::isfinite((*pd)[ptId]))
568 pressureLookup[key] = (*pd)[ptId];
570 warmStartable_ = !concentrationCache_.empty();
574 ConstSparseIterator reactionIt(reactionInterface->getDomain());
575 ConstSparseIterator ambientIt(ambientInterface->getDomain());
576 auto maskIt = makeMaskIterator();
580 const T reactionPhi =
valueAt(reactionIt, index);
581 const T ambientPhi =
valueAt(ambientIt, index);
582 if (isInsideOxide(reactionPhi, ambientPhi) &&
583 !isInsideMask(maskIt, index)) {
584 const std::size_t
id = nodes.size();
586 T seedConc = parameters.equilibriumConcentration;
589 if (cacheIt != concentrationCache_.end())
590 seedConc = cacheIt->second;
591 if (!std::isfinite(seedConc))
592 seedConc = parameters.equilibriumConcentration;
593 Node newNode{index, seedConc};
594 if (parameters.reactionRateRatio111 !=
T(1))
595 newNode.siNormal = computeSiNormal(index, reactionIt);
596 nodes.push_back(newNode);
606 const std::size_t n = nodes.size();
607 faceBCTypes_.assign(2 *
D * n, Boundary::NONE);
608 faceBCDists_.assign(2 *
D * n,
T(1));
609 ConstSparseIterator faceReactionIt(reactionInterface->getDomain());
610 ConstSparseIterator faceAmbientIt(ambientInterface->getDomain());
611 auto faceMaskIt = makeMaskIterator();
612 for (std::size_t
id = 0;
id < n; ++id) {
613 const auto &node = nodes[id];
614 for (
unsigned dir = 0; dir <
D; ++dir) {
615 for (
int off : {-1, 1}) {
616 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
617 IndexType nb = node.index;
621 const auto bc = classifyBoundary(faceReactionIt, faceAmbientIt,
622 faceMaskIt, node.index, nb);
623 faceBCTypes_[fi * n + id] = bc.first;
624 faceBCDists_[fi * n + id] = bc.second;
631 neighborIds_.assign(2 *
D * n,
noNode);
632 for (std::size_t
id = 0;
id < n; ++id) {
633 for (
unsigned dir = 0; dir <
D; ++dir) {
634 for (
int off : {-1, 1}) {
635 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
636 IndexType nb = nodes[id].index;
644 bool loggedBackend =
false;
645#ifdef VIENNALS_GPU_BICGSTAB
647 gpu::freeGpuBuffers(gpuBufs_);
653 gpuBufs_ = gpu::allocGpuBuffers(
static_cast<uint32_t
>(n), 2 *
D, useIlu0);
657 const std::size_t nf = 2u *
D * n;
658 std::vector<uint32_t> nb32(nf);
659 for (std::size_t k = 0; k < nf; ++k)
660 nb32[k] = (neighborIds_[k] ==
noNode)
662 :
static_cast<uint32_t
>(neighborIds_[k]);
663 if (!gpu::gpuUploadNeighborIds(gpuBufs_, nb32.data(), nf)) {
664 gpu::freeGpuBuffers(gpuBufs_);
666 VIENNACORE_LOG_ERROR(
"OxidationDiffusion: GPU mode was selected, but "
667 "uploading GPU neighbor IDs failed." +
670 if (useIlu0 && !gpu::gpuSetupCSR(gpuBufs_, nb32.data(),
671 static_cast<uint32_t
>(n), 2 *
D)) {
672 gpu::freeGpuBuffers(gpuBufs_);
674 VIENNACORE_LOG_ERROR(
"OxidationDiffusion: GPU mode was selected, but "
675 "CUSPARSE setup for the GPU BiCGSTAB solver "
679 logDiffusionBackend(
"GPU BiCGSTAB",
681 std::string(useIlu0 ?
"ILU0" :
"Jacobi"));
682 loggedBackend =
true;
684 VIENNACORE_LOG_ERROR(
"OxidationDiffusion: GPU mode was selected, but "
685 "CUDA buffers could not be "
686 "allocated or the CUDA context could not be "
692 if (!loggedBackend) {
693#ifdef VIENNALS_GPU_BICGSTAB
694 logDiffusionBackend(
"CPU BiCGSTAB",
"GPU mode not selected");
696 logDiffusionBackend(
"CPU BiCGSTAB",
697 "ViennaLS was built without GPU BiCGSTAB support");
711 void computeFaceCoeffs(std::vector<T> &faceCoeffs)
const {
712 const std::size_t n = nodes.size();
713 faceCoeffs.assign(2 *
D * n,
T(0));
714 const T eps = std::numeric_limits<T>::epsilon();
715 const std::vector<T> zeros(n,
T(0));
717 for (std::size_t
id = 0;
id < n; ++id) {
718 const T D_eff = getEffectiveDiffusionCoefficient(nodes[
id].index);
719 for (
unsigned dir = 0; dir <
D; ++dir) {
720 const unsigned fiNeg = dir * 2u;
721 const unsigned fiPos = dir * 2u + 1u;
725 const auto neg = makeStencilSide(
id, zeros, dir, -1, D_eff);
726 const auto pos = makeStencilSide(
id, zeros, dir, 1, D_eff);
727 const T distNeg = neg.distance;
728 const T distPos = pos.distance;
729 const T distSum = distNeg + distPos;
733 if (neighborIds_[fiNeg * n +
id] !=
noNode && distNeg > eps)
734 faceCoeffs[fiNeg * n + id] =
T(2) * D_eff / (distNeg * distSum);
735 if (neighborIds_[fiPos * n +
id] !=
noNode && distPos > eps)
736 faceCoeffs[fiPos * n + id] =
T(2) * D_eff / (distPos * distSum);
744 template <
class SolverT>
745 void computeStencilAt(std::size_t nodeId,
const std::vector<SolverT> &x,
746 T &diag,
T &rhs)
const {
749 const T D_eff = getEffectiveDiffusionCoefficient(nodes[nodeId].index);
750 for (
unsigned direction = 0; direction <
D; ++direction) {
751 const auto neg = makeStencilSide(nodeId, x, direction, -1, D_eff);
752 const auto pos = makeStencilSide(nodeId, x, direction, 1, D_eff);
753 addAxisContribution(rhs, diag, neg, pos, D_eff);
760 template <
class SolverT>
761 void matvec(
const std::vector<SolverT> &v,
762 const std::vector<T> &precomputedDiag,
const std::vector<T> &b,
763 std::vector<SolverT> &Av)
const {
764#pragma omp parallel for schedule(static)
765 for (std::size_t i = 0; i < nodes.size(); ++i) {
767 computeStencilAt(i, v, diag, rhs);
768 Av[i] =
static_cast<SolverT
>(precomputedDiag[i] *
v[i] - rhs + b[i]);
772 void solveDiffusion() {
775 normalizedResidual_ = 0.;
776 lastSolveConverged_ =
false;
778 lastSolveConverged_ =
true;
784 using SolverT = float;
786 const std::size_t n = nodes.size();
787 const T eps = std::numeric_limits<T>::epsilon();
793 std::vector<T> diag(n), b(n);
795 const std::vector<SolverT> zeros(n, SolverT(0));
796#pragma omp parallel for schedule(static)
797 for (std::size_t i = 0; i < n; ++i)
798 computeStencilAt(i, zeros, diag[i], b[i]);
803 bool finiteSystem =
true;
804 for (std::size_t i = 0; i < n; ++i) {
806 finiteSystem && std::isfinite(diag[i]) && std::isfinite(b[i]);
807 b_norm = std::max(b_norm, std::abs(b[i]));
809 if (b_norm <
T(1e-100))
812 residual = std::numeric_limits<T>::infinity();
813 normalizedResidual_ = residual;
814 Logger::getInstance()
815 .addWarning(
"solveDiffusion: assembled non-finite matrix/RHS; "
816 "rejecting this coupled trial.")
823 auto fallbackInitialGuess = [&](std::size_t i) ->
T {
824 if (std::isfinite(diag[i]) && std::isfinite(b[i]) && diag[i] > eps) {
825 const T guess = b[i] / diag[i];
826 if (std::isfinite(guess))
832 std::vector<SolverT> x(n);
833 for (std::size_t i = 0; i < n; ++i) {
835 warmStartable_ ? nodes[i].concentration : fallbackInitialGuess(i);
836 if (!std::isfinite(guess))
837 guess = fallbackInitialGuess(i);
838 x[i] =
static_cast<SolverT
>(guess);
839 if (!std::isfinite(x[i]))
842 const auto initialGuess = x;
844#ifdef VIENNALS_GPU_BICGSTAB
851 if (gpu::gpuIsValid(gpuBufs_)) {
852 Timer<> tPrep, tUpload, tSolve;
854 std::vector<double> diagGpu(n), bGpu(n);
855 for (std::size_t i = 0; i < n; ++i) {
856 diagGpu[i] =
static_cast<double>(diag[i]);
857 bGpu[i] =
static_cast<double>(b[i]);
861 std::vector<T> faceCoeffs;
862 computeFaceCoeffs(faceCoeffs);
863 std::vector<double> coeffGpu(faceCoeffs.size());
864 for (std::size_t k = 0; k < faceCoeffs.size(); ++k)
865 coeffGpu[k] =
static_cast<double>(faceCoeffs[k]);
867 std::vector<double> xGpu(n);
868 for (std::size_t i = 0; i < n; ++i)
869 xGpu[i] =
static_cast<double>(x[i]);
873 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
874 gpuBufs_, diagGpu.data(), bGpu.data(), coeffGpu.data(),
875 static_cast<uint32_t
>(n), faceCoeffs.size());
878 if (Logger::hasDebug()) {
879 const std::string tag =
880 "diffusion n=" + std::to_string(n) +
" [GPU upload failed]";
881 Logger::getInstance()
882 .addTiming(tag +
" diag/b precompute", tDiag)
883 .addTiming(tag +
" GPU prep+faceCoeffs", tPrep)
884 .addTiming(tag +
" GPU upload", tUpload)
887 VIENNACORE_LOG_ERROR(
"OxidationDiffusion: GPU mode was selected, but "
888 "uploading GPU solver arrays or factorizing ILU "
895 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
896 gpuBufs_, xGpu.data(),
static_cast<double>(eps),
897 parameters.maxIterations,
static_cast<double>(parameters.tolerance),
898 iterations, residual);
901 const bool finiteGpuSolution =
903 std::all_of(xGpu.begin(), xGpu.end(),
904 [](
double value) { return std::isfinite(value); });
905 const unsigned gpuIterations = iterations;
906 const T gpuResidual = residual;
908 if (finiteGpuSolution) {
911 for (std::size_t i = 0; i < n; ++i)
912 nodes[i].concentration =
static_cast<T>(xGpu[i]);
913 normalizedResidual_ = gpuResidual / b_norm;
914 lastSolveConverged_ = std::isfinite(normalizedResidual_) &&
915 normalizedResidual_ <= parameters.tolerance;
917 if (Logger::hasTiming()) {
918 const std::string tag =
"diffusion n=" + std::to_string(n) +
919 " iters=" + std::to_string(iterations) +
920 " res=" + std::to_string(residual) +
" [GPU]";
921 Logger::getInstance()
922 .addTiming(tag +
" GPU BiCGSTAB", tSolve)
925 if (Logger::hasDebug()) {
926 const std::string tag =
"diffusion n=" + std::to_string(n) +
" [GPU]";
927 Logger::getInstance()
928 .addTiming(tag +
" diag/b precompute", tDiag)
929 .addTiming(tag +
" GPU prep+faceCoeffs", tPrep)
930 .addTiming(tag +
" GPU upload", tUpload)
936 if (Logger::hasDebug()) {
937 const std::string tag =
"diffusion n=" + std::to_string(n) +
938 " iters=" + std::to_string(gpuIterations) +
939 " res=" + std::to_string(gpuResidual) +
941 Logger::getInstance()
942 .addTiming(tag +
" diag/b precompute", tDiag)
943 .addTiming(tag +
" GPU prep+faceCoeffs", tPrep)
944 .addTiming(tag +
" GPU upload", tUpload)
945 .addTiming(tag +
" GPU BiCGSTAB", tSolve)
948 VIENNACORE_LOG_ERROR(
949 "OxidationDiffusion: GPU mode was selected, but GPU BiCGSTAB "
950 "failed, did not converge, or produced non-finite concentrations "
952 std::to_string(gpuIterations) +
953 ", residual=" + std::to_string(gpuResidual) +
").");
957 VIENNACORE_LOG_ERROR(
"OxidationDiffusion: explicit GPU mode was "
958 "requested, but ViennaLS was built without "
959 "VIENNALS_GPU_BICGSTAB.");
964 std::vector<SolverT> Ax(n);
965 matvec(x, diag, b, Ax);
966 std::vector<SolverT> r(n), r_hat(n);
967 for (std::size_t i = 0; i < n; ++i) {
968 r[i] =
static_cast<SolverT
>(b[i] - Ax[i]);
977 std::vector<SolverT> p(n, SolverT(0)),
v(n, SolverT(0)), y(n), z(n), s(n),
979 T rho =
T(1), alpha =
T(1), omega =
T(1);
980 bool bicgstabBreakdown =
false;
981 for (std::size_t i = 0; i < n; ++i) {
982 const T ri =
static_cast<T>(r[i]);
983 if (!std::isfinite(ri)) {
984 bicgstabBreakdown =
true;
987 residual = std::max(residual, std::abs(ri));
990 for (; !bicgstabBreakdown && iterations < parameters.maxIterations;
993 for (std::size_t i = 0; i < n; ++i)
994 rho_new +=
static_cast<T>(r_hat[i]) *
static_cast<T>(r[i]);
996 if (!std::isfinite(rho_new)) {
997 bicgstabBreakdown =
true;
1000 if (std::abs(rho_new) <
T(1e-100))
1002 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1003 !std::isfinite(omega) || std::abs(omega) <
T(1e-100)) {
1004 bicgstabBreakdown =
true;
1008 const T beta = (rho_new / rho) * (alpha / omega);
1009 if (!std::isfinite(beta)) {
1010 bicgstabBreakdown =
true;
1015 for (std::size_t i = 0; i < n; ++i)
1016 p[i] =
static_cast<SolverT
>(r[i] + beta * (p[i] - omega * v[i]));
1018 for (std::size_t i = 0; i < n; ++i) {
1020 y[i] =
static_cast<SolverT
>((diag[i] > eps) ? pi / diag[i] : pi);
1023 matvec(y, diag, b, v);
1026 for (std::size_t i = 0; i < n; ++i)
1027 r_hat_v +=
static_cast<T>(r_hat[i]) *
static_cast<T>(
v[i]);
1028 if (!std::isfinite(r_hat_v)) {
1029 bicgstabBreakdown =
true;
1032 if (std::abs(r_hat_v) <
T(1e-100))
1035 alpha = rho_new / r_hat_v;
1036 if (!std::isfinite(alpha)) {
1037 bicgstabBreakdown =
true;
1041 for (std::size_t i = 0; i < n; ++i)
1042 s[i] =
static_cast<SolverT
>(r[i] - alpha * v[i]);
1045 for (std::size_t i = 0; i < n; ++i)
1046 residual = std::max(residual, std::abs(
static_cast<T>(s[i])));
1047 if (!std::isfinite(residual)) {
1048 bicgstabBreakdown =
true;
1051 if (residual < parameters.tolerance * b_norm) {
1052 for (std::size_t i = 0; i < n; ++i)
1053 x[i] =
static_cast<SolverT
>(x[i] + alpha * y[i]);
1058 for (std::size_t i = 0; i < n; ++i) {
1060 z[i] =
static_cast<SolverT
>((diag[i] > eps) ? si / diag[i] : si);
1063 matvec(z, diag, b, t);
1065 T t_s =
T(0), t_t =
T(0);
1066 for (std::size_t i = 0; i < n; ++i) {
1067 t_s +=
static_cast<T>(t[i]) *
static_cast<T>(s[i]);
1068 t_t +=
static_cast<T>(t[i]) *
static_cast<T>(t[i]);
1070 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
1071 bicgstabBreakdown =
true;
1074 omega = (t_t >
T(1e-100)) ? t_s / t_t :
T(0);
1075 if (!std::isfinite(omega)) {
1076 bicgstabBreakdown =
true;
1080 for (std::size_t i = 0; i < n; ++i) {
1081 x[i] =
static_cast<SolverT
>(x[i] + alpha * y[i] + omega * z[i]);
1082 r[i] =
static_cast<SolverT
>(s[i] - omega * t[i]);
1086 for (std::size_t i = 0; i < n; ++i)
1087 residual = std::max(residual, std::abs(
static_cast<T>(r[i])));
1088 if (!std::isfinite(residual)) {
1089 bicgstabBreakdown =
true;
1092 if (residual < parameters.tolerance * b_norm) {
1098 if (bicgstabBreakdown)
1099 residual = std::numeric_limits<T>::infinity();
1101 const bool finiteCpuSolution =
1102 !bicgstabBreakdown &&
1103 std::all_of(x.begin(), x.end(),
1104 [](SolverT value) { return std::isfinite(value); });
1105 if (finiteCpuSolution) {
1106 for (std::size_t i = 0; i < n; ++i)
1107 nodes[i].concentration = x[i];
1109 for (std::size_t i = 0; i < n; ++i)
1110 nodes[i].concentration = initialGuess[i];
1111 residual = std::numeric_limits<T>::infinity();
1112 Logger::getInstance()
1113 .addWarning(
"solveDiffusion: CPU BiCGSTAB produced non-finite "
1114 "concentrations; keeping the sanitized initial guess.")
1117 normalizedResidual_ = residual / b_norm;
1118 lastSolveConverged_ = finiteCpuSolution &&
1119 std::isfinite(normalizedResidual_) &&
1120 normalizedResidual_ <= parameters.tolerance;
1123 if (Logger::hasTiming()) {
1124 const std::string path =
" [CPU]";
1125 const std::string tag =
"diffusion n=" + std::to_string(n) +
1126 " iters=" + std::to_string(iterations) +
1127 " res=" + std::to_string(residual) + path;
1128 Logger::getInstance().addTiming(tag +
" CPU BiCGSTAB", tCpuSolve).print();
1130 if (Logger::hasDebug()) {
1131 const std::string path =
" [CPU]";
1132 Logger::getInstance()
1133 .addTiming(
"diffusion n=" + std::to_string(n) + path +
1134 " diag/b precompute",
1138 if (residual > parameters.tolerance * b_norm)
1139 VIENNACORE_LOG_WARNING(
1140 "solveDiffusion: BiCGSTAB did not converge after " +
1141 std::to_string(iterations) +
"/" +
1142 std::to_string(parameters.maxIterations) +
1143 " iterations (residual=" + std::to_string(residual / b_norm) +
1144 ", tolerance=" + std::to_string(parameters.tolerance) +
")");
1147#ifdef VIENNALS_GPU_BICGSTAB
1148 static std::string gpuErrorDetail() {
1149 const char *detail = gpu::gpuGetLastErrorMessage();
1150 if (detail && detail[0] !=
'\0')
1151 return std::string(
" Detail: ") + detail;
1156 void logDiffusionBackend(
const std::string &backend,
1157 const std::string &detail)
const {
1158 if (!Logger::hasInfo())
1160 const std::string msg =
1161 "OxidationDiffusion: using " + backend +
1162 " for diffusion solve (nodes=" + std::to_string(nodes.size()) +
1163 (detail.empty() ? std::string() :
", " + detail) +
").";
1164 if (msg == lastLoggedBackend_)
1166 lastLoggedBackend_ = msg;
1167 Logger::getInstance().addInfo(msg).print();
1172 template <
class SolverT>
1174 makeStencilSide(std::size_t nodeId,
const std::vector<SolverT> &previous,
1175 unsigned direction,
int offset,
T diffusion)
const {
1176 const IndexType &nodeIndex = nodes[nodeId].index;
1177 IndexType neighbor = nodeIndex;
1178 neighbor[direction] += offset;
1181 return zeroFluxSide();
1183 const std::size_t neighborId =
lookupNode(neighbor);
1184 if (neighborId !=
noNode)
1185 return {
gridDelta, 0., previous[neighborId]};
1187 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
1188 const std::size_t n = nodes.size();
1189 const Boundary faceType = faceBCTypes_[fi * n + nodeId];
1190 const T faceDist = faceBCDists_[fi * n + nodeId];
1191 if (faceType == Boundary::REACTION)
1192 return reactionBoundarySide(nodeIndex, faceDist, diffusion);
1193 if (faceType == Boundary::AMBIENT)
1194 return ambientBoundarySide(faceDist, diffusion);
1195 if (faceType == Boundary::MASK)
1196 return maskBoundarySide(faceDist, diffusion);
1197 return zeroFluxSide();
1200 void addAxisContribution(
T &rightHandSide,
T &diagonal,
1201 const StencilSide &negativeSide,
1202 const StencilSide &positiveSide,
T diffusion)
const {
1203 const T distanceSum = negativeSide.distance + positiveSide.distance;
1204 if (distanceSum <= std::numeric_limits<T>::epsilon())
1207 addSideContribution(rightHandSide, diagonal, negativeSide, distanceSum,
1209 addSideContribution(rightHandSide, diagonal, positiveSide, distanceSum,
1213 void addSideContribution(
T &rightHandSide,
T &diagonal,
1214 const StencilSide &side,
T distanceSum,
1215 T diffusion)
const {
1216 if (side.distance <= std::numeric_limits<T>::epsilon())
1219 const T coefficient =
T(2) *
diffusion / (side.distance * distanceSum);
1220 rightHandSide += coefficient * side.constant;
1221 diagonal += coefficient * (
T(1) - side.nodeCoefficient);
1224 StencilSide zeroFluxSide()
const {
return {
gridDelta, 1., 0.}; }
1226 StencilSide reactionBoundarySide(
const IndexType &nodeIndex,
T distance,
1227 T diffusion)
const {
1230 const T denominator = conductance + reactionRate;
1231 if (denominator <= std::numeric_limits<T>::epsilon())
1232 return zeroFluxSide();
1234 return {distance, conductance / denominator, 0.};
1238 ConstSparseIterator reactionIt(reactionInterface->getDomain());
1240 const std::size_t directId =
lookupNode(index);
1241 if (directId !=
noNode) {
1243 reactionBoundarySampleFromNode(reactionIt, nodes[directId]);
1253 std::size_t bestNode = std::numeric_limits<std::size_t>::max();
1254 T bestDistance2 = std::numeric_limits<T>::max();
1256 for (
int radius = 1; radius <= 4; ++radius) {
1258 offset.fill(-radius);
1260 IndexType candidate = index;
1262 for (
unsigned d = 0; d <
D; ++d) {
1263 candidate[d] += offset[d];
1264 distance2 +=
static_cast<T>(offset[d] * offset[d]);
1267 if (distance2 >
T(0) &&
inBounds(candidate)) {
1269 if (foundId !=
noNode && distance2 < bestDistance2) {
1271 reactionBoundarySampleFromNode(reactionIt, nodes[foundId]);
1273 bestDistance2 = distance2;
1280 for (; dim <
D; ++dim) {
1281 if (offset[dim] < radius) {
1285 offset[dim] = -radius;
1291 if (bestNode != std::numeric_limits<std::size_t>::max())
1295 if (bestNode == std::numeric_limits<std::size_t>::max())
1297 return reactionBoundarySampleFromNode(reactionIt, nodes[bestNode]);
1301 reactionBoundarySampleFromNode(ConstSparseIterator &reactionIt,
1302 const Node &node)
const {
1305 T bestDistance = std::numeric_limits<T>::max();
1306 const T insidePhi =
valueAt(reactionIt, node.index);
1308 for (
unsigned direction = 0; direction <
D; ++direction) {
1309 for (
const int offset : {-1, 1}) {
1310 IndexType neighbor = node.index;
1311 neighbor[direction] += offset;
1315 const T outsidePhi =
valueAt(reactionIt, neighbor);
1316 if (!
crosses(insidePhi, outsidePhi))
1319 const T distance = crossingDistance(insidePhi, outsidePhi);
1320 if (distance >= bestDistance)
1323 const T diffusion = getEffectiveDiffusionCoefficient(node.index);
1324 const auto side = reactionBoundarySide(node.index, distance, diffusion);
1326 best.distance = distance;
1327 best.concentration =
1328 side.nodeCoefficient * node.concentration + side.constant;
1329 best.crossingAxis = direction;
1330 best.crossingOffset = offset;
1331 bestDistance = distance;
1338 StencilSide ambientBoundarySide(
T distance,
T diffusion)
const {
1340 const T denominator = conductance + parameters.transferCoefficient;
1341 if (denominator <= std::numeric_limits<T>::epsilon())
1342 return zeroFluxSide();
1344 return {distance, conductance / denominator,
1345 parameters.transferCoefficient *
1346 parameters.equilibriumConcentration / denominator};
1349 StencilSide maskBoundarySide(
T distance,
T diffusion)
const {
1350 if (parameters.maskTransferCoefficient <= std::numeric_limits<T>::epsilon())
1351 return zeroFluxSide();
1354 const T denominator = conductance + parameters.maskTransferCoefficient;
1355 if (denominator <= std::numeric_limits<T>::epsilon())
1356 return zeroFluxSide();
1358 return {distance, conductance / denominator,
1359 parameters.maskTransferCoefficient * parameters.maskConcentration /
1364 T rate = parameters.reactionRate;
1366 if (parameters.reactionActivationVolume !=
T(0)) {
1367 T pressure = parameters.referencePressure;
1368 const auto foundPressure =
1370 if (foundPressure != pressureLookup.end())
1371 pressure = foundPressure->second;
1372 if (!std::isfinite(pressure))
1373 pressure = parameters.referencePressure;
1375 stressExponent(pressure, parameters.reactionActivationVolume);
1376 rate *= stressFactor(exponent);
1379 if (parameters.reactionRateRatio111 !=
T(1)) {
1380 const std::size_t nodeId =
lookupNode(index);
1382 const auto &normal = nodes[nodeId].siNormal;
1384 for (
unsigned d = 0; d <
D; ++d)
1385 dot += normal[d] * parameters.crystalAxis[d];
1387 (parameters.reactionRateRatio111 -
T(1)) * (
T(1) - dot * dot);
1394 T getEffectiveDiffusionCoefficient(
const IndexType &index)
const {
1395 if (parameters.diffusionActivationVolume ==
T(0))
1396 return parameters.diffusionCoefficient;
1398 T pressure = parameters.referencePressure;
1400 if (found != pressureLookup.end())
1401 pressure = found->second;
1402 if (!std::isfinite(pressure))
1403 pressure = parameters.referencePressure;
1406 stressExponent(pressure, parameters.diffusionActivationVolume);
1407 return parameters.diffusionCoefficient * stressFactor(exponent);
1410 T stressExponent(
T pressure,
T activationVolume)
const {
1411 const T thermalEnergy =
1412 boltzmannConstant * std::max(parameters.temperature,
T(1.));
1413 return -(pressure - parameters.referencePressure) * activationVolume /
1417 static T stressFactor(
T exponent) {
1418 if (!std::isfinite(exponent))
1420 if (exponent <= std::log(minStressFactor))
1421 return minStressFactor;
1422 if (exponent >= std::log(maxStressFactor))
1423 return maxStressFactor;
1424 return std::exp(exponent);
1427 Vec3D<T> computeSiNormal(
const IndexType &index,
1428 ConstSparseIterator &reactionIt)
const {
1432 auto reflectToGrid = [&](IndexType idx) {
1433 auto &g = reactionInterface->getGrid();
1434 for (
unsigned d2 = 0;
d2 <
D; ++
d2) {
1435 const auto lo = g.getMinGridPoint(d2);
1436 const auto hi = g.getMaxGridPoint(d2);
1438 idx[
d2] = 2 * lo - idx[
d2];
1440 idx[
d2] = 2 * hi - idx[
d2];
1444 Vec3D<T> gradient{0., 0., 0.};
1445 for (
unsigned d = 0; d <
D; ++d) {
1446 IndexType plus = index, minus = index;
1452 valueAt(reactionIt, reflectToGrid(minus)))) /
1456 for (
unsigned d = 0; d <
D; ++d)
1457 len += gradient[d] * gradient[d];
1458 len = std::sqrt(len);
1459 if (len > std::numeric_limits<T>::epsilon()) {
1460 for (
unsigned d = 0; d <
D; ++d)
1463 gradient = Vec3D<T>{0., 0., 0.};
1464 gradient[
D - 1] =
T(1);
1469 std::pair<Boundary, T> classifyBoundary(ConstSparseIterator &reactionIt,
1470 ConstSparseIterator &ambientIt,
1471 ConstSparseIterator &maskIt,
1472 const IndexType &inside,
1473 const IndexType &outside)
const {
1474 const T reactionInside =
valueAt(reactionIt, inside);
1475 const T reactionOutside =
valueAt(reactionIt, outside);
1476 const T ambientInside =
valueAt(ambientIt, inside);
1477 const T ambientOutside =
valueAt(ambientIt, outside);
1478 const T maskInside = valueAtMask(maskIt, inside);
1479 const T maskOutside = valueAtMask(maskIt, outside);
1481 const T reactionDistance =
1482 crosses(reactionInside, reactionOutside)
1483 ? crossingDistance(reactionInside, reactionOutside)
1484 : std::numeric_limits<
T>::max();
1485 const T ambientDistance =
1486 crosses(ambientInside, ambientOutside)
1487 ? crossingDistance(ambientInside, ambientOutside)
1488 : std::numeric_limits<
T>::max();
1489 const T maskDistance =
1490 maskInterface !=
nullptr &&
crosses(maskInside, maskOutside)
1491 ? crossingDistance(maskInside, maskOutside)
1492 : std::numeric_limits<
T>::max();
1494 if (reactionDistance == std::numeric_limits<T>::max() &&
1495 ambientDistance == std::numeric_limits<T>::max() &&
1496 maskDistance == std::numeric_limits<T>::max())
1499 if (reactionDistance <= ambientDistance && reactionDistance <= maskDistance)
1500 return {Boundary::REACTION, reactionDistance};
1508 if (ambientDistance != std::numeric_limits<T>::max() &&
1509 (isMaskAtCrossing(maskInside, maskOutside, ambientDistance) ||
1510 (maskInterface !=
nullptr &&
1511 static_cast<T>(maskSign) * valueAtMask(maskIt, outside) >=
T(0))))
1512 return {Boundary::MASK, ambientDistance};
1514 if (maskDistance <= ambientDistance)
1515 return {Boundary::MASK, maskDistance};
1516 return {Boundary::AMBIENT, ambientDistance};
1519 bool isInsideOxide(
T reactionPhi,
T ambientPhi)
const {
1525 constexpr T eps =
T(1e-9);
1526 return reactionSign * reactionPhi >= -eps &&
1527 ambientSign * ambientPhi >= -eps;
1530 ConstSparseIterator makeMaskIterator()
const {
1531 if (maskInterface ==
nullptr)
1536 bool isInsideMask(ConstSparseIterator &maskIt,
const IndexType &index)
const {
1537 if (maskInterface ==
nullptr)
1539 return maskSign *
valueAt(maskIt, index) >= 0.;
1542 T valueAtMask(ConstSparseIterator &maskIt,
const IndexType &index)
const {
1543 if (maskInterface ==
nullptr)
1544 return std::numeric_limits<T>::max();
1545 return valueAt(maskIt, index);
1548 bool isMaskAtCrossing(
T maskInside,
T maskOutside,
T distance)
const {
1549 if (maskInterface ==
nullptr)
1551 const T fraction = std::clamp(distance /
gridDelta,
T(0),
T(1));
1554 const T maskPhi = insidePhi + fraction * (outsidePhi - insidePhi);
1555 return static_cast<T>(maskSign) * maskPhi >=
T(0);
1558 T crossingDistance(
T insidePhi,
T outsidePhi)
const {
1560 insidePhi, outsidePhi, parameters.minBoundaryDistance,
gridDelta);