ViennaLS
Loading...
Searching...
No Matches
lsOxidationDiffusion.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <lsVelocityField.hpp>
5
6#include <algorithm>
7#include <cmath>
8#include <stdexcept>
9#include <string>
10#include <unordered_map>
11#include <utility>
12#include <vector>
13
14#include <vcTimer.hpp>
15
16#include <omp.h>
17
18#ifdef VIENNALS_GPU_BICGSTAB
20#endif
21
22namespace viennals {
23
26enum class GpuMode {
29};
30
36
41 double reactionRate = 1.;
46 double velocitySign = 1.;
47
48 // Stress coupling for reaction rate:
49 // k_eff = k * exp(-(p - p_ref) * V_k / (k_B * T)).
50 // Activation volumes are in m^3, pressure is in Pa, temperature is in K.
51 double temperature = 1273.15;
53 double referencePressure = 0.;
54
55 // Stress coupling for diffusion coefficient:
56 // D_eff = D * exp(-(p - p_ref) * V_D / (k_B * T)).
58
59 // Crystal orientation factor on reaction rate.
60 // k(n) = k * [1 + (reactionRateRatio111 - 1) * (1 - (n . crystalAxis)^2)]
61 // reactionRateRatio111 = 1 disables the correction (isotropic).
63 Vec3D<double> crystalAxis = {0., 1., 0.};
64
66 double maskConcentration = 0.;
67 double minBoundaryDistance = 1e-6;
68 unsigned maxIterations = 10000;
69 double tolerance = 1e-8;
70 double relaxation = 1.;
71 std::size_t maxGridPoints = 5000000;
72 int material = -1;
73};
74
90template <class T, int D>
91class OxidationDiffusion final : public VelocityField<T>,
92 public OxidationSolverBase<T, D> {
93 using IndexType = viennahrle::Index<D>;
94 using ConstSparseIterator =
95 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
96
97private:
98 static constexpr T boltzmannConstant = T(1.380649e-23);
99 static constexpr T minStressFactor = T(1.e-6);
100 static constexpr T maxStressFactor = T(1.e6);
101
102 // bring base members into scope
119
120 enum class Boundary { NONE, REACTION, AMBIENT, MASK };
121
122 struct Node {
123 IndexType index;
124 T concentration = 0.;
125 Vec3D<T> siNormal = {0., 0.,
126 0.}; // unit outward normal of Si surface (into oxide)
127 };
128
129 struct StencilSide {
130 T distance = 1.;
131 T nodeCoefficient = 1.;
132 T constant = 0.;
133 };
134
135 SmartPointer<Domain<T, D>> reactionInterface = nullptr;
136 SmartPointer<Domain<T, D>> ambientInterface = nullptr;
137 SmartPointer<Domain<T, D>> maskInterface = nullptr;
138 OxidationParameters parameters;
139 int reactionSign = 1;
140 int ambientSign = -1;
141 int maskSign = 1;
142 IndexType requestedMinIndex{};
143 IndexType requestedMaxIndex{};
144 unsigned iterations = 0;
145 T residual = std::numeric_limits<T>::max();
146 T normalizedResidual_ = std::numeric_limits<T>::max();
147 bool lastSolveConverged_ = false;
148 T maxScalarVelocity_ = 0.;
149 bool solved = false;
150 bool nodesDirty_ = true; // true → rebuild grid/nodes on next apply()
151 mutable std::string
152 lastLoggedBackend_; // suppresses repeated "using X" messages
153 bool useRequestedBounds = false;
154 bool warmStartable_ =
155 false; // true when nodes[i].concentration holds a prior solution
156 std::unordered_map<std::size_t, T> pressureLookup;
157 std::unordered_map<std::size_t, T> concentrationCache_;
158 std::vector<Node> nodes;
159 // Face-major flat BC arrays: index = fi * n + nodeId, where fi in [0, 2*D).
160 // Face-major layout gives coalesced GPU reads when all warp threads access
161 // the same face of consecutive nodes.
162 std::vector<Boundary> faceBCTypes_;
163 std::vector<T> faceBCDists_;
164 // Neighbor node IDs for every face (geometry-only, rebuilt with nodes).
165 // Entry == noNode for boundary / out-of-bounds faces.
166 std::vector<std::size_t> neighborIds_;
167
168 GpuMode gpuMode_ = GpuMode::Cpu;
169 GpuPreconditioner gpuPreconditioner_ = GpuPreconditioner::Jacobi;
170
171#ifdef VIENNALS_GPU_BICGSTAB
172 // Opaque pointer to device-side buffers (GpuBiCGSTABBuffers is defined only
173 // in the CUDA translation unit; we hold a raw pointer here so g++ never sees
174 // the CUDA internals).
175 gpu::GpuBiCGSTABBuffers *gpuBufs_ = nullptr;
176#endif
177
178public:
184 bool found = false;
185 IndexType nodeIndex{};
188 unsigned crossingAxis = 0; // Cartesian axis of the crossing edge
189 int crossingOffset = 0; // +1 or -1: which neighbour the crossing faces
190 };
191
193
194 OxidationDiffusion(SmartPointer<Domain<T, D>> passedReactionInterface,
195 SmartPointer<Domain<T, D>> passedAmbientInterface,
196 OxidationParameters passedParameters = {})
197 : reactionInterface(passedReactionInterface),
198 ambientInterface(passedAmbientInterface), parameters(passedParameters) {
199 }
200
202#ifdef VIENNALS_GPU_BICGSTAB
203 gpu::freeGpuBuffers(gpuBufs_);
204 gpuBufs_ = nullptr;
205#endif
206 }
207
208 template <class... Args> static auto New(Args &&...args) {
209 return SmartPointer<OxidationDiffusion>::New(std::forward<Args>(args)...);
210 }
211
215 nodesDirty_ = true;
216 solved = false;
217 }
218
219 void setReactionInterface(SmartPointer<Domain<T, D>> passedInterface) {
220 reactionInterface = passedInterface;
221 nodesDirty_ = true;
222 solved = false;
223 }
224
225 void setAmbientInterface(SmartPointer<Domain<T, D>> passedInterface) {
226 ambientInterface = passedInterface;
227 nodesDirty_ = true;
228 solved = false;
229 }
230
231 void setMaskInterface(SmartPointer<Domain<T, D>> passedInterface,
232 int passedMaskSign = 1) {
233 maskInterface = passedInterface;
234 maskSign = (passedMaskSign < 0) ? -1 : 1;
235 nodesDirty_ = true;
236 solved = false;
237 }
238
240 maskInterface = nullptr;
241 nodesDirty_ = true;
242 solved = false;
243 }
244
245 void setParameters(OxidationParameters passedParameters) {
246 parameters = passedParameters;
247 solved = false;
248 }
249
250 OxidationParameters getParameters() const { return parameters; }
251
252 T getEffectiveReactionRate(const Vec3D<T> &coordinate) const {
253 IndexType index;
254 for (unsigned i = 0; i < D; ++i)
255 index[i] = std::llround(coordinate[i] / gridDelta);
256 return getEffectiveReactionRate(index);
257 }
258
260 pressureLookup.clear();
261 solved = false;
262 }
263
264 void setPressure(const IndexType &index, T pressure) {
265 const auto key = detail::gridIndexHash<D>(index);
266 if (std::isfinite(pressure))
267 pressureLookup[key] = pressure;
268 else
269 pressureLookup.erase(key);
270 solved = false;
271 }
272
273 void setPressure(const Vec3D<T> &coordinate, T pressure) {
274 IndexType index;
275 for (unsigned i = 0; i < D; ++i)
276 index[i] = std::llround(coordinate[i] / gridDelta);
277 setPressure(index, pressure);
278 }
279
282 void setOxideSigns(int passedReactionSign, int passedAmbientSign) {
283 reactionSign = (passedReactionSign < 0) ? -1 : 1;
284 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
285 solved = false;
286 }
287
290 void setSolveBounds(const IndexType &passedMinIndex,
291 const IndexType &passedMaxIndex) {
292 requestedMinIndex = passedMinIndex;
293 requestedMaxIndex = passedMaxIndex;
294 useRequestedBounds = true;
295 nodesDirty_ = true;
296 solved = false;
297 }
298
300 useRequestedBounds = false;
301 nodesDirty_ = true;
302 solved = false;
303 }
304
305 void apply() {
306 if (reactionInterface == nullptr || ambientInterface == nullptr) {
307 Logger::getInstance()
308 .addError("OxidationDiffusion: Missing level-set "
309 "interface.")
310 .print();
311 return;
312 }
313
314 if (nodesDirty_) {
315 if (!initialiseGrid())
316 return; // base class already logged the error
317 buildNodes();
318 nodesDirty_ = false;
319 if (nodes.empty())
320 Logger::getInstance()
321 .addWarning("OxidationDiffusion: no oxide nodes found after "
322 "buildNodes(). Verify that the reaction and ambient "
323 "level sets enclose a non-empty oxide band.")
324 .print();
325 }
326 solveDiffusion();
327
328 concentrationCache_.clear();
329 for (const auto &node : nodes)
330 concentrationCache_[detail::gridIndexHash<D>(node.index)] =
331 node.concentration;
332
333 maxScalarVelocity_ = 0.;
334 ConstSparseIterator reactionIt(reactionInterface->getDomain());
335 for (const auto &node : nodes) {
336 const auto sample = reactionBoundarySampleFromNode(reactionIt, node);
337 if (!sample.found)
338 continue;
339 const T rate = getEffectiveReactionRate(node.index);
340 const T vel =
341 std::abs(parameters.velocitySign) * rate * sample.concentration /
342 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient);
343 maxScalarVelocity_ = std::max(maxScalarVelocity_, vel);
344 }
345
346 solved = true;
347 }
348
349 T getScalarVelocity(const Vec3D<T> &coordinate, int material,
350 const Vec3D<T> &normalVector,
351 unsigned long /*pointId*/) final {
352 if (!solved)
353 apply();
354
355 if (parameters.material >= 0 && material != parameters.material)
356 return 0.;
357
358 IndexType index;
359 for (unsigned i = 0; i < D; ++i)
360 index[i] = std::llround(coordinate[i] / gridDelta);
361
362 const auto boundarySample = reactionBoundarySample(index);
363 const IndexType rateIndex =
364 boundarySample.found ? boundarySample.nodeIndex : index;
365 const T concentration = boundarySample.found
366 ? boundarySample.concentration
368 return parameters.velocitySign * getEffectiveReactionRate(rateIndex) *
369 concentration /
370 (parameters.oxidantMoleculeDensity *
371 parameters.expansionCoefficient);
372 }
373
374 T getDissipationAlpha(int /*direction*/, int material,
375 const Vec3D<T> & /*centralDifferences*/) final {
376 if (parameters.material >= 0 && material != parameters.material)
377 return 0.;
378
379 T bulkVel =
380 std::abs(parameters.velocitySign) * parameters.reactionRate *
381 parameters.equilibriumConcentration /
382 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient);
383
384 return std::max(maxScalarVelocity_, bulkVel);
385 }
386
387 T getConcentration(const Vec3D<T> &coordinate) const {
388 IndexType index;
389 for (unsigned i = 0; i < D; ++i)
390 index[i] = std::llround(coordinate[i] / gridDelta);
391 return getConcentration(index);
392 }
393
394 T getConcentration(const IndexType &index) const {
395 const std::size_t nodeId = lookupNode(index);
396 if (nodeId == noNode) {
397 const auto nearby = findNearbyNode(index);
398 if (nearby == noNode)
399 return 0.;
400 return nodes[nearby].concentration;
401 }
402 return nodes[nodeId].concentration;
403 }
404
405 T getReactionBoundaryConcentration(const Vec3D<T> &coordinate) const {
406 IndexType index;
407 for (unsigned i = 0; i < D; ++i)
408 index[i] = std::llround(coordinate[i] / gridDelta);
410 }
411
412 T getReactionBoundaryConcentration(const IndexType &index) const {
413 const auto sample = reactionBoundarySample(index);
414 return sample.found ? sample.concentration : getConcentration(index);
415 }
416
417 unsigned getIterations() const { return iterations; }
418 T getResidual() const { return residual; }
419 T getNormalizedResidual() const { return normalizedResidual_; }
420 bool lastSolveConverged() const { return lastSolveConverged_; }
421 std::size_t getNumberOfSolutionNodes() const { return nodes.size(); }
423 for (const auto &node : nodes)
424 if (!std::isfinite(node.concentration))
425 return false;
426 return true;
427 }
428
429 const std::unordered_map<std::size_t, T> &getConcentrationCache() const {
430 return concentrationCache_;
431 }
432
433 void setConcentrationCache(std::unordered_map<std::size_t, T> cache) {
434 concentrationCache_ = std::move(cache);
435 }
436
439 void setGpuMode(GpuMode mode) { gpuMode_ = mode; }
442 gpuPreconditioner_ = preconditioner;
443 }
444
448 void markSolved() { solved = true; }
449
454 if (nodes.empty() || ambientInterface == nullptr)
455 return;
456
457 std::vector<T> concentrations;
458 ConstSparseIterator it(ambientInterface->getDomain());
459 for (; !it.isFinished(); ++it) {
460 if (!it.isDefined())
461 continue;
462 const IndexType idx = it.getStartIndices();
463 const std::size_t nodeId = lookupNode(idx);
464 // Non-solve-grid narrow-band points (mask interior, gas-phase boundary)
465 // get concentration 0. Using equilibriumConcentration here contaminates
466 // the output: mask-interior points are impermeable (C≈0), and newly
467 // appeared bird's-beak points inherit C=1 spuriously after advection.
468 // The in-memory concentrationCache_ is the warm-start source for the
469 // next substep; the level-set value is only the fallback when that cache
470 // is cold, and C=0 converges just as quickly as C=1 from a cold start.
471 const T value = nodeId != noNode ? nodes[nodeId].concentration : T(0);
472 concentrations.push_back(std::isfinite(value) ? value : T(0));
473 }
474 ambientInterface->getPointData().insertReplaceScalarData(
475 std::move(concentrations), "OxConcentration");
476 }
477
481 if (pressureLookup.empty() || ambientInterface == nullptr)
482 return;
483
484 std::vector<T> pressures;
485 ConstSparseIterator it(ambientInterface->getDomain());
486 for (; !it.isFinished(); ++it) {
487 if (!it.isDefined())
488 continue;
489 const IndexType idx = it.getStartIndices();
490 const auto pIt = pressureLookup.find(detail::gridIndexHash<D>(idx));
491 const T value = pIt != pressureLookup.end() ? pIt->second : T(0);
492 pressures.push_back(std::isfinite(value) ? value : T(0));
493 }
494 ambientInterface->getPointData().insertReplaceScalarData(
495 std::move(pressures), "OxPressure");
496 }
497
503
507 ReactionBoundarySample
508 getReactionBoundarySample(const Vec3D<T> &coordinate) const {
509 IndexType index;
510 for (unsigned i = 0; i < D; ++i)
511 index[i] = std::llround(coordinate[i] / gridDelta);
512 return reactionBoundarySample(index);
513 }
514
518 if (!sample.found)
519 return T(0);
520 return std::abs(
522 (parameters.oxidantMoleculeDensity * parameters.expansionCoefficient));
523 }
524
525private:
526 bool initialiseGrid() {
528 reactionInterface, ambientInterface, maskInterface, useRequestedBounds,
529 requestedMinIndex, requestedMaxIndex, parameters.maxGridPoints,
530 "OxidationDiffusion");
531 }
532
533 void buildNodes() {
534 nodes.clear();
536
537 // On the first substep after an outer-step boundary the in-memory cache is
538 // empty. Fall back to the concentration stored in the level set's pointData
539 // (written by writeConcentrationToLevelSet() before the previous advection
540 // and remapped by lsAdvect + lsInterior).
541 warmStartable_ = !concentrationCache_.empty();
542 if (!warmStartable_) {
543 // Single HRLE pass restores both concentration and pressure from the
544 // pointData written by writePersistentFields() before the last advection.
545 const int cIdx = ambientInterface->getPointData().getScalarDataIndex(
546 "OxConcentration");
547 const int pIdx =
548 ambientInterface->getPointData().getScalarDataIndex("OxPressure");
549 const auto *cd =
550 (cIdx != -1) ? ambientInterface->getPointData().getScalarData(cIdx)
551 : nullptr;
552 const auto *pd =
553 (pIdx != -1) ? ambientInterface->getPointData().getScalarData(pIdx)
554 : nullptr;
555 if (cd != nullptr || pd != nullptr) {
556 ConstSparseIterator it(ambientInterface->getDomain());
557 for (; !it.isFinished(); ++it) {
558 if (!it.isDefined())
559 continue;
560 const auto ptId = it.getPointId();
561 const std::size_t key =
562 detail::gridIndexHash<D>(it.getStartIndices());
563 if (cd != nullptr && ptId < static_cast<decltype(ptId)>(cd->size()) &&
564 std::isfinite((*cd)[ptId]))
565 concentrationCache_[key] = (*cd)[ptId];
566 if (pd != nullptr && ptId < static_cast<decltype(ptId)>(pd->size()) &&
567 std::isfinite((*pd)[ptId]))
568 pressureLookup[key] = (*pd)[ptId];
569 }
570 warmStartable_ = !concentrationCache_.empty();
571 }
572 }
573
574 ConstSparseIterator reactionIt(reactionInterface->getDomain());
575 ConstSparseIterator ambientIt(ambientInterface->getDomain());
576 auto maskIt = makeMaskIterator();
577
578 IndexType index = minIndex;
579 while (true) {
580 const T reactionPhi = valueAt(reactionIt, index);
581 const T ambientPhi = valueAt(ambientIt, index);
582 if (isInsideOxide(reactionPhi, ambientPhi) &&
583 !isInsideMask(maskIt, index)) {
584 const std::size_t id = nodes.size();
585 nodeLookupFlat[linearIndex(index)] = id;
586 T seedConc = parameters.equilibriumConcentration;
587 auto cacheIt =
588 concentrationCache_.find(detail::gridIndexHash<D>(index));
589 if (cacheIt != concentrationCache_.end())
590 seedConc = cacheIt->second;
591 if (!std::isfinite(seedConc))
592 seedConc = parameters.equilibriumConcentration;
593 Node newNode{index, seedConc};
594 if (parameters.reactionRateRatio111 != T(1))
595 newNode.siNormal = computeSiNormal(index, reactionIt);
596 nodes.push_back(newNode);
597 }
598
599 if (!increment(index))
600 break;
601 }
602
603 // Precompute per-face boundary intersections into flat face-major arrays.
604 // Level-set positions are fixed during the inner solve; one pass per
605 // apply().
606 const std::size_t n = nodes.size();
607 faceBCTypes_.assign(2 * D * n, Boundary::NONE);
608 faceBCDists_.assign(2 * D * n, T(1));
609 ConstSparseIterator faceReactionIt(reactionInterface->getDomain());
610 ConstSparseIterator faceAmbientIt(ambientInterface->getDomain());
611 auto faceMaskIt = makeMaskIterator();
612 for (std::size_t id = 0; id < n; ++id) {
613 const auto &node = nodes[id];
614 for (unsigned dir = 0; dir < D; ++dir) {
615 for (int off : {-1, 1}) {
616 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
617 IndexType nb = node.index;
618 nb[dir] += off;
619 if (!inBounds(nb) || lookupNode(nb) != noNode)
620 continue; // NONE/1.0 already set by assign()
621 const auto bc = classifyBoundary(faceReactionIt, faceAmbientIt,
622 faceMaskIt, node.index, nb);
623 faceBCTypes_[fi * n + id] = bc.first;
624 faceBCDists_[fi * n + id] = bc.second;
625 }
626 }
627 }
628
629 // Precompute per-face neighbor node IDs (geometry-only).
630 // Used by computeFaceCoeffs() and uploaded once to GPU.
631 neighborIds_.assign(2 * D * n, noNode);
632 for (std::size_t id = 0; id < n; ++id) {
633 for (unsigned dir = 0; dir < D; ++dir) {
634 for (int off : {-1, 1}) {
635 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
636 IndexType nb = nodes[id].index;
637 nb[dir] += off;
638 if (inBounds(nb))
639 neighborIds_[fi * n + id] = lookupNode(nb);
640 }
641 }
642 }
643
644 bool loggedBackend = false;
645#ifdef VIENNALS_GPU_BICGSTAB
646 // Free any stale allocation from a previous buildNodes() call.
647 gpu::freeGpuBuffers(gpuBufs_);
648 gpuBufs_ = nullptr;
649
650 const bool tryGpu = (gpuMode_ == GpuMode::Gpu);
651 if (tryGpu) {
652 const bool useIlu0 = gpuPreconditioner_ == GpuPreconditioner::ILU0;
653 gpuBufs_ = gpu::allocGpuBuffers(static_cast<uint32_t>(n), 2 * D, useIlu0);
654
655 if (gpuBufs_) {
656 // Convert std::size_t neighbor IDs to uint32_t for the device
657 const std::size_t nf = 2u * D * n;
658 std::vector<uint32_t> nb32(nf);
659 for (std::size_t k = 0; k < nf; ++k)
660 nb32[k] = (neighborIds_[k] == noNode)
661 ? gpu::kNoNode
662 : static_cast<uint32_t>(neighborIds_[k]);
663 if (!gpu::gpuUploadNeighborIds(gpuBufs_, nb32.data(), nf)) {
664 gpu::freeGpuBuffers(gpuBufs_);
665 gpuBufs_ = nullptr;
666 VIENNACORE_LOG_ERROR("OxidationDiffusion: GPU mode was selected, but "
667 "uploading GPU neighbor IDs failed." +
668 gpuErrorDetail());
669 }
670 if (useIlu0 && !gpu::gpuSetupCSR(gpuBufs_, nb32.data(),
671 static_cast<uint32_t>(n), 2 * D)) {
672 gpu::freeGpuBuffers(gpuBufs_);
673 gpuBufs_ = nullptr;
674 VIENNACORE_LOG_ERROR("OxidationDiffusion: GPU mode was selected, but "
675 "CUSPARSE setup for the GPU BiCGSTAB solver "
676 "failed." +
677 gpuErrorDetail());
678 }
679 logDiffusionBackend("GPU BiCGSTAB",
680 "preconditioner=" +
681 std::string(useIlu0 ? "ILU0" : "Jacobi"));
682 loggedBackend = true;
683 } else {
684 VIENNACORE_LOG_ERROR("OxidationDiffusion: GPU mode was selected, but "
685 "CUDA buffers could not be "
686 "allocated or the CUDA context could not be "
687 "initialized." +
688 gpuErrorDetail());
689 }
690 }
691#endif
692 if (!loggedBackend) {
693#ifdef VIENNALS_GPU_BICGSTAB
694 logDiffusionBackend("CPU BiCGSTAB", "GPU mode not selected");
695#else
696 logDiffusionBackend("CPU BiCGSTAB",
697 "ViennaLS was built without GPU BiCGSTAB support");
698#endif
699 }
700 }
701
702 // Precompute per-face coupling coefficients for the GPU SpMV.
703 //
704 // faceCoeffs[fi * n + id] = 2*D_eff / (dist_fi * distSum_axis)
705 //
706 // Only interior faces (neighbor != noNode) get a nonzero entry;
707 // boundary and out-of-bounds faces contribute to diag/b but not to
708 // the off-diagonal coupling stored here. Must be called after diag/b
709 // are computed (or at least after D_eff is known), because D_eff
710 // depends on pressure when diffusionActivationVolume != 0.
711 void computeFaceCoeffs(std::vector<T> &faceCoeffs) const {
712 const std::size_t n = nodes.size();
713 faceCoeffs.assign(2 * D * n, T(0));
714 const T eps = std::numeric_limits<T>::epsilon();
715 const std::vector<T> zeros(n, T(0));
716
717 for (std::size_t id = 0; id < n; ++id) {
718 const T D_eff = getEffectiveDiffusionCoefficient(nodes[id].index);
719 for (unsigned dir = 0; dir < D; ++dir) {
720 const unsigned fiNeg = dir * 2u;
721 const unsigned fiPos = dir * 2u + 1u;
722
723 // Use the exact side construction used by the CPU stencil so the GPU
724 // SpMV cannot drift from computeStencilAt() at cut-cell boundaries.
725 const auto neg = makeStencilSide(id, zeros, dir, -1, D_eff);
726 const auto pos = makeStencilSide(id, zeros, dir, 1, D_eff);
727 const T distNeg = neg.distance;
728 const T distPos = pos.distance;
729 const T distSum = distNeg + distPos;
730 if (distSum <= eps)
731 continue;
732
733 if (neighborIds_[fiNeg * n + id] != noNode && distNeg > eps)
734 faceCoeffs[fiNeg * n + id] = T(2) * D_eff / (distNeg * distSum);
735 if (neighborIds_[fiPos * n + id] != noNode && distPos > eps)
736 faceCoeffs[fiPos * n + id] = T(2) * D_eff / (distPos * distSum);
737 }
738 }
739 }
740
741 // Evaluates the stencil at one node: fills diag = A[i,i] and
742 // rhs = sum_j(A_off[i,j] * x[j]) + bc_constants[i].
743 // Called with x = zeros to precompute the geometry-fixed diagonal and b.
744 template <class SolverT>
745 void computeStencilAt(std::size_t nodeId, const std::vector<SolverT> &x,
746 T &diag, T &rhs) const {
747 diag = T(0);
748 rhs = T(0);
749 const T D_eff = getEffectiveDiffusionCoefficient(nodes[nodeId].index);
750 for (unsigned direction = 0; direction < D; ++direction) {
751 const auto neg = makeStencilSide(nodeId, x, direction, -1, D_eff);
752 const auto pos = makeStencilSide(nodeId, x, direction, 1, D_eff);
753 addAxisContribution(rhs, diag, neg, pos, D_eff);
754 }
755 }
756
757 // Computes Av = A * v using precomputed diagonal and b (RHS constants).
758 // (Av)[i] = precomputedDiag[i]*v[i] - rhs_at_v[i] + b[i]
759 // Stencil arithmetic stays in T; only storage uses SolverT.
760 template <class SolverT>
761 void matvec(const std::vector<SolverT> &v,
762 const std::vector<T> &precomputedDiag, const std::vector<T> &b,
763 std::vector<SolverT> &Av) const {
764#pragma omp parallel for schedule(static)
765 for (std::size_t i = 0; i < nodes.size(); ++i) {
766 T diag, rhs;
767 computeStencilAt(i, v, diag, rhs);
768 Av[i] = static_cast<SolverT>(precomputedDiag[i] * v[i] - rhs + b[i]);
769 }
770 }
771
772 void solveDiffusion() {
773 iterations = 0;
774 residual = 0.;
775 normalizedResidual_ = 0.;
776 lastSolveConverged_ = false;
777 if (nodes.empty()) {
778 lastSolveConverged_ = true;
779 return;
780 }
781
782 // Work vectors use float to halve memory bandwidth in the SpMV hot path.
783 // Stencil arithmetic and dot-product accumulation remain in T (double).
784 using SolverT = float;
785
786 const std::size_t n = nodes.size();
787 const T eps = std::numeric_limits<T>::epsilon();
788
789 // Geometry-fixed diagonal and BC source vector (kept in T for full
790 // precision).
791 Timer<> tDiag;
792 tDiag.start();
793 std::vector<T> diag(n), b(n);
794 {
795 const std::vector<SolverT> zeros(n, SolverT(0));
796#pragma omp parallel for schedule(static)
797 for (std::size_t i = 0; i < n; ++i)
798 computeStencilAt(i, zeros, diag[i], b[i]);
799 }
800 tDiag.finish();
801
802 T b_norm = T(0);
803 bool finiteSystem = true;
804 for (std::size_t i = 0; i < n; ++i) {
805 finiteSystem =
806 finiteSystem && std::isfinite(diag[i]) && std::isfinite(b[i]);
807 b_norm = std::max(b_norm, std::abs(b[i]));
808 }
809 if (b_norm < T(1e-100))
810 b_norm = T(1);
811 if (!finiteSystem) {
812 residual = std::numeric_limits<T>::infinity();
813 normalizedResidual_ = residual;
814 Logger::getInstance()
815 .addWarning("solveDiffusion: assembled non-finite matrix/RHS; "
816 "rejecting this coupled trial.")
817 .print();
818 return;
819 }
820
821 // Initial guess: warm-start or diagonal-preconditioned b. Sanitize the
822 // warm start so a previous failed solve cannot seed NaNs into the next one.
823 auto fallbackInitialGuess = [&](std::size_t i) -> T {
824 if (std::isfinite(diag[i]) && std::isfinite(b[i]) && diag[i] > eps) {
825 const T guess = b[i] / diag[i];
826 if (std::isfinite(guess))
827 return guess;
828 }
829 return T(0);
830 };
831
832 std::vector<SolverT> x(n);
833 for (std::size_t i = 0; i < n; ++i) {
834 T guess =
835 warmStartable_ ? nodes[i].concentration : fallbackInitialGuess(i);
836 if (!std::isfinite(guess))
837 guess = fallbackInitialGuess(i);
838 x[i] = static_cast<SolverT>(guess);
839 if (!std::isfinite(x[i]))
840 x[i] = SolverT(0);
841 }
842 const auto initialGuess = x;
843
844#ifdef VIENNALS_GPU_BICGSTAB
845 // ── GPU BiCGSTAB path ──────────────────────────────────────────────
846 // Engaged when GpuMode::Gpu is set and buildNodes() allocated GPU buffers.
847 // Uploads diag, b, and face coefficients fresh each call (they are
848 // pressure-dependent via D_eff), then runs the full BiCGSTAB loop on
849 // the device. Only dot-product scalars and the max-abs residual are
850 // downloaded; work vectors stay on the GPU between iterations.
851 if (gpu::gpuIsValid(gpuBufs_)) {
852 Timer<> tPrep, tUpload, tSolve;
853 tPrep.start();
854 std::vector<double> diagGpu(n), bGpu(n);
855 for (std::size_t i = 0; i < n; ++i) {
856 diagGpu[i] = static_cast<double>(diag[i]);
857 bGpu[i] = static_cast<double>(b[i]);
858 }
859
860 // Face coupling coefficients (pressure-dependent; recomputed each call)
861 std::vector<T> faceCoeffs;
862 computeFaceCoeffs(faceCoeffs);
863 std::vector<double> coeffGpu(faceCoeffs.size());
864 for (std::size_t k = 0; k < faceCoeffs.size(); ++k)
865 coeffGpu[k] = static_cast<double>(faceCoeffs[k]);
866
867 std::vector<double> xGpu(n);
868 for (std::size_t i = 0; i < n; ++i)
869 xGpu[i] = static_cast<double>(x[i]);
870 tPrep.finish();
871
872 tUpload.start();
873 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
874 gpuBufs_, diagGpu.data(), bGpu.data(), coeffGpu.data(),
875 static_cast<uint32_t>(n), faceCoeffs.size());
876 tUpload.finish();
877 if (!gpuUploadOk) {
878 if (Logger::hasDebug()) {
879 const std::string tag =
880 "diffusion n=" + std::to_string(n) + " [GPU upload failed]";
881 Logger::getInstance()
882 .addTiming(tag + " diag/b precompute", tDiag)
883 .addTiming(tag + " GPU prep+faceCoeffs", tPrep)
884 .addTiming(tag + " GPU upload", tUpload)
885 .print();
886 }
887 VIENNACORE_LOG_ERROR("OxidationDiffusion: GPU mode was selected, but "
888 "uploading GPU solver arrays or factorizing ILU "
889 "failed." +
890 gpuErrorDetail());
891 }
892
893 // Solve on GPU; xGpu holds the initial guess and receives the solution.
894 tSolve.start();
895 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
896 gpuBufs_, xGpu.data(), static_cast<double>(eps),
897 parameters.maxIterations, static_cast<double>(parameters.tolerance),
898 iterations, residual);
899 tSolve.finish();
900
901 const bool finiteGpuSolution =
902 gpuConverged &&
903 std::all_of(xGpu.begin(), xGpu.end(),
904 [](double value) { return std::isfinite(value); });
905 const unsigned gpuIterations = iterations;
906 const T gpuResidual = residual;
907
908 if (finiteGpuSolution) {
909 // Write solution back to nodes only after convergence and finite
910 // checks.
911 for (std::size_t i = 0; i < n; ++i)
912 nodes[i].concentration = static_cast<T>(xGpu[i]);
913 normalizedResidual_ = gpuResidual / b_norm;
914 lastSolveConverged_ = std::isfinite(normalizedResidual_) &&
915 normalizedResidual_ <= parameters.tolerance;
916
917 if (Logger::hasTiming()) {
918 const std::string tag = "diffusion n=" + std::to_string(n) +
919 " iters=" + std::to_string(iterations) +
920 " res=" + std::to_string(residual) + " [GPU]";
921 Logger::getInstance()
922 .addTiming(tag + " GPU BiCGSTAB", tSolve)
923 .print();
924 }
925 if (Logger::hasDebug()) {
926 const std::string tag = "diffusion n=" + std::to_string(n) + " [GPU]";
927 Logger::getInstance()
928 .addTiming(tag + " diag/b precompute", tDiag)
929 .addTiming(tag + " GPU prep+faceCoeffs", tPrep)
930 .addTiming(tag + " GPU upload", tUpload)
931 .print();
932 }
933 return;
934 }
935
936 if (Logger::hasDebug()) {
937 const std::string tag = "diffusion n=" + std::to_string(n) +
938 " iters=" + std::to_string(gpuIterations) +
939 " res=" + std::to_string(gpuResidual) +
940 " [GPU failed]";
941 Logger::getInstance()
942 .addTiming(tag + " diag/b precompute", tDiag)
943 .addTiming(tag + " GPU prep+faceCoeffs", tPrep)
944 .addTiming(tag + " GPU upload", tUpload)
945 .addTiming(tag + " GPU BiCGSTAB", tSolve)
946 .print();
947 }
948 VIENNACORE_LOG_ERROR(
949 "OxidationDiffusion: GPU mode was selected, but GPU BiCGSTAB "
950 "failed, did not converge, or produced non-finite concentrations "
951 "(iters=" +
952 std::to_string(gpuIterations) +
953 ", residual=" + std::to_string(gpuResidual) + ").");
954 }
955#else
956 if (gpuMode_ == GpuMode::Gpu) {
957 VIENNACORE_LOG_ERROR("OxidationDiffusion: explicit GPU mode was "
958 "requested, but ViennaLS was built without "
959 "VIENNALS_GPU_BICGSTAB.");
960 }
961#endif
962
963 // r = b - A*x
964 std::vector<SolverT> Ax(n);
965 matvec(x, diag, b, Ax);
966 std::vector<SolverT> r(n), r_hat(n);
967 for (std::size_t i = 0; i < n; ++i) {
968 r[i] = static_cast<SolverT>(b[i] - Ax[i]);
969 r_hat[i] = r[i];
970 }
971
972 // BiCGSTAB with diagonal (Jacobi) preconditioner.
973 // Scalars (rho, alpha, omega, beta) and dot products stay in T for
974 // stability.
975 Timer<> tCpuSolve;
976 tCpuSolve.start();
977 std::vector<SolverT> p(n, SolverT(0)), v(n, SolverT(0)), y(n), z(n), s(n),
978 t(n);
979 T rho = T(1), alpha = T(1), omega = T(1);
980 bool bicgstabBreakdown = false;
981 for (std::size_t i = 0; i < n; ++i) {
982 const T ri = static_cast<T>(r[i]);
983 if (!std::isfinite(ri)) {
984 bicgstabBreakdown = true;
985 break;
986 }
987 residual = std::max(residual, std::abs(ri));
988 }
989
990 for (; !bicgstabBreakdown && iterations < parameters.maxIterations;
991 ++iterations) {
992 T rho_new = T(0);
993 for (std::size_t i = 0; i < n; ++i)
994 rho_new += static_cast<T>(r_hat[i]) * static_cast<T>(r[i]);
995
996 if (!std::isfinite(rho_new)) {
997 bicgstabBreakdown = true;
998 break;
999 }
1000 if (std::abs(rho_new) < T(1e-100))
1001 break;
1002 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1003 !std::isfinite(omega) || std::abs(omega) < T(1e-100)) {
1004 bicgstabBreakdown = true;
1005 break;
1006 }
1007
1008 const T beta = (rho_new / rho) * (alpha / omega);
1009 if (!std::isfinite(beta)) {
1010 bicgstabBreakdown = true;
1011 break;
1012 }
1013 rho = rho_new;
1014
1015 for (std::size_t i = 0; i < n; ++i)
1016 p[i] = static_cast<SolverT>(r[i] + beta * (p[i] - omega * v[i]));
1017
1018 for (std::size_t i = 0; i < n; ++i) {
1019 const T pi = p[i];
1020 y[i] = static_cast<SolverT>((diag[i] > eps) ? pi / diag[i] : pi);
1021 }
1022
1023 matvec(y, diag, b, v);
1024
1025 T r_hat_v = T(0);
1026 for (std::size_t i = 0; i < n; ++i)
1027 r_hat_v += static_cast<T>(r_hat[i]) * static_cast<T>(v[i]);
1028 if (!std::isfinite(r_hat_v)) {
1029 bicgstabBreakdown = true;
1030 break;
1031 }
1032 if (std::abs(r_hat_v) < T(1e-100))
1033 break;
1034
1035 alpha = rho_new / r_hat_v;
1036 if (!std::isfinite(alpha)) {
1037 bicgstabBreakdown = true;
1038 break;
1039 }
1040
1041 for (std::size_t i = 0; i < n; ++i)
1042 s[i] = static_cast<SolverT>(r[i] - alpha * v[i]);
1043
1044 residual = T(0);
1045 for (std::size_t i = 0; i < n; ++i)
1046 residual = std::max(residual, std::abs(static_cast<T>(s[i])));
1047 if (!std::isfinite(residual)) {
1048 bicgstabBreakdown = true;
1049 break;
1050 }
1051 if (residual < parameters.tolerance * b_norm) {
1052 for (std::size_t i = 0; i < n; ++i)
1053 x[i] = static_cast<SolverT>(x[i] + alpha * y[i]);
1054 ++iterations;
1055 break;
1056 }
1057
1058 for (std::size_t i = 0; i < n; ++i) {
1059 const T si = s[i];
1060 z[i] = static_cast<SolverT>((diag[i] > eps) ? si / diag[i] : si);
1061 }
1062
1063 matvec(z, diag, b, t);
1064
1065 T t_s = T(0), t_t = T(0);
1066 for (std::size_t i = 0; i < n; ++i) {
1067 t_s += static_cast<T>(t[i]) * static_cast<T>(s[i]);
1068 t_t += static_cast<T>(t[i]) * static_cast<T>(t[i]);
1069 }
1070 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
1071 bicgstabBreakdown = true;
1072 break;
1073 }
1074 omega = (t_t > T(1e-100)) ? t_s / t_t : T(0);
1075 if (!std::isfinite(omega)) {
1076 bicgstabBreakdown = true;
1077 break;
1078 }
1079
1080 for (std::size_t i = 0; i < n; ++i) {
1081 x[i] = static_cast<SolverT>(x[i] + alpha * y[i] + omega * z[i]);
1082 r[i] = static_cast<SolverT>(s[i] - omega * t[i]);
1083 }
1084
1085 residual = T(0);
1086 for (std::size_t i = 0; i < n; ++i)
1087 residual = std::max(residual, std::abs(static_cast<T>(r[i])));
1088 if (!std::isfinite(residual)) {
1089 bicgstabBreakdown = true;
1090 break;
1091 }
1092 if (residual < parameters.tolerance * b_norm) {
1093 ++iterations;
1094 break;
1095 }
1096 }
1097
1098 if (bicgstabBreakdown)
1099 residual = std::numeric_limits<T>::infinity();
1100
1101 const bool finiteCpuSolution =
1102 !bicgstabBreakdown &&
1103 std::all_of(x.begin(), x.end(),
1104 [](SolverT value) { return std::isfinite(value); });
1105 if (finiteCpuSolution) {
1106 for (std::size_t i = 0; i < n; ++i)
1107 nodes[i].concentration = x[i];
1108 } else {
1109 for (std::size_t i = 0; i < n; ++i)
1110 nodes[i].concentration = initialGuess[i];
1111 residual = std::numeric_limits<T>::infinity();
1112 Logger::getInstance()
1113 .addWarning("solveDiffusion: CPU BiCGSTAB produced non-finite "
1114 "concentrations; keeping the sanitized initial guess.")
1115 .print();
1116 }
1117 normalizedResidual_ = residual / b_norm;
1118 lastSolveConverged_ = finiteCpuSolution &&
1119 std::isfinite(normalizedResidual_) &&
1120 normalizedResidual_ <= parameters.tolerance;
1121
1122 tCpuSolve.finish();
1123 if (Logger::hasTiming()) {
1124 const std::string path = " [CPU]";
1125 const std::string tag = "diffusion n=" + std::to_string(n) +
1126 " iters=" + std::to_string(iterations) +
1127 " res=" + std::to_string(residual) + path;
1128 Logger::getInstance().addTiming(tag + " CPU BiCGSTAB", tCpuSolve).print();
1129 }
1130 if (Logger::hasDebug()) {
1131 const std::string path = " [CPU]";
1132 Logger::getInstance()
1133 .addTiming("diffusion n=" + std::to_string(n) + path +
1134 " diag/b precompute",
1135 tDiag)
1136 .print();
1137 }
1138 if (residual > parameters.tolerance * b_norm)
1139 VIENNACORE_LOG_WARNING(
1140 "solveDiffusion: BiCGSTAB did not converge after " +
1141 std::to_string(iterations) + "/" +
1142 std::to_string(parameters.maxIterations) +
1143 " iterations (residual=" + std::to_string(residual / b_norm) +
1144 ", tolerance=" + std::to_string(parameters.tolerance) + ")");
1145 }
1146
1147#ifdef VIENNALS_GPU_BICGSTAB
1148 static std::string gpuErrorDetail() {
1149 const char *detail = gpu::gpuGetLastErrorMessage();
1150 if (detail && detail[0] != '\0')
1151 return std::string(" Detail: ") + detail;
1152 return {};
1153 }
1154#endif
1155
1156 void logDiffusionBackend(const std::string &backend,
1157 const std::string &detail) const {
1158 if (!Logger::hasInfo())
1159 return;
1160 const std::string msg =
1161 "OxidationDiffusion: using " + backend +
1162 " for diffusion solve (nodes=" + std::to_string(nodes.size()) +
1163 (detail.empty() ? std::string() : ", " + detail) + ").";
1164 if (msg == lastLoggedBackend_)
1165 return;
1166 lastLoggedBackend_ = msg;
1167 Logger::getInstance().addInfo(msg).print();
1168 }
1169
1170 // Uses precomputed flat faceBC arrays — no HRLE access, safe for parallel
1171 // execution.
1172 template <class SolverT>
1173 StencilSide
1174 makeStencilSide(std::size_t nodeId, const std::vector<SolverT> &previous,
1175 unsigned direction, int offset, T diffusion) const {
1176 const IndexType &nodeIndex = nodes[nodeId].index;
1177 IndexType neighbor = nodeIndex;
1178 neighbor[direction] += offset;
1179
1180 if (!inBounds(neighbor))
1181 return zeroFluxSide();
1182
1183 const std::size_t neighborId = lookupNode(neighbor);
1184 if (neighborId != noNode)
1185 return {gridDelta, 0., previous[neighborId]};
1186
1187 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
1188 const std::size_t n = nodes.size();
1189 const Boundary faceType = faceBCTypes_[fi * n + nodeId];
1190 const T faceDist = faceBCDists_[fi * n + nodeId];
1191 if (faceType == Boundary::REACTION)
1192 return reactionBoundarySide(nodeIndex, faceDist, diffusion);
1193 if (faceType == Boundary::AMBIENT)
1194 return ambientBoundarySide(faceDist, diffusion);
1195 if (faceType == Boundary::MASK)
1196 return maskBoundarySide(faceDist, diffusion);
1197 return zeroFluxSide();
1198 }
1199
1200 void addAxisContribution(T &rightHandSide, T &diagonal,
1201 const StencilSide &negativeSide,
1202 const StencilSide &positiveSide, T diffusion) const {
1203 const T distanceSum = negativeSide.distance + positiveSide.distance;
1204 if (distanceSum <= std::numeric_limits<T>::epsilon())
1205 return;
1206
1207 addSideContribution(rightHandSide, diagonal, negativeSide, distanceSum,
1208 diffusion);
1209 addSideContribution(rightHandSide, diagonal, positiveSide, distanceSum,
1210 diffusion);
1211 }
1212
1213 void addSideContribution(T &rightHandSide, T &diagonal,
1214 const StencilSide &side, T distanceSum,
1215 T diffusion) const {
1216 if (side.distance <= std::numeric_limits<T>::epsilon())
1217 return;
1218
1219 const T coefficient = T(2) * diffusion / (side.distance * distanceSum);
1220 rightHandSide += coefficient * side.constant;
1221 diagonal += coefficient * (T(1) - side.nodeCoefficient);
1222 }
1223
1224 StencilSide zeroFluxSide() const { return {gridDelta, 1., 0.}; }
1225
1226 StencilSide reactionBoundarySide(const IndexType &nodeIndex, T distance,
1227 T diffusion) const {
1228 const T conductance = diffusion / distance;
1229 const T reactionRate = getEffectiveReactionRate(nodeIndex);
1230 const T denominator = conductance + reactionRate;
1231 if (denominator <= std::numeric_limits<T>::epsilon())
1232 return zeroFluxSide();
1233
1234 return {distance, conductance / denominator, 0.};
1235 }
1236
1237 ReactionBoundarySample reactionBoundarySample(const IndexType &index) const {
1238 ConstSparseIterator reactionIt(reactionInterface->getDomain());
1239
1240 const std::size_t directId = lookupNode(index);
1241 if (directId != noNode) {
1242 const auto sample =
1243 reactionBoundarySampleFromNode(reactionIt, nodes[directId]);
1244 if (sample.found)
1245 return sample;
1246 }
1247
1248 // The reaction velocity is a local interface quantity. A global nearest
1249 // search can smear open-window concentrations deep under a mask if the
1250 // local oxide band is temporarily missing or clipped near contact. Keep
1251 // the fallback strictly local, consistent with the velocity-extension
1252 // radius used elsewhere in the oxidation fields.
1253 std::size_t bestNode = std::numeric_limits<std::size_t>::max();
1254 T bestDistance2 = std::numeric_limits<T>::max();
1255
1256 for (int radius = 1; radius <= 4; ++radius) {
1257 IndexType offset{};
1258 offset.fill(-radius);
1259 while (true) {
1260 IndexType candidate = index;
1261 T distance2 = 0.;
1262 for (unsigned d = 0; d < D; ++d) {
1263 candidate[d] += offset[d];
1264 distance2 += static_cast<T>(offset[d] * offset[d]);
1265 }
1266
1267 if (distance2 > T(0) && inBounds(candidate)) {
1268 const std::size_t foundId = nodeLookupFlat[linearIndex(candidate)];
1269 if (foundId != noNode && distance2 < bestDistance2) {
1270 const auto sample =
1271 reactionBoundarySampleFromNode(reactionIt, nodes[foundId]);
1272 if (sample.found) {
1273 bestDistance2 = distance2;
1274 bestNode = foundId;
1275 }
1276 }
1277 }
1278
1279 unsigned dim = 0;
1280 for (; dim < D; ++dim) {
1281 if (offset[dim] < radius) {
1282 ++offset[dim];
1283 break;
1284 }
1285 offset[dim] = -radius;
1286 }
1287 if (dim == D)
1288 break;
1289 }
1290
1291 if (bestNode != std::numeric_limits<std::size_t>::max())
1292 break;
1293 }
1294
1295 if (bestNode == std::numeric_limits<std::size_t>::max())
1296 return {};
1297 return reactionBoundarySampleFromNode(reactionIt, nodes[bestNode]);
1298 }
1299
1301 reactionBoundarySampleFromNode(ConstSparseIterator &reactionIt,
1302 const Node &node) const {
1304 best.nodeIndex = node.index;
1305 T bestDistance = std::numeric_limits<T>::max();
1306 const T insidePhi = valueAt(reactionIt, node.index);
1307
1308 for (unsigned direction = 0; direction < D; ++direction) {
1309 for (const int offset : {-1, 1}) {
1310 IndexType neighbor = node.index;
1311 neighbor[direction] += offset;
1312 if (!inBounds(neighbor))
1313 continue;
1314
1315 const T outsidePhi = valueAt(reactionIt, neighbor);
1316 if (!crosses(insidePhi, outsidePhi))
1317 continue;
1318
1319 const T distance = crossingDistance(insidePhi, outsidePhi);
1320 if (distance >= bestDistance)
1321 continue;
1322
1323 const T diffusion = getEffectiveDiffusionCoefficient(node.index);
1324 const auto side = reactionBoundarySide(node.index, distance, diffusion);
1325 best.found = true;
1326 best.distance = distance;
1327 best.concentration =
1328 side.nodeCoefficient * node.concentration + side.constant;
1329 best.crossingAxis = direction;
1330 best.crossingOffset = offset;
1331 bestDistance = distance;
1332 }
1333 }
1334
1335 return best;
1336 }
1337
1338 StencilSide ambientBoundarySide(T distance, T diffusion) const {
1339 const T conductance = diffusion / distance;
1340 const T denominator = conductance + parameters.transferCoefficient;
1341 if (denominator <= std::numeric_limits<T>::epsilon())
1342 return zeroFluxSide();
1343
1344 return {distance, conductance / denominator,
1345 parameters.transferCoefficient *
1346 parameters.equilibriumConcentration / denominator};
1347 }
1348
1349 StencilSide maskBoundarySide(T distance, T diffusion) const {
1350 if (parameters.maskTransferCoefficient <= std::numeric_limits<T>::epsilon())
1351 return zeroFluxSide();
1352
1353 const T conductance = diffusion / distance;
1354 const T denominator = conductance + parameters.maskTransferCoefficient;
1355 if (denominator <= std::numeric_limits<T>::epsilon())
1356 return zeroFluxSide();
1357
1358 return {distance, conductance / denominator,
1359 parameters.maskTransferCoefficient * parameters.maskConcentration /
1360 denominator};
1361 }
1362
1363 T getEffectiveReactionRate(const IndexType &index) const {
1364 T rate = parameters.reactionRate;
1365
1366 if (parameters.reactionActivationVolume != T(0)) {
1367 T pressure = parameters.referencePressure;
1368 const auto foundPressure =
1369 pressureLookup.find(detail::gridIndexHash<D>(index));
1370 if (foundPressure != pressureLookup.end())
1371 pressure = foundPressure->second;
1372 if (!std::isfinite(pressure))
1373 pressure = parameters.referencePressure;
1374 const T exponent =
1375 stressExponent(pressure, parameters.reactionActivationVolume);
1376 rate *= stressFactor(exponent);
1377 }
1378
1379 if (parameters.reactionRateRatio111 != T(1)) {
1380 const std::size_t nodeId = lookupNode(index);
1381 if (nodeId != noNode) {
1382 const auto &normal = nodes[nodeId].siNormal;
1383 T dot = T(0);
1384 for (unsigned d = 0; d < D; ++d)
1385 dot += normal[d] * parameters.crystalAxis[d];
1386 rate *= T(1) +
1387 (parameters.reactionRateRatio111 - T(1)) * (T(1) - dot * dot);
1388 }
1389 }
1390
1391 return rate;
1392 }
1393
1394 T getEffectiveDiffusionCoefficient(const IndexType &index) const {
1395 if (parameters.diffusionActivationVolume == T(0))
1396 return parameters.diffusionCoefficient;
1397
1398 T pressure = parameters.referencePressure;
1399 const auto found = pressureLookup.find(detail::gridIndexHash<D>(index));
1400 if (found != pressureLookup.end())
1401 pressure = found->second;
1402 if (!std::isfinite(pressure))
1403 pressure = parameters.referencePressure;
1404
1405 const T exponent =
1406 stressExponent(pressure, parameters.diffusionActivationVolume);
1407 return parameters.diffusionCoefficient * stressFactor(exponent);
1408 }
1409
1410 T stressExponent(T pressure, T activationVolume) const {
1411 const T thermalEnergy =
1412 boltzmannConstant * std::max(parameters.temperature, T(1.));
1413 return -(pressure - parameters.referencePressure) * activationVolume /
1414 thermalEnergy;
1415 }
1416
1417 static T stressFactor(T exponent) {
1418 if (!std::isfinite(exponent))
1419 return T(1);
1420 if (exponent <= std::log(minStressFactor))
1421 return minStressFactor;
1422 if (exponent >= std::log(maxStressFactor))
1423 return maxStressFactor;
1424 return std::exp(exponent);
1425 }
1426
1427 Vec3D<T> computeSiNormal(const IndexType &index,
1428 ConstSparseIterator &reactionIt) const {
1429 // Reflect neighbor indices that fall outside the HRLE grid. This handles
1430 // REFLECTIVE boundary conditions correctly: phi(b-k) = phi(b+k), so the
1431 // centered difference at b gives zero lateral gradient as expected.
1432 auto reflectToGrid = [&](IndexType idx) {
1433 auto &g = reactionInterface->getGrid();
1434 for (unsigned d2 = 0; d2 < D; ++d2) {
1435 const auto lo = g.getMinGridPoint(d2);
1436 const auto hi = g.getMaxGridPoint(d2);
1437 if (idx[d2] < lo)
1438 idx[d2] = 2 * lo - idx[d2];
1439 if (idx[d2] > hi)
1440 idx[d2] = 2 * hi - idx[d2];
1441 }
1442 return idx;
1443 };
1444 Vec3D<T> gradient{0., 0., 0.};
1445 for (unsigned d = 0; d < D; ++d) {
1446 IndexType plus = index, minus = index;
1447 plus[d] += 1;
1448 minus[d] -= 1;
1449 gradient[d] =
1450 (detail::clampLevelSetPhi(valueAt(reactionIt, reflectToGrid(plus))) -
1452 valueAt(reactionIt, reflectToGrid(minus)))) /
1453 (T(2) * gridDelta);
1454 }
1455 T len = T(0);
1456 for (unsigned d = 0; d < D; ++d)
1457 len += gradient[d] * gradient[d];
1458 len = std::sqrt(len);
1459 if (len > std::numeric_limits<T>::epsilon()) {
1460 for (unsigned d = 0; d < D; ++d)
1461 gradient[d] /= len;
1462 } else {
1463 gradient = Vec3D<T>{0., 0., 0.};
1464 gradient[D - 1] = T(1);
1465 }
1466 return gradient;
1467 }
1468
1469 std::pair<Boundary, T> classifyBoundary(ConstSparseIterator &reactionIt,
1470 ConstSparseIterator &ambientIt,
1471 ConstSparseIterator &maskIt,
1472 const IndexType &inside,
1473 const IndexType &outside) const {
1474 const T reactionInside = valueAt(reactionIt, inside);
1475 const T reactionOutside = valueAt(reactionIt, outside);
1476 const T ambientInside = valueAt(ambientIt, inside);
1477 const T ambientOutside = valueAt(ambientIt, outside);
1478 const T maskInside = valueAtMask(maskIt, inside);
1479 const T maskOutside = valueAtMask(maskIt, outside);
1480
1481 const T reactionDistance =
1482 crosses(reactionInside, reactionOutside)
1483 ? crossingDistance(reactionInside, reactionOutside)
1484 : std::numeric_limits<T>::max();
1485 const T ambientDistance =
1486 crosses(ambientInside, ambientOutside)
1487 ? crossingDistance(ambientInside, ambientOutside)
1488 : std::numeric_limits<T>::max();
1489 const T maskDistance =
1490 maskInterface != nullptr && crosses(maskInside, maskOutside)
1491 ? crossingDistance(maskInside, maskOutside)
1492 : std::numeric_limits<T>::max();
1493
1494 if (reactionDistance == std::numeric_limits<T>::max() &&
1495 ambientDistance == std::numeric_limits<T>::max() &&
1496 maskDistance == std::numeric_limits<T>::max())
1497 return {Boundary::NONE, gridDelta};
1498
1499 if (reactionDistance <= ambientDistance && reactionDistance <= maskDistance)
1500 return {Boundary::REACTION, reactionDistance};
1501
1502 // Ambient crossing heads into mask-occupied space: the oxide/gas surface
1503 // has drifted under the nitride. Classify as MASK regardless of whether
1504 // the crossings are coincident (isMaskAtCrossing) or the outer node is
1505 // wholly inside the mask body (valueAtMask check). This is equivalent to
1506 // sealing the diffusion domain with UNION(ambientInterface, maskInterface)
1507 // without mutating any level set.
1508 if (ambientDistance != std::numeric_limits<T>::max() &&
1509 (isMaskAtCrossing(maskInside, maskOutside, ambientDistance) ||
1510 (maskInterface != nullptr &&
1511 static_cast<T>(maskSign) * valueAtMask(maskIt, outside) >= T(0))))
1512 return {Boundary::MASK, ambientDistance};
1513
1514 if (maskDistance <= ambientDistance)
1515 return {Boundary::MASK, maskDistance};
1516 return {Boundary::AMBIENT, ambientDistance};
1517 }
1518
1519 bool isInsideOxide(T reactionPhi, T ambientPhi) const {
1520 // GeometricAdvect can leave a tiny positive residual (~4*epsilon) when the
1521 // interface lands exactly on a grid point at non-zero coordinates, because
1522 // k*gridDelta is not exactly representable in floating point. Allow a
1523 // tolerance of 1e-9 grid units so that grid points on the surface (phi≈0)
1524 // are correctly classified as inside the oxide.
1525 constexpr T eps = T(1e-9);
1526 return reactionSign * reactionPhi >= -eps &&
1527 ambientSign * ambientPhi >= -eps;
1528 }
1529
1530 ConstSparseIterator makeMaskIterator() const {
1531 if (maskInterface == nullptr)
1532 return ConstSparseIterator(reactionInterface->getDomain());
1533 return ConstSparseIterator(maskInterface->getDomain());
1534 }
1535
1536 bool isInsideMask(ConstSparseIterator &maskIt, const IndexType &index) const {
1537 if (maskInterface == nullptr)
1538 return false;
1539 return maskSign * valueAt(maskIt, index) >= 0.;
1540 }
1541
1542 T valueAtMask(ConstSparseIterator &maskIt, const IndexType &index) const {
1543 if (maskInterface == nullptr)
1544 return std::numeric_limits<T>::max();
1545 return valueAt(maskIt, index);
1546 }
1547
1548 bool isMaskAtCrossing(T maskInside, T maskOutside, T distance) const {
1549 if (maskInterface == nullptr)
1550 return false;
1551 const T fraction = std::clamp(distance / gridDelta, T(0), T(1));
1552 const T insidePhi = detail::clampLevelSetPhi(maskInside);
1553 const T outsidePhi = detail::clampLevelSetPhi(maskOutside);
1554 const T maskPhi = insidePhi + fraction * (outsidePhi - insidePhi);
1555 return static_cast<T>(maskSign) * maskPhi >= T(0);
1556 }
1557
1558 T crossingDistance(T insidePhi, T outsidePhi) const {
1560 insidePhi, outsidePhi, parameters.minBoundaryDistance, gridDelta);
1561 }
1562};
1563
1564} // namespace viennals
constexpr int D
Definition Epitaxy.cpp:12
double T
Definition Epitaxy.cpp:13
Class containing all information about the level set, including the dimensions of the domain,...
Definition lsDomain.hpp:27
void markGeometryChanged()
Call after any in-place modification of the level sets (e.g. after ls::Advect) so that the next apply...
Definition lsOxidationDiffusion.hpp:214
bool hasFiniteConcentrationField() const
Definition lsOxidationDiffusion.hpp:422
T getDissipationAlpha(int, int material, const Vec3D< T > &) final
If lsLocalLaxFriedrichsAnalytical is used as the spatial discretization scheme, this is called to pro...
Definition lsOxidationDiffusion.hpp:374
void setConcentrationCache(std::unordered_map< std::size_t, T > cache)
Definition lsOxidationDiffusion.hpp:433
void setMaskInterface(SmartPointer< Domain< T, D > > passedInterface, int passedMaskSign=1)
Definition lsOxidationDiffusion.hpp:231
T getEffectiveReactionRate(const Vec3D< T > &coordinate) const
Definition lsOxidationDiffusion.hpp:252
~OxidationDiffusion()
Definition lsOxidationDiffusion.hpp:201
void markSolved()
Mark the current solution as valid without re-solving. Call this before any parallel advection (lsAdv...
Definition lsOxidationDiffusion.hpp:448
const std::unordered_map< std::size_t, T > & getConcentrationCache() const
Definition lsOxidationDiffusion.hpp:429
OxidationDiffusion(SmartPointer< Domain< T, D > > passedReactionInterface, SmartPointer< Domain< T, D > > passedAmbientInterface, OxidationParameters passedParameters={})
Definition lsOxidationDiffusion.hpp:194
T getResidual() const
Definition lsOxidationDiffusion.hpp:418
T getNormalizedResidual() const
Definition lsOxidationDiffusion.hpp:419
void apply()
Definition lsOxidationDiffusion.hpp:305
std::size_t getNumberOfSolutionNodes() const
Definition lsOxidationDiffusion.hpp:421
void setParameters(OxidationParameters passedParameters)
Definition lsOxidationDiffusion.hpp:245
T getConcentration(const IndexType &index) const
Definition lsOxidationDiffusion.hpp:394
void setPressure(const Vec3D< T > &coordinate, T pressure)
Definition lsOxidationDiffusion.hpp:273
void clearSolveBounds()
Definition lsOxidationDiffusion.hpp:299
void clearPressureField()
Definition lsOxidationDiffusion.hpp:259
static auto New(Args &&...args)
Definition lsOxidationDiffusion.hpp:208
void writeConcentrationToLevelSet()
Write per-node concentration into ambientInterface->getPointData() so that lsInterior + lsAdvect can ...
Definition lsOxidationDiffusion.hpp:453
void writePressureToLevelSet()
Write per-node pressure into ambientInterface->getPointData() so that it survives advection and can w...
Definition lsOxidationDiffusion.hpp:480
void setSolveBounds(const IndexType &passedMinIndex, const IndexType &passedMaxIndex)
Restrict the dense Cartesian diffusion solve to a finite index box. This is useful for level sets wit...
Definition lsOxidationDiffusion.hpp:290
T getScalarVelocityFromSample(const ReactionBoundarySample &sample) const
Absolute scalar velocity derived from an already-computed boundary sample, avoiding a second call to ...
Definition lsOxidationDiffusion.hpp:517
void setPressure(const IndexType &index, T pressure)
Definition lsOxidationDiffusion.hpp:264
void setGpuPreconditioner(GpuPreconditioner preconditioner)
Set the GPU BiCGSTAB preconditioner. Jacobi matches the CPU solver.
Definition lsOxidationDiffusion.hpp:441
T getReactionBoundaryConcentration(const IndexType &index) const
Definition lsOxidationDiffusion.hpp:412
unsigned getIterations() const
Definition lsOxidationDiffusion.hpp:417
void setAmbientInterface(SmartPointer< Domain< T, D > > passedInterface)
Definition lsOxidationDiffusion.hpp:225
T getScalarVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &normalVector, unsigned long) final
Should return a scalar value for the velocity at coordinate for a point of material with the given no...
Definition lsOxidationDiffusion.hpp:349
T getReactionBoundaryConcentration(const Vec3D< T > &coordinate) const
Definition lsOxidationDiffusion.hpp:405
OxidationParameters getParameters() const
Definition lsOxidationDiffusion.hpp:250
void setReactionInterface(SmartPointer< Domain< T, D > > passedInterface)
Definition lsOxidationDiffusion.hpp:219
void setOxideSigns(int passedReactionSign, int passedAmbientSign)
Set signs defining the oxide band. A node is inside oxide if reactionSign * reactionPhi >= 0 and ambi...
Definition lsOxidationDiffusion.hpp:282
void setGpuMode(GpuMode mode)
Set the GPU solver selection mode. See GpuMode for the two options. On CPU-only builds (VIENNALS_GPU_...
Definition lsOxidationDiffusion.hpp:439
bool lastSolveConverged() const
Definition lsOxidationDiffusion.hpp:420
ReactionBoundarySample getReactionBoundarySample(const Vec3D< T > &coordinate) const
Return the reaction boundary sample for the grid node nearest to coordinate. Used by the deformation ...
Definition lsOxidationDiffusion.hpp:508
void clearMaskInterface()
Definition lsOxidationDiffusion.hpp:239
T getConcentration(const Vec3D< T > &coordinate) const
Definition lsOxidationDiffusion.hpp:387
void writePersistentFields()
Convenience wrapper: persist both concentration and pressure in one call.
Definition lsOxidationDiffusion.hpp:499
Common Cartesian-grid infrastructure shared by the three oxidation solver classes (diffusion,...
Definition lsOxidationSolverBase.hpp:90
static constexpr std::size_t noNode
Definition lsOxidationSolverBase.hpp:96
bool crosses(T a, T b) const
Definition lsOxidationSolverBase.hpp:110
std::size_t lookupNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:137
std::size_t linearIndex(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:143
void initNodeLookup()
Definition lsOxidationSolverBase.hpp:130
bool inBounds(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:123
std::array< std::size_t, D > strides
Definition lsOxidationSolverBase.hpp:101
T gridDelta
Definition lsOxidationSolverBase.hpp:102
std::vector< std::size_t > nodeLookupFlat
Definition lsOxidationSolverBase.hpp:97
bool initializeGridFromInterfaces(SmartPointer< Domain< T, D > > reactionInterface, SmartPointer< Domain< T, D > > ambientInterface, SmartPointer< Domain< T, D > > maskInterface, bool useRequestedBounds, const IndexType &requestedMinIndex, const IndexType &requestedMaxIndex, std::size_t maxGridPoints, const std::string &solverName)
Definition lsOxidationSolverBase.hpp:207
std::array< std::size_t, D > extents
Definition lsOxidationSolverBase.hpp:100
viennahrle::ConstSparseIterator< typename Domain< T, D >::DomainType > ConstSparseIterator
Definition lsOxidationSolverBase.hpp:93
T valueAt(ConstSparseIterator &it, const IndexType &index) const
Definition lsOxidationSolverBase.hpp:118
bool increment(IndexType &index) const
Definition lsOxidationSolverBase.hpp:152
IndexType minIndex
Definition lsOxidationSolverBase.hpp:98
std::size_t findNearbyNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:166
IndexType maxIndex
Definition lsOxidationSolverBase.hpp:99
dict v
Definition LOCOSOxidation.py:217
diffusion
Definition LOCOSOxidation.py:144
d2
Definition __init__.py:36
T levelSetCrossingDistance(T insidePhi, T outsidePhi, T minBoundaryFraction, T gridDelta)
Definition lsOxidationSolverBase.hpp:76
T clampLevelSetPhi(T v)
Clamp HRLE far-field sentinels (±DBL_MAX) to ±1 before differencing to prevent DBL_MAX² overflow that...
Definition lsOxidationSolverBase.hpp:71
std::size_t gridIndexHash(const viennahrle::Index< D > &index)
Definition lsOxidationSolverBase.hpp:23
Definition lsAdvect.hpp:41
GpuMode
Selects the BiCGSTAB back-end for the diffusion solve. GPU failures are reported and not silently fal...
Definition lsOxidationDiffusion.hpp:26
@ Gpu
Always use GPU; fail if unavailable or unsuccessful.
Definition lsOxidationDiffusion.hpp:28
@ Cpu
Always use CPU (default).
Definition lsOxidationDiffusion.hpp:27
GpuPreconditioner
Selects the preconditioner used by the GPU BiCGSTAB solver. Jacobi matches the CPU solver's precondit...
Definition lsOxidationDiffusion.hpp:35
@ ILU0
Definition lsOxidationDiffusion.hpp:35
@ Jacobi
Definition lsOxidationDiffusion.hpp:35
Sub-grid accurate sample of the reaction boundary crossing closest to a given grid node....
Definition lsOxidationDiffusion.hpp:183
T concentration
Definition lsOxidationDiffusion.hpp:187
bool found
Definition lsOxidationDiffusion.hpp:184
unsigned crossingAxis
Definition lsOxidationDiffusion.hpp:188
IndexType nodeIndex
Definition lsOxidationDiffusion.hpp:185
T distance
Definition lsOxidationDiffusion.hpp:186
int crossingOffset
Definition lsOxidationDiffusion.hpp:189
Parameters for the steady oxidant diffusion model used by OxidationDiffusion.
Definition lsOxidationDiffusion.hpp:39
double maskConcentration
Definition lsOxidationDiffusion.hpp:66
double maskTransferCoefficient
Definition lsOxidationDiffusion.hpp:65
double transferCoefficient
Definition lsOxidationDiffusion.hpp:42
double temperature
Definition lsOxidationDiffusion.hpp:51
double minBoundaryDistance
Definition lsOxidationDiffusion.hpp:67
double relaxation
Definition lsOxidationDiffusion.hpp:70
Vec3D< double > crystalAxis
Definition lsOxidationDiffusion.hpp:63
double reactionActivationVolume
Definition lsOxidationDiffusion.hpp:52
double diffusionActivationVolume
Definition lsOxidationDiffusion.hpp:57
double reactionRateRatio111
Definition lsOxidationDiffusion.hpp:62
double diffusionCoefficient
Definition lsOxidationDiffusion.hpp:40
int material
Definition lsOxidationDiffusion.hpp:72
double velocitySign
Definition lsOxidationDiffusion.hpp:46
double reactionRate
Definition lsOxidationDiffusion.hpp:41
double oxidantMoleculeDensity
Definition lsOxidationDiffusion.hpp:44
double equilibriumConcentration
Definition lsOxidationDiffusion.hpp:43
double referencePressure
Definition lsOxidationDiffusion.hpp:53
double tolerance
Definition lsOxidationDiffusion.hpp:69
std::size_t maxGridPoints
Definition lsOxidationDiffusion.hpp:71
double expansionCoefficient
Definition lsOxidationDiffusion.hpp:45
unsigned maxIterations
Definition lsOxidationDiffusion.hpp:68