ViennaLS
Loading...
Searching...
No Matches
lsOxidationDeformation.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <algorithm>
7#include <functional>
8#include <stdexcept>
9#include <string>
10#include <unordered_map>
11
12#include <omp.h>
13#include <vcTimer.hpp>
14
15namespace viennals {
16
19 double viscosity = 1.;
20 double bulkModulus = 1.;
21 double ambientPressure = 0.;
22 double pressureTolerance = 1e-8;
24 double shearModulus = 0.;
26 double stressTimeStep = 1.;
27 unsigned harmonicIterations = 500;
28 unsigned mechanicsIterations = 5;
29 unsigned pressureIterations = 10000;
30 unsigned stokesIterations = 200;
31 double mechanicsTolerance = 1e-8;
32 double stokesTolerance = 1e-8;
33 double tolerance = 1e-8;
34 double relaxation = 0.7; // SIMPLE velocity under-relaxation (0 < α ≤ 1)
36 0.5; // SIMPLE pressure under-relaxation (0 < β ≤ 1)
37 std::size_t maxGridPoints = 5000000;
38 int material = -1;
39};
40
58template <class T, int D>
59class OxidationDeformation final : public VelocityField<T>,
60 public OxidationSolverBase<T, D> {
61 using IndexType = viennahrle::Index<D>;
62 using ConstSparseIterator =
63 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
64
65private:
66 // bring base members into scope
83
84 enum class Boundary { NONE, REACTION, AMBIENT, MASK };
85
86 struct BoundaryIntersection {
87 Boundary boundary = Boundary::NONE;
88 T distance = 0.;
89 };
90
91 template <class ValueType> struct StencilPoint {
92 ValueType value{};
93 T distance = 1.;
94 };
95
96 struct Node {
97 IndexType index;
98 Vec3D<T> velocity{0., 0., 0.};
99 T pressure = 0.;
100 T strainTrace = 0.;
101 std::array<T, 9> strainRateTensor{};
102 std::array<T, 9> stressTensor{};
103 T vonMisesStress = 0.;
104 };
105
106 SmartPointer<Domain<T, D>> reactionInterface = nullptr;
107 SmartPointer<Domain<T, D>> ambientInterface = nullptr;
108 SmartPointer<Domain<T, D>> maskInterface = nullptr;
109 SmartPointer<OxidationDiffusion<T, D>> diffusionField = nullptr;
110 SmartPointer<VelocityField<T>> maskVelocityField = nullptr;
111 OxidationDeformationParameters deformationParameters;
112 OxidationParameters oxidationParameters;
113 int reactionSign = 1;
114 int ambientSign = -1;
115 int maskSign = 1;
116
117 IndexType requestedMinIndex{};
118 IndexType requestedMaxIndex{};
119 unsigned iterations = 0;
120 T residual = std::numeric_limits<T>::max();
121 // Last achieved iteration counts and residuals for pressure and Stokes
122 // solves.
123 unsigned lastPressureIters_ = 0;
124 T lastPressureResidual_ = 0.;
125 unsigned lastStokesIters_ = 0;
126 T lastStokesResidual_ = 0.;
127 T avgExpansionSpeed_ = 0.;
128 bool avgExpansionSpeedComputed = false;
129 bool solved = false;
130 bool nodesDirty_ = true;
131 std::array<T, D> maxVelocity_{};
132 bool useRequestedBounds = false;
133 std::unordered_map<IndexType, std::array<T, 9>, detail::IndexTypeHasher<D>>
134 deviatoricStressHistory;
135
136 // Warm-start storage: solutions from previous time step used as initial guess
137 std::vector<Vec3D<T>> previousVelocity_;
138 std::vector<T> previousPressure_;
139 bool hasPreviousSolution_ = false;
140
141 static bool isFiniteVec(const Vec3D<T> &value) {
142 for (unsigned i = 0; i < 3; ++i)
143 if (!std::isfinite(value[i]))
144 return false;
145 return true;
146 }
147
148 static bool isFiniteTensor(const std::array<T, 9> &value) {
149 for (const auto component : value)
150 if (!std::isfinite(component))
151 return false;
152 return true;
153 }
154
155 // Face-major flat BC arrays: index = fi * n + nodeId, fi in [0, 2*D).
156 std::vector<Boundary> faceBCTypes_;
157 std::vector<T> faceBCDists_;
158 std::vector<uint8_t>
159 touchesAmbient_; // 1 if node touches the ambient (free) surface
160
161 // GPU solver selection. Semantics match OxidationDiffusion.
162 GpuMode gpuMode_ = GpuMode::Cpu;
163 GpuPreconditioner gpuPreconditioner_ = GpuPreconditioner::Jacobi;
164 mutable std::string
165 lastLoggedBackend_; // suppresses repeated "using X" messages
166
167#ifdef VIENNALS_GPU_BICGSTAB
168 // Geometry-fixed (per buildNodes()) arrays for GPU pressure solve.
169 // Layout matches pressCoeff/pressNeighId in solvePressure() so the
170 // same spmvKernel can be reused without any CPU-side reformatting.
171 std::vector<double> pressCoeffGpu_; // face-major [2D * n]
172 std::vector<uint32_t> pressNeighId32_; // face-major [2D * n]
173 std::vector<double> actualDiagGpu_; // [n], effective GPU matrix diagonal
174 gpu::GpuBiCGSTABBuffers *gpuPressBufs_ = nullptr;
175
176 // Geometry-fixed arrays for GPU Stokes velocity solve. The diagonal is
177 // component-major because mixed MASK contact is Dirichlet in the normal
178 // component and Neumann/self-canceling in tangential components.
179 std::vector<double> stokesCoeffGpu_; // face-major [2D * n]
180 std::vector<uint32_t> stokesNeighId32_; // face-major [2D * n]
181 std::vector<double> stokesDiagGpu_; // component-major [D * n]
182 gpu::GpuBiCGSTABBuffers *gpuStokesBufs_ = nullptr;
183
184 // Harmonic velocity solver GPU arrays. The neighbor IDs are identical to
185 // Stokes (stokesNeighId32_ is reused); only the coefficients and diagonal
186 // differ (all interior coefficients = 1.0, diagonal = interior-face count).
187 std::vector<double>
188 harmonicCoeffGpu_; // face-major [2D * n], 1.0 for interior
189 std::vector<double>
190 harmonicDiagGpu_; // [n], = count of interior faces per node
191 gpu::GpuBiCGSTABBuffers *gpuHarmonicBufs_ = nullptr;
192#endif
193
194public:
195 std::vector<Node> nodes;
196
198
200 SmartPointer<Domain<T, D>> passedReactionInterface,
201 SmartPointer<Domain<T, D>> passedAmbientInterface,
202 SmartPointer<OxidationDiffusion<T, D>> passedDiffusionField,
203 OxidationParameters passedOxidationParameters,
204 OxidationDeformationParameters passedDeformationParameters = {})
205 : reactionInterface(passedReactionInterface),
206 ambientInterface(passedAmbientInterface),
207 diffusionField(passedDiffusionField),
208 deformationParameters(passedDeformationParameters),
209 oxidationParameters(passedOxidationParameters) {}
210
211 template <class... Args> static auto New(Args &&...args) {
212 return SmartPointer<OxidationDeformation>::New(std::forward<Args>(args)...);
213 }
214
216#ifdef VIENNALS_GPU_BICGSTAB
217 gpu::freeGpuBuffers(gpuPressBufs_);
218 gpu::freeGpuBuffers(gpuStokesBufs_);
219 gpu::freeGpuBuffers(gpuHarmonicBufs_);
220#endif
221 }
222
223 void setGpuMode(GpuMode mode) { gpuMode_ = mode; }
225 gpuPreconditioner_ = prec;
226 }
227
228 void setReactionInterface(SmartPointer<Domain<T, D>> passedInterface) {
229 reactionInterface = passedInterface;
230 nodesDirty_ = true;
231 solved = false;
232 }
233
234 void setAmbientInterface(SmartPointer<Domain<T, D>> passedInterface) {
235 ambientInterface = passedInterface;
236 nodesDirty_ = true;
237 solved = false;
238 }
239
240 void setMaskInterface(SmartPointer<Domain<T, D>> passedInterface,
241 int passedMaskSign = 1) {
242 maskInterface = passedInterface;
243 maskSign = (passedMaskSign < 0) ? -1 : 1;
244 nodesDirty_ = true;
245 solved = false;
246 }
247
249 maskInterface = nullptr;
250 nodesDirty_ = true;
251 solved = false;
252 }
253
254 void
255 setMaskVelocityField(SmartPointer<VelocityField<T>> passedVelocityField) {
256 maskVelocityField = passedVelocityField;
257 solved = false;
258 }
259
261 maskVelocityField = nullptr;
262 solved = false;
263 }
264
266 SmartPointer<OxidationDiffusion<T, D>> passedDiffusionField) {
267 diffusionField = passedDiffusionField;
268 solved = false;
269 }
270
272 oxidationParameters = passedParameters;
273 solved = false;
274 }
275
276 void
278 deformationParameters = passedParameters;
279 solved = false;
280 }
281
282 void setOxideSigns(int passedReactionSign, int passedAmbientSign) {
283 reactionSign = (passedReactionSign < 0) ? -1 : 1;
284 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
285 solved = false;
286 }
287
288 void setSolveBounds(const IndexType &passedMinIndex,
289 const IndexType &passedMaxIndex) {
290 requestedMinIndex = passedMinIndex;
291 requestedMaxIndex = passedMaxIndex;
292 useRequestedBounds = true;
293 nodesDirty_ = true;
294 solved = false;
295 }
296
298 useRequestedBounds = false;
299 nodesDirty_ = true;
300 solved = false;
301 }
302
304 nodesDirty_ = true;
305 solved = false;
306 }
307
308 void apply() {
309 if (reactionInterface == nullptr || ambientInterface == nullptr ||
310 diffusionField == nullptr) {
311 Logger::getInstance()
312 .addError("OxidationDeformation: Missing interface or "
313 "diffusion field.")
314 .print();
315 return;
316 }
317
318 if (nodesDirty_) {
319 if (!initialiseGrid())
320 return; // base class already logged the error
321 buildNodes();
322 nodesDirty_ = false;
323 if (nodes.empty())
324 Logger::getInstance()
325 .addWarning("OxidationDeformation: no oxide nodes found after "
326 "buildNodes(). Verify that the reaction and ambient "
327 "level sets enclose a non-empty oxide band.")
328 .print();
329 hasPreviousSolution_ =
330 false; // Geometry changed, invalidate in-memory warm-start
331 // Try restoring velocity, pressure, and stress history from level set
332 // pointData (written by writeFieldsToLevelSet() before the previous
333 // advection and remapped+filled by lsAdvect + lsInterior).
334 seedFromLevelSet();
335 } else if (hasPreviousSolution_ &&
336 previousVelocity_.size() == nodes.size() &&
337 previousPressure_.size() == nodes.size()) {
338 // Warm-start: restore previous solution as initial guess for solver.
339 // Geometry stability check: only warm-start if node count matches
340 // (prevents using stale solutions after grid refinement/coarsening). This
341 // typically reduces solver iterations by 30-50% since the previous step's
342 // solution is close to the new one.
343 for (std::size_t i = 0; i < nodes.size(); ++i) {
344 nodes[i].velocity = previousVelocity_[i];
345 nodes[i].pressure = previousPressure_[i];
346 }
347 }
348
349 const std::size_t nn = nodes.size();
350 Timer<> tHarmonic, tMechanics;
351 tHarmonic.start();
353 tHarmonic.finish();
354 tMechanics.start();
356 tMechanics.finish();
357 Logger::getInstance()
358 .addTiming(" deformation n=" + std::to_string(nn) + " harmonic",
359 tHarmonic)
360 .addTiming(" deformation n=" + std::to_string(nn) +
361 " mechanics-total",
362 tMechanics)
363 .print();
364 avgExpansionSpeedComputed = false;
365
366 maxVelocity_.fill(T(0));
367 for (const auto &node : nodes) {
368 for (unsigned d = 0; d < D; ++d)
369 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
370 }
371 const auto unresolvedMax = estimateMaxUnresolvedAmbientVelocity();
372 for (unsigned d = 0; d < D; ++d)
373 maxVelocity_[d] = std::max(maxVelocity_[d], unresolvedMax[d]);
374
375 // Save current solution for warm-start on next apply()
376 previousVelocity_.resize(nodes.size());
377 previousPressure_.resize(nodes.size());
378 for (std::size_t i = 0; i < nodes.size(); ++i) {
379 previousVelocity_[i] = nodes[i].velocity;
380 previousPressure_[i] = nodes[i].pressure;
381 }
382 hasPreviousSolution_ = true;
383
384 solved = true;
385 }
386
387 Vec3D<T> getVectorVelocity(const Vec3D<T> &coordinate, int material,
388 const Vec3D<T> & /*normalVector*/,
389 unsigned long /*pointId*/) final {
390 if (!solved)
391 apply();
392
393 if (deformationParameters.material >= 0 &&
394 material != deformationParameters.material)
395 return {0., 0., 0.};
396
397 const auto velocity = getVelocity(coordinate);
398 T norm2 = 0.;
399 for (unsigned d = 0; d < D; ++d)
400 norm2 += velocity[d] * velocity[d];
401 if (norm2 > std::numeric_limits<T>::epsilon())
402 return velocity;
403
404 return unresolvedAmbientVelocity(coordinate);
405 }
406
407 T getScalarVelocity(const Vec3D<T> &coordinate, int material,
408 const Vec3D<T> &normalVector,
409 unsigned long /*pointId*/) final {
410 return 0.;
411 }
412
413 T getDissipationAlpha(int direction, int material,
414 const Vec3D<T> & /*centralDifferences*/) final {
415 if (deformationParameters.material >= 0 &&
416 material != deformationParameters.material)
417 return 0.;
418 return maxVelocity_[direction];
419 }
420
421private:
422 template <class ValueType, class NodeAccessor>
423 ValueType getField(const IndexType &index, ValueType fallback,
424 NodeAccessor accessor) const {
425 const std::size_t nodeId = lookupNode(index);
426 if (nodeId != noNode)
427 return accessor(nodes[nodeId]);
428
429 const auto nearby = findNearbyNode(index);
430 if (nearby == noNode)
431 return fallback;
432 return accessor(nodes[nearby]);
433 }
434
435 template <class ValueType, class NodeAccessor>
436 ValueType getField(const Vec3D<T> &coordinate, ValueType fallback,
437 NodeAccessor accessor) const {
438 // D-linear interpolation for Vec3D<T> (velocity) and scalar T (pressure).
439 // These are the types queried during level-set advection; interpolating at
440 // the exact interface coordinate eliminates the O(gridDelta) nearest-node
441 // error that caused the SiO2/mask interface to shift with grid delta.
442 // Other types (e.g. stress tensor std::array<T,9>) use nearest-node because
443 // they lack scalar multiply and are not used for advection.
444 if constexpr (!std::is_same_v<ValueType, Vec3D<T>> &&
445 !std::is_same_v<ValueType, T>) {
446 IndexType index;
447 for (unsigned i = 0; i < D; ++i)
448 index[i] = std::llround(coordinate[i] / gridDelta);
449 return getField(index, fallback, accessor);
450 } else {
451 using IdxScalar = std::decay_t<decltype(std::declval<IndexType>()[0])>;
452 IndexType lo;
453 T frac[D];
454 for (unsigned d = 0; d < D; ++d) {
455 const T c = coordinate[d] / gridDelta;
456 const T c_flo = std::floor(c);
457 lo[d] = static_cast<IdxScalar>(c_flo);
458 frac[d] = c - c_flo;
459 }
460
461 ValueType result{};
462 T totalWeight = T(0);
463 for (int corner = 0; corner < (1 << D); ++corner) {
464 IndexType idx = lo;
465 T w = T(1);
466 for (unsigned d = 0; d < D; ++d) {
467 if ((corner >> d) & 1) {
468 idx[d]++;
469 w *= frac[d];
470 } else {
471 w *= T(1) - frac[d];
472 }
473 }
474 if (w < T(1e-14))
475 continue;
476 const std::size_t nodeId = lookupNode(idx);
477 if (nodeId == noNode)
478 continue;
479 result = result + accessor(nodes[nodeId]) * w;
480 totalWeight += w;
481 }
482
483 if (totalWeight < T(1e-14))
484 return fallback;
485 if (totalWeight < T(1) - T(1e-6))
486 result = result * (T(1) / totalWeight);
487 return result;
488 }
489 }
490
491public:
492 Vec3D<T> getVelocity(const Vec3D<T> &coordinate) const {
493 return getField(coordinate, Vec3D<T>{0., 0., 0.},
494 [](const Node &n) { return n.velocity; });
495 }
496
497 Vec3D<T> getVelocity(const IndexType &index) const {
498 return getField(index, Vec3D<T>{0., 0., 0.},
499 [](const Node &n) { return n.velocity; });
500 }
501
502 T getPressure(const Vec3D<T> &coordinate) const {
503 return getField(coordinate, T(0), [](const Node &n) { return n.pressure; });
504 }
505
506 T getPressure(const IndexType &index) const {
507 return getField(index, T(0), [](const Node &n) { return n.pressure; });
508 }
509
510 T getStrainTrace(const Vec3D<T> &coordinate) const {
511 return getField(coordinate, T(0),
512 [](const Node &n) { return n.strainTrace; });
513 }
514
515 T getStrainTrace(const IndexType &index) const {
516 return getField(index, T(0), [](const Node &n) { return n.strainTrace; });
517 }
518
519 std::array<T, 9> getStrainRateTensor(const Vec3D<T> &coordinate) const {
520 return getField(coordinate, std::array<T, 9>{},
521 [](const Node &n) { return n.strainRateTensor; });
522 }
523
524 std::array<T, 9> getStrainRateTensor(const IndexType &index) const {
525 return getField(index, std::array<T, 9>{},
526 [](const Node &n) { return n.strainRateTensor; });
527 }
528
529 std::array<T, 9> getStressTensor(const Vec3D<T> &coordinate) const {
530 return getField(coordinate, std::array<T, 9>{},
531 [](const Node &n) { return n.stressTensor; });
532 }
533
534 std::array<T, 9> getStressTensor(const IndexType &index) const {
535 return getField(index, std::array<T, 9>{},
536 [](const Node &n) { return n.stressTensor; });
537 }
538
539 T getVonMisesStress(const Vec3D<T> &coordinate) const {
540 return getField(coordinate, T(0),
541 [](const Node &n) { return n.vonMisesStress; });
542 }
543
544 T getVonMisesStress(const IndexType &index) const {
545 return getField(index, T(0),
546 [](const Node &n) { return n.vonMisesStress; });
547 }
548
549 unsigned getIterations() const { return iterations; }
550 T getResidual() const { return residual; }
551 T getLastPressureResidual() const { return lastPressureResidual_; }
552 T getLastStokesResidual() const { return lastStokesResidual_; }
553 bool lastSolveConverged() const {
554 return std::isfinite(residual) &&
555 residual <= deformationParameters.mechanicsTolerance &&
556 std::isfinite(lastPressureResidual_) &&
557 lastPressureResidual_ <= deformationParameters.pressureTolerance &&
558 std::isfinite(lastStokesResidual_) &&
559 lastStokesResidual_ <= deformationParameters.stokesTolerance &&
561 }
562 bool hasFiniteSolution() const {
563 for (const auto &node : nodes) {
564 if (!isFiniteVec(node.velocity) || !std::isfinite(node.pressure) ||
565 !std::isfinite(node.strainTrace) ||
566 !isFiniteTensor(node.strainRateTensor) ||
567 !isFiniteTensor(node.stressTensor) ||
568 !std::isfinite(node.vonMisesStress))
569 return false;
570 }
571 return true;
572 }
573 std::size_t getNumberOfSolutionNodes() const { return nodes.size(); }
575 if (!avgExpansionSpeedComputed) {
577 avgExpansionSpeedComputed = true;
578 }
579 return avgExpansionSpeed_;
580 }
581 template <class Callback> void forEachSolutionNode(Callback callback) const {
582 for (const auto &node : nodes)
583 callback(node.index, node.pressure);
584 }
585
591 if (nodes.empty() || ambientInterface == nullptr)
592 return;
593
594 using VD = typename PointData<T>::VectorDataType;
595 VD velocity, stressR0, stressR1, stressR2;
596
597 ConstSparseIterator it(ambientInterface->getDomain());
598 for (; !it.isFinished(); ++it) {
599 if (!it.isDefined())
600 continue;
601 const IndexType idx = it.getStartIndices();
602 const std::size_t nId = lookupNode(idx);
603
604 if (nId != noNode) {
605 const auto &n = nodes[nId];
606 velocity.push_back(
607 isFiniteVec(n.velocity) ? n.velocity : Vec3D<T>{T(0), T(0), T(0)});
608 const auto sIt = deviatoricStressHistory.find(idx);
609 if (sIt != deviatoricStressHistory.end() &&
610 isFiniteTensor(sIt->second)) {
611 const auto &s = sIt->second;
612 stressR0.push_back({s[0], s[1], s[2]});
613 stressR1.push_back({s[3], s[4], s[5]});
614 stressR2.push_back({s[6], s[7], s[8]});
615 } else {
616 stressR0.push_back({T(0), T(0), T(0)});
617 stressR1.push_back({T(0), T(0), T(0)});
618 stressR2.push_back({T(0), T(0), T(0)});
619 }
620 } else {
621 velocity.push_back({T(0), T(0), T(0)});
622 stressR0.push_back({T(0), T(0), T(0)});
623 stressR1.push_back({T(0), T(0), T(0)});
624 stressR2.push_back({T(0), T(0), T(0)});
625 }
626 }
627
628 auto &pd = ambientInterface->getPointData();
629 pd.insertReplaceVectorData(std::move(velocity), "OxVelocity");
630 pd.insertReplaceVectorData(std::move(stressR0), "OxStressR0");
631 pd.insertReplaceVectorData(std::move(stressR1), "OxStressR1");
632 pd.insertReplaceVectorData(std::move(stressR2), "OxStressR2");
633 }
634
635private:
639 void seedFromLevelSet() {
640 if (ambientInterface == nullptr || nodes.empty())
641 return;
642
643 auto &pd = ambientInterface->getPointData();
644 const int vIdx = pd.getVectorDataIndex("OxVelocity");
645 const int r0Idx = pd.getVectorDataIndex("OxStressR0");
646 const int r1Idx = pd.getVectorDataIndex("OxStressR1");
647 const int r2Idx = pd.getVectorDataIndex("OxStressR2");
648 const int pIdx = pd.getScalarDataIndex("OxPressure");
649
650 const bool hasVelocity = (vIdx != -1);
651 const bool hasStress = (r0Idx != -1 && r1Idx != -1 && r2Idx != -1);
652 const bool hasPressure = (pIdx != -1);
653
654 if (!hasVelocity && !hasStress && !hasPressure)
655 return;
656
657 const auto *vd = hasVelocity ? pd.getVectorData(vIdx) : nullptr;
658 const auto *r0d = hasStress ? pd.getVectorData(r0Idx) : nullptr;
659 const auto *r1d = hasStress ? pd.getVectorData(r1Idx) : nullptr;
660 const auto *r2d = hasStress ? pd.getVectorData(r2Idx) : nullptr;
661 const auto *ppd = hasPressure ? pd.getScalarData(pIdx) : nullptr;
662
663 previousVelocity_.assign(nodes.size(), Vec3D<T>{});
664 previousPressure_.assign(nodes.size(), T(0));
665
666 ConstSparseIterator it(ambientInterface->getDomain());
667 for (; !it.isFinished(); ++it) {
668 if (!it.isDefined())
669 continue;
670 const auto ptId = it.getPointId();
671 const IndexType idx = it.getStartIndices();
672 const std::size_t ni = lookupNode(idx);
673
674 if (ni != noNode) {
675 if (vd && ptId < static_cast<decltype(ptId)>(vd->size()) &&
676 isFiniteVec((*vd)[ptId]))
677 previousVelocity_[ni] = (*vd)[ptId];
678 if (ppd && ptId < static_cast<decltype(ptId)>(ppd->size()) &&
679 std::isfinite((*ppd)[ptId]))
680 previousPressure_[ni] = (*ppd)[ptId];
681 }
682
683 if (hasStress && ptId < static_cast<decltype(ptId)>(r0d->size()) &&
684 ptId < static_cast<decltype(ptId)>(r1d->size()) &&
685 ptId < static_cast<decltype(ptId)>(r2d->size())) {
686 const auto &row0 = (*r0d)[ptId];
687 const auto &row1 = (*r1d)[ptId];
688 const auto &row2 = (*r2d)[ptId];
689 std::array<T, 9> s{row0[0], row0[1], row0[2], row1[0], row1[1],
690 row1[2], row2[0], row2[1], row2[2]};
691 if (isFiniteTensor(s))
692 deviatoricStressHistory[idx] = s;
693 }
694 }
695
696 if (hasVelocity || hasPressure) {
697 hasPreviousSolution_ = true;
698 // Apply the restored state immediately to the current nodes so
699 // the Stokes solve warm-starts on this (nodesDirty_=true) apply() call.
700 for (std::size_t i = 0; i < nodes.size(); ++i) {
701 nodes[i].velocity = previousVelocity_[i];
702 nodes[i].pressure = previousPressure_[i];
703 }
704 }
705 }
706
707#ifdef VIENNALS_GPU_BICGSTAB
708 // Builds the face-major pressure matrix geometry for GPU reuse.
709 // The logic mirrors the explicit precomputation in solvePressure() so the
710 // same actualDiagGpu_ / pressCoeffGpu_ arrays can feed both the CPU ILU(0)
711 // factorization and the GPU SpMV without recomputation.
712 void buildPressureGpuGeometry() {
713 const std::size_t n = nodes.size();
714 const T eps = std::numeric_limits<T>::epsilon();
715 pressCoeffGpu_.assign(2 * D * n, 0.0);
716 pressNeighId32_.assign(2 * D * n, gpu::kNoNode);
717 actualDiagGpu_.assign(n, 0.0);
718
719 for (std::size_t id = 0; id < n; ++id) {
720 if (touchesAmbient_[id]) {
721 actualDiagGpu_[id] = 1.0;
722 continue;
723 }
724 for (unsigned dir = 0; dir < D; ++dir) {
725 const unsigned fiNeg = dir * 2u;
726 const unsigned fiPos = dir * 2u + 1u;
727 IndexType nbNeg = nodes[id].index;
728 nbNeg[dir] -= 1;
729 IndexType nbPos = nodes[id].index;
730 nbPos[dir] += 1;
731 const std::size_t jNeg =
732 inBounds(nbNeg) ? nodeLookupFlat[linearIndex(nbNeg)] : noNode;
733 const std::size_t jPos =
734 inBounds(nbPos) ? nodeLookupFlat[linearIndex(nbPos)] : noNode;
735
736 auto effDist = [&](unsigned fi, std::size_t j) -> T {
737 if (j != noNode)
738 return gridDelta;
739 const Boundary bt = faceBCTypes_[fi * n + id];
740 return (bt != Boundary::NONE) ? faceBCDists_[fi * n + id] : gridDelta;
741 };
742
743 const T dNeg = effDist(fiNeg, jNeg);
744 const T dPos = effDist(fiPos, jPos);
745 const T dSum = dNeg + dPos;
746 if (dSum <= eps)
747 continue;
748
749 auto processFace = [&](unsigned fi, std::size_t j, T d) {
750 const T c = T(2) / (d * dSum);
751 if (j != noNode && !touchesAmbient_[j]) {
752 pressCoeffGpu_[fi * n + id] = static_cast<double>(c);
753 pressNeighId32_[fi * n + id] = static_cast<uint32_t>(j);
754 actualDiagGpu_[id] += c;
755 } else if (j != noNode ||
756 faceBCTypes_[fi * n + id] == Boundary::AMBIENT) {
757 // j is an ambient-only neighbour (identity-row Dirichlet p=0), OR
758 // this face crosses the free surface directly (AMBIENT Dirichlet
759 // p=0 at the sub-grid crossing distance). REACTION faces are
760 // solid-wall Neumann ∂p/∂n=0: no contribution.
761 actualDiagGpu_[id] += c;
762 }
763 };
764 processFace(fiNeg, jNeg, dNeg);
765 processFace(fiPos, jPos, dPos);
766 }
767 if (actualDiagGpu_[id] <= eps)
768 actualDiagGpu_[id] = 1.0;
769 }
770 }
771
772 // Builds the face-major Stokes velocity matrix geometry for GPU reuse.
773 // The effective diagonal excludes OOB, AMBIENT, and traction-coupled MASK
774 // face self-coupling because those cancel exactly with the vBC correction in
775 // the Stokes matvec. Kinematic MASK faces remain Dirichlet-like and
776 // contribute to the diagonal.
777 void buildStokesGpuGeometry() {
778 const std::size_t n = nodes.size();
779 const T eps = std::numeric_limits<T>::epsilon();
780 stokesCoeffGpu_.assign(2 * D * n, 0.0);
781 stokesNeighId32_.assign(2 * D * n, gpu::kNoNode);
782 stokesDiagGpu_.assign(D * n, 0.0);
783
784 auto addDiag = [&](std::size_t id, T c) {
785 for (unsigned comp = 0; comp < D; ++comp)
786 stokesDiagGpu_[comp * n + id] += static_cast<double>(c);
787 };
788
789 for (std::size_t id = 0; id < n; ++id) {
790 for (unsigned dir = 0; dir < D; ++dir) {
791 const unsigned fiNeg = dir * 2u;
792 const unsigned fiPos = dir * 2u + 1u;
793 IndexType nbNeg = nodes[id].index;
794 nbNeg[dir] -= 1;
795 IndexType nbPos = nodes[id].index;
796 nbPos[dir] += 1;
797 const bool negInBounds = inBounds(nbNeg);
798 const bool posInBounds = inBounds(nbPos);
799 const std::size_t jNeg =
800 negInBounds ? nodeLookupFlat[linearIndex(nbNeg)] : noNode;
801 const std::size_t jPos =
802 posInBounds ? nodeLookupFlat[linearIndex(nbPos)] : noNode;
803
804 // Distance matching velocityStencilPoint: gridDelta for OOB/interior,
805 // faceBCDists_ for boundary-crossing faces.
806 auto stokesFaceDist = [&](unsigned fi, bool inb, std::size_t j) -> T {
807 if (!inb || j != noNode)
808 return gridDelta;
809 const Boundary bt = faceBCTypes_[fi * n + id];
810 return (bt != Boundary::NONE) ? faceBCDists_[fi * n + id] : gridDelta;
811 };
812
813 const T dNeg = stokesFaceDist(fiNeg, negInBounds, jNeg);
814 const T dPos = stokesFaceDist(fiPos, posInBounds, jPos);
815 const T dSum = dNeg + dPos;
816 if (dSum <= eps)
817 continue;
818
819 auto processFace = [&](unsigned fi, bool inb, std::size_t j, T d) {
820 const T c = T(2) / (d * dSum);
821 if (inb && j != noNode) {
822 // Interior neighbor: off-diagonal and diagonal contribution.
823 stokesCoeffGpu_[fi * n + id] = static_cast<double>(c);
824 stokesNeighId32_[fi * n + id] = static_cast<uint32_t>(j);
825 addDiag(id, c);
826 } else if (inb) {
827 // Boundary-crossing face.
828 // REACTION / MASK: Dirichlet → adds to diagonal.
829 // AMBIENT / OOB: excluded (self-coupling cancels with vBC
830 // correction).
831 const Boundary bt = faceBCTypes_[fi * n + id];
832 if (bt == Boundary::REACTION || bt == Boundary::MASK)
833 addDiag(id, c);
834 }
835 // !inb (OOB): self-coupling, excluded.
836 };
837 processFace(fiNeg, negInBounds, jNeg, dNeg);
838 processFace(fiPos, posInBounds, jPos, dPos);
839 }
840 for (unsigned comp = 0; comp < D; ++comp)
841 if (stokesDiagGpu_[comp * n + id] <= static_cast<double>(eps))
842 stokesDiagGpu_[comp * n + id] = 1.0;
843 }
844 }
845
846 // Builds face-major harmonic velocity geometry for GPU reuse.
847 // The neighbor IDs are identical to Stokes (stokesNeighId32_ is shared).
848 //
849 // The CPU harmonicMatvec is Av[i][c] = 2*D*v[i] - stencil_sum(v)[i] + b[i]
850 // where stencil_sum includes self-coupling (v[nodeId]) for OOB/AMBIENT/NONE
851 // faces and constants for REACTION/MASK faces. After the b/constant terms
852 // cancel, the effective matrix diagonal is:
853 // 2*D - n_OOB - n_AMBIENT - n_NONE = n_interior + n_REACTION + n_MASK
854 //
855 // REACTION and MASK faces have no off-diagonal entry (their velocity is a
856 // constant in b) but they DO count toward the diagonal. Excluding them
857 // gives a matrix that is not diagonally dominant near the Si/SiO2 boundary,
858 // which causes Jacobi-preconditioned BiCGSTAB to diverge.
859 void buildHarmonicGpuGeometry() {
860 const std::size_t n = nodes.size();
861 harmonicCoeffGpu_.assign(2 * D * n, 0.0);
862 harmonicDiagGpu_.assign(n, 0.0);
863 for (std::size_t id = 0; id < n; ++id) {
864 for (unsigned fi = 0; fi < 2 * D; ++fi) {
865 if (stokesNeighId32_[fi * n + id] != gpu::kNoNode) {
866 // Interior neighbor: off-diagonal coefficient 1.0 + diagonal 1.0.
867 harmonicCoeffGpu_[fi * n + id] = 1.0;
868 harmonicDiagGpu_[id] += 1.0;
869 } else {
870 // Non-interior face. REACTION/MASK contribute a Dirichlet constant
871 // to b but NOT self-coupling, so they add 1.0 to the diagonal just
872 // like an interior neighbor would (matching the CPU matrix row).
873 // OOB/AMBIENT/NONE contribute v[nodeId] (self-coupling), which
874 // cancels with the 2*D diagonal term and must be excluded here.
875 const Boundary bt = faceBCTypes_[fi * n + id];
876 if (bt == Boundary::REACTION || bt == Boundary::MASK)
877 harmonicDiagGpu_[id] += 1.0;
878 }
879 }
880 if (harmonicDiagGpu_[id] < 1e-10)
881 harmonicDiagGpu_[id] = 1.0;
882 }
883 }
884
885 // Allocates GPU buffers for pressure and Stokes solves and uploads the
886 // geometry-fixed CSR pattern. Called from buildNodes() after the face BC
887 // arrays and the two geometry helper arrays are populated.
888 void setupDeformationGpuBuffers() {
889 const std::size_t n = nodes.size();
890 if (n == 0) {
891 gpu::freeGpuBuffers(gpuPressBufs_);
892 gpuPressBufs_ = nullptr;
893 gpu::freeGpuBuffers(gpuStokesBufs_);
894 gpuStokesBufs_ = nullptr;
895 return;
896 }
897 const bool tryGpu = (gpuMode_ == GpuMode::Gpu);
898 const bool useIlu0 = (gpuPreconditioner_ == GpuPreconditioner::ILU0);
899
900 gpu::freeGpuBuffers(gpuPressBufs_);
901 gpuPressBufs_ = nullptr;
902 if (tryGpu) {
903 gpuPressBufs_ =
904 gpu::allocGpuBuffers(static_cast<uint32_t>(n), 2 * D, useIlu0);
905 if (!gpuPressBufs_) {
906 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
907 "pressure solver CUDA buffers could not be "
908 "allocated or the CUDA context could not be "
909 "initialized." +
910 gpuErrorDetail());
911 }
912 if (!gpu::gpuUploadNeighborIds(gpuPressBufs_, pressNeighId32_.data(),
913 2u * D * n)) {
914 gpu::freeGpuBuffers(gpuPressBufs_);
915 gpuPressBufs_ = nullptr;
916 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
917 "uploading pressure GPU neighbor IDs failed." +
918 gpuErrorDetail());
919 }
920 if (useIlu0 && !gpu::gpuSetupCSR(gpuPressBufs_, pressNeighId32_.data(),
921 static_cast<uint32_t>(n), 2 * D)) {
922 gpu::freeGpuBuffers(gpuPressBufs_);
923 gpuPressBufs_ = nullptr;
924 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
925 "CUSPARSE setup for the pressure GPU BiCGSTAB "
926 "solver failed." +
927 gpuErrorDetail());
928 }
929 }
930
931 gpu::freeGpuBuffers(gpuStokesBufs_);
932 gpuStokesBufs_ = nullptr;
933 if (tryGpu) {
934 gpuStokesBufs_ =
935 gpu::allocGpuBuffers(static_cast<uint32_t>(n), 2 * D, useIlu0);
936 if (!gpuStokesBufs_) {
937 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
938 "Stokes solver CUDA buffers could not be "
939 "allocated or the CUDA context could not be "
940 "initialized." +
941 gpuErrorDetail());
942 }
943 if (!gpu::gpuUploadNeighborIds(gpuStokesBufs_, stokesNeighId32_.data(),
944 2u * D * n)) {
945 gpu::freeGpuBuffers(gpuStokesBufs_);
946 gpuStokesBufs_ = nullptr;
947 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
948 "uploading Stokes GPU neighbor IDs failed." +
949 gpuErrorDetail());
950 }
951 if (useIlu0 && !gpu::gpuSetupCSR(gpuStokesBufs_, stokesNeighId32_.data(),
952 static_cast<uint32_t>(n), 2 * D)) {
953 gpu::freeGpuBuffers(gpuStokesBufs_);
954 gpuStokesBufs_ = nullptr;
955 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
956 "CUSPARSE setup for the Stokes GPU BiCGSTAB "
957 "solver failed." +
958 gpuErrorDetail());
959 }
960 }
961
962 // Harmonic velocity: same neighbor IDs as Stokes, different coefficients.
963 gpu::freeGpuBuffers(gpuHarmonicBufs_);
964 gpuHarmonicBufs_ = nullptr;
965 if (tryGpu) {
966 gpuHarmonicBufs_ =
967 gpu::allocGpuBuffers(static_cast<uint32_t>(n), 2 * D, useIlu0);
968 if (!gpuHarmonicBufs_) {
969 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
970 "harmonic solver CUDA buffers could not be "
971 "allocated." +
972 gpuErrorDetail());
973 }
974 if (!gpu::gpuUploadNeighborIds(gpuHarmonicBufs_, stokesNeighId32_.data(),
975 2u * D * n)) {
976 gpu::freeGpuBuffers(gpuHarmonicBufs_);
977 gpuHarmonicBufs_ = nullptr;
978 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
979 "uploading harmonic GPU neighbor IDs failed." +
980 gpuErrorDetail());
981 }
982 if (useIlu0 &&
983 !gpu::gpuSetupCSR(gpuHarmonicBufs_, stokesNeighId32_.data(),
984 static_cast<uint32_t>(n), 2 * D)) {
985 gpu::freeGpuBuffers(gpuHarmonicBufs_);
986 gpuHarmonicBufs_ = nullptr;
987 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
988 "CUSPARSE setup for the harmonic GPU BiCGSTAB "
989 "solver failed." +
990 gpuErrorDetail());
991 }
992 }
993
994 if (tryGpu)
995 logDeformationBackend("GPU BiCGSTAB",
996 "pressure/Stokes/harmonic, preconditioner=" +
997 std::string(useIlu0 ? "ILU0" : "Jacobi"));
998 }
999#endif // VIENNALS_GPU_BICGSTAB
1000
1001#ifdef VIENNALS_GPU_BICGSTAB
1002 static std::string gpuErrorDetail() {
1003 const char *detail = gpu::gpuGetLastErrorMessage();
1004 if (detail && detail[0] != '\0')
1005 return std::string(" Detail: ") + detail;
1006 return {};
1007 }
1008#endif
1009
1010 void logDeformationBackend(const std::string &backend,
1011 const std::string &detail) const {
1012 if (!Logger::hasInfo())
1013 return;
1014 const std::string msg = "OxidationDeformation: using " + backend +
1015 " for mechanics pressure/Stokes solves (nodes=" +
1016 std::to_string(nodes.size()) +
1017 (detail.empty() ? std::string() : ", " + detail) +
1018 ").";
1019 if (msg == lastLoggedBackend_)
1020 return;
1021 lastLoggedBackend_ = msg;
1022 Logger::getInstance().addInfo(msg).print();
1023 }
1024
1025public:
1028 reactionInterface, ambientInterface, maskInterface, useRequestedBounds,
1029 requestedMinIndex, requestedMaxIndex,
1030 deformationParameters.maxGridPoints, "OxidationDeformation");
1031 }
1032
1033 void buildNodes() {
1034 nodes.clear();
1036
1037 ConstSparseIterator reactionIt(reactionInterface->getDomain());
1038 ConstSparseIterator ambientIt(ambientInterface->getDomain());
1039 auto maskIt = makeMaskIterator();
1040
1041 IndexType index = minIndex;
1042 while (true) {
1043 const T reactionPhi = valueAt(reactionIt, index);
1044 const T ambientPhi = valueAt(ambientIt, index);
1045 if (isInsideOxide(reactionPhi, ambientPhi) &&
1046 !isInsideMask(maskIt, index)) {
1047 const std::size_t id = nodes.size();
1048 nodeLookupFlat[linearIndex(index)] = id;
1049 nodes.push_back({index});
1050 }
1051
1052 if (!increment(index))
1053 break;
1054 }
1055
1056 // Precompute per-face boundary intersections into flat face-major arrays.
1057 const std::size_t n = nodes.size();
1058 faceBCTypes_.assign(2 * D * n, Boundary::NONE);
1059 faceBCDists_.assign(2 * D * n, T(1));
1060 touchesAmbient_.assign(n, uint8_t(0));
1061 for (std::size_t id = 0; id < n; ++id) {
1062 const auto &node = nodes[id];
1063 bool touchesAmbient = false;
1064 bool touchesSolidBoundary = false;
1065 for (unsigned dir = 0; dir < D; ++dir) {
1066 for (int off : {-1, 1}) {
1067 const unsigned fi = dir * 2u + (off == 1 ? 1u : 0u);
1068 IndexType nb = node.index;
1069 nb[dir] += off;
1070 if (!inBounds(nb) || lookupNode(nb) != noNode)
1071 continue; // NONE/1.0 already set
1072 const auto bi = boundaryIntersection(reactionIt, ambientIt, maskIt,
1073 node.index, nb);
1074 faceBCTypes_[fi * n + id] = bi.boundary;
1075 faceBCDists_[fi * n + id] = bi.distance;
1076 if (bi.boundary == Boundary::AMBIENT)
1077 touchesAmbient = true;
1078 else if (bi.boundary == Boundary::MASK ||
1079 bi.boundary == Boundary::REACTION)
1080 touchesSolidBoundary = true;
1081 }
1082 }
1083 // Do not collapse mixed mask/reaction/ambient corner nodes to a single
1084 // ambient pressure identity row. Those nodes need their per-face
1085 // boundary conditions; otherwise the mask-edge triple point injects an
1086 // artificial free-surface pressure singularity.
1087 touchesAmbient_[id] =
1088 (touchesAmbient && !touchesSolidBoundary) ? uint8_t(1) : uint8_t(0);
1089 }
1090
1091#ifdef VIENNALS_GPU_BICGSTAB
1092 buildPressureGpuGeometry();
1093 buildStokesGpuGeometry();
1094 buildHarmonicGpuGeometry();
1095 setupDeformationGpuBuffers();
1096#else
1097 if (gpuMode_ == GpuMode::Gpu) {
1098 VIENNACORE_LOG_ERROR("OxidationDeformation: explicit GPU mode was "
1099 "requested, but ViennaLS was built without "
1100 "VIENNALS_GPU_BICGSTAB.");
1101 }
1102#endif
1103 }
1104
1105 // Evaluates the harmonic stencil at one node.
1106 // sum = sum of neighbor/BC contributions (interior neighbors, reaction/mask
1107 // BCs, and self-coupling for OOB/NONE/AMBIENT faces). count is always 2*D
1108 // (every face is counted regardless of type).
1109 template <class SolverT>
1110 void computeHarmonicStencilAt(std::size_t nodeId,
1111 const std::vector<Vec3D<SolverT>> &v,
1112 Vec3D<T> &sum) const {
1113 const auto &node = nodes[nodeId];
1114 sum = {T(0), T(0), T(0)};
1115
1116 const auto toT = [](const Vec3D<SolverT> &w) -> Vec3D<T> {
1117 return {static_cast<T>(w[0]), static_cast<T>(w[1]), static_cast<T>(w[2])};
1118 };
1119
1120 for (unsigned direction = 0; direction < D; ++direction) {
1121 for (int offset : {-1, 1}) {
1122 IndexType neighbor = node.index;
1123 neighbor[direction] += offset;
1124
1125 if (!inBounds(neighbor)) {
1126 detail::vecAddTo(sum, toT(v[nodeId])); // zero-flux: ghost = self
1127 continue;
1128 }
1129
1130 const std::size_t neighborId = lookupNode(neighbor);
1131 if (neighborId != noNode) {
1132 detail::vecAddTo(sum, toT(v[neighborId]));
1133 continue;
1134 }
1135
1136 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
1137 const Boundary boundary = faceBCTypes_[fi * nodes.size() + nodeId];
1138 if (boundary == Boundary::REACTION) {
1140 } else if (boundary == Boundary::MASK) {
1141 detail::vecAddTo(sum,
1142 maskVelocityBoundary(node.index, toT(v[nodeId])));
1143 } else {
1144 detail::vecAddTo(sum, toT(v[nodeId])); // AMBIENT/NONE: zero-flux
1145 }
1146 }
1147 }
1148 }
1149
1150 // (Av)[i] = (2*D) * v[i] - sum_at_v[i] + b[i]
1151 template <class SolverT>
1152 void harmonicMatvec(const std::vector<Vec3D<SolverT>> &v,
1153 const std::vector<Vec3D<T>> &b,
1154 std::vector<Vec3D<SolverT>> &Av) const {
1155 const T diagVal = static_cast<T>(2 * D);
1156#pragma omp parallel for schedule(static)
1157 for (std::size_t i = 0; i < nodes.size(); ++i) {
1158 Vec3D<T> sum;
1159 computeHarmonicStencilAt(i, v, sum);
1160 for (unsigned c = 0; c < D; ++c)
1161 Av[i][c] = static_cast<SolverT>(diagVal * v[i][c] - sum[c] + b[i][c]);
1162 }
1163 }
1164
1166 iterations = 0;
1167 residual = 0.;
1168 if (nodes.empty())
1169 return;
1170
1171 using SolverT = T;
1172
1173 const std::size_t n = nodes.size();
1174 const T diagVal = static_cast<T>(2 * D); // constant for all nodes
1175
1176 // b[i] = BC constants (reaction + mask velocities), computed at v = zeros.
1177 // OOB/NONE/AMBIENT faces contribute v[i] = 0 at zeros, so only Dirichlet
1178 // BCs survive — correctly isolating the RHS constant vector.
1179 std::vector<Vec3D<T>> b(n);
1180 {
1181 const std::vector<Vec3D<SolverT>> zeros(
1182 n, Vec3D<SolverT>{SolverT(0), SolverT(0), SolverT(0)});
1183#pragma omp parallel for schedule(static)
1184 for (std::size_t i = 0; i < n; ++i)
1185 computeHarmonicStencilAt(i, zeros, b[i]);
1186 }
1187
1188 // Warm-start from previous substep's velocity field.
1189 std::vector<Vec3D<SolverT>> x(n);
1190 for (std::size_t i = 0; i < n; ++i)
1191 for (unsigned c = 0; c < D; ++c) {
1192 const T value = nodes[i].velocity[c];
1193 x[i][c] = static_cast<SolverT>(std::isfinite(value) ? value : T(0));
1194 }
1195
1196#ifdef VIENNALS_GPU_BICGSTAB
1197 if (gpu::gpuIsValid(gpuHarmonicBufs_)) {
1198 const std::size_t nf = 2u * D * n;
1199 if (harmonicDiagGpu_.size() != n || harmonicCoeffGpu_.size() != nf) {
1200 VIENNACORE_LOG_ERROR("OxidationDeformation: harmonic GPU geometry has "
1201 "the wrong size for the current node set.");
1202 }
1203
1204 Timer<> tUpload, tSolve;
1205 std::vector<Vec3D<SolverT>> xSolved(n);
1206 unsigned maxGpuIterations = 0;
1207 double maxGpuResidual = 0.0;
1208
1209 for (unsigned c = 0; c < D; ++c) {
1210 std::vector<double> bGpu(n), xGpu(n);
1211 for (std::size_t i = 0; i < n; ++i) {
1212 bGpu[i] = static_cast<double>(b[i][c]);
1213 xGpu[i] = static_cast<double>(x[i][c]);
1214 }
1215
1216 tUpload.start();
1217 const bool gpuUploadOk =
1218 (c == 0) ? gpu::gpuUploadSolverArrays(
1219 gpuHarmonicBufs_, harmonicDiagGpu_.data(),
1220 bGpu.data(), harmonicCoeffGpu_.data(),
1221 static_cast<uint32_t>(n), harmonicCoeffGpu_.size())
1222 : gpu::gpuUploadRhs(gpuHarmonicBufs_, bGpu.data(),
1223 static_cast<uint32_t>(n));
1224 tUpload.finish();
1225 if (!gpuUploadOk) {
1226 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, "
1227 "but uploading harmonic solver arrays failed." +
1228 gpuErrorDetail());
1229 }
1230
1231 unsigned gpuIterations = 0;
1232 double gpuResidual = 0.0;
1233 tSolve.start();
1234 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
1235 gpuHarmonicBufs_, xGpu.data(),
1236 static_cast<double>(std::numeric_limits<SolverT>::epsilon()),
1237 deformationParameters.harmonicIterations,
1238 static_cast<double>(deformationParameters.tolerance), gpuIterations,
1239 gpuResidual);
1240 tSolve.finish();
1241
1242 if (!gpuConverged || !std::isfinite(gpuResidual)) {
1243 VIENNACORE_LOG_ERROR(
1244 "OxidationDeformation: harmonic GPU BiCGSTAB failed or produced "
1245 "a non-finite residual for component " +
1246 std::to_string(c) + " (iters=" + std::to_string(gpuIterations) +
1247 ", residual=" + std::to_string(gpuResidual) + ").");
1248 }
1249
1250 maxGpuIterations = std::max(maxGpuIterations, gpuIterations);
1251 maxGpuResidual = std::max(maxGpuResidual, gpuResidual);
1252 for (std::size_t i = 0; i < n; ++i)
1253 xSolved[i][c] = static_cast<SolverT>(xGpu[i]);
1254 }
1255
1256 for (std::size_t i = 0; i < n; ++i)
1257 for (unsigned c = 0; c < D; ++c)
1258 nodes[i].velocity[c] = static_cast<T>(xSolved[i][c]);
1259 iterations = maxGpuIterations;
1260 residual = maxGpuResidual;
1261
1262 if (Logger::hasTiming()) {
1263 Logger::getInstance()
1264 .addTiming("harmonic n=" + std::to_string(n) +
1265 " iters=" + std::to_string(iterations) + " res=" +
1266 std::to_string(residual) + " [GPU] GPU BiCGSTAB",
1267 tSolve)
1268 .print();
1269 }
1270 if (Logger::hasDebug()) {
1271 Logger::getInstance()
1272 .addTiming("harmonic n=" + std::to_string(n) + " [GPU] GPU upload",
1273 tUpload)
1274 .print();
1275 }
1276 return;
1277 }
1278#endif
1279
1280 // r = b - A*x
1281 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
1282 std::vector<Vec3D<SolverT>> Ax(n);
1283 harmonicMatvec(x, b, Ax);
1284 std::vector<Vec3D<SolverT>> r(n), r_hat(n);
1285 for (std::size_t i = 0; i < n; ++i)
1286 for (unsigned c = 0; c < D; ++c) {
1287 r[i][c] = static_cast<SolverT>(b[i][c] - Ax[i][c]);
1288 r_hat[i][c] = r[i][c];
1289 }
1290
1291 // BiCGSTAB with diagonal preconditioner (diag = 2*D, constant).
1292 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
1293 t(n);
1294 T rho = T(1), alpha = T(1), omega = T(1);
1295
1296 auto vecDot = [&](const std::vector<Vec3D<SolverT>> &a,
1297 const std::vector<Vec3D<SolverT>> &bv) {
1298 T sum = T(0);
1299 for (std::size_t i = 0; i < n; ++i)
1300 for (unsigned c = 0; c < D; ++c)
1301 sum += static_cast<T>(a[i][c]) * static_cast<T>(bv[i][c]);
1302 return sum;
1303 };
1304
1305 auto vecMaxAbs = [&](const std::vector<Vec3D<SolverT>> &vin) {
1306 T m = T(0);
1307 for (std::size_t i = 0; i < n; ++i)
1308 for (unsigned c = 0; c < D; ++c)
1309 m = std::max(m, std::abs(static_cast<T>(vin[i][c])));
1310 return m;
1311 };
1312
1313 const T b_norm = [&] {
1314 T m = T(0);
1315 for (std::size_t i = 0; i < n; ++i)
1316 for (unsigned c = 0; c < D; ++c)
1317 m = std::max(m, std::abs(b[i][c]));
1318 return (m < T(1e-100)) ? T(1) : m;
1319 }();
1320
1321 for (; iterations < deformationParameters.harmonicIterations;
1322 ++iterations) {
1323 const T rho_new = vecDot(r_hat, r);
1324 if (!std::isfinite(rho_new) || std::abs(rho_new) < T(1e-100))
1325 break;
1326 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1327 !std::isfinite(omega) || std::abs(omega) < T(1e-100))
1328 break;
1329
1330 const T beta = (rho_new / rho) * (alpha / omega);
1331 if (!std::isfinite(beta))
1332 break;
1333 rho = rho_new;
1334
1335 for (std::size_t i = 0; i < n; ++i)
1336 for (unsigned c = 0; c < D; ++c)
1337 pv[i][c] = static_cast<SolverT>(r[i][c] +
1338 beta * (pv[i][c] - omega * sv[i][c]));
1339
1340 // y = M^{-1} p = p / (2*D)
1341 for (std::size_t i = 0; i < n; ++i)
1342 for (unsigned c = 0; c < D; ++c)
1343 y[i][c] = static_cast<SolverT>(static_cast<T>(pv[i][c]) / diagVal);
1344
1345 harmonicMatvec(y, b, sv);
1346
1347 const T r_hat_v = vecDot(r_hat, sv);
1348 if (!std::isfinite(r_hat_v) || std::abs(r_hat_v) < T(1e-100))
1349 break;
1350
1351 alpha = rho_new / r_hat_v;
1352 if (!std::isfinite(alpha))
1353 break;
1354
1355 for (std::size_t i = 0; i < n; ++i)
1356 for (unsigned c = 0; c < D; ++c)
1357 s[i][c] = static_cast<SolverT>(r[i][c] - alpha * sv[i][c]);
1358
1359 residual = vecMaxAbs(s);
1360 if (!std::isfinite(residual))
1361 break;
1362 if (residual < deformationParameters.tolerance * b_norm) {
1363 for (std::size_t i = 0; i < n; ++i)
1364 for (unsigned c = 0; c < D; ++c)
1365 x[i][c] = static_cast<SolverT>(x[i][c] + alpha * y[i][c]);
1366 ++iterations;
1367 break;
1368 }
1369
1370 // z = M^{-1} s
1371 for (std::size_t i = 0; i < n; ++i)
1372 for (unsigned c = 0; c < D; ++c)
1373 z[i][c] = static_cast<SolverT>(static_cast<T>(s[i][c]) / diagVal);
1374
1375 harmonicMatvec(z, b, t);
1376
1377 const T t_s = vecDot(t, s);
1378 const T t_t = vecDot(t, t);
1379 if (!std::isfinite(t_s) || !std::isfinite(t_t))
1380 break;
1381 omega = (t_t > T(1e-100)) ? t_s / t_t : T(0);
1382 if (!std::isfinite(omega))
1383 break;
1384
1385 for (std::size_t i = 0; i < n; ++i)
1386 for (unsigned c = 0; c < D; ++c) {
1387 x[i][c] =
1388 static_cast<SolverT>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
1389 r[i][c] = static_cast<SolverT>(s[i][c] - omega * t[i][c]);
1390 }
1391
1392 residual = vecMaxAbs(r);
1393 if (!std::isfinite(residual))
1394 break;
1395 if (residual < deformationParameters.tolerance * b_norm) {
1396 ++iterations;
1397 break;
1398 }
1399 }
1400
1401 bool finiteSolution = true;
1402 for (std::size_t i = 0; i < n; ++i)
1403 for (unsigned c = 0; c < D; ++c)
1404 if (!std::isfinite(static_cast<T>(x[i][c])))
1405 finiteSolution = false;
1406
1407 if (finiteSolution) {
1408 for (std::size_t i = 0; i < n; ++i)
1409 for (unsigned c = 0; c < D; ++c)
1410 nodes[i].velocity[c] = static_cast<T>(x[i][c]);
1411 } else {
1412 residual = std::numeric_limits<T>::infinity();
1413 }
1414 if (residual > deformationParameters.tolerance * b_norm)
1415 VIENNACORE_LOG_WARNING(
1416 "solveVelocity (harmonic): BiCGSTAB did not converge after " +
1417 std::to_string(iterations) + "/" +
1418 std::to_string(deformationParameters.harmonicIterations) +
1419 " iterations (residual=" + std::to_string(residual / b_norm) +
1420 ", tolerance=" + std::to_string(deformationParameters.tolerance) +
1421 ")");
1422 }
1423
1424 // Returns component-wise diagonal entries of the Stokes operator A_v.
1425 // Geometry-fixed within a mechanics solve; computed once and reused by the
1426 // SIMPLE velocity-correction step: v_c^{k+1}=v_c* - grad_c(dp)/(eta*a_ic).
1427 //
1428 // With traction-coupled MASK contact, ghost=v_node+const for every component,
1429 // so the MASK face self-coupling cancels and the face coefficient is removed.
1430 std::vector<Vec3D<T>> computeVelocityDiagonals() const {
1431 const std::size_t n = nodes.size();
1432 std::vector<Vec3D<T>> diagV(n, Vec3D<T>{T(0), T(0), T(0)});
1433 if (n == 0)
1434 return diagV;
1435 const std::vector<Vec3D<T>> zeros(n, Vec3D<T>{T(0), T(0), T(0)});
1436 std::vector<Vec3D<T>> tmp(n);
1437#pragma omp parallel for schedule(static)
1438 for (std::size_t i = 0; i < n; ++i) {
1439 T diag{};
1440 computeVelocityStencilAt(i, zeros, diag, tmp[i]);
1441 for (unsigned comp = 0; comp < D; ++comp)
1442 diagV[i][comp] = diag;
1443 }
1444
1445 return diagV;
1446 }
1447
1449 T mechanicsResidual = 0.;
1450
1451 // SIMPLE (Semi-Implicit Method for Pressure-Linked Equations) coupling.
1452 // The Gauss-Seidel p→v→p loop has spectral radius > 1 on thin geometries,
1453 // causing divergence that worsens with more iterations. SIMPLE adds a
1454 // velocity-correction step after the pressure update that provably
1455 // eliminates the oscillation mode:
1456 //
1457 // 1. Momentum predictor: A_v * v* = vBC - (∇p^k - ∇·σ'(v^k)) / η
1458 // 2. Pressure update: A_p * p^{k+1} = pBC + K · div(v*)
1459 // 3. Velocity correction: v^{k+1} = v* - ∇δp / (η · a_i)
1460 // where δp = p^{k+1} - p^k, a_i = diag(A_v)[i]
1461 //
1462 // Step 3 ensures the corrected velocity is consistent with the new
1463 // pressure without re-solving the full momentum equation. Unlike the
1464 // Aitken clamp (which can only damp, not stabilise, ρ > 1 iterations),
1465 // this correction is unconditionally convergent for steady Stokes.
1466
1467 const std::vector<Vec3D<T>> diagV =
1468 computeVelocityDiagonals(); // geometry-fixed within this call
1469
1470 for (unsigned iteration = 0;
1471 iteration < deformationParameters.mechanicsIterations; ++iteration) {
1472 const auto previousVelocity = collectVelocities(); // v^k
1473 const auto previousPressure = collectPressures(); // p^k
1474
1477
1478 // Step 1: momentum predictor uses current p^k (in nodes[i].pressure).
1479 Timer<> tStokes, tPressure;
1480 tStokes.start();
1481 solveStokesVelocity(); // produces v* in nodes[i].velocity
1482 tStokes.finish();
1483 if (!std::isfinite(lastStokesResidual_)) {
1484 mechanicsResidual = std::numeric_limits<T>::infinity();
1485 break;
1486 }
1487
1488 // Step 2: pressure solve uses divergence of v*.
1489 tPressure.start();
1490 solvePressure(); // produces p^{k+1} in nodes[i].pressure
1491 tPressure.finish();
1492 if (!std::isfinite(lastPressureResidual_)) {
1493 mechanicsResidual = std::numeric_limits<T>::infinity();
1494 break;
1495 }
1496
1497 // Step 3: SIMPLE velocity correction: v^{k+1} = v* - ∇δp / (η · a_i).
1498 applySimpleVelocityCorrection(previousPressure, diagV);
1499
1500 mechanicsResidual = std::max(maxVelocityChange(previousVelocity),
1501 maxPressureChange(previousPressure));
1502 if (!std::isfinite(mechanicsResidual)) {
1503 mechanicsResidual = std::numeric_limits<T>::infinity();
1504 break;
1505 }
1506
1507 if (Logger::hasDebug())
1508 Logger::getInstance()
1509 .addTiming(
1510 " mechanics[" + std::to_string(iteration) +
1511 "] stokes iters=" + std::to_string(lastStokesIters_) +
1512 "/" +
1513 std::to_string(deformationParameters.stokesIterations) +
1514 " res=" + std::to_string(lastStokesResidual_),
1515 tStokes)
1516 .addTiming(
1517 " mechanics[" + std::to_string(iteration) +
1518 "] pressure iters=" + std::to_string(lastPressureIters_) +
1519 "/" +
1520 std::to_string(deformationParameters.pressureIterations) +
1521 " res=" + std::to_string(lastPressureResidual_) +
1522 " coupling=" + std::to_string(mechanicsResidual),
1523 tPressure)
1524 .print();
1525
1526 if (mechanicsResidual < deformationParameters.mechanicsTolerance)
1527 break;
1528 }
1529
1532 residual = mechanicsResidual;
1533 if (residual > deformationParameters.mechanicsTolerance)
1534 VIENNACORE_LOG_WARNING(
1535 "solveMechanics: did not converge after " +
1536 std::to_string(deformationParameters.mechanicsIterations) +
1537 " iterations (residual=" + std::to_string(residual) + ", tolerance=" +
1538 std::to_string(deformationParameters.mechanicsTolerance) + ")");
1539 }
1540
1541 // SIMPLE velocity correction: v^{k+1} = v* - ∇(p^{k+1} - p^k) / (η · a_i)
1542 //
1543 // δp gradient uses homogeneous Neumann at all boundary faces (δp ghost = 0).
1544 // The boundary pressure correction is re-enforced by the next pressure solve,
1545 // so this approximation only affects the current-iteration correction, not
1546 // the converged solution.
1547 void applySimpleVelocityCorrection(const std::vector<T> &pressureOld,
1548 const std::vector<Vec3D<T>> &diagV) {
1549 if (nodes.empty())
1550 return;
1551 if (deformationParameters.viscosity <= std::numeric_limits<T>::epsilon())
1552 return;
1553
1554 const std::size_t n = nodes.size();
1555 const T invEta = T(1) / deformationParameters.viscosity;
1556
1557#pragma omp parallel for schedule(static)
1558 for (std::size_t i = 0; i < n; ++i) {
1559 for (unsigned dir = 0; dir < D; ++dir) {
1560 const T ai = diagV[i][dir];
1561 if (ai <= std::numeric_limits<T>::epsilon())
1562 continue;
1563
1564 // Negative-offset face (fi = dir*2).
1565 T dpMinus, dMinus;
1566 {
1567 const unsigned fi = dir * 2u;
1568 IndexType nb = nodes[i].index;
1569 nb[dir] -= 1;
1570 if (inBounds(nb)) {
1571 const std::size_t j = nodeLookupFlat[linearIndex(nb)];
1572 if (j != noNode) {
1573 dpMinus = nodes[j].pressure - pressureOld[j];
1574 dMinus = gridDelta;
1575 } else {
1576 dpMinus = T(0);
1577 dMinus = faceBCDists_[fi * n + i];
1578 }
1579 } else {
1580 dpMinus = T(0);
1581 dMinus = gridDelta;
1582 }
1583 }
1584
1585 // Positive-offset face (fi = dir*2+1).
1586 T dpPlus, dPlus;
1587 {
1588 const unsigned fi = dir * 2u + 1u;
1589 IndexType nb = nodes[i].index;
1590 nb[dir] += 1;
1591 if (inBounds(nb)) {
1592 const std::size_t j = nodeLookupFlat[linearIndex(nb)];
1593 if (j != noNode) {
1594 dpPlus = nodes[j].pressure - pressureOld[j];
1595 dPlus = gridDelta;
1596 } else {
1597 dpPlus = T(0);
1598 dPlus = faceBCDists_[fi * n + i];
1599 }
1600 } else {
1601 dpPlus = T(0);
1602 dPlus = gridDelta;
1603 }
1604 }
1605
1606 const T dpCenter = nodes[i].pressure - pressureOld[i];
1607 const T gradDP =
1608 firstDerivative(dpMinus, dpCenter, dpPlus, dMinus, dPlus);
1609 const T correction =
1610 gradDP * invEta / ai * deformationParameters.relaxation;
1611 if (std::isfinite(correction) && std::isfinite(nodes[i].velocity[dir]))
1612 nodes[i].velocity[dir] -= correction;
1613 }
1614 }
1615 }
1616
1617 // Fills diag = centerCoefficient and rhs = pressureSum for one node.
1618 // Dirichlet (ambient) nodes are encoded as identity rows: diag=1,
1619 // rhs=ambientBP.
1620 template <class SolverT>
1621 void computePressureStencilAt(std::size_t nodeId,
1622 const std::vector<SolverT> &p,
1623 const std::vector<T> &ambientBP, T &diag,
1624 T &rhs) const {
1625 if (touchesAmbient_[nodeId]) {
1626 diag = T(1);
1627 rhs = ambientBP[nodeId];
1628 return;
1629 }
1630 diag = T(0);
1631 rhs = T(0);
1632 for (unsigned direction = 0; direction < D; ++direction) {
1633 const auto plus =
1634 pressureStencilPoint(p, ambientBP, nodeId, direction, 1);
1635 const auto minus =
1636 pressureStencilPoint(p, ambientBP, nodeId, direction, -1);
1637 const T dSum = plus.distance + minus.distance;
1638 const T plusCoeff = T(2) / (plus.distance * dSum);
1639 const T minusCoeff = T(2) / (minus.distance * dSum);
1640 rhs += plusCoeff * plus.value + minusCoeff * minus.value;
1641 diag += plusCoeff + minusCoeff;
1642 }
1643 }
1644
1645 // (Av)[i] = precomputedDiag[i]*v[i] - rhs_at_v[i] + pBC[i]
1646 template <class SolverT>
1647 void
1648 pressureMatvec(const std::vector<SolverT> &v, const std::vector<T> &ambientBP,
1649 const std::vector<T> &precomputedDiag,
1650 const std::vector<T> &pBC, std::vector<SolverT> &Av) const {
1651#pragma omp parallel for schedule(static)
1652 for (std::size_t i = 0; i < nodes.size(); ++i) {
1653 T diag, rhs;
1654 computePressureStencilAt(i, v, ambientBP, diag, rhs);
1655 Av[i] = static_cast<SolverT>(precomputedDiag[i] * v[i] - rhs + pBC[i]);
1656 }
1657 }
1658
1660 if (nodes.empty())
1661 return;
1662
1663 using SolverT = T;
1664
1665 const std::size_t n = nodes.size();
1666 const T eps = std::numeric_limits<T>::epsilon();
1667
1668 std::vector<T> divergence(n), ambientBP(n);
1669#pragma omp parallel for schedule(static)
1670 for (std::size_t i = 0; i < n; ++i) {
1671 divergence[i] = divergenceAt(nodes[i].index);
1672 ambientBP[i] = freeSurfacePressureBoundary(nodes[i].index);
1673 }
1674
1675 auto warnBadPressureAssembly = [](const std::string &stage,
1676 std::size_t nodeId, const IndexType &idx,
1677 T value) {
1678 VIENNACORE_LOG_WARNING(
1679 "solvePressure: non-finite/overflow " + stage +
1680 " at node=" + std::to_string(nodeId) + " index=(" +
1681 std::to_string(idx[0]) + "," + std::to_string(idx[1]) +
1682 (D == 3 ? "," + std::to_string(idx[2]) : std::string()) +
1683 ") value=" + std::to_string(value));
1684 };
1685
1686 const T solverMax = static_cast<T>(std::numeric_limits<SolverT>::max());
1687 for (std::size_t i = 0; i < n; ++i) {
1688 if (!std::isfinite(divergence[i]) ||
1689 std::abs(divergence[i]) > solverMax) {
1690 warnBadPressureAssembly("divergence", i, nodes[i].index, divergence[i]);
1691 break;
1692 }
1693 if (!std::isfinite(ambientBP[i]) || std::abs(ambientBP[i]) > solverMax) {
1694 warnBadPressureAssembly("ambient pressure boundary", i, nodes[i].index,
1695 ambientBP[i]);
1696 break;
1697 }
1698 }
1699
1700 // Geometry-fixed diagonal and BC constants (kept in T for full precision).
1701 std::vector<T> diag(n), pBC(n);
1702 {
1703 const std::vector<SolverT> zeros(n, SolverT(0));
1704#pragma omp parallel for schedule(static)
1705 for (std::size_t i = 0; i < n; ++i)
1706 computePressureStencilAt(i, zeros, ambientBP, diag[i], pBC[i]);
1707 }
1708
1709 for (std::size_t i = 0; i < n; ++i) {
1710 if (!std::isfinite(diag[i]) || std::abs(diag[i]) > solverMax) {
1711 warnBadPressureAssembly("pressure diagonal", i, nodes[i].index,
1712 diag[i]);
1713 break;
1714 }
1715 if (!std::isfinite(pBC[i]) || std::abs(pBC[i]) > solverMax) {
1716 warnBadPressureAssembly("pressure boundary rhs", i, nodes[i].index,
1717 pBC[i]);
1718 break;
1719 }
1720 }
1721
1722 std::vector<T> b(n);
1723 T b_norm = T(0);
1724 for (std::size_t i = 0; i < n; ++i) {
1725 b[i] = pBC[i] + deformationParameters.bulkModulus * divergence[i];
1726 b_norm = std::max(b_norm, std::abs(b[i]));
1727 }
1728 for (std::size_t i = 0; i < n; ++i) {
1729 if (!std::isfinite(b[i]) || std::abs(b[i]) > solverMax) {
1730 warnBadPressureAssembly("pressure rhs", i, nodes[i].index, b[i]);
1731 break;
1732 }
1733 }
1734 if (b_norm < T(1e-100))
1735 b_norm = T(1);
1736
1737 std::vector<SolverT> x(n);
1738 for (std::size_t i = 0; i < n; ++i) {
1739 T guess = touchesAmbient_[i] ? ambientBP[i] : nodes[i].pressure;
1740 if (!std::isfinite(guess))
1741 guess = deformationParameters.ambientPressure;
1742 x[i] = static_cast<SolverT>(guess);
1743 }
1744
1745#ifdef VIENNALS_GPU_BICGSTAB
1746 if (gpu::gpuIsValid(gpuPressBufs_)) {
1747 const std::size_t nf = 2u * D * n;
1748 if (actualDiagGpu_.size() != n || pressCoeffGpu_.size() != nf) {
1749 VIENNACORE_LOG_ERROR("OxidationDeformation: pressure GPU geometry has "
1750 "the wrong size for the current node set.");
1751 }
1752
1753 Timer<> tUpload, tSolve;
1754 std::vector<double> bGpu(n), xGpu(n);
1755 for (std::size_t i = 0; i < n; ++i) {
1756 bGpu[i] = static_cast<double>(b[i]);
1757 xGpu[i] = static_cast<double>(x[i]);
1758 }
1759
1760 tUpload.start();
1761 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
1762 gpuPressBufs_, actualDiagGpu_.data(), bGpu.data(),
1763 pressCoeffGpu_.data(), static_cast<uint32_t>(n),
1764 pressCoeffGpu_.size());
1765 tUpload.finish();
1766 if (!gpuUploadOk) {
1767 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, but "
1768 "uploading pressure solver arrays or factorizing "
1769 "ILU failed." +
1770 gpuErrorDetail());
1771 }
1772
1773 unsigned gpuIterations = 0;
1774 double gpuResidual = 0.0;
1775 tSolve.start();
1776 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
1777 gpuPressBufs_, xGpu.data(), static_cast<double>(eps),
1778 deformationParameters.pressureIterations,
1779 static_cast<double>(deformationParameters.pressureTolerance),
1780 gpuIterations, gpuResidual);
1781 tSolve.finish();
1782
1783 // gpuResidual is the GPU true residual ||b - A*x||_inf recomputed at
1784 // convergence (not the recursive BiCGSTAB residual), so no separate CPU
1785 // stencil evaluation is needed.
1786 if (!gpuConverged || !std::isfinite(gpuResidual)) {
1787 VIENNACORE_LOG_ERROR(
1788 "OxidationDeformation: pressure GPU BiCGSTAB failed or produced "
1789 "a non-finite residual (iters=" +
1790 std::to_string(gpuIterations) +
1791 ", residual=" + std::to_string(gpuResidual) + ").");
1792 }
1793
1794 {
1795 const T beta = deformationParameters.pressureRelaxation;
1796 const T oneMinB = T(1) - beta;
1797 for (std::size_t i = 0; i < n; ++i)
1798 nodes[i].pressure =
1799 oneMinB * nodes[i].pressure + beta * static_cast<T>(xGpu[i]);
1800 }
1801 lastPressureIters_ = gpuIterations;
1802 lastPressureResidual_ = gpuResidual / b_norm;
1803
1804 if (Logger::hasDebug()) {
1805 const std::string tag =
1806 "pressure n=" + std::to_string(n) +
1807 " iters=" + std::to_string(lastPressureIters_) +
1808 " res=" + std::to_string(lastPressureResidual_) + " [GPU]";
1809 Logger::getInstance()
1810 .addTiming(tag + " GPU upload", tUpload)
1811 .addTiming(tag + " GPU BiCGSTAB", tSolve)
1812 .print();
1813 }
1814 return;
1815 }
1816#endif
1817
1818 // Precompute off-diagonal structure and the CORRECT matrix diagonal for
1819 // SSOR.
1820 //
1821 // Key insight: diag[i] from computePressureStencilAt includes self-coupling
1822 // contributions from REACTION/MASK/OOB faces (those return v[nodeId]
1823 // itself). The ACTUAL matrix diagonal A[i,i] = sum of off-diagonal
1824 // (interior-neighbor) coefficients only. Using the wrong diagonal in the
1825 // SSOR sweeps makes the preconditioner invalid near boundaries.
1826 //
1827 // Also: NONE-type non-interior faces (OOB or no crossing) use gridDelta in
1828 // pressureStencilPoint, NOT faceBCDists_ (which defaults to T(1)).
1829 //
1830 // Face-major layout: fi = dir*2 + (offset==+1 ? 1 : 0)
1831 // Even fi (offset=-1): lower-index neighbor → forward sweep
1832 // Odd fi (offset=+1): higher-index neighbor → backward sweep
1833 std::vector<T> pressCoeff(2 * D * n, T(0));
1834 std::vector<std::size_t> pressNeighId(2 * D * n, noNode);
1835 std::vector<T> actualDiag(n, T(0)); // A[i,i] = sum of interior coefficients
1836
1837 for (std::size_t id = 0; id < n; ++id) {
1838 if (touchesAmbient_[id]) {
1839 actualDiag[id] = T(1);
1840 continue;
1841 } // identity row
1842 for (unsigned dir = 0; dir < D; ++dir) {
1843 const unsigned fiNeg = dir * 2u;
1844 const unsigned fiPos = dir * 2u + 1u;
1845 IndexType nbNeg = nodes[id].index;
1846 nbNeg[dir] -= 1;
1847 IndexType nbPos = nodes[id].index;
1848 nbPos[dir] += 1;
1849 const std::size_t jNeg =
1850 inBounds(nbNeg) ? nodeLookupFlat[linearIndex(nbNeg)] : noNode;
1851 const std::size_t jPos =
1852 inBounds(nbPos) ? nodeLookupFlat[linearIndex(nbPos)] : noNode;
1853
1854 // Effective distance matching pressureStencilPoint:
1855 // interior neighbour → gridDelta
1856 // AMBIENT/REACTION/MASK crossing → faceBCDists_ (actual sub-grid
1857 // distance) NONE (OOB or no crossing) → gridDelta
1858 // (pressureStencilPoint fallthrough)
1859 auto effectiveDist = [&](unsigned fi, std::size_t j) -> T {
1860 if (j != noNode)
1861 return gridDelta;
1862 const Boundary bt = faceBCTypes_[fi * n + id];
1863 if (bt != Boundary::NONE)
1864 return faceBCDists_[fi * n + id];
1865 return gridDelta;
1866 };
1867
1868 const T dNeg = effectiveDist(fiNeg, jNeg);
1869 const T dPos = effectiveDist(fiPos, jPos);
1870 const T dSum = dNeg + dPos;
1871 if (dSum <= eps)
1872 continue;
1873
1874 if (jNeg != noNode && !touchesAmbient_[jNeg]) {
1875 const T c = T(2) / (dNeg * dSum);
1876 pressCoeff[fiNeg * n + id] = c;
1877 pressNeighId[fiNeg * n + id] = jNeg;
1878 actualDiag[id] += c; // A[i,i] += interior off-diagonal coefficient
1879 } else if (jNeg != noNode ||
1880 faceBCTypes_[fiNeg * n + id] == Boundary::AMBIENT) {
1881 // j is an ambient-only neighbour (identity-row Dirichlet p=0), OR
1882 // this face directly crosses the free surface (AMBIENT Dirichlet p=0
1883 // at the sub-grid crossing distance). RHS contribution is c·0=0.
1884 // REACTION faces are solid-wall Neumann ∂p/∂n=0: no contribution.
1885 actualDiag[id] += T(2) / (dNeg * dSum);
1886 }
1887 if (jPos != noNode && !touchesAmbient_[jPos]) {
1888 const T c = T(2) / (dPos * dSum);
1889 pressCoeff[fiPos * n + id] = c;
1890 pressNeighId[fiPos * n + id] = jPos;
1891 actualDiag[id] += c;
1892 } else if (jPos != noNode ||
1893 faceBCTypes_[fiPos * n + id] == Boundary::AMBIENT) {
1894 actualDiag[id] += T(2) / (dPos * dSum);
1895 }
1896 }
1897 // Guard against fully-isolated nodes (surrounded by boundaries on every
1898 // face)
1899 if (actualDiag[id] <= eps)
1900 actualDiag[id] = T(1);
1901 }
1902
1903 // ILU(0) preconditioner for the (non-symmetric) pressure matrix.
1904 //
1905 // The sub-grid interface distances make A[i,j] ≠ A[j,i] in general, so
1906 // SSOR is not guaranteed to converge. ILU(0) handles non-symmetric
1907 // matrices robustly.
1908 //
1909 // Factorisation A ≈ L * U (zero fill-in, natural node ordering):
1910 // L – unit lower triangular: L[i,j] = A[i,j] / U[j,j] for j < i
1911 // U – upper triangular: U[i,j] = A[i,j] for j > i
1912 // U[i,i] = A[i,i] - Σ_{k<i} L[i,k] * A[k,i]
1913 //
1914 // With A[i,j] = -pressCoeff[fi*n+i] and A[j,i] = -pressCoeff[(fi^1)*n+j]:
1915 // U[i,i] = actualDiag[i] - Σ_{lower j} pressCoeff[fi_L*n+i]
1916 // * pressCoeff[fi_U*n+j]
1917 // / ilu_diag[j]
1918 //
1919 // Preconditioner application M_ILU^{-1} r = z:
1920 // Forward (L y = r, unit lower triangular, no diagonal divide):
1921 // y[i] = r[i] + Σ_{j<i} (pressCoeff[fi_L*n+i] / ilu_diag[j]) * y[j]
1922 // Backward (U z = y):
1923 // z[i] = (y[i] + Σ_{j>i} pressCoeff[fi_U*n+i] * z[j]) / ilu_diag[i]
1924 std::vector<T> ilu_diag(n);
1925 for (std::size_t id = 0; id < n; ++id) {
1926 if (touchesAmbient_[id]) {
1927 ilu_diag[id] = T(1);
1928 continue;
1929 }
1930 ilu_diag[id] = actualDiag[id];
1931 for (unsigned dir = 0; dir < D; ++dir) {
1932 const unsigned fi_L = dir * 2u; // lower face (offset=-1)
1933 const unsigned fi_U =
1934 fi_L + 1u; // upper face (offset=+1, j's face toward i)
1935 const std::size_t j = pressNeighId[fi_L * n + id];
1936 if (j == noNode || ilu_diag[j] <= eps)
1937 continue;
1938 // L[id,j] = A[id,j] / U[j,j] = (-pressCoeff_L) / ilu_diag[j]
1939 // A[j,id] = -pressCoeff[fi_U * n + j] (j's upper-face coefficient
1940 // toward id) ilu_diag[id] -= L[id,j] * A[j,id]
1941 // = (-pressCoeff_L / ilu_diag[j]) * (-pressCoeff_fi_U[j])
1942 // = pressCoeff_L * pressCoeff_fi_U[j] / ilu_diag[j]
1943 // (positive drop)
1944 ilu_diag[id] -=
1945 pressCoeff[fi_L * n + id] * pressCoeff[fi_U * n + j] / ilu_diag[j];
1946 }
1947 if (ilu_diag[id] <= eps)
1948 ilu_diag[id] = actualDiag[id]; // guard non-positive pivot
1949 }
1950
1951 auto applyIlu = [&](const std::vector<SolverT> &in,
1952 std::vector<SolverT> &out) {
1953 std::vector<T> y(n);
1954 // Forward solve: L * y = in (L is unit lower triangular)
1955 for (std::size_t i = 0; i < n; ++i) {
1956 T val = static_cast<T>(in[i]);
1957 for (unsigned dir = 0; dir < D; ++dir) {
1958 const unsigned fi_L = dir * 2u;
1959 const std::size_t j = pressNeighId[fi_L * n + i];
1960 if (j != noNode)
1961 // L[i,j] = -pressCoeff[fi_L*n+i] / ilu_diag[j], subtract
1962 // A[i,j]*y[j]: y[i] -= L[i,j] * y[j] = -(-pressCoeff/ilu_diag[j]) *
1963 // y[j] = +(coeff/ilu) * y[j]
1964 val += (pressCoeff[fi_L * n + i] / ilu_diag[j]) * y[j];
1965 }
1966 y[i] = val; // no diagonal divide (unit lower triangular)
1967 }
1968 // Backward solve: U * z = y
1969 for (std::size_t i = n; i-- > 0;) {
1970 T val = y[i];
1971 for (unsigned dir = 0; dir < D; ++dir) {
1972 const unsigned fi_U = dir * 2u + 1u;
1973 const std::size_t j = pressNeighId[fi_U * n + i];
1974 if (j != noNode)
1975 // U[i,j] = -pressCoeff[fi_U*n+i], subtract U[i,j]*z[j]:
1976 // val -= U[i,j] * z[j] = -(-pressCoeff) * z[j] = +(pressCoeff) *
1977 // z[j]
1978 val += pressCoeff[fi_U * n + i] * static_cast<T>(out[j]);
1979 }
1980 out[i] = static_cast<SolverT>(val / ilu_diag[i]);
1981 }
1982 };
1983
1984 std::vector<SolverT> Ax(n);
1985 pressureMatvec(x, ambientBP, diag, pBC, Ax);
1986 for (std::size_t i = 0; i < n; ++i) {
1987 if (!std::isfinite(static_cast<T>(Ax[i])) ||
1988 std::abs(static_cast<T>(Ax[i])) > solverMax) {
1989 warnBadPressureAssembly("initial pressure matvec", i, nodes[i].index,
1990 static_cast<T>(Ax[i]));
1991 break;
1992 }
1993 }
1994 std::vector<SolverT> r(n), r_hat(n), p(n, SolverT(0)), v(n, SolverT(0)),
1995 y(n), z(n), s(n), t(n);
1996 for (std::size_t i = 0; i < n; ++i) {
1997 r[i] = static_cast<SolverT>(b[i] - Ax[i]);
1998 r_hat[i] = r[i];
1999 }
2000
2001 T rho = T(1), alpha = T(1), omega = T(1);
2002 T pressureResidual = T(0);
2003 unsigned pressureIter = 0;
2004 bool pressureBreakdown = false;
2005 for (std::size_t i = 0; i < n; ++i) {
2006 const T ri = static_cast<T>(r[i]);
2007 if (!std::isfinite(ri)) {
2008 pressureBreakdown = true;
2009 break;
2010 }
2011 pressureResidual = std::max(pressureResidual, std::abs(ri));
2012 }
2013
2014 for (; !pressureBreakdown &&
2015 pressureIter < deformationParameters.pressureIterations;
2016 ++pressureIter) {
2017 T rho_new = T(0);
2018 for (std::size_t i = 0; i < n; ++i)
2019 rho_new += static_cast<T>(r_hat[i]) * static_cast<T>(r[i]);
2020
2021 if (!std::isfinite(rho_new)) {
2022 pressureBreakdown = true;
2023 break;
2024 }
2025 if (std::abs(rho_new) < T(1e-100))
2026 break;
2027 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
2028 !std::isfinite(omega) || std::abs(omega) < T(1e-100)) {
2029 pressureBreakdown = true;
2030 break;
2031 }
2032
2033 const T beta = (rho_new / rho) * (alpha / omega);
2034 if (!std::isfinite(beta)) {
2035 pressureBreakdown = true;
2036 break;
2037 }
2038 rho = rho_new;
2039
2040 for (std::size_t i = 0; i < n; ++i)
2041 p[i] = static_cast<SolverT>(r[i] + beta * (p[i] - omega * v[i]));
2042
2043 applyIlu(p, y);
2044
2045 pressureMatvec(y, ambientBP, diag, pBC, v);
2046
2047 T r_hat_v = T(0);
2048 for (std::size_t i = 0; i < n; ++i)
2049 r_hat_v += static_cast<T>(r_hat[i]) * static_cast<T>(v[i]);
2050 if (!std::isfinite(r_hat_v)) {
2051 pressureBreakdown = true;
2052 break;
2053 }
2054 if (std::abs(r_hat_v) < T(1e-100))
2055 break;
2056
2057 alpha = rho_new / r_hat_v;
2058 if (!std::isfinite(alpha)) {
2059 pressureBreakdown = true;
2060 break;
2061 }
2062
2063 for (std::size_t i = 0; i < n; ++i)
2064 s[i] = static_cast<SolverT>(r[i] - alpha * v[i]);
2065
2066 pressureResidual = T(0);
2067 for (std::size_t i = 0; i < n; ++i)
2068 pressureResidual =
2069 std::max(pressureResidual, std::abs(static_cast<T>(s[i])));
2070 if (!std::isfinite(pressureResidual)) {
2071 pressureBreakdown = true;
2072 break;
2073 }
2074 if (pressureResidual < deformationParameters.pressureTolerance * b_norm) {
2075 for (std::size_t i = 0; i < n; ++i)
2076 x[i] = static_cast<SolverT>(x[i] + alpha * y[i]);
2077 break;
2078 }
2079
2080 applyIlu(s, z);
2081
2082 pressureMatvec(z, ambientBP, diag, pBC, t);
2083
2084 T t_s = T(0), t_t = T(0);
2085 for (std::size_t i = 0; i < n; ++i) {
2086 t_s += static_cast<T>(t[i]) * static_cast<T>(s[i]);
2087 t_t += static_cast<T>(t[i]) * static_cast<T>(t[i]);
2088 }
2089 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
2090 pressureBreakdown = true;
2091 break;
2092 }
2093 omega = (t_t > T(1e-100)) ? t_s / t_t : T(0);
2094 if (!std::isfinite(omega)) {
2095 pressureBreakdown = true;
2096 break;
2097 }
2098
2099 for (std::size_t i = 0; i < n; ++i) {
2100 x[i] = static_cast<SolverT>(x[i] + alpha * y[i] + omega * z[i]);
2101 r[i] = static_cast<SolverT>(s[i] - omega * t[i]);
2102 }
2103
2104 pressureResidual = T(0);
2105 for (std::size_t i = 0; i < n; ++i)
2106 pressureResidual =
2107 std::max(pressureResidual, std::abs(static_cast<T>(r[i])));
2108 if (!std::isfinite(pressureResidual)) {
2109 pressureBreakdown = true;
2110 break;
2111 }
2112 if (pressureResidual < deformationParameters.pressureTolerance * b_norm)
2113 break;
2114 }
2115
2116 if (pressureBreakdown)
2117 pressureResidual = std::numeric_limits<T>::infinity();
2118
2119 bool finiteSolution = !pressureBreakdown;
2120 for (std::size_t i = 0; i < n; ++i)
2121 if (!std::isfinite(static_cast<T>(x[i])))
2122 finiteSolution = false;
2123
2124 lastPressureIters_ = pressureIter;
2125 lastPressureResidual_ = pressureResidual / b_norm;
2126 if (finiteSolution) {
2127 const T beta = deformationParameters.pressureRelaxation;
2128 const T oneMinB = T(1) - beta;
2129 for (std::size_t i = 0; i < n; ++i)
2130 nodes[i].pressure =
2131 oneMinB * nodes[i].pressure + beta * static_cast<T>(x[i]);
2132 } else {
2133 lastPressureResidual_ = std::numeric_limits<T>::infinity();
2134 }
2135 if (lastPressureResidual_ > deformationParameters.pressureTolerance)
2136 VIENNACORE_LOG_WARNING(
2137 "solvePressure: BiCGSTAB did not converge after " +
2138 std::to_string(lastPressureIters_) + "/" +
2139 std::to_string(deformationParameters.pressureIterations) +
2140 " iterations (residual=" + std::to_string(lastPressureResidual_) +
2141 ", tolerance=" +
2142 std::to_string(deformationParameters.pressureTolerance) + ")");
2143 }
2144
2145 // Fills scalar diag = centerCoefficient and Vec3D rhs = velocitySum for one
2146 // node.
2147 template <class SolverT>
2148 void computeVelocityStencilAt(std::size_t nodeId,
2149 const std::vector<Vec3D<SolverT>> &v, T &diag,
2150 Vec3D<T> &rhs) const {
2151 diag = T(0);
2152 rhs = {T(0), T(0), T(0)};
2153 for (unsigned direction = 0; direction < D; ++direction) {
2154 const auto plus = velocityStencilPoint(v, nodeId, direction, 1);
2155 const auto minus = velocityStencilPoint(v, nodeId, direction, -1);
2156 const T dSum = plus.distance + minus.distance;
2157 const T plusCoeff = T(2) / (plus.distance * dSum);
2158 const T minusCoeff = T(2) / (minus.distance * dSum);
2159 detail::vecAddTo(rhs, detail::vecScaled(plus.value, plusCoeff));
2160 detail::vecAddTo(rhs, detail::vecScaled(minus.value, minusCoeff));
2161 diag += plusCoeff + minusCoeff;
2162 }
2163 }
2164
2166 if (deformationParameters.viscosity <= std::numeric_limits<T>::epsilon())
2167 return;
2168 if (nodes.empty())
2169 return;
2170
2171 using SolverT = T;
2172
2173 const std::size_t n = nodes.size();
2174 const T eps = std::numeric_limits<T>::epsilon();
2175
2176 // Geometry-fixed diagonal, BC constants, and forcing (all in T).
2177 std::vector<T> diag(n);
2178 std::vector<Vec3D<T>> vBC(n), forcing(n);
2179 {
2180 const std::vector<Vec3D<SolverT>> zeros(
2181 n, Vec3D<SolverT>{SolverT(0), SolverT(0), SolverT(0)});
2182#pragma omp parallel for schedule(static)
2183 for (std::size_t i = 0; i < n; ++i) {
2184 computeVelocityStencilAt(i, zeros, diag[i], vBC[i]);
2185 forcing[i] = momentumForcing(nodes[i].index);
2186 }
2187 }
2188 const auto precondDiag = computeVelocityDiagonals();
2189
2190 std::vector<Vec3D<T>> b(n);
2191 T b_norm = T(0);
2192 for (std::size_t i = 0; i < n; ++i) {
2193 for (unsigned c = 0; c < D; ++c) {
2194 b[i][c] = vBC[i][c] - forcing[i][c] / deformationParameters.viscosity;
2195 b_norm = std::max(b_norm, std::abs(b[i][c]));
2196 }
2197 }
2198 if (b_norm < T(1e-100))
2199 b_norm = T(1);
2200
2201 // Initial guess from current node velocities (warm-start), converted to
2202 // SolverT.
2203 std::vector<Vec3D<SolverT>> x(n);
2204 {
2205 const auto vel = collectVelocities();
2206 for (std::size_t i = 0; i < n; ++i)
2207 for (unsigned c = 0; c < D; ++c) {
2208 const T value = vel[i][c];
2209 x[i][c] = static_cast<SolverT>(std::isfinite(value) ? value : T(0));
2210 }
2211 }
2212
2213#ifdef VIENNALS_GPU_BICGSTAB
2214 if (gpu::gpuIsValid(gpuStokesBufs_)) {
2215 const std::size_t nf = 2u * D * n;
2216 if (stokesDiagGpu_.size() != D * n || stokesCoeffGpu_.size() != nf) {
2217 VIENNACORE_LOG_ERROR("OxidationDeformation: Stokes GPU geometry has "
2218 "the wrong size for the current node set.");
2219 }
2220
2221 Timer<> tUpload, tSolve;
2222 std::vector<Vec3D<SolverT>> xSolved(n);
2223 unsigned maxGpuIterations = 0;
2224 double maxGpuResidual = 0.0;
2225
2226 for (unsigned c = 0; c < D; ++c) {
2227 std::vector<double> bGpu(n), xGpu(n);
2228 for (std::size_t i = 0; i < n; ++i) {
2229 bGpu[i] = static_cast<double>(b[i][c]);
2230 xGpu[i] = static_cast<double>(x[i][c]);
2231 }
2232
2233 tUpload.start();
2234 const bool gpuUploadOk = gpu::gpuUploadSolverArrays(
2235 gpuStokesBufs_, stokesDiagGpu_.data() + c * n, bGpu.data(),
2236 stokesCoeffGpu_.data(), static_cast<uint32_t>(n),
2237 stokesCoeffGpu_.size());
2238 tUpload.finish();
2239 if (!gpuUploadOk) {
2240 VIENNACORE_LOG_ERROR("OxidationDeformation: GPU mode was selected, "
2241 "but uploading Stokes solver arrays failed." +
2242 gpuErrorDetail());
2243 }
2244
2245 unsigned gpuIterations = 0;
2246 double gpuResidual = 0.0;
2247 tSolve.start();
2248 const bool gpuConverged = gpu::gpuSolveBiCGSTAB(
2249 gpuStokesBufs_, xGpu.data(), static_cast<double>(eps),
2250 deformationParameters.stokesIterations,
2251 static_cast<double>(deformationParameters.stokesTolerance),
2252 gpuIterations, gpuResidual);
2253 tSolve.finish();
2254
2255 if (!gpuConverged || !std::isfinite(gpuResidual)) {
2256 VIENNACORE_LOG_ERROR(
2257 "OxidationDeformation: Stokes GPU BiCGSTAB failed or produced "
2258 "a non-finite residual for component " +
2259 std::to_string(c) + " (iters=" + std::to_string(gpuIterations) +
2260 ", residual=" + std::to_string(gpuResidual) + ").");
2261 }
2262
2263 maxGpuIterations = std::max(maxGpuIterations, gpuIterations);
2264 maxGpuResidual = std::max(maxGpuResidual, gpuResidual);
2265 for (std::size_t i = 0; i < n; ++i)
2266 xSolved[i][c] = static_cast<SolverT>(xGpu[i]);
2267 }
2268
2269 for (std::size_t i = 0; i < n; ++i)
2270 for (unsigned c = 0; c < D; ++c)
2271 nodes[i].velocity[c] = static_cast<T>(xSolved[i][c]);
2272
2273 lastStokesIters_ = maxGpuIterations;
2274 lastStokesResidual_ = maxGpuResidual / b_norm;
2275
2276 if (Logger::hasDebug()) {
2277 const std::string tag = "stokes n=" + std::to_string(n) +
2278 " iters=" + std::to_string(lastStokesIters_) +
2279 " res=" + std::to_string(lastStokesResidual_) +
2280 " [GPU]";
2281 Logger::getInstance()
2282 .addTiming(tag + " GPU upload", tUpload)
2283 .addTiming(tag + " GPU BiCGSTAB", tSolve)
2284 .print();
2285 }
2286 return;
2287 }
2288#endif
2289
2290 // Stokes SpMV: (Av)[i] = diag[i]*vin[i] - rhs_at_vin[i] + vBC[i], stored as
2291 // SolverT.
2292 auto stokesMatvec = [&](const std::vector<Vec3D<SolverT>> &vin,
2293 std::vector<Vec3D<SolverT>> &Av) {
2294#pragma omp parallel for schedule(static)
2295 for (std::size_t i = 0; i < n; ++i) {
2296 T d;
2297 Vec3D<T> rhs;
2298 computeVelocityStencilAt(i, vin, d, rhs);
2299 for (unsigned c = 0; c < D; ++c)
2300 Av[i][c] =
2301 static_cast<SolverT>(diag[i] * vin[i][c] - rhs[c] + vBC[i][c]);
2302 }
2303 };
2304
2305 // Dot product accumulated in T for numerical stability.
2306 auto vecDot = [&](const std::vector<Vec3D<SolverT>> &a,
2307 const std::vector<Vec3D<SolverT>> &bv) {
2308 T sum = T(0);
2309 for (std::size_t i = 0; i < n; ++i)
2310 for (unsigned c = 0; c < D; ++c) {
2311 const T av = static_cast<T>(a[i][c]);
2312 const T bvVal = static_cast<T>(bv[i][c]);
2313 if (!std::isfinite(av) || !std::isfinite(bvVal))
2314 return std::numeric_limits<T>::quiet_NaN();
2315 sum += av * bvVal;
2316 }
2317 return sum;
2318 };
2319
2320 auto vecMaxAbs = [&](const std::vector<Vec3D<SolverT>> &vin) {
2321 T m = T(0);
2322 for (std::size_t i = 0; i < n; ++i)
2323 for (unsigned c = 0; c < D; ++c) {
2324 const T value = static_cast<T>(vin[i][c]);
2325 if (!std::isfinite(value))
2326 return std::numeric_limits<T>::infinity();
2327 m = std::max(m, std::abs(value));
2328 }
2329 return m;
2330 };
2331
2332 // r = b - A*x
2333 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
2334 std::vector<Vec3D<SolverT>> Ax(n), r(n), r_hat(n);
2335 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
2336 t(n);
2337 stokesMatvec(x, Ax);
2338 for (std::size_t i = 0; i < n; ++i)
2339 for (unsigned c = 0; c < D; ++c) {
2340 r[i][c] = static_cast<SolverT>(b[i][c] - Ax[i][c]);
2341 r_hat[i][c] = r[i][c];
2342 }
2343
2344 T rho = T(1), alpha = T(1), omega = T(1);
2345 T velocityResidual = T(0);
2346 unsigned stokesIter = 0;
2347 bool stokesBreakdown = false;
2348 velocityResidual = vecMaxAbs(r);
2349
2350 for (; stokesIter < deformationParameters.stokesIterations; ++stokesIter) {
2351 const T rho_new = vecDot(r_hat, r);
2352 if (!std::isfinite(rho_new)) {
2353 stokesBreakdown = true;
2354 break;
2355 }
2356 if (std::abs(rho_new) < T(1e-100))
2357 break;
2358 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
2359 !std::isfinite(omega) || std::abs(omega) < T(1e-100)) {
2360 stokesBreakdown = true;
2361 break;
2362 }
2363
2364 const T beta = (rho_new / rho) * (alpha / omega);
2365 if (!std::isfinite(beta)) {
2366 stokesBreakdown = true;
2367 break;
2368 }
2369 rho = rho_new;
2370
2371 for (std::size_t i = 0; i < n; ++i)
2372 for (unsigned c = 0; c < D; ++c)
2373 pv[i][c] = static_cast<SolverT>(r[i][c] +
2374 beta * (pv[i][c] - omega * sv[i][c]));
2375
2376 for (std::size_t i = 0; i < n; ++i)
2377 for (unsigned c = 0; c < D; ++c) {
2378 const T pvc = pv[i][c];
2379 const T pcDiag = precondDiag[i][c];
2380 y[i][c] = static_cast<SolverT>((pcDiag > eps) ? pvc / pcDiag : pvc);
2381 }
2382
2383 stokesMatvec(y, sv);
2384
2385 const T r_hat_v = vecDot(r_hat, sv);
2386 if (!std::isfinite(r_hat_v)) {
2387 stokesBreakdown = true;
2388 break;
2389 }
2390 if (std::abs(r_hat_v) < T(1e-100))
2391 break;
2392
2393 alpha = rho_new / r_hat_v;
2394 if (!std::isfinite(alpha)) {
2395 stokesBreakdown = true;
2396 break;
2397 }
2398
2399 for (std::size_t i = 0; i < n; ++i)
2400 for (unsigned c = 0; c < D; ++c)
2401 s[i][c] = static_cast<SolverT>(r[i][c] - alpha * sv[i][c]);
2402
2403 velocityResidual = vecMaxAbs(s);
2404 if (!std::isfinite(velocityResidual)) {
2405 stokesBreakdown = true;
2406 break;
2407 }
2408 if (velocityResidual < deformationParameters.stokesTolerance * b_norm) {
2409 for (std::size_t i = 0; i < n; ++i)
2410 for (unsigned c = 0; c < D; ++c)
2411 x[i][c] = static_cast<SolverT>(x[i][c] + alpha * y[i][c]);
2412 break;
2413 }
2414
2415 for (std::size_t i = 0; i < n; ++i)
2416 for (unsigned c = 0; c < D; ++c) {
2417 const T sc = s[i][c];
2418 const T pcDiag = precondDiag[i][c];
2419 z[i][c] = static_cast<SolverT>((pcDiag > eps) ? sc / pcDiag : sc);
2420 }
2421
2422 stokesMatvec(z, t);
2423
2424 const T t_s = vecDot(t, s);
2425 const T t_t = vecDot(t, t);
2426 if (!std::isfinite(t_s) || !std::isfinite(t_t)) {
2427 stokesBreakdown = true;
2428 break;
2429 }
2430 omega = (t_t > T(1e-100)) ? t_s / t_t : T(0);
2431 if (!std::isfinite(omega)) {
2432 stokesBreakdown = true;
2433 break;
2434 }
2435
2436 for (std::size_t i = 0; i < n; ++i)
2437 for (unsigned c = 0; c < D; ++c) {
2438 x[i][c] =
2439 static_cast<SolverT>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
2440 r[i][c] = static_cast<SolverT>(s[i][c] - omega * t[i][c]);
2441 }
2442
2443 velocityResidual = vecMaxAbs(r);
2444 if (!std::isfinite(velocityResidual)) {
2445 stokesBreakdown = true;
2446 break;
2447 }
2448 if (velocityResidual < deformationParameters.stokesTolerance * b_norm)
2449 break;
2450 }
2451
2452 if (stokesBreakdown)
2453 velocityResidual = std::numeric_limits<T>::infinity();
2454
2455 bool finiteSolution = !stokesBreakdown;
2456 for (std::size_t i = 0; i < n; ++i)
2457 for (unsigned c = 0; c < D; ++c)
2458 if (!std::isfinite(static_cast<T>(x[i][c])))
2459 finiteSolution = false;
2460
2461 if (finiteSolution) {
2462 for (std::size_t i = 0; i < n; ++i)
2463 for (unsigned c = 0; c < D; ++c)
2464 nodes[i].velocity[c] = static_cast<T>(x[i][c]);
2465 }
2466
2467 lastStokesIters_ = stokesIter;
2468 lastStokesResidual_ = finiteSolution ? velocityResidual / b_norm
2469 : std::numeric_limits<T>::infinity();
2470 if (lastStokesResidual_ > deformationParameters.stokesTolerance)
2471 VIENNACORE_LOG_WARNING(
2472 "solveStokesVelocity: BiCGSTAB did not converge after " +
2473 std::to_string(lastStokesIters_) + "/" +
2474 std::to_string(deformationParameters.stokesIterations) +
2475 " iterations (residual=" + std::to_string(lastStokesResidual_) +
2476 ", tolerance=" +
2477 std::to_string(deformationParameters.stokesTolerance) + ")");
2478 }
2479
2480 std::vector<Vec3D<T>> collectVelocities() const {
2481 std::vector<Vec3D<T>> velocities;
2482 velocities.reserve(nodes.size());
2483 for (const auto &node : nodes)
2484 velocities.push_back(node.velocity);
2485 return velocities;
2486 }
2487
2488 std::vector<T> collectPressures() const {
2489 std::vector<T> pressures;
2490 pressures.reserve(nodes.size());
2491 for (const auto &node : nodes)
2492 pressures.push_back(node.pressure);
2493 return pressures;
2494 }
2495
2496 template <class SolverT>
2497 StencilPoint<T>
2498 pressureStencilPoint(const std::vector<SolverT> &pressure,
2499 const std::vector<T> &ambientBoundaryPressure,
2500 std::size_t nodeId, unsigned direction,
2501 int offset) const {
2502 const auto &node = nodes[nodeId];
2503 IndexType neighbor = node.index;
2504 neighbor[direction] += offset;
2505
2506 if (!inBounds(neighbor))
2507 return {static_cast<T>(pressure[nodeId]), gridDelta};
2508
2509 const std::size_t neighborId = nodeLookupFlat[linearIndex(neighbor)];
2510 if (neighborId != noNode) {
2511 if (touchesAmbient_[neighborId])
2512 return {ambientBoundaryPressure[neighborId], gridDelta};
2513 return {static_cast<T>(pressure[neighborId]), gridDelta};
2514 }
2515
2516 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2517 const std::size_t nn = nodes.size();
2518 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2519 const T faceDist = faceBCDists_[fi * nn + nodeId];
2520 if (faceType == Boundary::AMBIENT)
2521 return {ambientBoundaryPressure[nodeId], faceDist};
2522 // Reaction interface: Neumann ∂p/∂n=0 (solid-wall BC for pressure).
2523 // The ghost node takes the same value as the interior node, giving zero
2524 // contribution to the Laplacian stencil. The pressure is anchored only by
2525 // the AMBIENT Dirichlet (p=0 at the free surface), which is always present
2526 // for any connected oxide region.
2527 if (faceType == Boundary::REACTION)
2528 return {static_cast<T>(pressure[nodeId]), faceDist};
2529 if (faceType == Boundary::MASK)
2530 return {maskPressureBoundary(node.index, direction, offset,
2531 static_cast<T>(pressure[nodeId])),
2532 faceDist};
2533
2534 return {static_cast<T>(pressure[nodeId]), gridDelta};
2535 }
2536
2537 template <class SolverT>
2538 StencilPoint<Vec3D<T>>
2539 velocityStencilPoint(const std::vector<Vec3D<SolverT>> &velocity,
2540 std::size_t nodeId, unsigned direction,
2541 int offset) const {
2542 const auto &node = nodes[nodeId];
2543 IndexType neighbor = node.index;
2544 neighbor[direction] += offset;
2545
2546 const auto toT = [](const Vec3D<SolverT> &v) -> Vec3D<T> {
2547 return {static_cast<T>(v[0]), static_cast<T>(v[1]), static_cast<T>(v[2])};
2548 };
2549
2550 if (!inBounds(neighbor))
2551 return {toT(velocity[nodeId]), gridDelta};
2552
2553 const std::size_t neighborId = nodeLookupFlat[linearIndex(neighbor)];
2554 if (neighborId != noNode)
2555 return {toT(velocity[neighborId]), gridDelta};
2556
2557 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2558 const std::size_t nn = nodes.size();
2559 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2560 const T faceDist = faceBCDists_[fi * nn + nodeId];
2561 if (faceType == Boundary::REACTION)
2562 return {reactionBoundaryVelocity(node.index), faceDist};
2563 if (faceType == Boundary::AMBIENT)
2564 return {freeSurfaceVelocityBoundary(node.index, direction, offset,
2565 faceDist, toT(velocity[nodeId])),
2566 faceDist};
2567 if (faceType == Boundary::MASK)
2568 return {maskVelocityBoundary(node.index, toT(velocity[nodeId])),
2569 faceDist};
2570
2571 return {toT(velocity[nodeId]), gridDelta};
2572 }
2573
2574 StencilPoint<T> currentPressureStencilPoint(std::size_t nodeId,
2575 unsigned direction,
2576 int offset) const {
2577 const auto &node = nodes[nodeId];
2578 IndexType neighbor = node.index;
2579 neighbor[direction] += offset;
2580
2581 if (!inBounds(neighbor))
2582 return {node.pressure, gridDelta};
2583
2584 const std::size_t neighborId = nodeLookupFlat[linearIndex(neighbor)];
2585 if (neighborId != noNode)
2586 return {nodes[neighborId].pressure, gridDelta};
2587
2588 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2589 const std::size_t nn = nodes.size();
2590 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2591 const T faceDist = faceBCDists_[fi * nn + nodeId];
2592 if (faceType == Boundary::AMBIENT)
2593 return {freeSurfacePressureBoundary(node.index), faceDist};
2594 if (faceType == Boundary::REACTION)
2595 return {node.pressure, faceDist}; // Neumann ∂p/∂n=0
2596 if (faceType == Boundary::MASK)
2597 return {
2598 maskPressureBoundary(node.index, direction, offset, node.pressure),
2599 faceDist};
2600
2601 return {node.pressure, gridDelta};
2602 }
2603
2604 StencilPoint<Vec3D<T>> currentVelocityStencilPoint(std::size_t nodeId,
2605 unsigned direction,
2606 int offset) const {
2607 const auto &node = nodes[nodeId];
2608 IndexType neighbor = node.index;
2609 neighbor[direction] += offset;
2610
2611 if (!inBounds(neighbor))
2612 return {node.velocity, gridDelta};
2613
2614 const std::size_t neighborId = nodeLookupFlat[linearIndex(neighbor)];
2615 if (neighborId != noNode)
2616 return {nodes[neighborId].velocity, gridDelta};
2617
2618 const unsigned fi = direction * 2u + (offset == 1 ? 1u : 0u);
2619 const std::size_t nn = nodes.size();
2620 const Boundary faceType = faceBCTypes_[fi * nn + nodeId];
2621 const T faceDist = faceBCDists_[fi * nn + nodeId];
2622 if (faceType == Boundary::REACTION)
2623 return {reactionBoundaryVelocity(node.index), faceDist};
2624 if (faceType == Boundary::AMBIENT)
2625 return {freeSurfaceVelocityBoundary(node.index, direction, offset,
2626 faceDist, node.velocity),
2627 faceDist};
2628 if (faceType == Boundary::MASK)
2629 return {maskVelocityBoundary(node.index, node.velocity), faceDist};
2630
2631 return {node.velocity, gridDelta};
2632 }
2633
2634 T maxVelocityChange(const std::vector<Vec3D<T>> &previous) const {
2635 T maxChange = 0.;
2636 T maxVelocity = 0.;
2637 const auto count = std::min(previous.size(), nodes.size());
2638 for (std::size_t i = 0; i < count; ++i) {
2639 for (unsigned j = 0; j < D; ++j) {
2640 maxChange = std::max(maxChange,
2641 std::abs(nodes[i].velocity[j] - previous[i][j]));
2642 maxVelocity = std::max(maxVelocity, std::abs(nodes[i].velocity[j]));
2643 }
2644 }
2645
2646 if (maxVelocity <= std::numeric_limits<T>::epsilon())
2647 return maxChange;
2648 return maxChange / maxVelocity;
2649 }
2650
2651 T maxPressureChange(const std::vector<T> &previous) const {
2652 T maxChange = 0.;
2653 T maxPressure = 0.;
2654 const auto count = std::min(previous.size(), nodes.size());
2655 for (std::size_t i = 0; i < count; ++i) {
2656 maxChange =
2657 std::max(maxChange, std::abs(nodes[i].pressure - previous[i]));
2658 maxPressure = std::max(maxPressure, std::abs(nodes[i].pressure));
2659 }
2660
2661 if (maxPressure <= std::numeric_limits<T>::epsilon())
2662 return maxChange;
2663 return maxChange / maxPressure;
2664 }
2665
2666 std::array<T, 9>
2667 currentBoundaryDeviatoricStress(const IndexType &index) const {
2668 const auto strainRate = strainRateTensorAt(index);
2669 const auto deviatoricRate =
2670 deviatoricTensor(strainRate, divergenceAt(index));
2671 const auto previousStress = previousDeviatoricStress(index);
2672 const T relaxationTime = effectiveStressRelaxationTime();
2673 const T decay =
2674 (relaxationTime <= std::numeric_limits<T>::epsilon())
2675 ? T(0)
2676 : std::exp(-deformationParameters.stressTimeStep / relaxationTime);
2677
2678 std::array<T, 9> deviatoricStress{};
2679 for (unsigned i = 0; i < 9; ++i) {
2680 const T viscousStress =
2681 T(2) * deformationParameters.viscosity * deviatoricRate[i];
2682 deviatoricStress[i] =
2683 decay * previousStress[i] + (T(1) - decay) * viscousStress;
2684 }
2685
2686 return deviatoricStress;
2687 }
2688
2689 T freeSurfacePressureBoundary(const IndexType &index) const {
2690 const auto normal = interfaceNormal(index, Boundary::AMBIENT);
2691 const auto deviatoricStress = currentBoundaryDeviatoricStress(index);
2692
2693 return deformationParameters.ambientPressure +
2694 normalStress(deviatoricStress, normal);
2695 }
2696
2697 T maskPressureBoundary(const IndexType & /*index*/, unsigned /*direction*/,
2698 int /*offset*/, T fallbackPressure) const {
2699 return fallbackPressure;
2700 }
2701
2702 Vec3D<T> freeSurfaceVelocityBoundary(const IndexType &index,
2703 unsigned direction, int offset,
2704 T distance,
2705 const Vec3D<T> &interiorVelocity) const {
2706 Vec3D<T> boundaryVelocity = interiorVelocity;
2707 const auto normal = interfaceNormal(index, Boundary::AMBIENT);
2708 const auto deviatoricStress = deviatoricStressAt(index);
2709 const T pressure = pressureAt(index);
2710
2711 Vec3D<T> deviatoricTraction{0., 0., 0.};
2712 for (unsigned component = 0; component < D; ++component) {
2713 for (unsigned j = 0; j < D; ++j)
2714 deviatoricTraction[component] +=
2715 deviatoricStress[tensorIndex(component, j)] * normal[j];
2716 }
2717
2718 for (unsigned component = 0; component < D; ++component) {
2719 const T normalTraction =
2720 pressure * normal[component] - deviatoricTraction[component];
2721 const T faceDerivative = normalTraction * normal[direction] /
2722 std::max(deformationParameters.viscosity,
2723 std::numeric_limits<T>::epsilon());
2724 boundaryVelocity[component] +=
2725 static_cast<T>(offset) * distance * faceDerivative;
2726 }
2727
2728 return boundaryVelocity;
2729 }
2730
2731 Vec3D<T> maskVelocityBoundary(const IndexType &index,
2732 const Vec3D<T> &interiorVelocity) const {
2733 if (maskVelocityField != nullptr) {
2734 Vec3D<T> coordinate{0., 0., 0.};
2735 for (unsigned i = 0; i < D; ++i)
2736 coordinate[i] = index[i] * gridDelta;
2737 return maskVelocityField->getVectorVelocity(
2738 coordinate, deformationParameters.material, {0., 0., 0.}, 0);
2739 }
2740 return {0., 0., 0.};
2741 }
2742
2744 avgExpansionSpeed_ = 0.;
2745 if (nodes.empty())
2746 return;
2747
2748 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2749 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2750 auto maskIt = makeMaskIterator();
2751 std::size_t count = 0;
2752
2753 for (const auto &node : nodes) {
2754 bool touchesReactionBoundary = false;
2755 for (unsigned direction = 0; direction < D; ++direction) {
2756 for (int offset : {-1, 1}) {
2757 IndexType neighbor = node.index;
2758 neighbor[direction] += offset;
2759 if (!inBounds(neighbor))
2760 continue;
2761
2762 if (lookupNode(neighbor) != noNode)
2763 continue;
2764
2765 if (classifyBoundary(reactionIt, ambientIt, maskIt, node.index,
2766 neighbor) == Boundary::REACTION) {
2767 touchesReactionBoundary = true;
2768 break;
2769 }
2770 }
2771 if (touchesReactionBoundary)
2772 break;
2773 }
2774
2775 if (touchesReactionBoundary) {
2776 Vec3D<T> coordinate{0., 0., 0.};
2777 for (unsigned i = 0; i < D; ++i)
2778 coordinate[i] = node.index[i] * gridDelta;
2779 avgExpansionSpeed_ +=
2780 (oxidationParameters.expansionCoefficient - T(1)) *
2781 std::abs(diffusionField->getScalarVelocity(coordinate, 0,
2782 {0., 0., 0.}, 0));
2783 ++count;
2784 }
2785 }
2786
2787 if (count > 0)
2788 avgExpansionSpeed_ /= static_cast<T>(count);
2789 }
2790
2791 Vec3D<T> reactionBoundaryVelocity(const IndexType &index) const {
2792 Vec3D<T> coordinate{0., 0., 0.};
2793 for (unsigned i = 0; i < D; ++i)
2794 coordinate[i] = index[i] * gridDelta;
2795 const T expansionVelocity = localExpansionSpeed(coordinate);
2796 return detail::vecScaled(reactionNormal(index),
2797 reactionSign * expansionVelocity);
2798 }
2799
2800 Vec3D<T> unresolvedAmbientVelocity(const Vec3D<T> &coordinate) const {
2801 if (diffusionField == nullptr || ambientInterface == nullptr)
2802 return {0., 0., 0.};
2803
2804 IndexType index;
2805 for (unsigned i = 0; i < D; ++i)
2806 index[i] = std::llround(coordinate[i] / gridDelta);
2807
2808 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2809 const auto normal = levelSetNormal(ambientIt, index);
2810 return detail::vecScaled(normal, localExpansionSpeed(coordinate));
2811 }
2812
2814 Vec3D<T> maxVelocity{0., 0., 0.};
2815 if (ambientInterface == nullptr || diffusionField == nullptr)
2816 return maxVelocity;
2817
2818 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2819 for (; !ambientIt.isFinished(); ++ambientIt) {
2820 if (!ambientIt.isDefined())
2821 continue;
2822
2823 Vec3D<T> coordinate{0., 0., 0.};
2824 const auto &index = ambientIt.getStartIndices();
2825 for (unsigned d = 0; d < D; ++d)
2826 coordinate[d] = index[d] * gridDelta;
2827
2828 const auto velocity = unresolvedAmbientVelocity(coordinate);
2829 for (unsigned d = 0; d < D; ++d)
2830 maxVelocity[d] = std::max(maxVelocity[d], std::abs(velocity[d]));
2831 }
2832 return maxVelocity;
2833 }
2834
2835 T divergenceAt(const IndexType &index) const {
2836 T divergence = 0.;
2837 for (unsigned i = 0; i < D; ++i) {
2838 divergence += velocityDerivative(index, i, i);
2839 }
2840 return divergence;
2841 }
2842
2843 Vec3D<T> pressureGradient(const IndexType &index) const {
2844 Vec3D<T> gradient{0., 0., 0.};
2845 for (unsigned i = 0; i < D; ++i)
2846 gradient[i] = pressureDerivative(index, i);
2847 return gradient;
2848 }
2849
2850 Vec3D<T> momentumForcing(const IndexType &index) const {
2851 Vec3D<T> forcing = pressureGradient(index);
2852 const auto stressDivergence = deviatoricStressDivergence(index);
2853 for (unsigned i = 0; i < D; ++i)
2854 forcing[i] -= stressDivergence[i];
2855 return forcing;
2856 }
2857
2858 Vec3D<T> deviatoricStressDivergence(const IndexType &index) const {
2859 Vec3D<T> divergence{0., 0., 0.};
2860 for (unsigned component = 0; component < D; ++component) {
2861 for (unsigned direction = 0; direction < D; ++direction) {
2862 IndexType pos = index;
2863 IndexType neg = index;
2864 pos[direction] += 1;
2865 neg[direction] -= 1;
2866 const auto posStress = deviatoricStressAt(pos);
2867 const auto negStress = deviatoricStressAt(neg);
2868 divergence[component] +=
2869 (posStress[tensorIndex(component, direction)] -
2870 negStress[tensorIndex(component, direction)]) /
2871 (T(2) * gridDelta);
2872 }
2873 }
2874 return divergence;
2875 }
2876
2877 std::array<T, 9> deviatoricStressAt(const IndexType &index) const {
2878 if (!inBounds(index))
2879 return {};
2880
2881 const std::size_t nodeId = lookupNode(index);
2882 if (nodeId == noNode)
2883 return {};
2884
2885 std::array<T, 9> deviatoric = nodes[nodeId].stressTensor;
2886 for (unsigned i = 0; i < 3; ++i)
2887 deviatoric[tensorIndex(i, i)] += nodes[nodeId].pressure;
2888 return deviatoric;
2889 }
2890
2891 T pressureAt(const IndexType &index) const {
2892 if (!inBounds(index))
2893 return deformationParameters.ambientPressure;
2894
2895 const std::size_t nodeId = lookupNode(index);
2896 if (nodeId == noNode)
2897 return deformationParameters.ambientPressure;
2898 return nodes[nodeId].pressure;
2899 }
2900
2901 T localExpansionSpeed(const Vec3D<T> &coordinate) const {
2902 return (oxidationParameters.expansionCoefficient - T(1)) *
2903 std::abs(diffusionField->getScalarVelocity(coordinate, 0,
2904 {0., 0., 0.}, 0));
2905 }
2906
2907 Vec3D<T> reactionNormal(const IndexType &index) const {
2908 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2909 return levelSetNormal(reactionIt, index);
2910 }
2911
2912 Vec3D<T> interfaceNormal(const IndexType &index, Boundary boundary) const {
2913 if (boundary == Boundary::AMBIENT) {
2914 ConstSparseIterator ambientIt(ambientInterface->getDomain());
2915 return levelSetNormal(ambientIt, index);
2916 }
2917 if (boundary == Boundary::MASK && maskInterface != nullptr) {
2918 ConstSparseIterator maskIt(maskInterface->getDomain());
2919 return levelSetNormal(maskIt, index);
2920 }
2921
2922 ConstSparseIterator reactionIt(reactionInterface->getDomain());
2923 return levelSetNormal(reactionIt, index);
2924 }
2925
2926 Vec3D<T> levelSetNormal(ConstSparseIterator &levelSetIt,
2927 const IndexType &index) const {
2928 Vec3D<T> normal{0., 0., 0.};
2929 T norm = 0.;
2930
2931 for (unsigned i = 0; i < D; ++i) {
2932 IndexType pos = index;
2933 IndexType neg = index;
2934 pos[i] += 1;
2935 neg[i] -= 1;
2936 if (!inBounds(pos))
2937 pos = index;
2938 if (!inBounds(neg))
2939 neg = index;
2940 normal[i] = detail::clampLevelSetPhi(valueAt(levelSetIt, pos)) -
2941 detail::clampLevelSetPhi(valueAt(levelSetIt, neg));
2942 norm += normal[i] * normal[i];
2943 }
2944
2945 if (norm <= std::numeric_limits<T>::epsilon()) {
2946 normal = Vec3D<T>{0., 0., 0.};
2947 normal[D - 1] = 1.;
2948 return normal;
2949 }
2950
2951 norm = std::sqrt(norm);
2952 for (unsigned i = 0; i < D; ++i)
2953 normal[i] /= norm;
2954 return normal;
2955 }
2956
2958#pragma omp parallel for schedule(static)
2959 for (std::size_t i = 0; i < nodes.size(); ++i)
2960 nodes[i].strainTrace = divergenceAt(nodes[i].index);
2961 }
2962
2964 const T relaxationTime = effectiveStressRelaxationTime();
2965 const T decay =
2966 (relaxationTime <= std::numeric_limits<T>::epsilon())
2967 ? T(0)
2968 : std::exp(-deformationParameters.stressTimeStep / relaxationTime);
2969
2970 // Per-node computation is independent; collect history keys into a vector
2971 // to avoid concurrent map writes, then build the map sequentially below.
2972 std::vector<std::pair<IndexType, std::array<T, 9>>> historyEntries(
2973 nodes.size());
2974#pragma omp parallel for schedule(static)
2975 for (std::size_t i = 0; i < nodes.size(); ++i) {
2976 auto &node = nodes[i];
2977 node.strainRateTensor = strainRateTensorAt(node.index);
2978 const auto deviatoricRate =
2979 deviatoricTensor(node.strainRateTensor, node.strainTrace);
2980 const auto previousStress = previousDeviatoricStress(node.index);
2981
2982 std::array<T, 9> deviatoricStress{};
2983 for (unsigned j = 0; j < 9; ++j) {
2984 const T viscousStress =
2985 T(2) * deformationParameters.viscosity * deviatoricRate[j];
2986 deviatoricStress[j] =
2987 decay * previousStress[j] + (T(1) - decay) * viscousStress;
2988 }
2989
2990 node.stressTensor = deviatoricStress;
2991 for (unsigned j = 0; j < 3; ++j)
2992 node.stressTensor[tensorIndex(j, j)] -= node.pressure;
2993
2994 node.vonMisesStress = vonMisesFromDeviatoric(deviatoricStress);
2995 historyEntries[i] = {node.index, deviatoricStress};
2996 }
2997
2998 std::unordered_map<IndexType, std::array<T, 9>, detail::IndexTypeHasher<D>>
2999 nextHistory;
3000 nextHistory.reserve(nodes.size());
3001 for (const auto &entry : historyEntries)
3002 nextHistory[entry.first] = entry.second;
3003 deviatoricStressHistory.swap(nextHistory);
3004 }
3005
3006 std::array<T, 9> strainRateTensorAt(const IndexType &index) const {
3007 std::array<T, 9> tensor{};
3008 for (unsigned i = 0; i < D; ++i) {
3009 for (unsigned j = 0; j < D; ++j) {
3010 tensor[tensorIndex(i, j)] = T(0.5) * (velocityDerivative(index, i, j) +
3011 velocityDerivative(index, j, i));
3012 }
3013 }
3014 return tensor;
3015 }
3016
3017 T velocityDerivative(const IndexType &index, unsigned component,
3018 unsigned direction) const {
3019 const std::size_t nodeId = lookupNode(index);
3020 if (nodeId == noNode)
3021 return 0.;
3022
3023 const auto plus = currentVelocityStencilPoint(nodeId, direction, 1);
3024 const auto minus = currentVelocityStencilPoint(nodeId, direction, -1);
3025 return firstDerivative(
3026 minus.value[component], nodes[nodeId].velocity[component],
3027 plus.value[component], minus.distance, plus.distance);
3028 }
3029
3030 T pressureDerivative(const IndexType &index, unsigned direction) const {
3031 const std::size_t nodeId = lookupNode(index);
3032 if (nodeId == noNode)
3033 return 0.;
3034
3035 const auto plus = currentPressureStencilPoint(nodeId, direction, 1);
3036 const auto minus = currentPressureStencilPoint(nodeId, direction, -1);
3037 return firstDerivative(minus.value, nodes[nodeId].pressure, plus.value,
3038 minus.distance, plus.distance);
3039 }
3040
3041 std::array<T, 9> deviatoricTensor(const std::array<T, 9> &tensor,
3042 T trace) const {
3043 std::array<T, 9> result = tensor;
3044 const T mean = trace / T(3);
3045 for (unsigned i = 0; i < 3; ++i)
3046 result[tensorIndex(i, i)] -= mean;
3047 return result;
3048 }
3049
3050 std::array<T, 9> previousDeviatoricStress(const IndexType &index) const {
3051 const auto found = deviatoricStressHistory.find(index);
3052 if (found == deviatoricStressHistory.end())
3053 return {};
3054 return found->second;
3055 }
3056
3058 if (deformationParameters.stressRelaxationTime > T(0))
3059 return deformationParameters.stressRelaxationTime;
3060 if (deformationParameters.shearModulus > std::numeric_limits<T>::epsilon())
3061 return deformationParameters.viscosity /
3062 deformationParameters.shearModulus;
3063 return T(0);
3064 }
3065
3066 T vonMisesFromDeviatoric(const std::array<T, 9> &deviatoricStress) const {
3067 T doubleContraction = 0.;
3068 for (unsigned i = 0; i < 3; ++i) {
3069 for (unsigned j = 0; j < 3; ++j) {
3070 const T value = T(0.5) * (deviatoricStress[tensorIndex(i, j)] +
3071 deviatoricStress[tensorIndex(j, i)]);
3072 doubleContraction += value * value;
3073 }
3074 }
3075 return std::sqrt(T(1.5) * doubleContraction);
3076 }
3077
3078 T normalStress(const std::array<T, 9> &tensor, const Vec3D<T> &normal) const {
3079 T result = 0.;
3080 for (unsigned i = 0; i < 3; ++i) {
3081 for (unsigned j = 0; j < 3; ++j)
3082 result += normal[i] * tensor[tensorIndex(i, j)] * normal[j];
3083 }
3084 return result;
3085 }
3086
3087 Boundary classifyBoundary(ConstSparseIterator &reactionIt,
3088 ConstSparseIterator &ambientIt,
3089 ConstSparseIterator &maskIt,
3090 const IndexType &inside,
3091 const IndexType &outside) const {
3092 return boundaryIntersection(reactionIt, ambientIt, maskIt, inside, outside)
3093 .boundary;
3094 }
3095
3096 BoundaryIntersection boundaryIntersection(ConstSparseIterator &reactionIt,
3097 ConstSparseIterator &ambientIt,
3098 ConstSparseIterator &maskIt,
3099 const IndexType &inside,
3100 const IndexType &outside) const {
3101 const T reactionInside = valueAt(reactionIt, inside);
3102 const T reactionOutside = valueAt(reactionIt, outside);
3103 const T ambientInside = valueAt(ambientIt, inside);
3104 const T ambientOutside = valueAt(ambientIt, outside);
3105 const T maskInside = valueAtMask(maskIt, inside);
3106 const T maskOutside = valueAtMask(maskIt, outside);
3107
3108 const bool reactionCrosses = crosses(reactionInside, reactionOutside);
3109 const bool ambientCrosses = crosses(ambientInside, ambientOutside);
3110 const bool maskCrosses =
3111 maskInterface != nullptr && crosses(maskInside, maskOutside);
3112
3113 if (!reactionCrosses && !ambientCrosses && !maskCrosses)
3114 return {Boundary::NONE, gridDelta};
3115 if (reactionCrosses && !ambientCrosses && !maskCrosses)
3116 return {Boundary::REACTION,
3117 crossingDistance(reactionInside, reactionOutside)};
3118 if (!reactionCrosses && ambientCrosses && !maskCrosses)
3120 maskInside, maskOutside,
3121 crossingDistance(ambientInside, ambientOutside));
3122 if (!reactionCrosses && !ambientCrosses && maskCrosses)
3123 return {Boundary::MASK, crossingDistance(maskInside, maskOutside)};
3124
3125 const T reactionDistance =
3126 reactionCrosses ? crossingDistance(reactionInside, reactionOutside)
3127 : std::numeric_limits<T>::max();
3128 const T ambientDistance =
3129 ambientCrosses ? crossingDistance(ambientInside, ambientOutside)
3130 : std::numeric_limits<T>::max();
3131 const T maskDistance = maskCrosses
3132 ? crossingDistance(maskInside, maskOutside)
3133 : std::numeric_limits<T>::max();
3134 if (reactionDistance <= ambientDistance && reactionDistance <= maskDistance)
3135 return {Boundary::REACTION, reactionDistance};
3136 if (ambientDistance != std::numeric_limits<T>::max()) {
3137 const auto maskedAmbient =
3138 ambientCrossingInsideMask(maskInside, maskOutside, ambientDistance);
3139 if (maskedAmbient.boundary == Boundary::MASK)
3140 return maskedAmbient;
3141 }
3142 if (maskDistance <= ambientDistance)
3143 return {Boundary::MASK, maskDistance};
3144 return {Boundary::AMBIENT, ambientDistance};
3145 }
3146
3147 bool touchesBoundary(ConstSparseIterator &reactionIt,
3148 ConstSparseIterator &ambientIt,
3149 ConstSparseIterator &maskIt, const IndexType &index,
3150 Boundary requestedBoundary) const {
3151 for (unsigned direction = 0; direction < D; ++direction) {
3152 for (int offset : {-1, 1}) {
3153 IndexType neighbor = index;
3154 neighbor[direction] += offset;
3155 if (!inBounds(neighbor))
3156 continue;
3157
3158 if (lookupNode(neighbor) != noNode)
3159 continue;
3160
3161 if (classifyBoundary(reactionIt, ambientIt, maskIt, index, neighbor) ==
3162 requestedBoundary)
3163 return true;
3164 }
3165 }
3166 return false;
3167 }
3168
3169 bool isInsideOxide(T reactionPhi, T ambientPhi) const {
3170 // GeometricAdvect can leave a tiny positive residual (~4*epsilon) when the
3171 // interface lands exactly on a grid point at non-zero coordinates, because
3172 // k*gridDelta is not exactly representable in floating point. Allow a
3173 // tolerance of 1e-9 grid units so that grid points on the surface (phi≈0)
3174 // are correctly classified as inside the oxide.
3175 constexpr T eps = T(1e-9);
3176 return reactionSign * reactionPhi >= -eps &&
3177 ambientSign * ambientPhi >= -eps;
3178 }
3179
3180 ConstSparseIterator makeMaskIterator() const {
3181 if (maskInterface == nullptr)
3182 return ConstSparseIterator(reactionInterface->getDomain());
3183 return ConstSparseIterator(maskInterface->getDomain());
3184 }
3185
3186 bool isInsideMask(ConstSparseIterator &maskIt, const IndexType &index) const {
3187 if (maskInterface == nullptr)
3188 return false;
3189 return maskSign * valueAt(maskIt, index) >= 0.;
3190 }
3191
3192 T valueAtMask(ConstSparseIterator &maskIt, const IndexType &index) const {
3193 if (maskInterface == nullptr)
3194 return std::numeric_limits<T>::max();
3195 return valueAt(maskIt, index);
3196 }
3197
3198 BoundaryIntersection ambientCrossingInsideMask(T maskInside, T maskOutside,
3199 T distance) const {
3200 if (isMaskAtCrossing(maskInside, maskOutside, distance))
3201 return {Boundary::MASK, distance};
3202 // Outer node is wholly inside the mask body: the oxide/gas surface has
3203 // drifted into the nitride. Apply mask Dirichlet BC (not traction-free)
3204 // so the deformation solver does not advance the surface further in.
3205 // Mirrors the equivalent check in lsOxidationDiffusion::classifyBoundary.
3206 if (maskInterface != nullptr &&
3207 static_cast<T>(maskSign) * maskOutside >= T(0))
3208 return {Boundary::MASK, distance};
3209 return {Boundary::AMBIENT, distance};
3210 }
3211
3212 bool isMaskAtCrossing(T maskInside, T maskOutside, T distance) const {
3213 if (maskInterface == nullptr)
3214 return false;
3215 const T fraction = std::clamp(distance / gridDelta, T(0), T(1));
3216 const T insidePhi = detail::clampLevelSetPhi(maskInside);
3217 const T outsidePhi = detail::clampLevelSetPhi(maskOutside);
3218 const T maskPhi = insidePhi + fraction * (outsidePhi - insidePhi);
3219 return static_cast<T>(maskSign) * maskPhi >= T(0);
3220 }
3221
3222 T crossingDistance(T insidePhi, T outsidePhi) const {
3224 insidePhi, outsidePhi,
3225 deformationParameters.minMechanicsBoundaryDistance, gridDelta);
3226 }
3227
3228 static T firstDerivative(T minusValue, T centerValue, T plusValue,
3229 T minusDistance, T plusDistance) {
3230 const T denominator =
3231 minusDistance * plusDistance * (minusDistance + plusDistance);
3232 if (denominator <= std::numeric_limits<T>::epsilon())
3233 return 0.;
3234
3235 return (-plusDistance * plusDistance * minusValue +
3236 (plusDistance * plusDistance - minusDistance * minusDistance) *
3237 centerValue +
3238 minusDistance * minusDistance * plusValue) /
3239 denominator;
3240 }
3241
3242 static constexpr unsigned tensorIndex(unsigned row, unsigned column) {
3243 return 3 * row + column;
3244 }
3245};
3246
3247} // namespace viennals
constexpr int D
Definition Epitaxy.cpp:12
double T
Definition Epitaxy.cpp:13
Class containing all information about the level set, including the dimensions of the domain,...
Definition lsDomain.hpp:27
void setGpuMode(GpuMode mode)
Definition lsOxidationDeformation.hpp:223
T normalStress(const std::array< T, 9 > &tensor, const Vec3D< T > &normal) const
Definition lsOxidationDeformation.hpp:3078
T pressureAt(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2891
bool initialiseGrid()
Definition lsOxidationDeformation.hpp:1026
std::array< T, 9 > getStressTensor(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:529
std::vector< Vec3D< T > > collectVelocities() const
Definition lsOxidationDeformation.hpp:2480
std::size_t getNumberOfSolutionNodes() const
Definition lsOxidationDeformation.hpp:573
void setMaskInterface(SmartPointer< Domain< T, D > > passedInterface, int passedMaskSign=1)
Definition lsOxidationDeformation.hpp:240
StencilPoint< T > currentPressureStencilPoint(std::size_t nodeId, unsigned direction, int offset) const
Definition lsOxidationDeformation.hpp:2574
std::array< T, 9 > getStrainRateTensor(const IndexType &index) const
Definition lsOxidationDeformation.hpp:524
T getVonMisesStress(const IndexType &index) const
Definition lsOxidationDeformation.hpp:544
T velocityDerivative(const IndexType &index, unsigned component, unsigned direction) const
Definition lsOxidationDeformation.hpp:3017
void computeDiagnostics()
Definition lsOxidationDeformation.hpp:2957
T getResidual() const
Definition lsOxidationDeformation.hpp:550
T localExpansionSpeed(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:2901
static T firstDerivative(T minusValue, T centerValue, T plusValue, T minusDistance, T plusDistance)
Definition lsOxidationDeformation.hpp:3228
T valueAtMask(ConstSparseIterator &maskIt, const IndexType &index) const
Definition lsOxidationDeformation.hpp:3192
void setReactionInterface(SmartPointer< Domain< T, D > > passedInterface)
Definition lsOxidationDeformation.hpp:228
Vec3D< T > estimateMaxUnresolvedAmbientVelocity() const
Definition lsOxidationDeformation.hpp:2813
void computePressureStencilAt(std::size_t nodeId, const std::vector< SolverT > &p, const std::vector< T > &ambientBP, T &diag, T &rhs) const
Definition lsOxidationDeformation.hpp:1621
Vec3D< T > levelSetNormal(ConstSparseIterator &levelSetIt, const IndexType &index) const
Definition lsOxidationDeformation.hpp:2926
void computeAvgExpansionSpeed()
Definition lsOxidationDeformation.hpp:2743
void buildNodes()
Definition lsOxidationDeformation.hpp:1033
unsigned getIterations() const
Definition lsOxidationDeformation.hpp:549
T maskPressureBoundary(const IndexType &, unsigned, int, T fallbackPressure) const
Definition lsOxidationDeformation.hpp:2697
Vec3D< T > freeSurfaceVelocityBoundary(const IndexType &index, unsigned direction, int offset, T distance, const Vec3D< T > &interiorVelocity) const
Definition lsOxidationDeformation.hpp:2702
StencilPoint< Vec3D< T > > currentVelocityStencilPoint(std::size_t nodeId, unsigned direction, int offset) const
Definition lsOxidationDeformation.hpp:2604
Vec3D< T > getVectorVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &, unsigned long) final
Like getScalarVelocity, but returns a velocity value for each cartesian direction.
Definition lsOxidationDeformation.hpp:387
void computeVelocityStencilAt(std::size_t nodeId, const std::vector< Vec3D< SolverT > > &v, T &diag, Vec3D< T > &rhs) const
Definition lsOxidationDeformation.hpp:2148
T getScalarVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &normalVector, unsigned long) final
Should return a scalar value for the velocity at coordinate for a point of material with the given no...
Definition lsOxidationDeformation.hpp:407
T maxVelocityChange(const std::vector< Vec3D< T > > &previous) const
Definition lsOxidationDeformation.hpp:2634
StencilPoint< T > pressureStencilPoint(const std::vector< SolverT > &pressure, const std::vector< T > &ambientBoundaryPressure, std::size_t nodeId, unsigned direction, int offset) const
Definition lsOxidationDeformation.hpp:2498
void markGeometryChanged()
Definition lsOxidationDeformation.hpp:303
Vec3D< T > reactionNormal(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2907
std::array< T, 9 > currentBoundaryDeviatoricStress(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2667
void setDeformationParameters(OxidationDeformationParameters passedParameters)
Definition lsOxidationDeformation.hpp:277
T pressureDerivative(const IndexType &index, unsigned direction) const
Definition lsOxidationDeformation.hpp:3030
bool lastSolveConverged() const
Definition lsOxidationDeformation.hpp:553
static auto New(Args &&...args)
Definition lsOxidationDeformation.hpp:211
Vec3D< T > deviatoricStressDivergence(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2858
T getStrainTrace(const IndexType &index) const
Definition lsOxidationDeformation.hpp:515
OxidationDeformation(SmartPointer< Domain< T, D > > passedReactionInterface, SmartPointer< Domain< T, D > > passedAmbientInterface, SmartPointer< OxidationDiffusion< T, D > > passedDiffusionField, OxidationParameters passedOxidationParameters, OxidationDeformationParameters passedDeformationParameters={})
Definition lsOxidationDeformation.hpp:199
T getVonMisesStress(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:539
std::array< T, 9 > strainRateTensorAt(const IndexType &index) const
Definition lsOxidationDeformation.hpp:3006
ConstSparseIterator makeMaskIterator() const
Definition lsOxidationDeformation.hpp:3180
Vec3D< T > momentumForcing(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2850
Vec3D< T > reactionBoundaryVelocity(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2791
std::array< T, 9 > previousDeviatoricStress(const IndexType &index) const
Definition lsOxidationDeformation.hpp:3050
BoundaryIntersection ambientCrossingInsideMask(T maskInside, T maskOutside, T distance) const
Definition lsOxidationDeformation.hpp:3198
StencilPoint< Vec3D< T > > velocityStencilPoint(const std::vector< Vec3D< SolverT > > &velocity, std::size_t nodeId, unsigned direction, int offset) const
Definition lsOxidationDeformation.hpp:2539
Boundary classifyBoundary(ConstSparseIterator &reactionIt, ConstSparseIterator &ambientIt, ConstSparseIterator &maskIt, const IndexType &inside, const IndexType &outside) const
Definition lsOxidationDeformation.hpp:3087
std::vector< T > collectPressures() const
Definition lsOxidationDeformation.hpp:2488
void apply()
Definition lsOxidationDeformation.hpp:308
std::array< T, 9 > deviatoricStressAt(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2877
T getLastStokesResidual() const
Definition lsOxidationDeformation.hpp:552
Vec3D< T > maskVelocityBoundary(const IndexType &index, const Vec3D< T > &interiorVelocity) const
Definition lsOxidationDeformation.hpp:2731
void clearSolveBounds()
Definition lsOxidationDeformation.hpp:297
T maxPressureChange(const std::vector< T > &previous) const
Definition lsOxidationDeformation.hpp:2651
std::array< T, 9 > deviatoricTensor(const std::array< T, 9 > &tensor, T trace) const
Definition lsOxidationDeformation.hpp:3041
std::array< T, 9 > getStressTensor(const IndexType &index) const
Definition lsOxidationDeformation.hpp:534
void forEachSolutionNode(Callback callback) const
Definition lsOxidationDeformation.hpp:581
Vec3D< T > pressureGradient(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2843
BoundaryIntersection boundaryIntersection(ConstSparseIterator &reactionIt, ConstSparseIterator &ambientIt, ConstSparseIterator &maskIt, const IndexType &inside, const IndexType &outside) const
Definition lsOxidationDeformation.hpp:3096
void setOxidationParameters(OxidationParameters passedParameters)
Definition lsOxidationDeformation.hpp:271
bool isInsideMask(ConstSparseIterator &maskIt, const IndexType &index) const
Definition lsOxidationDeformation.hpp:3186
void setGpuPreconditioner(GpuPreconditioner prec)
Definition lsOxidationDeformation.hpp:224
void solvePressure()
Definition lsOxidationDeformation.hpp:1659
std::vector< Node > nodes
Definition lsOxidationDeformation.hpp:195
T effectiveStressRelaxationTime() const
Definition lsOxidationDeformation.hpp:3057
T divergenceAt(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2835
bool hasFiniteSolution() const
Definition lsOxidationDeformation.hpp:562
T crossingDistance(T insidePhi, T outsidePhi) const
Definition lsOxidationDeformation.hpp:3222
T getDissipationAlpha(int direction, int material, const Vec3D< T > &) final
If lsLocalLaxFriedrichsAnalytical is used as the spatial discretization scheme, this is called to pro...
Definition lsOxidationDeformation.hpp:413
void writeFieldsToLevelSet()
Write velocity (Vec3D) and viscoelastic stress history (3 tensor-row vectors) into ambientInterface->...
Definition lsOxidationDeformation.hpp:590
bool isMaskAtCrossing(T maskInside, T maskOutside, T distance) const
Definition lsOxidationDeformation.hpp:3212
void setMaskVelocityField(SmartPointer< VelocityField< T > > passedVelocityField)
Definition lsOxidationDeformation.hpp:255
void clearMaskVelocityField()
Definition lsOxidationDeformation.hpp:260
Vec3D< T > interfaceNormal(const IndexType &index, Boundary boundary) const
Definition lsOxidationDeformation.hpp:2912
std::array< T, 9 > getStrainRateTensor(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:519
void solveStokesVelocity()
Definition lsOxidationDeformation.hpp:2165
T vonMisesFromDeviatoric(const std::array< T, 9 > &deviatoricStress) const
Definition lsOxidationDeformation.hpp:3066
void setSolveBounds(const IndexType &passedMinIndex, const IndexType &passedMaxIndex)
Definition lsOxidationDeformation.hpp:288
void solveVelocity()
Definition lsOxidationDeformation.hpp:1165
void computeHarmonicStencilAt(std::size_t nodeId, const std::vector< Vec3D< SolverT > > &v, Vec3D< T > &sum) const
Definition lsOxidationDeformation.hpp:1110
Vec3D< T > getVelocity(const IndexType &index) const
Definition lsOxidationDeformation.hpp:497
void solveMechanics()
Definition lsOxidationDeformation.hpp:1448
void pressureMatvec(const std::vector< SolverT > &v, const std::vector< T > &ambientBP, const std::vector< T > &precomputedDiag, const std::vector< T > &pBC, std::vector< SolverT > &Av) const
Definition lsOxidationDeformation.hpp:1648
void harmonicMatvec(const std::vector< Vec3D< SolverT > > &v, const std::vector< Vec3D< T > > &b, std::vector< Vec3D< SolverT > > &Av) const
Definition lsOxidationDeformation.hpp:1152
T freeSurfacePressureBoundary(const IndexType &index) const
Definition lsOxidationDeformation.hpp:2689
void setOxideSigns(int passedReactionSign, int passedAmbientSign)
Definition lsOxidationDeformation.hpp:282
void setDiffusionField(SmartPointer< OxidationDiffusion< T, D > > passedDiffusionField)
Definition lsOxidationDeformation.hpp:265
~OxidationDeformation()
Definition lsOxidationDeformation.hpp:215
T getPressure(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:502
void setAmbientInterface(SmartPointer< Domain< T, D > > passedInterface)
Definition lsOxidationDeformation.hpp:234
static constexpr unsigned tensorIndex(unsigned row, unsigned column)
Definition lsOxidationDeformation.hpp:3242
void clearMaskInterface()
Definition lsOxidationDeformation.hpp:248
bool touchesBoundary(ConstSparseIterator &reactionIt, ConstSparseIterator &ambientIt, ConstSparseIterator &maskIt, const IndexType &index, Boundary requestedBoundary) const
Definition lsOxidationDeformation.hpp:3147
void applySimpleVelocityCorrection(const std::vector< T > &pressureOld, const std::vector< Vec3D< T > > &diagV)
Definition lsOxidationDeformation.hpp:1547
T avgExpansionSpeed()
Definition lsOxidationDeformation.hpp:574
T getPressure(const IndexType &index) const
Definition lsOxidationDeformation.hpp:506
bool isInsideOxide(T reactionPhi, T ambientPhi) const
Definition lsOxidationDeformation.hpp:3169
T getLastPressureResidual() const
Definition lsOxidationDeformation.hpp:551
Vec3D< T > unresolvedAmbientVelocity(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:2800
void computeStressTensors()
Definition lsOxidationDeformation.hpp:2963
T getStrainTrace(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:510
Vec3D< T > getVelocity(const Vec3D< T > &coordinate) const
Definition lsOxidationDeformation.hpp:492
std::vector< Vec3D< T > > computeVelocityDiagonals() const
Definition lsOxidationDeformation.hpp:1430
Solves the oxidant diffusion step of the Suvorov et al. (10.1007/s10825-006-0003-z) oxidation model o...
Definition lsOxidationDiffusion.hpp:92
Common Cartesian-grid infrastructure shared by the three oxidation solver classes (diffusion,...
Definition lsOxidationSolverBase.hpp:90
static constexpr std::size_t noNode
Definition lsOxidationSolverBase.hpp:96
bool crosses(T a, T b) const
Definition lsOxidationSolverBase.hpp:110
std::size_t lookupNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:137
std::size_t linearIndex(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:143
void initNodeLookup()
Definition lsOxidationSolverBase.hpp:130
bool inBounds(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:123
std::array< std::size_t, D > strides
Definition lsOxidationSolverBase.hpp:101
T gridDelta
Definition lsOxidationSolverBase.hpp:102
std::vector< std::size_t > nodeLookupFlat
Definition lsOxidationSolverBase.hpp:97
bool initializeGridFromInterfaces(SmartPointer< Domain< T, D > > reactionInterface, SmartPointer< Domain< T, D > > ambientInterface, SmartPointer< Domain< T, D > > maskInterface, bool useRequestedBounds, const IndexType &requestedMinIndex, const IndexType &requestedMaxIndex, std::size_t maxGridPoints, const std::string &solverName)
Definition lsOxidationSolverBase.hpp:207
std::array< std::size_t, D > extents
Definition lsOxidationSolverBase.hpp:100
viennahrle::ConstSparseIterator< typename Domain< T, D >::DomainType > ConstSparseIterator
Definition lsOxidationSolverBase.hpp:93
T valueAt(ConstSparseIterator &it, const IndexType &index) const
Definition lsOxidationSolverBase.hpp:118
bool increment(IndexType &index) const
Definition lsOxidationSolverBase.hpp:152
IndexType minIndex
Definition lsOxidationSolverBase.hpp:98
viennahrle::Index< D > IndexType
Definition lsOxidationSolverBase.hpp:92
std::size_t findNearbyNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:166
IndexType maxIndex
Definition lsOxidationSolverBase.hpp:99
T levelSetCrossingDistance(T insidePhi, T outsidePhi, T minBoundaryFraction, T gridDelta)
Definition lsOxidationSolverBase.hpp:76
T clampLevelSetPhi(T v)
Clamp HRLE far-field sentinels (±DBL_MAX) to ±1 before differencing to prevent DBL_MAX² overflow that...
Definition lsOxidationSolverBase.hpp:71
Vec3D< T > vecScaled(const Vec3D< T > &source, T factor)
Definition lsOxidationSolverBase.hpp:40
void vecAddTo(Vec3D< T > &target, const Vec3D< T > &source)
Definition lsOxidationSolverBase.hpp:64
Definition lsAdvect.hpp:41
GpuMode
Selects the BiCGSTAB back-end for the diffusion solve. GPU failures are reported and not silently fal...
Definition lsOxidationDiffusion.hpp:26
@ Gpu
Always use GPU; fail if unavailable or unsuccessful.
Definition lsOxidationDiffusion.hpp:28
@ Cpu
Always use CPU (default).
Definition lsOxidationDiffusion.hpp:27
GpuPreconditioner
Selects the preconditioner used by the GPU BiCGSTAB solver. Jacobi matches the CPU solver's precondit...
Definition lsOxidationDiffusion.hpp:35
@ ILU0
Definition lsOxidationDiffusion.hpp:35
@ Jacobi
Definition lsOxidationDiffusion.hpp:35
Parameters for the Cartesian-grid oxide deformation model.
Definition lsOxidationDeformation.hpp:18
double viscosity
Definition lsOxidationDeformation.hpp:19
unsigned stokesIterations
Definition lsOxidationDeformation.hpp:30
double tolerance
Definition lsOxidationDeformation.hpp:33
unsigned harmonicIterations
Definition lsOxidationDeformation.hpp:27
double shearModulus
Definition lsOxidationDeformation.hpp:24
double stressRelaxationTime
Definition lsOxidationDeformation.hpp:25
double relaxation
Definition lsOxidationDeformation.hpp:34
double minMechanicsBoundaryDistance
Definition lsOxidationDeformation.hpp:23
double bulkModulus
Definition lsOxidationDeformation.hpp:20
unsigned mechanicsIterations
Definition lsOxidationDeformation.hpp:28
unsigned pressureIterations
Definition lsOxidationDeformation.hpp:29
double pressureTolerance
Definition lsOxidationDeformation.hpp:22
double mechanicsTolerance
Definition lsOxidationDeformation.hpp:31
double stressTimeStep
Definition lsOxidationDeformation.hpp:26
double stokesTolerance
Definition lsOxidationDeformation.hpp:32
double pressureRelaxation
Definition lsOxidationDeformation.hpp:35
int material
Definition lsOxidationDeformation.hpp:38
double ambientPressure
Definition lsOxidationDeformation.hpp:21
std::size_t maxGridPoints
Definition lsOxidationDeformation.hpp:37
Parameters for the steady oxidant diffusion model used by OxidationDiffusion.
Definition lsOxidationDiffusion.hpp:39
Definition lsOxidationSolverBase.hpp:34