ViennaLS
Loading...
Searching...
No Matches
lsOxidationMask.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <hrleSparseStarIterator.hpp>
7
8#include <algorithm>
9#include <array>
10#include <cmath>
11#include <omp.h>
12#include <stdexcept>
13#include <unordered_map>
14
15namespace viennals {
16
18 // Contact mode:
19 // 0 = kinematic: mask velocity at oxide-contact faces equals the solved
20 // oxide velocity (legacy Dirichlet BC). No traction computation.
21 // 1 = oneway: mask solved with oxide stress traction as Neumann BC
22 // (multigrid GMRES); oxide uses kinematic Dirichlet at mask faces.
23 // Stable; captures mask bending driven by oxide force. (Config
24 // aliases: "oneway", "traction", "1", "2".)
25 // 2 = elastic: multigrid-GMRES elastic solve with youngModulus and
26 // poissonRatio. Oxide contact prescribes elastic displacement
27 // (v_oxide × dt). The outer coupling loop in lsOxidation feeds the
28 // solved mask displacement back into the next oxide solve. (Config
29 // aliases: "elastic", "twoway" and variants, "3", "4".)
30 int contactMode = 1;
31 // Arrhenius creep viscosity law:
32 // eta(T) = eta_ref * exp(E/R * (1/T - 1/T_ref)).
33 // Viscosity is in Pa·hr, activation energy in J/mol, temperature in K.
34 double temperature = 1273.15;
35 double referenceTemperature = 1273.15;
36 double referenceViscosity = 5e8;
38 // Young's modulus for elastic contact mode (2), in Pa.
39 // Si₃N₄ at 1000 °C: ~250–270 GPa.
40 double youngModulus = 270e9;
41 // Current solve timestep in hours; set by lsOxidation before each apply().
42 // Elastic modes use it to scale oxide velocity to contact displacement
43 // (v * dt) and to convert the solved displacement back to advection velocity.
44 double stressTimeStep = 1;
45 double poissonRatio = 0.27;
46 // false = bonded mask/oxide interface (traction continuity in compression
47 // and tension); true = optional unilateral contact/release model.
48 bool unilateralContact = true;
49 // Outer Aitken relaxation factor applied to the mask/oxide coupling residual.
50 // Independent of the multigrid smoother below.
51 double relaxation = 1.;
52 // Under-relaxation for the unilateral contact load active set. A hard
53 // compressive/tensile switch makes the mask/oxide fixed point oscillate when
54 // contact faces release; relaxing the load keeps the complementarity limit
55 // but makes the iteration continuous.
56 double contactLoadRelaxation = 0.25;
57 // Relative pressure floor for releasing a relaxed contact face. Machine
58 // epsilon is far too small for Pa-scale contact stresses: a tensile face with
59 // a decaying old compressive load would otherwise remain "active" for many
60 // nonlinear iterations. The scale is the previous/current normal traction.
62 // SOR omega for the multigrid V-cycle smoother (both forward and backward
63 // sweeps). 1.0 is standard Gauss-Seidel; values in (1, 1.4] add over-
64 // relaxation. Do not share this with the Aitken relaxation above.
66 double tolerance = 1e-8;
67 // Minimum sub-grid boundary distance as a fraction of gridDelta. This is a
68 // mechanical derivative length scale, so keep it comparable to the oxide
69 // deformation solver; near-zero distances make elastic contact stresses blow
70 // up as E * u / d.
71 double minBoundaryDistance = 0.05;
72 unsigned maxIterations = 10000;
73 std::size_t maxGridPoints = 5000000;
74 int material = -1;
75 // Optional far-field clamp used to remove rigid-body mask drift and provide
76 // the support that makes mask thickness mechanically meaningful. A side of
77 // -1 clamps the lower index side, +1 clamps the upper side, and 0 disables
78 // the clamp. Direction defaults to x in a LOCOS cross-section.
82};
83
89template <class T, int D>
90class OxidationMaskBending final : public VelocityField<T>,
91 public OxidationSolverBase<T, D> {
92 using IndexType = viennahrle::Index<D>;
93 using ConstSparseIterator =
94 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
95
96private:
97 // bring base members into scope
114
115 struct Node {
116 IndexType index;
117 Vec3D<T> velocity{0., 0., 0.};
118 bool contact = false;
119 bool fixed = false;
120 };
121
122 struct SparseMatrix {
123 std::size_t nodeCount = 0;
124 std::vector<std::size_t> rowPtr;
125 std::vector<std::size_t> colIndex;
126 std::vector<T> values;
127 std::vector<T> invDiagonal;
128 };
129
130 struct MultigridLevel {
131 std::vector<IndexType> indices;
132 // For level l > 0, children maps each coarse node to level l-1 nodes.
133 std::vector<std::vector<std::size_t>> children;
134 std::vector<std::size_t> fineToCoarse;
135 SparseMatrix matrix;
136 };
137
138 SmartPointer<OxidationDeformation<T, D>> deformationField = nullptr;
139 SmartPointer<Domain<T, D>> maskInterface = nullptr;
140 SmartPointer<Domain<T, D>> ambientInterface = nullptr;
141 OxidationMaskParameters parameters;
142 int maskSign = 1;
143 int ambientSign = -1;
144
145 bool solved = false;
146 bool useRequestedBounds = false;
147 T residual = std::numeric_limits<T>::max();
148 unsigned iterations = 0;
149 std::array<T, D> maxVelocity_{};
150 std::size_t contactNodes = 0;
151 std::size_t fixedNodes = 0;
152 T lastApplyVelocityChange = std::numeric_limits<T>::max();
153 T lastApplyAbsoluteVelocityChange = std::numeric_limits<T>::max();
154 T aitkenOmega = 1.;
155 std::size_t candidateContactFaces_ = 0;
156 std::size_t activeContactFaces_ = 0;
157 std::size_t tensileContactFaces_ = 0;
158 T minContactNormalTraction_ = std::numeric_limits<T>::max();
159 T maxContactNormalTraction_ = std::numeric_limits<T>::lowest();
160 T contactReleaseThreshold_ = 0.;
161 std::unordered_map<std::size_t, Vec3D<T>> previousContactTraction_;
162 std::unordered_map<std::size_t, T> previousContactReleaseScale_;
163 std::vector<T> previousAitkenResidual;
164 // Elastic mode (contactMode==2): snapshot of u_new (elastic equilibrium
165 // displacement in µm, stored as µm/hr with implicit dt_ref=1 hr) taken by
166 // finalizeElasticAdvectionVelocity(). Used by writeFieldsToLevelSet() to
167 // write the "MaskVelocity" warm-start for the next substep's solver.
168 std::vector<Vec3D<T>> elasticU_;
169 IndexType requestedMinIndex{};
170 IndexType requestedMaxIndex{};
171 std::vector<Node> nodes;
172 // Face-major flat contact BC arrays: index = faceIdx * n + nodeId.
173 std::vector<uint8_t> contactFaceActive_; // 1 if contact BC applies
174 std::vector<Vec3D<T>> contactFaceVelocity_; // Dirichlet velocity at contact
175 std::vector<Vec3D<T>> contactFaceTraction_; // Oxide traction on mask face
176 std::vector<T> contactFaceDistance_; // Node-to-interface distance
177 std::unordered_map<std::size_t, T> ambientPhiCache_;
178
179 // Cached multigrid hierarchy. The stiffness matrix depends on the node
180 // geometry and the contact face classification (active/inactive) but NOT on
181 // the traction magnitudes, which only affect the load vector b. When the
182 // contact pattern is unchanged from the previous apply() call, the hierarchy
183 // can be reused and the O(n) matrix build and Galerkin coarsening skipped.
184 std::vector<MultigridLevel> cachedMultigridLevels_;
185 std::vector<uint8_t> cachedContactFaceActive_; // snapshot at last build
186 std::size_t cachedNodeCount_ = 0;
187
188public:
190
192 SmartPointer<OxidationDeformation<T, D>> passedDeformation,
193 OxidationMaskParameters passedParameters = {})
194 : deformationField(passedDeformation), parameters(passedParameters) {}
195
197 SmartPointer<OxidationDeformation<T, D>> passedDeformation,
198 SmartPointer<Domain<T, D>> passedMaskInterface,
199 OxidationMaskParameters passedParameters = {}, int passedMaskSign = 1)
200 : deformationField(passedDeformation), maskInterface(passedMaskInterface),
201 parameters(passedParameters), maskSign((passedMaskSign < 0) ? -1 : 1) {}
202
204
205 static SmartPointer<OxidationMaskBending>
206 New(SmartPointer<OxidationDeformation<T, D>> passedDeformation,
207 OxidationMaskParameters passedParameters = {}) {
208 return SmartPointer<OxidationMaskBending>::New(passedDeformation,
209 passedParameters);
210 }
211
212 static SmartPointer<OxidationMaskBending>
213 New(SmartPointer<OxidationDeformation<T, D>> passedDeformation,
214 SmartPointer<Domain<T, D>> passedMaskInterface,
215 OxidationMaskParameters passedParameters = {}, int passedMaskSign = 1) {
216 return SmartPointer<OxidationMaskBending>::New(
217 passedDeformation, passedMaskInterface, passedParameters,
218 passedMaskSign);
219 }
220
221 void setMaskInterface(SmartPointer<Domain<T, D>> passedMaskInterface,
222 int passedMaskSign = 1) {
223 maskInterface = passedMaskInterface;
224 maskSign = (passedMaskSign < 0) ? -1 : 1;
225 solved = false;
226 }
227
232 void setAmbientInterface(SmartPointer<Domain<T, D>> passedAmbientInterface,
233 int passedAmbientSign = -1) {
234 ambientInterface = passedAmbientInterface;
235 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
236 solved = false;
237 }
238
239 void setParameters(OxidationMaskParameters passedParameters) {
240 parameters = passedParameters;
241 solved = false;
242 }
243
244 void setSolveBounds(const IndexType &passedMinIndex,
245 const IndexType &passedMaxIndex) {
246 requestedMinIndex = passedMinIndex;
247 requestedMaxIndex = passedMaxIndex;
248 useRequestedBounds = true;
249 solved = false;
250 }
251
253 useRequestedBounds = false;
254 solved = false;
255 }
256
257 OxidationMaskParameters getParameters() const { return parameters; }
258 unsigned getIterations() const { return iterations; }
259 T getResidual() const { return residual; }
260 std::size_t getNumberOfSolutionNodes() const { return nodes.size(); }
261 std::size_t getNumberOfContactNodes() const { return contactNodes; }
262 std::size_t getNumberOfFixedNodes() const { return fixedNodes; }
263 T getLastApplyVelocityChange() const { return lastApplyVelocityChange; }
265 return lastApplyAbsoluteVelocityChange;
266 }
267
268 void apply() {
269 if (maskInterface == nullptr) {
270 solved = true;
271 return;
272 }
273 if (deformationField == nullptr) {
274 Logger::getInstance()
275 .addWarning("OxidationMaskBending: deformation field is null; "
276 "mask bending will produce zero velocities.")
277 .print();
278 solved = true;
279 return;
280 }
281
282 const auto previousVelocities = collectVelocitiesByGridPoint();
283 if (!initialiseGrid())
284 return; // base class already logged the error
285 elasticU_
286 .clear(); // reset before coupling so getVectorVelocity divides by dt
287 buildNodes();
288 seedFromLevelSet(); // warm-start velocity from previous substep's pointData
289 seedFromPrevious(previousVelocities);
290 if (nodes.empty()) {
291 Logger::getInstance()
292 .addWarning("OxidationMaskBending: no mask nodes found after "
293 "buildNodes(). Verify that the mask level set defines "
294 "a non-empty interior region within the solve bounds.")
295 .print();
296 solved = true;
297 return;
298 }
299 if (contactNodes == 0)
300 VIENNACORE_LOG_DEBUG(
301 "OxidationMaskBending: mask has no oxide-contact nodes; "
302 "no traction will be applied this step.");
303 solveElasticVelocity();
304 validateNodeVelocities("solveElasticVelocity");
305 smoothVelocityField();
306
307 const auto fixedPointResidual = contactResidualVector(previousVelocities);
308 const T omega = aitkenRelaxation(fixedPointResidual);
309 relaxVelocities(previousVelocities, omega);
310 validateNodeVelocities("Aitken relaxation");
311 lastApplyVelocityChange = maxVelocityChange(previousVelocities);
312 lastApplyAbsoluteVelocityChange =
313 absoluteVelocityChange(previousVelocities);
314
315 maxVelocity_.fill(T(0));
316 for (const auto &node : nodes) {
317 for (unsigned d = 0; d < D; ++d)
318 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
319 }
320 VIENNACORE_LOG_DEBUG(
321 "OxidationMaskBending: contact faces active=" +
322 std::to_string(activeContactFaces_) + "/" +
323 std::to_string(candidateContactFaces_) + ", tensileSkipped=" +
324 std::to_string(tensileContactFaces_) + ", normalTraction=[" +
325 std::to_string(
326 candidateContactFaces_ == 0 ? T(0) : minContactNormalTraction_) +
327 ", " +
328 std::to_string(
329 candidateContactFaces_ == 0 ? T(0) : maxContactNormalTraction_) +
330 "], aitkenOmega=" + std::to_string(omega) + ", contactLoadRelaxation=" +
331 std::to_string(
332 std::clamp(parameters.contactLoadRelaxation, T(0.02), T(1))) +
333 ", contactReleaseThreshold=" +
334 std::to_string(contactReleaseThreshold_) + ", residual=" +
335 std::to_string(lastApplyVelocityChange) + ", absVelocityChange=" +
336 std::to_string(lastApplyAbsoluteVelocityChange));
337
338 solved = true;
339 }
340
341 // Called from lsOxidation after writeFieldsToLevelSet() and before advect().
342 //
343 // Elastic mode (2): the contact faces use kinematic (Dirichlet) BC so the
344 // mask stays bonded to the oxide surface. The solver output u_new is the
345 // elastic bending displacement (in µm, stored as µm/hr with implicit
346 // dt_ref=1 hr). Convert to advection velocity u_new/dt and update
347 // maxVelocity_ for CFL subcycling. No-op for viscous modes.
349 if (!isElasticContactMode() || nodes.empty())
350 return;
351 const T dt = parameters.stressTimeStep;
352 if (dt <= T(0)) {
353 for (auto &node : nodes)
354 node.velocity = {T(0), T(0), T(0)};
355 return;
356 }
357
358 const std::size_t n = nodes.size();
359 elasticU_.resize(n);
360 for (std::size_t i = 0; i < n; ++i)
361 elasticU_[i] = nodes[i].velocity;
362
363 T maxUNew = T(0);
364 for (std::size_t i = 0; i < n; ++i)
365 for (unsigned d = 0; d < D; ++d)
366 maxUNew = std::max(maxUNew, std::abs(elasticU_[i][d]));
367
368 const T invDt = T(1) / dt;
369 for (std::size_t i = 0; i < n; ++i)
370 for (unsigned d = 0; d < D; ++d)
371 nodes[i].velocity[d] = elasticU_[i][d] * invDt;
372
373 VIENNACORE_LOG_INFO("ElasticFinalize: dt=" + std::to_string(dt) +
374 " max|u_new|=" + std::to_string(maxUNew) +
375 " max|u_new/dt|=" + std::to_string(maxUNew * invDt));
376
377 maxVelocity_.fill(T(0));
378 for (const auto &node : nodes)
379 for (unsigned d = 0; d < D; ++d)
380 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
381 }
382
383 Vec3D<T> getVectorVelocity(const Vec3D<T> &coordinate, int material,
384 const Vec3D<T> & /*normalVector*/,
385 unsigned long /*pointId*/) final {
386 if (deformationField == nullptr)
387 return {0., 0., 0.};
388
389 if (parameters.material >= 0 && material != parameters.material)
390 return {0., 0., 0.};
391
392 if (!solved)
393 apply();
394
395 if (maskInterface == nullptr || nodes.empty())
396 return {0., 0., 0.};
397
398 IndexType index;
399 for (unsigned i = 0; i < D; ++i)
400 index[i] = std::llround(coordinate[i] / gridDelta);
401
402 // Elastic mode: the solver stores u_new (displacement in µm) as if it were
403 // a velocity (µm/hr, with implicit dt_ref=1hr). The oxide needs the actual
404 // advection velocity u_new/dt. Before finalization elasticU_ is empty and
405 // nodes[i].velocity = u_new; we divide by dt here. After finalization
406 // nodes[i].velocity = u_new/dt already, so we return it directly.
407 if (isElasticContactMode() && elasticU_.empty()) {
408 const T dt = parameters.stressTimeStep;
409 if (dt <= T(0))
410 return {T(0), T(0), T(0)};
411 return detail::vecScaled(getVelocity(index), T(1) / dt);
412 }
413
414 return getVelocity(index);
415 }
416
417 T getDissipationAlpha(int direction, int /*material*/,
418 const Vec3D<T> & /*centralDifferences*/) final {
419 return maxVelocity_[direction];
420 }
421
425 if (nodes.empty() || maskInterface == nullptr)
426 return;
427
428 using VD = typename PointData<T>::VectorDataType;
429
430 // Elastic mode: after finalization, elasticU_ holds u_new (the displacement
431 // in µm, stored as µm/hr). Use it as the "MaskVelocity" warm-start so the
432 // next substep's solver begins near its expected solution.
433 // Before finalization (elasticU_ empty), fall back to nodes[nId].velocity
434 // which still equals u_new from the current solve.
435 const bool useElasticU = isElasticContactMode() && !elasticU_.empty();
436
437 VD velocity;
438 ConstSparseIterator it(maskInterface->getDomain());
439 for (; !it.isFinished(); ++it) {
440 if (!it.isDefined())
441 continue;
442 const std::size_t nId = lookupNode(it.getStartIndices());
443 Vec3D<T> v{T(0), T(0), T(0)};
444 if (nId != noNode)
445 v = (useElasticU && nId < elasticU_.size()) ? elasticU_[nId]
446 : nodes[nId].velocity;
447 velocity.push_back(v);
448 }
449 maskInterface->getPointData().insertReplaceVectorData(std::move(velocity),
450 "MaskVelocity");
451
452 // Cauchy stress tensor in the mask: σ = λ(∇·u)I + 2μ·sym(∇u)
453 // In elastic mode use the raw displacement u_new (elasticU_); in viscous
454 // mode use the velocity field — both give the correct stress via the same
455 // Lamé constants (which already encode the physical units).
456 const T lambda = lameLambda();
457 const T mu = lameMu();
458
459 std::vector<Vec3D<T>> velForStress(nodes.size());
460 for (std::size_t i = 0; i < nodes.size(); ++i)
461 velForStress[i] = (useElasticU && i < elasticU_.size())
462 ? elasticU_[i]
463 : nodes[i].velocity;
464
465 VD stressR0, stressR1, stressR2;
466 for (ConstSparseIterator sit(maskInterface->getDomain()); !sit.isFinished();
467 ++sit) {
468 if (!sit.isDefined())
469 continue;
470 const std::size_t nId = lookupNode(sit.getStartIndices());
471 Vec3D<T> row0{T(0), T(0), T(0)};
472 Vec3D<T> row1{T(0), T(0), T(0)};
473 Vec3D<T> row2{T(0), T(0), T(0)};
474 if (nId != noNode) {
475 // grad[j][i] = ∂v_i/∂x_j via central differences
476 std::array<Vec3D<T>, 3> grad{};
477 for (unsigned j = 0; j < static_cast<unsigned>(D); ++j) {
478 const auto vp = simpleNeighborVelocity(velForStress, nId, j, +1);
479 const auto vm = simpleNeighborVelocity(velForStress, nId, j, -1);
480 for (unsigned i = 0; i < 3; ++i)
481 grad[j][i] = (vp[i] - vm[i]) / (T(2) * gridDelta);
482 }
483 T div = T(0);
484 for (unsigned i = 0; i < static_cast<unsigned>(D); ++i)
485 div += grad[i][i];
486 // σ_ij = λ·div·δ_ij + μ·(∂v_i/∂x_j + ∂v_j/∂x_i)
487 row0[0] = lambda * div + T(2) * mu * grad[0][0];
488 row0[1] = mu * (grad[1][0] + grad[0][1]);
489 row0[2] = (D > 2) ? mu * (grad[2][0] + grad[0][2]) : T(0);
490 row1[0] = row0[1];
491 row1[1] = lambda * div + T(2) * mu * grad[1][1];
492 row1[2] = (D > 2) ? mu * (grad[2][1] + grad[1][2]) : T(0);
493 row2[0] = row0[2];
494 row2[1] = row1[2];
495 row2[2] = (D > 2) ? lambda * div + T(2) * mu * grad[2][2] : T(0);
496 }
497 stressR0.push_back(row0);
498 stressR1.push_back(row1);
499 stressR2.push_back(row2);
500 }
501 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR0),
502 "MaskStressR0");
503 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR1),
504 "MaskStressR1");
505 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR2),
506 "MaskStressR2");
507 }
508
509private:
510 bool initialiseGrid() {
512 maskInterface, useRequestedBounds, requestedMinIndex, requestedMaxIndex,
513 parameters.maxGridPoints, "OxidationMaskBending");
514 }
515
518 void seedFromLevelSet() {
519 if (maskInterface == nullptr || nodes.empty())
520 return;
521 const int vIdx =
522 maskInterface->getPointData().getVectorDataIndex("MaskVelocity");
523 if (vIdx == -1)
524 return;
525 const auto *vd = maskInterface->getPointData().getVectorData(vIdx);
526 if (vd == nullptr)
527 return;
528
529 ConstSparseIterator it(maskInterface->getDomain());
530 for (; !it.isFinished(); ++it) {
531 if (!it.isDefined())
532 continue;
533 const auto ptId = it.getPointId();
534 if (ptId >= static_cast<decltype(ptId)>(vd->size()))
535 continue;
536 const std::size_t nId = lookupNode(it.getStartIndices());
537 if (nId == noNode)
538 continue;
539 if (!isFinite((*vd)[ptId]))
540 throwNonFinite("stored mask velocity point data");
541 else
542 nodes[nId].velocity = (*vd)[ptId];
543 }
544 }
545
546 void
547 seedFromPrevious(const std::unordered_map<std::size_t, Vec3D<T>> &previous) {
548 if (previous.empty())
549 return;
550 for (auto &node : nodes) {
551 const auto found = previous.find(linearIndex(node.index));
552 if (found != previous.end() && isFinite(found->second))
553 node.velocity = found->second;
554 if (node.fixed)
555 node.velocity = {T(0), T(0), T(0)};
556 }
557 }
558
559 std::unordered_map<std::size_t, Vec3D<T>>
560 collectVelocitiesByGridPoint() const {
561 std::unordered_map<std::size_t, Vec3D<T>> result;
562 result.reserve(nodes.size());
563 for (const auto &node : nodes)
564 if (inBounds(node.index) && isFinite(node.velocity))
565 result.emplace(linearIndex(node.index), node.velocity);
566 return result;
567 }
568
569 static bool isFinite(const Vec3D<T> &v) {
570 for (unsigned i = 0; i < 3; ++i)
571 if (!std::isfinite(v[i]))
572 return false;
573 return true;
574 }
575
576 static bool isFiniteTensor(const std::array<T, 9> &tensor) {
577 for (T value : tensor)
578 if (!std::isfinite(value))
579 return false;
580 return true;
581 }
582
583 [[noreturn]] void throwNonFinite(const std::string &stage) const {
584 const std::string message =
585 "OxidationMaskBending: " + stage + " produced non-finite values.";
586 Logger::getInstance().addError(message).print();
587 throw std::runtime_error(message);
588 }
589
590 void validateNodeVelocities(const std::string &stage) const {
591 for (const auto &node : nodes)
592 for (unsigned i = 0; i < D; ++i)
593 if (!std::isfinite(node.velocity[i]))
594 throwNonFinite(stage);
595 }
596
597 T maxVelocityChange(
598 const std::unordered_map<std::size_t, Vec3D<T>> &previous) const {
599 if (previous.empty())
600 return std::numeric_limits<T>::max();
601
602 T changeSquaredSum = 0.;
603 T magnitudeSquaredSum = std::numeric_limits<T>::epsilon();
604 for (const auto &node : nodes) {
605 if (!node.contact)
606 continue;
607 const auto found = previous.find(linearIndex(node.index));
608 if (found == previous.end())
609 continue;
610
611 for (unsigned i = 0; i < D; ++i) {
612 const T delta = node.velocity[i] - found->second[i];
613 changeSquaredSum += delta * delta;
614 magnitudeSquaredSum += node.velocity[i] * node.velocity[i];
615 magnitudeSquaredSum += found->second[i] * found->second[i];
616 }
617 }
618
619 const T change = std::sqrt(changeSquaredSum / magnitudeSquaredSum);
620 if (!std::isfinite(change))
621 throwNonFinite("mask velocity coupling residual");
622 return change;
623 }
624
625 T absoluteVelocityChange(
626 const std::unordered_map<std::size_t, Vec3D<T>> &previous) const {
627 if (previous.empty())
628 return std::numeric_limits<T>::max();
629
630 T changeSquaredSum = 0.;
631 std::size_t components = 0;
632 for (const auto &node : nodes) {
633 if (!node.contact)
634 continue;
635 const auto found = previous.find(linearIndex(node.index));
636 if (found == previous.end())
637 continue;
638
639 for (unsigned i = 0; i < D; ++i) {
640 const T delta = node.velocity[i] - found->second[i];
641 if (!std::isfinite(delta))
642 throwNonFinite("mask absolute velocity coupling residual");
643 changeSquaredSum += delta * delta;
644 ++components;
645 }
646 }
647
648 if (components == 0)
649 return std::numeric_limits<T>::max();
650 const T change = std::sqrt(changeSquaredSum / static_cast<T>(components));
651 if (!std::isfinite(change))
652 throwNonFinite("mask absolute velocity coupling residual");
653 return change;
654 }
655
656 std::vector<T> contactResidualVector(
657 const std::unordered_map<std::size_t, Vec3D<T>> &previous) const {
658 std::vector<T> residualVector;
659 residualVector.reserve(contactNodes * D);
660 if (previous.empty())
661 return residualVector;
662
663 for (const auto &node : nodes) {
664 if (!node.contact)
665 continue;
666 const auto found = previous.find(linearIndex(node.index));
667 if (found == previous.end())
668 continue;
669 for (unsigned i = 0; i < D; ++i) {
670 const T residualComponent = node.velocity[i] - found->second[i];
671 if (!std::isfinite(residualComponent))
672 throwNonFinite("mask contact residual");
673 residualVector.push_back(residualComponent);
674 }
675 }
676 return residualVector;
677 }
678
679 T aitkenRelaxation(const std::vector<T> &residualVector) {
680 const T baseOmega = std::clamp(parameters.relaxation, T(0.01), T(1));
681 if (residualVector.empty()) {
682 previousAitkenResidual.clear();
683 aitkenOmega = baseOmega;
684 return 1.;
685 }
686
687 T omega = std::clamp(aitkenOmega, T(0.01), baseOmega);
688 if (previousAitkenResidual.size() != residualVector.size())
689 omega = baseOmega;
690 if (previousAitkenResidual.size() == residualVector.size()) {
691 T numerator = 0.;
692 T denominator = 0.;
693 T residualNorm2 = 0.;
694 T previousNorm2 = 0.;
695 for (std::size_t i = 0; i < residualVector.size(); ++i) {
696 if (!std::isfinite(residualVector[i]) ||
697 !std::isfinite(previousAitkenResidual[i]))
698 throwNonFinite("mask Aitken residual");
699 const T delta = residualVector[i] - previousAitkenResidual[i];
700 numerator += previousAitkenResidual[i] * delta;
701 denominator += delta * delta;
702 residualNorm2 += residualVector[i] * residualVector[i];
703 previousNorm2 += previousAitkenResidual[i] * previousAitkenResidual[i];
704 }
705
706 if (!std::isfinite(numerator) || !std::isfinite(denominator))
707 throwNonFinite("mask Aitken coefficient");
708
709 if (denominator > std::numeric_limits<T>::epsilon()) {
710 omega = -aitkenOmega * numerator / denominator;
711 if (std::isfinite(omega)) {
712 // Traction contact mode: cap at 1.0 (no extrapolation). The
713 // compressive/tensile contact state can alternate across coupling
714 // iterations, and omega > 1 extrapolates through the fixed point
715 // for those nodes, producing sign-alternating velocities that
716 // appear as zigzag kinks after level-set advection.
717 const T omegaMax = (parameters.contactMode > 0) ? baseOmega : T(1.5);
718 omega = std::clamp(omega, T(0.05), omegaMax);
719 } else {
720 throwNonFinite("mask Aitken coefficient");
721 }
722 }
723
724 if (std::isfinite(residualNorm2) && std::isfinite(previousNorm2) &&
725 residualNorm2 > previousNorm2 * T(1.05)) {
726 omega = std::min(omega, std::max(T(0.05), aitkenOmega * T(0.5)));
727 }
728 }
729
730 previousAitkenResidual = residualVector;
731 aitkenOmega = omega;
732 return omega;
733 }
734
735 // One pass of Laplacian (neighbour-average) smoothing on the mask velocity
736 // field. Damps grid-scale oscillations that arise near the contact-to-free
737 // boundary transition without materially changing the bulk velocity.
738 // Fixed (anchor) nodes are skipped — they must stay at zero.
739 void smoothVelocityField() {
740 const std::size_t n = nodes.size();
741 if (n == 0)
742 return;
743
744 std::vector<Vec3D<T>> smoothed(n);
745 for (std::size_t id = 0; id < n; ++id) {
746 if (nodes[id].fixed) {
747 smoothed[id] = {T(0), T(0), T(0)};
748 continue;
749 }
750 Vec3D<T> sum = nodes[id].velocity;
751 int count = 1;
752 for (unsigned dir = 0; dir < D; ++dir) {
753 for (int off : {-1, 1}) {
754 IndexType nb = nodes[id].index;
755 nb[dir] += off;
756 if (!inBounds(nb))
757 continue;
758 const std::size_t nbId = nodeLookupFlat[linearIndex(nb)];
759 if (nbId == noNode)
760 continue;
761 for (unsigned c = 0; c < D; ++c)
762 sum[c] += nodes[nbId].velocity[c];
763 ++count;
764 }
765 }
766 const T w = T(1) / static_cast<T>(count);
767 for (unsigned c = 0; c < D; ++c)
768 smoothed[id][c] = sum[c] * w;
769 }
770
771 for (std::size_t id = 0; id < n; ++id)
772 nodes[id].velocity = smoothed[id];
773 }
774
775 void
776 relaxVelocities(const std::unordered_map<std::size_t, Vec3D<T>> &previous,
777 T omega) {
778 if (!std::isfinite(omega))
779 throwNonFinite("mask Aitken coefficient");
780 if (previous.empty() || omega == T(1))
781 return;
782
783 for (auto &node : nodes) {
784 const auto found = previous.find(linearIndex(node.index));
785 if (found == previous.end())
786 continue;
787 for (unsigned i = 0; i < D; ++i) {
788 const T relaxed =
789 found->second[i] + omega * (node.velocity[i] - found->second[i]);
790 if (!std::isfinite(relaxed))
791 throwNonFinite("mask velocity relaxation");
792 node.velocity[i] = relaxed;
793 }
794 }
795 }
796
797 void buildAmbientPhiCache() {
798 ambientPhiCache_.clear();
799 if (ambientInterface == nullptr)
800 return;
801 for (ConstSparseIterator it(ambientInterface->getDomain());
802 !it.isFinished(); ++it) {
803 if (!it.isDefined())
804 continue;
805 ambientPhiCache_[detail::gridIndexHash<D>(it.getStartIndices())] =
806 static_cast<T>(ambientSign) * it.getValue();
807 }
808 }
809
810 void buildNodes() {
811 auto oldContactTraction = std::move(previousContactTraction_);
812 auto oldContactReleaseScale = std::move(previousContactReleaseScale_);
813 previousContactTraction_.clear();
814 previousContactReleaseScale_.clear();
815 contactReleaseThreshold_ = T(0);
816 nodes.clear();
818 buildAmbientPhiCache();
819
820 ConstSparseIterator maskIt(maskInterface->getDomain());
821 IndexType index = minIndex;
822 while (true) {
823 if (isInsideMask(maskIt, index)) {
824 const std::size_t id = nodes.size();
825 nodeLookupFlat[linearIndex(index)] = id;
826 nodes.push_back({index});
827 }
828
829 if (!increment(index))
830 break;
831 }
832
833 const std::size_t n = nodes.size();
834 contactFaceActive_.assign(2 * D * n, uint8_t(0));
835 contactFaceVelocity_.assign(2 * D * n, Vec3D<T>{T(0), T(0), T(0)});
836 contactFaceTraction_.assign(2 * D * n, Vec3D<T>{T(0), T(0), T(0)});
837 contactFaceDistance_.assign(2 * D * n, gridDelta);
838
839 contactNodes = 0;
840 candidateContactFaces_ = 0;
841 activeContactFaces_ = 0;
842 tensileContactFaces_ = 0;
843 minContactNormalTraction_ = std::numeric_limits<T>::max();
844 maxContactNormalTraction_ = std::numeric_limits<T>::lowest();
845 for (std::size_t id = 0; id < n; ++id) {
846 auto &node = nodes[id];
847 node.contact = touchesContactBoundary(maskIt, node.index);
848 if (node.contact)
849 ++contactNodes;
850
851 for (unsigned dir = 0; dir < D; ++dir) {
852 for (int offset : {-1, 1}) {
853 const unsigned faceIdx = dir * 2u + (offset == 1 ? 1u : 0u);
854 IndexType neighbor = node.index;
855 neighbor[dir] += offset;
856 const bool neighborIsNode =
857 inBounds(neighbor) && lookupNode(neighbor) != noNode;
858 if (neighborIsNode ||
859 !isContactBoundary(node.index, dir, offset, maskIt))
860 continue; // inactive already set by assign()
861
862 const T faceDistance = maskFaceDistance(maskIt, node.index, neighbor);
863 ++candidateContactFaces_;
864 Vec3D<T> faceNormal{0., 0., 0.};
865 faceNormal[dir] = static_cast<T>(offset);
866 Vec3D<T> oxidePt{0., 0., 0.};
867 for (unsigned i = 0; i < D; ++i)
868 oxidePt[i] = node.index[i] * gridDelta;
869 oxidePt[dir] += offset * gridDelta;
870
871 const auto sigma = deformationField->getStressTensor(oxidePt);
872 if (!isFiniteTensor(sigma))
873 throwNonFinite("oxide stress at mask contact");
874 Vec3D<T> t{0., 0., 0.};
875 for (unsigned i = 0; i < D; ++i)
876 for (unsigned j = 0; j < D; ++j)
877 t[i] += sigma[3 * i + j] * faceNormal[j];
878
879 T tn = 0.;
880 for (unsigned i = 0; i < D; ++i)
881 tn += t[i] * faceNormal[i];
882
883 if (!isFinite(t) || !std::isfinite(tn))
884 throwNonFinite("oxide traction at mask contact");
885 minContactNormalTraction_ = std::min(minContactNormalTraction_, tn);
886 maxContactNormalTraction_ = std::max(maxContactNormalTraction_, tn);
887
888 Vec3D<T> contactLoad = t;
889 if (parameters.unilateralContact && tn >= T(0)) {
890 ++tensileContactFaces_;
891 contactLoad = {T(0), T(0), T(0)};
892 }
893
894 if (parameters.contactMode > 0) {
895 const auto key = contactFaceKey(node.index, dir, offset);
896 const auto prevIt = oldContactTraction.find(key);
897 if (prevIt != oldContactTraction.end()) {
898 const T alpha =
899 std::clamp(parameters.contactLoadRelaxation, T(0.02), T(1));
900 for (unsigned c = 0; c < D; ++c)
901 contactLoad[c] = prevIt->second[c] +
902 alpha * (contactLoad[c] - prevIt->second[c]);
903 }
904 }
905
906 T loadNormal = T(0);
907 for (unsigned i = 0; i < D; ++i)
908 loadNormal += contactLoad[i] * faceNormal[i];
909
910 T releaseThreshold = std::numeric_limits<T>::epsilon();
911 T releaseScale = std::max(std::abs(tn), T(1));
912 if (parameters.contactMode > 0 && parameters.unilateralContact) {
913 const auto scaleIt = oldContactReleaseScale.find(
914 contactFaceKey(node.index, dir, offset));
915 if (scaleIt != oldContactReleaseScale.end())
916 releaseScale = std::max(releaseScale, scaleIt->second);
917 releaseThreshold =
918 std::clamp(parameters.contactReleaseFraction, T(0), T(0.25)) *
919 releaseScale;
920 contactReleaseThreshold_ =
921 std::max(contactReleaseThreshold_, releaseThreshold);
922 }
923
924 if (parameters.unilateralContact && loadNormal >= -releaseThreshold)
925 continue;
926
927 if (!isFinite(contactLoad))
928 throwNonFinite("relaxed oxide contact load");
929
930 if (parameters.contactMode > 0) {
931 const auto key = contactFaceKey(node.index, dir, offset);
932 previousContactTraction_[key] = contactLoad;
933 if (parameters.unilateralContact)
934 previousContactReleaseScale_[key] =
935 std::max(releaseScale, std::abs(loadNormal));
936 }
937
938 contactFaceActive_[faceIdx * n + id] = uint8_t(1);
939 ++activeContactFaces_;
940 contactFaceTraction_[faceIdx * n + id] = contactLoad;
941 contactFaceDistance_[faceIdx * n + id] = faceDistance;
942 if (usesKinematicContactBoundary()) {
943 auto oxVel = deformationField->getVectorVelocity(
944 oxidePt, parameters.material, faceNormal, 0);
945 if (isElasticContactMode()) {
946 // Scale by dt: elastic solver uses dt_ref=1hr so the Dirichlet BC
947 // must be the physical displacement (v_oxide × dt), not the
948 // velocity.
949 oxVel = detail::vecScaled(oxVel, parameters.stressTimeStep);
950 }
951 contactFaceVelocity_[faceIdx * n + id] = oxVel;
952 }
953 }
954 }
955 }
956 markFixedNodes();
957 }
958
959 std::size_t contactFaceKey(const IndexType &index, unsigned direction,
960 int offset) const {
961 std::size_t seed = detail::gridIndexHash<D>(index);
962 seed ^= std::hash<unsigned>{}(direction) +
963 std::size_t(0x9e3779b97f4a7c15ULL) + (seed << 6) + (seed >> 2);
964 seed ^= std::hash<int>{}(offset) + std::size_t(0x9e3779b97f4a7c15ULL) +
965 (seed << 6) + (seed >> 2);
966 return seed;
967 }
968
969 // Returns neighbor velocity without ghost extrapolation.
970 // All arithmetic is in T; reads from the SolverT work vector are widened.
971 template <class SolverT>
972 Vec3D<T> simpleNeighborVelocity(const std::vector<Vec3D<SolverT>> &velocity,
973 std::size_t nodeId, unsigned direction,
974 int offset) const {
975 IndexType neighbor = nodes[nodeId].index;
976 neighbor[direction] += offset;
977 const auto toT = [](const Vec3D<SolverT> &v) -> Vec3D<T> {
978 return {static_cast<T>(v[0]), static_cast<T>(v[1]), static_cast<T>(v[2])};
979 };
980 if (!inBounds(neighbor))
981 return toT(velocity[nodeId]);
982 const std::size_t foundId = nodeLookupFlat[linearIndex(neighbor)];
983 return (foundId != noNode) ? toT(velocity[foundId]) : toT(velocity[nodeId]);
984 }
985
986 // Ghost velocity enforcing sigma_mask * n = traction at a mask boundary face.
987 template <class SolverT>
988 Vec3D<T> stressBoundaryGhost(const std::vector<Vec3D<SolverT>> &velocity,
989 std::size_t nodeId, unsigned normalDir,
990 int offset, T distance,
991 const Vec3D<T> &traction) const {
992 const T lambda = lameLambda();
993 const T mu = lameMu();
994 const T denom =
995 std::max(lambda + T(2) * mu, std::numeric_limits<T>::epsilon());
996 const T signedOffset = static_cast<T>(offset);
997 Vec3D<T> ghost{static_cast<T>(velocity[nodeId][0]),
998 static_cast<T>(velocity[nodeId][1]),
999 static_cast<T>(velocity[nodeId][2])};
1000 T tangentialDivergence = T(0);
1001 std::array<T, D> normalTangentialDerivative{};
1002 for (unsigned tanDir = 0; tanDir < D; ++tanDir) {
1003 if (tanDir == normalDir)
1004 continue;
1005 const Vec3D<T> vPlus =
1006 simpleNeighborVelocity(velocity, nodeId, tanDir, +1);
1007 const Vec3D<T> vMinus =
1008 simpleNeighborVelocity(velocity, nodeId, tanDir, -1);
1009 tangentialDivergence +=
1010 (vPlus[tanDir] - vMinus[tanDir]) / (T(2) * gridDelta);
1011 normalTangentialDerivative[tanDir] =
1012 (vPlus[normalDir] - vMinus[normalDir]) / (T(2) * gridDelta);
1013 }
1014
1015 const T normalTraction = traction[normalDir] * signedOffset;
1016 const T normalDerivative =
1017 (normalTraction - lambda * tangentialDivergence) / denom;
1018 ghost[normalDir] += signedOffset * distance * normalDerivative;
1019
1020 for (unsigned tanDir = 0; tanDir < D; ++tanDir) {
1021 if (tanDir == normalDir)
1022 continue;
1023 const T normalDerivativeTangential =
1024 signedOffset * traction[tanDir] /
1025 std::max(mu, std::numeric_limits<T>::epsilon()) -
1026 normalTangentialDerivative[tanDir];
1027 ghost[tanDir] += signedOffset * distance * normalDerivativeTangential;
1028 }
1029 return ghost;
1030 }
1031
1032 template <class SolverT>
1033 Vec3D<T> tractionFreeGhost(const std::vector<Vec3D<SolverT>> &velocity,
1034 std::size_t nodeId, unsigned normalDir,
1035 int offset) const {
1036 return stressBoundaryGhost(velocity, nodeId, normalDir, offset, gridDelta,
1037 Vec3D<T>{T(0), T(0), T(0)});
1038 }
1039
1040 template <class SolverT>
1041 Vec3D<T> neighborVelocity(const std::vector<Vec3D<SolverT>> &velocity,
1042 std::size_t nodeId, unsigned direction,
1043 int offset) const {
1044 IndexType neighbor = nodes[nodeId].index;
1045 neighbor[direction] += offset;
1046
1047 if (inBounds(neighbor)) {
1048 const std::size_t foundId = nodeLookupFlat[linearIndex(neighbor)];
1049 if (foundId != noNode)
1050 return {static_cast<T>(velocity[foundId][0]),
1051 static_cast<T>(velocity[foundId][1]),
1052 static_cast<T>(velocity[foundId][2])};
1053 }
1054
1055 const unsigned faceIdx = direction * 2u + (offset == 1 ? 1u : 0u);
1056 const std::size_t nn = nodes.size();
1057 if (contactFaceActive_[faceIdx * nn + nodeId]) {
1058 if (usesKinematicContactBoundary())
1059 return detail::vecSubtract(
1060 detail::vecScaled(contactFaceVelocity_[faceIdx * nn + nodeId],
1061 T(2)),
1062 Vec3D<T>{static_cast<T>(velocity[nodeId][0]),
1063 static_cast<T>(velocity[nodeId][1]),
1064 static_cast<T>(velocity[nodeId][2])});
1065
1066 return stressBoundaryGhost(velocity, nodeId, direction, offset,
1067 contactFaceDistance_[faceIdx * nn + nodeId],
1068 contactFaceTraction_[faceIdx * nn + nodeId]);
1069 }
1070
1071 return tractionFreeGhost(velocity, nodeId, direction, offset);
1072 }
1073
1074 template <class SolverT>
1075 Vec3D<T> divergenceGradient(const std::vector<Vec3D<SolverT>> &velocity,
1076 const IndexType &index) const {
1077 Vec3D<T> gradient{T(0), T(0), T(0)};
1078 for (unsigned component = 0; component < D; ++component) {
1079 IndexType plus = index;
1080 IndexType minus = index;
1081 plus[component] += 1;
1082 minus[component] -= 1;
1083 const T plusDiv = divergence(velocity, plus);
1084 const T minusDiv = divergence(velocity, minus);
1085 gradient[component] = (plusDiv - minusDiv) / (T(2) * gridDelta);
1086 }
1087 return gradient;
1088 }
1089
1090 template <class SolverT>
1091 T divergence(const std::vector<Vec3D<SolverT>> &velocity,
1092 const IndexType &index) const {
1093 if (!inBounds(index))
1094 return T(0);
1095 const std::size_t nodeId = nodeLookupFlat[linearIndex(index)];
1096 if (nodeId == noNode)
1097 return T(0);
1098
1099 T result = T(0);
1100 for (unsigned direction = 0; direction < D; ++direction) {
1101 const Vec3D<T> plus = neighborVelocity(velocity, nodeId, direction, 1);
1102 const Vec3D<T> minus = neighborVelocity(velocity, nodeId, direction, -1);
1103 result += (plus[direction] - minus[direction]) / (T(2) * gridDelta);
1104 }
1105 return result;
1106 }
1107
1108 // Evaluates the elastic stencil F(v)[i] = laplaceAverage + gradDivCorrection.
1109 // (A·v)[i] = v[i] - F(v)[i] + b[i], where b[i] = F(0)[i] (contact BC
1110 // constants).
1111 template <class SolverT>
1112 Vec3D<T> computeElasticStencilAt(std::size_t nodeId,
1113 const std::vector<Vec3D<SolverT>> &v,
1114 T gradDivWeight) const {
1115 Vec3D<T> laplaceAverage{T(0), T(0), T(0)};
1116 for (unsigned direction = 0; direction < D; ++direction)
1117 for (int offset : {-1, 1})
1118 detail::vecAddTo(laplaceAverage,
1119 neighborVelocity(v, nodeId, direction, offset));
1120
1121 const T count = static_cast<T>(2 * D);
1122 const Vec3D<T> lapAvg = detail::vecScaled(laplaceAverage, T(1) / count);
1123 const Vec3D<T> gradDivCorr = detail::vecScaled(
1124 divergenceGradient(v, nodes[nodeId].index),
1125 gradDivWeight * gridDelta * gridDelta / (T(2) * static_cast<T>(D)));
1126
1127 return detail::vecAdd(lapAvg, gradDivCorr);
1128 }
1129
1130 // (Av)[i] = v[i] - F(v)[i] + b[i], stored as SolverT.
1131 template <class SolverT>
1132 void elasticMatvec(const std::vector<Vec3D<SolverT>> &v,
1133 const std::vector<Vec3D<T>> &b, T gradDivWeight,
1134 std::vector<Vec3D<SolverT>> &Av) const {
1135#pragma omp parallel for schedule(static)
1136 for (std::size_t i = 0; i < nodes.size(); ++i) {
1137 if (nodes[i].fixed) {
1138 for (unsigned c = 0; c < D; ++c)
1139 Av[i][c] = v[i][c];
1140 continue;
1141 }
1142 const Vec3D<T> Fv = computeElasticStencilAt(i, v, gradDivWeight);
1143 for (unsigned c = 0; c < D; ++c)
1144 Av[i][c] =
1145 static_cast<SolverT>(static_cast<T>(v[i][c]) - Fv[c] + b[i][c]);
1146 }
1147 }
1148
1149 static constexpr std::size_t mgNoNode =
1150 std::numeric_limits<std::size_t>::max();
1151
1152 static Vec3D<T> zeroVec() { return Vec3D<T>{T(0), T(0), T(0)}; }
1153
1154 static IndexType coarsenIndex(const IndexType &index) {
1155 IndexType coarse{};
1156 for (unsigned d = 0; d < D; ++d) {
1157 const auto value = index[d];
1158 coarse[d] = (value >= 0) ? value / 2 : -((-value + 1) / 2);
1159 }
1160 return coarse;
1161 }
1162
1163 std::vector<MultigridLevel> buildMultigridHierarchy() const {
1164 std::vector<MultigridLevel> levels;
1165 levels.emplace_back();
1166 auto &fine = levels.back();
1167 fine.indices.reserve(nodes.size());
1168 for (const auto &node : nodes)
1169 fine.indices.push_back(node.index);
1170
1171 constexpr unsigned maxLevels = 12;
1172 constexpr std::size_t minCoarsenNodes = 48;
1173 while (levels.size() < maxLevels &&
1174 levels.back().indices.size() > minCoarsenNodes) {
1175 const auto &previous = levels.back();
1176 MultigridLevel coarse;
1177 coarse.indices.reserve(previous.indices.size() / 2 + 1);
1178 coarse.children.reserve(previous.indices.size() / 2 + 1);
1179 coarse.fineToCoarse.assign(previous.indices.size(), mgNoNode);
1180
1181 std::unordered_map<std::size_t, std::size_t> coarseLookup;
1182 coarseLookup.reserve(previous.indices.size());
1183 for (std::size_t fineId = 0; fineId < previous.indices.size(); ++fineId) {
1184 const IndexType coarseIndex = coarsenIndex(previous.indices[fineId]);
1185 const std::size_t key = detail::gridIndexHash<D>(coarseIndex);
1186 auto found = coarseLookup.find(key);
1187 if (found == coarseLookup.end()) {
1188 const std::size_t coarseId = coarse.indices.size();
1189 coarseLookup.emplace(key, coarseId);
1190 coarse.indices.push_back(coarseIndex);
1191 coarse.children.emplace_back();
1192 found = coarseLookup.find(key);
1193 }
1194
1195 const std::size_t coarseId = found->second;
1196 coarse.children[coarseId].push_back(fineId);
1197 coarse.fineToCoarse[fineId] = coarseId;
1198 }
1199
1200 if (coarse.indices.empty() ||
1201 coarse.indices.size() >= previous.indices.size())
1202 break;
1203
1204 // Stop if any grid dimension collapses to a single cell on the coarse
1205 // level. For thin masks this happens quickly in the thickness direction,
1206 // and a 1-cell-thick level loses all stress-gradient information across
1207 // that dimension, making the smoother degenerate.
1208 {
1209 IndexType lo = coarse.indices.front(), hi = coarse.indices.front();
1210 for (const auto &idx : coarse.indices)
1211 for (unsigned d = 0; d < D; ++d) {
1212 lo[d] = std::min(lo[d], idx[d]);
1213 hi[d] = std::max(hi[d], idx[d]);
1214 }
1215 bool degenerate = false;
1216 for (unsigned d = 0; d < D; ++d)
1217 if (hi[d] == lo[d])
1218 degenerate = true;
1219 if (degenerate)
1220 break;
1221 }
1222
1223 levels.push_back(std::move(coarse));
1224 }
1225 return levels;
1226 }
1227
1228 static T vectorDot(const std::vector<Vec3D<T>> &a,
1229 const std::vector<Vec3D<T>> &b) {
1230 T sum = T(0);
1231 for (std::size_t i = 0; i < a.size(); ++i)
1232 for (unsigned c = 0; c < D; ++c)
1233 sum += a[i][c] * b[i][c];
1234 return sum;
1235 }
1236
1237 static T vectorNorm(const std::vector<Vec3D<T>> &v) {
1238 return std::sqrt(std::max(vectorDot(v, v), T(0)));
1239 }
1240
1241 static void vectorScale(std::vector<Vec3D<T>> &v, T scale) {
1242 for (auto &entry : v)
1243 for (unsigned c = 0; c < D; ++c)
1244 entry[c] *= scale;
1245 }
1246
1247 static void vectorAxpy(std::vector<Vec3D<T>> &y, T alpha,
1248 const std::vector<Vec3D<T>> &x) {
1249 for (std::size_t i = 0; i < y.size(); ++i)
1250 for (unsigned c = 0; c < D; ++c)
1251 y[i][c] += alpha * x[i][c];
1252 }
1253
1254 static void vectorSubtractInPlace(std::vector<Vec3D<T>> &y, T alpha,
1255 const std::vector<Vec3D<T>> &x) {
1256 for (std::size_t i = 0; i < y.size(); ++i)
1257 for (unsigned c = 0; c < D; ++c)
1258 y[i][c] -= alpha * x[i][c];
1259 }
1260
1261 static T flatValue(const std::vector<Vec3D<T>> &v, std::size_t row) {
1262 return v[row / D][row % D];
1263 }
1264
1265 static void flatSet(std::vector<Vec3D<T>> &v, std::size_t row, T value) {
1266 v[row / D][row % D] = value;
1267 }
1268
1269 void collectLocalCandidatesRecursive(unsigned dim, const IndexType &center,
1270 IndexType &candidate,
1271 std::vector<std::size_t> &out) const {
1272 if (dim == D) {
1273 if (!inBounds(candidate))
1274 return;
1275 const std::size_t nodeId = lookupNode(candidate);
1276 if (nodeId != noNode)
1277 out.push_back(nodeId);
1278 return;
1279 }
1280
1281 for (int offset = -2; offset <= 2; ++offset) {
1282 candidate[dim] = center[dim] + offset;
1283 collectLocalCandidatesRecursive(dim + 1, center, candidate, out);
1284 }
1285 }
1286
1287 void collectLocalCandidates(std::size_t rowNode,
1288 std::vector<std::size_t> &out) const {
1289 out.clear();
1290 IndexType candidate = nodes[rowNode].index;
1291 collectLocalCandidatesRecursive(0, nodes[rowNode].index, candidate, out);
1292 out.push_back(rowNode);
1293 std::sort(out.begin(), out.end());
1294 out.erase(std::unique(out.begin(), out.end()), out.end());
1295 }
1296
1297 SparseMatrix
1298 compressSparseRows(std::vector<std::unordered_map<std::size_t, T>> &rows,
1299 std::size_t nodeCount) const {
1300 SparseMatrix matrix;
1301 matrix.nodeCount = nodeCount;
1302 matrix.rowPtr.assign(rows.size() + 1, 0);
1303 matrix.invDiagonal.assign(rows.size(), T(1));
1304
1305 for (std::size_t row = 0; row < rows.size(); ++row) {
1306 std::vector<std::pair<std::size_t, T>> entries(rows[row].begin(),
1307 rows[row].end());
1308 std::sort(entries.begin(), entries.end(),
1309 [](const auto &a, const auto &b) { return a.first < b.first; });
1310
1311 matrix.rowPtr[row] = matrix.values.size();
1312 T diagonal = T(0);
1313 for (const auto &[col, value] : entries) {
1314 if (std::abs(value) <= std::numeric_limits<T>::epsilon() * T(100))
1315 continue;
1316 if (!std::isfinite(value))
1317 throwNonFinite("traction sparse matrix assembly");
1318 matrix.colIndex.push_back(col);
1319 matrix.values.push_back(value);
1320 if (col == row)
1321 diagonal += value;
1322 }
1323
1324 if (std::abs(diagonal) > std::numeric_limits<T>::epsilon())
1325 matrix.invDiagonal[row] = T(1) / diagonal;
1326 else
1327 matrix.invDiagonal[row] = T(1);
1328 }
1329 matrix.rowPtr[rows.size()] = matrix.values.size();
1330 return matrix;
1331 }
1332
1333 SparseMatrix buildFineElasticMatrix(const std::vector<Vec3D<T>> &b,
1334 T gradDivWeight) const {
1335 const std::size_t n = nodes.size();
1336 std::vector<std::unordered_map<std::size_t, T>> rows(n * D);
1337 std::vector<Vec3D<T>> basis(n, zeroVec());
1338 std::vector<std::size_t> candidates;
1339
1340 for (std::size_t rowNode = 0; rowNode < n; ++rowNode) {
1341 if (nodes[rowNode].fixed) {
1342 for (unsigned rowComponent = 0; rowComponent < D; ++rowComponent) {
1343 const std::size_t row = rowNode * D + rowComponent;
1344 rows[row][row] = T(1);
1345 }
1346 continue;
1347 }
1348
1349 // Build A column-by-column via probing. The operator is (A·v)[i][c] =
1350 // v[i][c] - F(v)[i][c], where F is affine: F(v) = F_linear(v) + F(0).
1351 // b[rowNode] = F(0)[rowNode] (the traction load vector). The matrix
1352 // entry is the linear part only:
1353 // A[row, col] = δ(row,col) - (F(e_col)[rowComp] - F(0)[rowComp])
1354 // = δ(row,col) - (Fbasis[rowComp] - b[rowNode][rowComp])
1355 // b is constant with respect to col so it cancels between probes; it
1356 // is included here to isolate the linear part of F from its affine
1357 // offset.
1358 collectLocalCandidates(rowNode, candidates);
1359 for (std::size_t colNode : candidates) {
1360 for (unsigned colComponent = 0; colComponent < D; ++colComponent) {
1361 basis[colNode][colComponent] = T(1);
1362 const Vec3D<T> Fbasis =
1363 computeElasticStencilAt(rowNode, basis, gradDivWeight);
1364 basis[colNode][colComponent] = T(0);
1365
1366 for (unsigned rowComponent = 0; rowComponent < D; ++rowComponent) {
1367 const std::size_t row = rowNode * D + rowComponent;
1368 const std::size_t col = colNode * D + colComponent;
1369 const T identity =
1370 (rowNode == colNode && rowComponent == colComponent) ? T(1)
1371 : T(0);
1372 const T value =
1373 identity - (Fbasis[rowComponent] - b[rowNode][rowComponent]);
1374 rows[row][col] += value;
1375 }
1376 }
1377 }
1378 }
1379
1380 return compressSparseRows(rows, n);
1381 }
1382
1383 SparseMatrix buildGalerkinMatrix(const SparseMatrix &fine,
1384 const MultigridLevel &coarseLevel) const {
1385 const std::size_t coarseNodes = coarseLevel.indices.size();
1386 std::vector<std::unordered_map<std::size_t, T>> rows(coarseNodes * D);
1387
1388 for (std::size_t fineRow = 0; fineRow < fine.nodeCount * D; ++fineRow) {
1389 const std::size_t fineRowNode = fineRow / D;
1390 if (fineRowNode >= coarseLevel.fineToCoarse.size())
1391 continue;
1392 const std::size_t coarseRowNode = coarseLevel.fineToCoarse[fineRowNode];
1393 if (coarseRowNode == mgNoNode)
1394 continue;
1395
1396 const T restrictionWeight =
1397 T(1) / static_cast<T>(std::max<std::size_t>(
1398 coarseLevel.children[coarseRowNode].size(), 1));
1399 const std::size_t coarseRow = coarseRowNode * D + fineRow % D;
1400
1401 for (std::size_t nz = fine.rowPtr[fineRow]; nz < fine.rowPtr[fineRow + 1];
1402 ++nz) {
1403 const std::size_t fineCol = fine.colIndex[nz];
1404 const std::size_t fineColNode = fineCol / D;
1405 if (fineColNode >= coarseLevel.fineToCoarse.size())
1406 continue;
1407 const std::size_t coarseColNode = coarseLevel.fineToCoarse[fineColNode];
1408 if (coarseColNode == mgNoNode)
1409 continue;
1410 const std::size_t coarseCol = coarseColNode * D + fineCol % D;
1411 rows[coarseRow][coarseCol] += restrictionWeight * fine.values[nz];
1412 }
1413 }
1414
1415 return compressSparseRows(rows, coarseNodes);
1416 }
1417
1418 void sparseMatvec(const SparseMatrix &matrix, const std::vector<Vec3D<T>> &x,
1419 std::vector<Vec3D<T>> &Ax) const {
1420 Ax.assign(matrix.nodeCount, zeroVec());
1421#pragma omp parallel for schedule(static)
1422 for (std::size_t row = 0; row < matrix.nodeCount * D; ++row) {
1423 T sum = T(0);
1424 for (std::size_t nz = matrix.rowPtr[row]; nz < matrix.rowPtr[row + 1];
1425 ++nz)
1426 sum += matrix.values[nz] * flatValue(x, matrix.colIndex[nz]);
1427 flatSet(Ax, row, sum);
1428 }
1429 }
1430
1431 void multigridProlong(const std::vector<MultigridLevel> &levels,
1432 std::size_t coarseLevelId,
1433 const std::vector<Vec3D<T>> &coarse,
1434 std::vector<Vec3D<T>> &fine) const {
1435 const auto &fineLevel = levels[coarseLevelId - 1];
1436 const auto &coarseLevel = levels[coarseLevelId];
1437 fine.assign(fineLevel.indices.size(), zeroVec());
1438 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1439 ++coarseId)
1440 for (std::size_t fineId : coarseLevel.children[coarseId])
1441 for (unsigned c = 0; c < D; ++c)
1442 fine[fineId][c] = coarse[coarseId][c];
1443 }
1444
1445 void multigridSmooth(const SparseMatrix &matrix,
1446 const std::vector<Vec3D<T>> &rhs,
1447 std::vector<Vec3D<T>> &x, unsigned sweeps,
1448 T omega) const {
1449 const std::size_t rows = matrix.nodeCount * D;
1450 auto relaxRow = [&](std::size_t row) {
1451 T offDiagonal = T(0);
1452 for (std::size_t nz = matrix.rowPtr[row]; nz < matrix.rowPtr[row + 1];
1453 ++nz) {
1454 const std::size_t col = matrix.colIndex[nz];
1455 if (col == row)
1456 continue;
1457 offDiagonal += matrix.values[nz] * flatValue(x, col);
1458 }
1459 const T updated =
1460 (flatValue(rhs, row) - offDiagonal) * matrix.invDiagonal[row];
1461 flatSet(x, row,
1462 flatValue(x, row) + omega * (updated - flatValue(x, row)));
1463 };
1464
1465 for (unsigned sweep = 0; sweep < sweeps; ++sweep) {
1466 for (std::size_t row = 0; row < rows; ++row)
1467 relaxRow(row);
1468 for (std::size_t row = rows; row-- > 0;)
1469 relaxRow(row);
1470 }
1471 }
1472
1473 void multigridResidual(const SparseMatrix &matrix,
1474 const std::vector<Vec3D<T>> &rhs,
1475 const std::vector<Vec3D<T>> &x,
1476 std::vector<Vec3D<T>> &residualOut) const {
1477 sparseMatvec(matrix, x, residualOut);
1478 for (std::size_t i = 0; i < rhs.size(); ++i)
1479 for (unsigned c = 0; c < D; ++c)
1480 residualOut[i][c] = rhs[i][c] - residualOut[i][c];
1481 }
1482
1483 void multigridRestrict(const std::vector<Vec3D<T>> &fineResidual,
1484 const MultigridLevel &coarseLevel,
1485 std::vector<Vec3D<T>> &coarseRhs) const {
1486 coarseRhs.assign(coarseLevel.indices.size(), zeroVec());
1487 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1488 ++coarseId) {
1489 const auto &children = coarseLevel.children[coarseId];
1490 if (children.empty())
1491 continue;
1492 for (std::size_t fineId : children)
1493 for (unsigned c = 0; c < D; ++c)
1494 coarseRhs[coarseId][c] += fineResidual[fineId][c];
1495 const T scale = T(1) / static_cast<T>(children.size());
1496 for (unsigned c = 0; c < D; ++c)
1497 coarseRhs[coarseId][c] *= scale;
1498 }
1499 }
1500
1501 void multigridProlongAdd(const std::vector<Vec3D<T>> &coarseCorrection,
1502 const MultigridLevel &coarseLevel,
1503 std::vector<Vec3D<T>> &fineCorrection) const {
1504 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1505 ++coarseId) {
1506 for (std::size_t fineId : coarseLevel.children[coarseId])
1507 for (unsigned c = 0; c < D; ++c)
1508 fineCorrection[fineId][c] += coarseCorrection[coarseId][c];
1509 }
1510 }
1511
1512 void multigridVCycle(const std::vector<MultigridLevel> &levels,
1513 std::size_t levelId, const std::vector<Vec3D<T>> &rhs,
1514 std::vector<Vec3D<T>> &x, T smootherOmega) const {
1515 const auto &level = levels[levelId];
1516 if (levelId + 1 >= levels.size() || level.indices.size() <= 32) {
1517 multigridSmooth(level.matrix, rhs, x, 40, smootherOmega);
1518 return;
1519 }
1520
1521 multigridSmooth(level.matrix, rhs, x, 2, smootherOmega);
1522
1523 std::vector<Vec3D<T>> fineResidual;
1524 multigridResidual(level.matrix, rhs, x, fineResidual);
1525
1526 std::vector<Vec3D<T>> coarseRhs;
1527 const auto &coarseLevel = levels[levelId + 1];
1528 multigridRestrict(fineResidual, coarseLevel, coarseRhs);
1529
1530 std::vector<Vec3D<T>> coarseCorrection(coarseRhs.size(), zeroVec());
1531 multigridVCycle(levels, levelId + 1, coarseRhs, coarseCorrection,
1532 smootherOmega);
1533 multigridProlongAdd(coarseCorrection, coarseLevel, x);
1534
1535 multigridSmooth(level.matrix, rhs, x, 2, smootherOmega);
1536 }
1537
1538 std::vector<Vec3D<T>>
1539 multigridPrecondition(const std::vector<MultigridLevel> &levels,
1540 const std::vector<Vec3D<T>> &rhs,
1541 T smootherOmega) const {
1542 std::vector<Vec3D<T>> correction(rhs.size(), zeroVec());
1543 if (levels.empty())
1544 return correction;
1545 multigridVCycle(levels, 0, rhs, correction, smootherOmega);
1546 return correction;
1547 }
1548
1549 void exactElasticResidual(const SparseMatrix &matrix,
1550 const std::vector<Vec3D<T>> &x,
1551 const std::vector<Vec3D<T>> &b,
1552 std::vector<Vec3D<T>> &r) const {
1553 std::vector<Vec3D<T>> Ax(x.size(), zeroVec());
1554 sparseMatvec(matrix, x, Ax);
1555 r.assign(x.size(), zeroVec());
1556 for (std::size_t i = 0; i < x.size(); ++i)
1557 for (unsigned c = 0; c < D; ++c)
1558 r[i][c] = b[i][c] - Ax[i][c];
1559 }
1560
1561 static std::vector<T>
1562 solveUpperTriangular(const std::vector<std::vector<T>> &h,
1563 const std::vector<T> &g, unsigned usedColumns) {
1564 std::vector<T> y(usedColumns, T(0));
1565 for (int row = static_cast<int>(usedColumns) - 1; row >= 0; --row) {
1566 T sum = g[static_cast<std::size_t>(row)];
1567 for (unsigned col = static_cast<unsigned>(row) + 1; col < usedColumns;
1568 ++col)
1569 sum -= h[static_cast<std::size_t>(row)][col] * y[col];
1570 const T diag =
1571 h[static_cast<std::size_t>(row)][static_cast<std::size_t>(row)];
1572 if (std::abs(diag) > std::numeric_limits<T>::epsilon())
1573 y[static_cast<std::size_t>(row)] = sum / diag;
1574 }
1575 return y;
1576 }
1577
1578 void solveElasticVelocity() {
1579 if (parameters.contactMode > 0) {
1580 solveElasticVelocityMultigridGMRES();
1581 return;
1582 }
1583 solveElasticVelocityBiCGSTAB();
1584 }
1585
1586 void solveElasticVelocityMultigridGMRES() {
1587 iterations = 0;
1588 residual = 0.;
1589 if (nodes.empty())
1590 return;
1591
1592 const T lambda = lameLambda();
1593 const T mu = lameMu();
1594 const T gradDivWeight =
1595 (lambda + mu) /
1596 std::max(lambda + T(2) * mu, std::numeric_limits<T>::epsilon());
1597 const T smootherOmega =
1598 std::clamp(parameters.multigridSmootherOmega, T(0.2), T(1.4));
1599
1600 const std::size_t n = nodes.size();
1601 const std::vector<Vec3D<T>> zeros(n, zeroVec());
1602 std::vector<Vec3D<T>> b(n, zeroVec());
1603#pragma omp parallel for schedule(static)
1604 for (std::size_t i = 0; i < n; ++i) {
1605 if (nodes[i].fixed)
1606 b[i] = zeroVec();
1607 else
1608 b[i] = computeElasticStencilAt(i, zeros, gradDivWeight);
1609 }
1610
1611 std::vector<Vec3D<T>> x(n, zeroVec());
1612 for (std::size_t i = 0; i < n; ++i)
1613 x[i] = nodes[i].fixed ? zeroVec() : nodes[i].velocity;
1614
1615 // Rebuild the multigrid hierarchy only when the node count or the contact
1616 // face classification changes. The stiffness matrix depends only on node
1617 // geometry and which faces are active (compressive), not on the traction
1618 // magnitudes — those only enter the load vector b computed above. Within
1619 // a single time step the coupling iterations run without advection, so the
1620 // mask geometry and contact pattern are typically stable after the first
1621 // call and the matrix can be reused for all subsequent iterations.
1622 const bool hierarchyDirty =
1623 cachedNodeCount_ != n || cachedContactFaceActive_ != contactFaceActive_;
1624
1625 if (hierarchyDirty) {
1626 cachedMultigridLevels_ = buildMultigridHierarchy();
1627 if (cachedMultigridLevels_.empty())
1628 return;
1629 cachedMultigridLevels_[0].matrix =
1630 buildFineElasticMatrix(b, gradDivWeight);
1631 for (std::size_t level = 1; level < cachedMultigridLevels_.size();
1632 ++level)
1633 cachedMultigridLevels_[level].matrix =
1634 buildGalerkinMatrix(cachedMultigridLevels_[level - 1].matrix,
1635 cachedMultigridLevels_[level]);
1636 cachedNodeCount_ = n;
1637 cachedContactFaceActive_ = contactFaceActive_;
1638 }
1639
1640 if (cachedMultigridLevels_.empty())
1641 return;
1642 const auto &multigridLevels = cachedMultigridLevels_;
1643
1644 std::vector<Vec3D<T>> r;
1645 exactElasticResidual(multigridLevels[0].matrix, x, b, r);
1646
1647 const T rhsNorm = vectorNorm(b);
1648 const T absTolerance =
1649 std::max(parameters.tolerance * rhsNorm,
1650 std::numeric_limits<T>::epsilon() * T(100) *
1651 std::sqrt(static_cast<T>(
1652 std::max<std::size_t>(std::size_t(1), n * D))));
1653 const T residualNormDenom = std::max(rhsNorm, absTolerance);
1654 auto updateGmresResidual = [&](T absResidual) {
1655 residual = (absResidual <= absTolerance)
1656 ? T(0)
1657 : absResidual / residualNormDenom;
1658 };
1659
1660 T absResidual = vectorNorm(r);
1661 updateGmresResidual(absResidual);
1662 if (absResidual <= absTolerance || residual < parameters.tolerance) {
1663 for (std::size_t i = 0; i < n; ++i)
1664 nodes[i].velocity = x[i];
1665 return;
1666 }
1667
1668 constexpr unsigned restart = 32;
1669 while (iterations < parameters.maxIterations &&
1670 residual > parameters.tolerance) {
1671 const T beta = vectorNorm(r);
1672 if (!std::isfinite(beta))
1673 throwNonFinite("traction multigrid GMRES residual");
1674 if (beta <= absTolerance) {
1675 residual = T(0);
1676 break;
1677 }
1678
1679 const unsigned innerLimit =
1680 std::min<unsigned>(restart, parameters.maxIterations - iterations);
1681 std::vector<std::vector<Vec3D<T>>> v(innerLimit + 1,
1682 std::vector<Vec3D<T>>(n, zeroVec()));
1683 std::vector<std::vector<Vec3D<T>>> z(innerLimit,
1684 std::vector<Vec3D<T>>(n, zeroVec()));
1685 v[0] = r;
1686 vectorScale(v[0], T(1) / beta);
1687
1688 std::vector<std::vector<T>> h(innerLimit + 1,
1689 std::vector<T>(innerLimit, T(0)));
1690 std::vector<T> cs(innerLimit, T(0));
1691 std::vector<T> sn(innerLimit, T(0));
1692 std::vector<T> g(innerLimit + 1, T(0));
1693 g[0] = beta;
1694
1695 unsigned usedColumns = 0;
1696 for (unsigned j = 0; j < innerLimit; ++j) {
1697 z[j] = multigridPrecondition(multigridLevels, v[j], smootherOmega);
1698
1699 std::vector<Vec3D<T>> w(n, zeroVec());
1700 sparseMatvec(multigridLevels[0].matrix, z[j], w);
1701
1702 for (unsigned i = 0; i <= j; ++i) {
1703 h[i][j] = vectorDot(w, v[i]);
1704 vectorSubtractInPlace(w, h[i][j], v[i]);
1705 }
1706
1707 h[j + 1][j] = vectorNorm(w);
1708 if (h[j + 1][j] > std::numeric_limits<T>::epsilon()) {
1709 v[j + 1] = w;
1710 vectorScale(v[j + 1], T(1) / h[j + 1][j]);
1711 }
1712
1713 for (unsigned i = 0; i < j; ++i) {
1714 const T h0 = h[i][j];
1715 const T h1 = h[i + 1][j];
1716 h[i][j] = cs[i] * h0 + sn[i] * h1;
1717 h[i + 1][j] = -sn[i] * h0 + cs[i] * h1;
1718 }
1719
1720 const T h0 = h[j][j];
1721 const T h1 = h[j + 1][j];
1722 const T denom = std::hypot(h0, h1);
1723 if (denom <= std::numeric_limits<T>::epsilon()) {
1724 cs[j] = T(1);
1725 sn[j] = T(0);
1726 } else {
1727 cs[j] = h0 / denom;
1728 sn[j] = h1 / denom;
1729 }
1730 h[j][j] = cs[j] * h0 + sn[j] * h1;
1731 h[j + 1][j] = T(0);
1732
1733 const T g0 = g[j];
1734 g[j] = cs[j] * g0;
1735 g[j + 1] = -sn[j] * g0;
1736
1737 ++iterations;
1738 usedColumns = j + 1;
1739 const T projectedAbsResidual = std::abs(g[j + 1]);
1740 updateGmresResidual(projectedAbsResidual);
1741 if (!std::isfinite(residual))
1742 throwNonFinite("traction multigrid GMRES residual");
1743 if (projectedAbsResidual <= absTolerance ||
1744 residual < parameters.tolerance)
1745 break;
1746 }
1747
1748 if (usedColumns == 0)
1749 break;
1750
1751 const auto y = solveUpperTriangular(h, g, usedColumns);
1752 for (unsigned col = 0; col < usedColumns; ++col)
1753 vectorAxpy(x, y[col], z[col]);
1754
1755 exactElasticResidual(multigridLevels[0].matrix, x, b, r);
1756 absResidual = vectorNorm(r);
1757 updateGmresResidual(absResidual);
1758 if (!std::isfinite(residual))
1759 throwNonFinite("traction multigrid GMRES residual");
1760 }
1761
1762 for (std::size_t i = 0; i < n; ++i)
1763 nodes[i].velocity = x[i];
1764
1765 if (residual > parameters.tolerance)
1766 Logger::getInstance()
1767 .addWarning("solveElasticVelocity: traction multigrid GMRES did not "
1768 "converge after " +
1769 std::to_string(iterations) + "/" +
1770 std::to_string(parameters.maxIterations) +
1771 " iterations (residual=" + std::to_string(residual) +
1772 ", tolerance=" + std::to_string(parameters.tolerance) +
1773 ")")
1774 .print();
1775 }
1776
1777 void solveElasticVelocityRelaxation() {
1778 iterations = 0;
1779 residual = 0.;
1780 if (nodes.empty())
1781 return;
1782
1783 const T lambda = lameLambda();
1784 const T mu = lameMu();
1785 const T gradDivWeight =
1786 (lambda + mu) /
1787 std::max(lambda + T(2) * mu, std::numeric_limits<T>::epsilon());
1788 const T relaxation = std::clamp(parameters.relaxation, T(0.01), T(1));
1789
1790 std::vector<Vec3D<T>> current(nodes.size());
1791 std::vector<Vec3D<T>> next(nodes.size());
1792 for (std::size_t i = 0; i < nodes.size(); ++i)
1793 current[i] =
1794 nodes[i].fixed ? Vec3D<T>{T(0), T(0), T(0)} : nodes[i].velocity;
1795
1796 for (; iterations < parameters.maxIterations; ++iterations) {
1797 T maxDelta = T(0);
1798 T maxMagnitude = std::numeric_limits<T>::epsilon();
1799 int finiteFlag = 1;
1800
1801#pragma omp parallel for schedule(static) \
1802 reduction(max : maxDelta, maxMagnitude) reduction(min : finiteFlag)
1803 for (std::size_t i = 0; i < nodes.size(); ++i) {
1804 Vec3D<T> candidate =
1805 nodes[i].fixed ? Vec3D<T>{T(0), T(0), T(0)}
1806 : computeElasticStencilAt(i, current, gradDivWeight);
1807
1808 for (unsigned c = 0; c < D; ++c) {
1809 if (!std::isfinite(candidate[c]) || !std::isfinite(current[i][c])) {
1810 finiteFlag = 0;
1811 next[i][c] = current[i][c];
1812 continue;
1813 }
1814 next[i][c] =
1815 current[i][c] + relaxation * (candidate[c] - current[i][c]);
1816 maxDelta = std::max(maxDelta, std::abs(next[i][c] - current[i][c]));
1817 maxMagnitude = std::max(maxMagnitude, std::abs(next[i][c]));
1818 maxMagnitude = std::max(maxMagnitude, std::abs(current[i][c]));
1819 }
1820 }
1821
1822 if (!finiteFlag) {
1823 residual = std::numeric_limits<T>::infinity();
1824 throwNonFinite("traction mask solve");
1825 }
1826
1827 residual = maxDelta / maxMagnitude;
1828 current.swap(next);
1829 if (residual < parameters.tolerance) {
1830 ++iterations;
1831 break;
1832 }
1833 }
1834
1835 for (std::size_t i = 0; i < nodes.size(); ++i)
1836 nodes[i].velocity = current[i];
1837
1838 if (residual > parameters.tolerance)
1839 Logger::getInstance()
1840 .addWarning("solveElasticVelocity: traction relaxation did not "
1841 "converge after " +
1842 std::to_string(iterations) + "/" +
1843 std::to_string(parameters.maxIterations) +
1844 " iterations (residual=" + std::to_string(residual) +
1845 ", tolerance=" + std::to_string(parameters.tolerance) +
1846 ")")
1847 .print();
1848 }
1849
1850 void solveElasticVelocityBiCGSTAB() {
1851 iterations = 0;
1852 residual = 0.;
1853 if (nodes.empty())
1854 return;
1855
1856 using SolverT = float;
1857
1858 const T lambda = lameLambda();
1859 const T mu = lameMu();
1860 const T gradDivWeight =
1861 (lambda + mu) /
1862 std::max(lambda + T(2) * mu, std::numeric_limits<T>::epsilon());
1863
1864 const std::size_t n = nodes.size();
1865 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
1866
1867 // b[i] = F(0)[i]: contact BC constants (reaction/contact velocities).
1868 // OOB and traction-free faces contribute v[i]=0 at zeros.
1869 std::vector<Vec3D<T>> b(n);
1870 {
1871 const std::vector<Vec3D<SolverT>> zeros(n, zero3);
1872#pragma omp parallel for schedule(static)
1873 for (std::size_t i = 0; i < n; ++i) {
1874 if (nodes[i].fixed)
1875 b[i] = Vec3D<T>{T(0), T(0), T(0)};
1876 else
1877 b[i] = computeElasticStencilAt(i, zeros, gradDivWeight);
1878 }
1879 }
1880
1881 // Cold start from zeros (matching original Jacobi behavior).
1882 std::vector<Vec3D<SolverT>> x(n, zero3);
1883
1884 // r = b - A*x. With x=0: A*0 = 0 - F(0) + b = 0, so r = b.
1885 std::vector<Vec3D<SolverT>> r(n), r_hat(n);
1886 for (std::size_t i = 0; i < n; ++i)
1887 for (unsigned c = 0; c < D; ++c) {
1888 r[i][c] = static_cast<SolverT>(b[i][c]);
1889 r_hat[i][c] = r[i][c];
1890 }
1891
1892 // BiCGSTAB with identity preconditioner.
1893 // For most interior nodes the diagonal of A is ≈ 1; the identity
1894 // preconditioner is exact there and a reasonable approximation at boundary
1895 // nodes.
1896 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
1897 t(n);
1898 T rho = T(1), alpha = T(1), omega = T(1);
1899
1900 auto vecDot = [&](const std::vector<Vec3D<SolverT>> &a,
1901 const std::vector<Vec3D<SolverT>> &bv) {
1902 T sum = T(0);
1903 for (std::size_t i = 0; i < n; ++i)
1904 for (unsigned c = 0; c < D; ++c) {
1905 const T av = static_cast<T>(a[i][c]);
1906 const T bvVal = static_cast<T>(bv[i][c]);
1907 if (!std::isfinite(av) || !std::isfinite(bvVal))
1908 return std::numeric_limits<T>::quiet_NaN();
1909 sum += av * bvVal;
1910 }
1911 return sum;
1912 };
1913
1914 auto vecMaxAbs = [&](const std::vector<Vec3D<SolverT>> &vin) {
1915 T m = T(0);
1916 for (std::size_t i = 0; i < n; ++i)
1917 for (unsigned c = 0; c < D; ++c) {
1918 const T value = static_cast<T>(vin[i][c]);
1919 if (!std::isfinite(value))
1920 return std::numeric_limits<T>::infinity();
1921 m = std::max(m, std::abs(value));
1922 }
1923 return m;
1924 };
1925
1926 const T b_norm = [&] {
1927 T m = T(0);
1928 for (std::size_t i = 0; i < n; ++i)
1929 for (unsigned c = 0; c < D; ++c)
1930 m = std::max(m, std::abs(b[i][c]));
1931 return (m < T(1e-100)) ? T(1) : m;
1932 }();
1933
1934 for (; iterations < parameters.maxIterations; ++iterations) {
1935 const T rho_new = vecDot(r_hat, r);
1936 if (!std::isfinite(rho_new) || std::abs(rho_new) < T(1e-100))
1937 break;
1938 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1939 !std::isfinite(omega) || std::abs(omega) < T(1e-100))
1940 break;
1941
1942 const T beta = (rho_new / rho) * (alpha / omega);
1943 if (!std::isfinite(beta))
1944 break;
1945 rho = rho_new;
1946
1947 for (std::size_t i = 0; i < n; ++i)
1948 for (unsigned c = 0; c < D; ++c)
1949 pv[i][c] = static_cast<SolverT>(r[i][c] +
1950 beta * (pv[i][c] - omega * sv[i][c]));
1951
1952 // Identity preconditioner: y = p
1953 y = pv;
1954 elasticMatvec(y, b, gradDivWeight, sv);
1955
1956 const T r_hat_v = vecDot(r_hat, sv);
1957 if (!std::isfinite(r_hat_v) || std::abs(r_hat_v) < T(1e-100))
1958 break;
1959
1960 alpha = rho_new / r_hat_v;
1961 if (!std::isfinite(alpha))
1962 break;
1963
1964 for (std::size_t i = 0; i < n; ++i)
1965 for (unsigned c = 0; c < D; ++c)
1966 s[i][c] = static_cast<SolverT>(r[i][c] - alpha * sv[i][c]);
1967
1968 residual = vecMaxAbs(s);
1969 if (!std::isfinite(residual))
1970 break;
1971 if (residual < parameters.tolerance * b_norm) {
1972 for (std::size_t i = 0; i < n; ++i)
1973 for (unsigned c = 0; c < D; ++c)
1974 x[i][c] = static_cast<SolverT>(x[i][c] + alpha * y[i][c]);
1975 ++iterations;
1976 break;
1977 }
1978
1979 // Identity preconditioner: z = s
1980 z = s;
1981 elasticMatvec(z, b, gradDivWeight, t);
1982
1983 const T t_s = vecDot(t, s);
1984 const T t_t = vecDot(t, t);
1985 if (!std::isfinite(t_s) || !std::isfinite(t_t) || t_t <= T(1e-100))
1986 break;
1987 omega = t_s / t_t;
1988 if (!std::isfinite(omega))
1989 break;
1990
1991 for (std::size_t i = 0; i < n; ++i)
1992 for (unsigned c = 0; c < D; ++c) {
1993 x[i][c] =
1994 static_cast<SolverT>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
1995 r[i][c] = static_cast<SolverT>(s[i][c] - omega * t[i][c]);
1996 }
1997
1998 residual = vecMaxAbs(r);
1999 if (!std::isfinite(residual))
2000 break;
2001 if (residual < parameters.tolerance * b_norm) {
2002 ++iterations;
2003 break;
2004 }
2005 }
2006
2007 bool finiteSolution = true;
2008 for (std::size_t i = 0; i < n; ++i)
2009 for (unsigned c = 0; c < D; ++c)
2010 if (!std::isfinite(static_cast<T>(x[i][c])))
2011 finiteSolution = false;
2012
2013 if (finiteSolution) {
2014 for (std::size_t i = 0; i < n; ++i)
2015 for (unsigned c = 0; c < D; ++c)
2016 nodes[i].velocity[c] = static_cast<T>(x[i][c]);
2017 } else {
2018 residual = std::numeric_limits<T>::infinity();
2019 throwNonFinite("legacy kinematic mask solve");
2020 }
2021 if (residual > parameters.tolerance * b_norm)
2022 Logger::getInstance()
2023 .addWarning(
2024 "solveElasticVelocity: BiCGSTAB did not converge after " +
2025 std::to_string(iterations) + "/" +
2026 std::to_string(parameters.maxIterations) +
2027 " iterations (residual=" + std::to_string(residual / b_norm) +
2028 ", tolerance=" + std::to_string(parameters.tolerance) + ")")
2029 .print();
2030 }
2031
2032 bool touchesContactBoundary(ConstSparseIterator &maskIt,
2033 const IndexType &index) const {
2034 for (unsigned direction = 0; direction < D; ++direction) {
2035 for (int offset : {-1, 1}) {
2036 IndexType neighbor = index;
2037 neighbor[direction] += offset;
2038 if (!inBounds(neighbor)) {
2039 // Solve region is clipped to the mask/oxide interface: an
2040 // out-of-bounds neighbor is by definition outside the mask.
2041 if (isContactBoundary(index, direction, offset, maskIt))
2042 return true;
2043 continue;
2044 }
2045 if (lookupNode(neighbor) != noNode)
2046 continue;
2047 if (!crosses(valueAt(maskIt, index), valueAt(maskIt, neighbor)))
2048 continue;
2049 if (isContactBoundary(index, direction, offset, maskIt))
2050 return true;
2051 }
2052 }
2053 return false;
2054 }
2055
2056 bool isContactBoundary(const IndexType &index, unsigned direction, int offset,
2057 ConstSparseIterator &maskIt) const {
2058 const T grad = maskGradientComponent(index, direction, maskIt);
2059
2060 // The face exits the selected bending domain only if it points from an
2061 // inside node toward the outside of that domain. Use the signed level-set
2062 // gradient instead of a normalized normal so HRLE far-field sentinels
2063 // cannot produce a spurious zero normal.
2064 if (static_cast<T>(maskSign) * static_cast<T>(offset) * grad >= T(0))
2065 return false;
2066
2067 if (ambientInterface == nullptr)
2068 return direction == D - 1; // no ambient LS: fall back to bottom-face only
2069
2070 IndexType ghostIndex = index;
2071 ghostIndex[direction] += offset;
2072 return isInsideOxide(ghostIndex);
2073 }
2074
2075 bool isInsideOxide(const IndexType &index) const {
2076 if (ambientInterface == nullptr)
2077 return false;
2078 const auto it = ambientPhiCache_.find(detail::gridIndexHash<D>(index));
2079 return it != ambientPhiCache_.end() && it->second >= T(0);
2080 }
2081
2082 T maskFaceDistance(ConstSparseIterator &maskIt, const IndexType &inside,
2083 const IndexType &outside) const {
2084 if (!inBounds(outside))
2085 return gridDelta;
2086 return crossingDistance(valueAt(maskIt, inside), valueAt(maskIt, outside));
2087 }
2088
2089 void markFixedNodes() {
2090 fixedNodes = 0;
2091 if (nodes.empty() || parameters.anchorBoundarySide == 0)
2092 return;
2093 if (parameters.anchorBoundaryDirection < 0 ||
2094 parameters.anchorBoundaryDirection >= D)
2095 return;
2096
2097 const unsigned dir =
2098 static_cast<unsigned>(parameters.anchorBoundaryDirection);
2099
2100 // Warn if the anchor is placed along the oxide-growth direction (D-1 in a
2101 // standard LOCOS cross-section is the vertical/y axis). Clamping the top
2102 // or bottom of the mask instead of a far lateral edge removes the degrees
2103 // of freedom the bending solve needs and will suppress the bird's-beak
2104 // deflection entirely.
2105 if (dir == static_cast<unsigned>(D - 1))
2106 Logger::getInstance()
2107 .addWarning("OxidationMaskBending: anchorBoundaryDirection=" +
2108 std::to_string(parameters.anchorBoundaryDirection) +
2109 " points along the oxide-growth axis (direction D-1=" +
2110 std::to_string(D - 1) +
2111 "). The anchor is normally "
2112 "placed at a lateral edge (direction 0) so bending "
2113 "degrees of freedom are preserved. Set "
2114 "anchorBoundaryDirection=0 for a LOCOS cross-section.")
2115 .print();
2116 IndexType nodeMin = nodes.front().index;
2117 IndexType nodeMax = nodes.front().index;
2118 for (const auto &node : nodes) {
2119 for (unsigned d = 0; d < D; ++d) {
2120 nodeMin[d] = std::min(nodeMin[d], node.index[d]);
2121 nodeMax[d] = std::max(nodeMax[d], node.index[d]);
2122 }
2123 }
2124
2125 const auto layers = static_cast<viennahrle::IndexType>(
2126 std::max(1u, parameters.anchorBoundaryLayers));
2127 for (auto &node : nodes) {
2128 const bool onLower = parameters.anchorBoundarySide < 0 &&
2129 node.index[dir] <= nodeMin[dir] + layers - 1;
2130 const bool onUpper = parameters.anchorBoundarySide > 0 &&
2131 node.index[dir] >= nodeMax[dir] - layers + 1;
2132 if (onLower || onUpper) {
2133 node.fixed = true;
2134 node.velocity = {T(0), T(0), T(0)};
2135 ++fixedNodes;
2136 }
2137 }
2138 }
2139
2140 T maskGradientComponent(const IndexType &index, unsigned direction,
2141 ConstSparseIterator &maskIt) const {
2142 IndexType pos = index;
2143 IndexType neg = index;
2144 pos[direction] += 1;
2145 neg[direction] -= 1;
2146 if (!inBounds(pos))
2147 pos = index;
2148 if (!inBounds(neg))
2149 neg = index;
2150 return detail::clampLevelSetPhi(valueAt(maskIt, pos)) -
2151 detail::clampLevelSetPhi(valueAt(maskIt, neg));
2152 }
2153
2154 T clampedPoissonRatio() const {
2155 return std::clamp(parameters.poissonRatio, T(-0.95), T(0.49));
2156 }
2157
2158 static constexpr T gasConstant = T(8.31446261815324);
2159
2160 bool isElasticContactMode() const { return parameters.contactMode == 2; }
2161
2162 bool usesKinematicContactBoundary() const {
2163 return parameters.contactMode == 0 || isElasticContactMode();
2164 }
2165
2166 T effectiveMaskViscosity() const {
2167 // Elastic mode: η_eff = E (Pa·hr, with implicit dt_ref = 1 hr).
2168 // lameMu/lameLambda equal the standard elastic Lamé constants G and λ.
2169 // Contact faces use kinematic (Dirichlet) BC: v_contact = v_oxide × dt,
2170 // so the solver gives u_new ≈ v_oxide × dt (displacement in µm stored as
2171 // µm/hr). finalizeElasticAdvectionVelocity() converts u_new → u_new/dt
2172 // (the actual advection velocity µm/hr). Displacement feedback into the
2173 // oxide comes through the outer coupling loop in lsOxidation.
2174 if (isElasticContactMode())
2175 return parameters.youngModulus; // Pa·hr with implicit dt_ref = 1 hr
2176 const T temperature = std::max(parameters.temperature, T(1.));
2177 const T referenceTemperature =
2178 std::max(parameters.referenceTemperature, T(1.));
2179 return parameters.referenceViscosity *
2180 std::exp(parameters.creepActivationEnergy / gasConstant *
2181 (T(1) / temperature - T(1) / referenceTemperature));
2182 }
2183
2184 // Lamé viscosity parameters: same structure as elastic Lamé constants but
2185 // with eta(T) in place of Young's modulus, making the governing equation a
2186 // viscous Stokes flow rather than elastic equilibrium.
2187 T lameMu() const {
2188 const T nu = clampedPoissonRatio();
2189 return effectiveMaskViscosity() / (T(2) * (T(1) + nu));
2190 }
2191
2192 T lameLambda() const {
2193 const T nu = clampedPoissonRatio();
2194 return effectiveMaskViscosity() * nu / ((T(1) + nu) * (T(1) - T(2) * nu));
2195 }
2196
2197 Vec3D<T> getVelocity(const IndexType &index) const {
2198 const std::size_t nodeId = lookupNode(index);
2199 if (nodeId != noNode)
2200 return nodes[nodeId].velocity;
2201
2202 const auto nearby = findNearbyNode(index);
2203 if (nearby == noNode)
2204 return {0., 0., 0.};
2205 return nodes[nearby].velocity;
2206 }
2207
2208 bool isInsideMask(ConstSparseIterator &maskIt, const IndexType &index) const {
2209 return maskSign * valueAt(maskIt, index) >= 0.;
2210 }
2211
2212 T crossingDistance(T insidePhi, T outsidePhi) const {
2214 insidePhi, outsidePhi, parameters.minBoundaryDistance, gridDelta);
2215 }
2216};
2217
2222template <class T, int D>
2224 using IndexType = viennahrle::Index<D>;
2225 using ConstSparseIterator =
2226 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
2227
2228 SmartPointer<OxidationDeformation<T, D>> deformationField = nullptr;
2229 SmartPointer<OxidationMaskBending<T, D>> maskVelocityField = nullptr;
2230 SmartPointer<Domain<T, D>> maskInterface = nullptr;
2231 SmartPointer<Domain<T, D>> ambientInterface = nullptr;
2232 int maskSign = 1;
2233 std::unordered_map<std::size_t, T> maskPhiCache_;
2234 T maskGridDelta_ = 1.;
2235 std::array<T, D> maxVelocity_{};
2236
2237public:
2239
2241 SmartPointer<OxidationDeformation<T, D>> passedDeformation,
2242 SmartPointer<OxidationMaskBending<T, D>> passedMaskVelocity,
2243 SmartPointer<Domain<T, D>> passedMaskInterface, int passedMaskSign = 1)
2244 : deformationField(passedDeformation),
2245 maskVelocityField(passedMaskVelocity),
2246 maskInterface(passedMaskInterface),
2247 maskSign((passedMaskSign < 0) ? -1 : 1) {
2248 buildMaskPhiCache();
2249 buildEffectiveVelocityCache();
2250 }
2251
2253 SmartPointer<OxidationDeformation<T, D>> passedDeformation,
2254 SmartPointer<OxidationMaskBending<T, D>> passedMaskVelocity,
2255 SmartPointer<Domain<T, D>> passedMaskInterface,
2256 SmartPointer<Domain<T, D>> passedAmbientInterface, int passedMaskSign = 1)
2257 : deformationField(passedDeformation),
2258 maskVelocityField(passedMaskVelocity),
2259 maskInterface(passedMaskInterface),
2260 ambientInterface(passedAmbientInterface),
2261 maskSign((passedMaskSign < 0) ? -1 : 1) {
2262 buildMaskPhiCache();
2263 buildEffectiveVelocityCache();
2264 }
2265
2266 template <class... Args> static auto New(Args &&...args) {
2267 return SmartPointer<OxidationConstrainedAmbient>::New(
2268 std::forward<Args>(args)...);
2269 }
2270
2271 Vec3D<T> getVectorVelocity(const Vec3D<T> &coordinate, int material,
2272 const Vec3D<T> &normalVector,
2273 unsigned long pointId) final {
2274 const T signedPhi = getSignedMaskPhi(coordinate);
2275
2276 // At/inside mask surface: kinematic constraint — oxide tracks mask exactly.
2277 if (signedPhi >= T(0) && maskVelocityField != nullptr)
2278 return maskVelocityField->getVectorVelocity(coordinate, material,
2279 normalVector, pointId);
2280 if (deformationField == nullptr)
2281 return {0., 0., 0.};
2282
2283 auto v_def = deformationField->getVectorVelocity(coordinate, material,
2284 normalVector, pointId);
2285
2286 // Near-contact gap zone: smoothly approach the mask velocity instead of
2287 // letting stress spikes just outside the mask edge advect the ambient level
2288 // set with the full oxide deformation velocity.
2289 if (maskVelocityField != nullptr) {
2290 if (signedPhi > -T(3) * maskGridDelta_) {
2291 auto v_mask = maskVelocityField->getVectorVelocity(
2292 coordinate, material, normalVector, pointId);
2293 const T blend =
2294 std::max(T(0), std::min(T(1), (signedPhi + T(3) * maskGridDelta_) /
2295 (T(3) * maskGridDelta_)));
2296 for (int k = 0; k < D; ++k)
2297 v_def[k] = (T(1) - blend) * v_def[k] + blend * v_mask[k];
2298
2299 T def_n = T(0), mask_n = T(0);
2300 for (int k = 0; k < D; ++k) {
2301 def_n += v_def[k] * normalVector[k];
2302 mask_n += v_mask[k] * normalVector[k];
2303 }
2304 if (mask_n > def_n) {
2305 const T boost = mask_n - def_n;
2306 Vec3D<T> v_out = v_def;
2307 for (int k = 0; k < D; ++k)
2308 v_out[k] += boost * normalVector[k];
2309 return v_out;
2310 }
2311 }
2312 }
2313 return v_def;
2314 }
2315
2316 T getScalarVelocity(const Vec3D<T> &coordinate, int material,
2317 const Vec3D<T> &normalVector,
2318 unsigned long pointId) final {
2319 if (getSignedMaskPhi(coordinate) >= T(0))
2320 return 0.;
2321 if (deformationField == nullptr)
2322 return 0.;
2323 return deformationField->getScalarVelocity(coordinate, material,
2324 normalVector, pointId);
2325 }
2326
2327 T getDissipationAlpha(int direction, int material,
2328 const Vec3D<T> &centralDifferences) final {
2329 if (direction < 0 || direction >= static_cast<int>(D))
2330 return T(0);
2331 (void)material;
2332 (void)centralDifferences;
2333 return maxVelocity_[direction];
2334 }
2335
2336private:
2337 // Returns maskSign * maskPhi at the grid node nearest to coordinate.
2338 // Positive → inside mask (contact), negative → outside mask (gap or free).
2339 // Uses a pre-built cache so no HRLE iterator is constructed in the hot path.
2340 // Nodes outside the narrow band return lowest() (unambiguously outside mask).
2341 T getSignedMaskPhi(const Vec3D<T> &coordinate) const {
2342 if (maskInterface == nullptr)
2343 return std::numeric_limits<T>::lowest();
2344 IndexType index;
2345 for (unsigned i = 0; i < D; ++i)
2346 index[i] = std::llround(coordinate[i] / maskGridDelta_);
2347 const auto it = maskPhiCache_.find(detail::gridIndexHash<D>(index));
2348 return (it != maskPhiCache_.end()) ? it->second
2349 : std::numeric_limits<T>::lowest();
2350 }
2351
2352 void buildMaskPhiCache() {
2353 maskPhiCache_.clear();
2354 if (maskInterface == nullptr)
2355 return;
2356 maskGridDelta_ = maskInterface->getGrid().getGridDelta();
2357 for (ConstSparseIterator it(maskInterface->getDomain()); !it.isFinished();
2358 ++it) {
2359 if (!it.isDefined())
2360 continue;
2361 const auto key = detail::gridIndexHash<D>(it.getStartIndices());
2362 maskPhiCache_[key] = static_cast<T>(maskSign) * it.getValue();
2363 }
2364 }
2365
2366 void buildEffectiveVelocityCache() {
2367 maxVelocity_.fill(T(0));
2368 if (ambientInterface == nullptr) {
2369 for (unsigned d = 0; d < D; ++d) {
2370 if (deformationField != nullptr)
2371 maxVelocity_[d] =
2372 std::max(maxVelocity_[d],
2373 deformationField->getDissipationAlpha(d, -1, {}));
2374 if (maskVelocityField != nullptr)
2375 maxVelocity_[d] =
2376 std::max(maxVelocity_[d],
2377 maskVelocityField->getDissipationAlpha(d, -1, {}));
2378 }
2379 return;
2380 }
2381
2382 const T gridDelta = ambientInterface->getGrid().getGridDelta();
2383 bool foundInterfacePoint = false;
2384 viennahrle::ConstSparseStarIterator<typename Domain<T, D>::DomainType, 1>
2385 neighborIterator(ambientInterface->getDomain());
2386 for (ConstSparseIterator it(ambientInterface->getDomain());
2387 !it.isFinished(); ++it) {
2388 if (!it.isDefined() || std::abs(it.getValue()) > T(1))
2389 continue;
2390 foundInterfacePoint = true;
2391
2392 const auto index = it.getStartIndices();
2393 neighborIterator.goToIndicesSequential(index);
2394
2395 Vec3D<T> coordinate{0., 0., 0.};
2396 Vec3D<T> normal{0., 0., 0.};
2397 T normalNorm2 = T(0);
2398 for (unsigned d = 0; d < D; ++d) {
2399 coordinate[d] = static_cast<T>(index[d]) * gridDelta;
2400 normal[d] = neighborIterator.getNeighbor(d).getValue() -
2401 neighborIterator.getNeighbor(d + D).getValue();
2402 normalNorm2 += normal[d] * normal[d];
2403 }
2404 if (normalNorm2 > std::numeric_limits<T>::epsilon()) {
2405 const T invNorm = T(1) / std::sqrt(normalNorm2);
2406 for (unsigned d = 0; d < D; ++d)
2407 normal[d] *= invNorm;
2408 }
2409
2410 const auto vectorVelocity = getVectorVelocity(coordinate, -1, normal, 0);
2411 const T scalarVelocity = getScalarVelocity(coordinate, -1, normal, 0);
2412 for (unsigned d = 0; d < D; ++d) {
2413 maxVelocity_[d] =
2414 std::max(maxVelocity_[d],
2415 std::abs(vectorVelocity[d] + scalarVelocity * normal[d]));
2416 }
2417 }
2418
2419 if (!foundInterfacePoint) {
2420 for (unsigned d = 0; d < D; ++d) {
2421 if (deformationField != nullptr)
2422 maxVelocity_[d] =
2423 std::max(maxVelocity_[d],
2424 deformationField->getDissipationAlpha(d, -1, {}));
2425 if (maskVelocityField != nullptr)
2426 maxVelocity_[d] =
2427 std::max(maxVelocity_[d],
2428 maskVelocityField->getDissipationAlpha(d, -1, {}));
2429 }
2430 }
2431 }
2432};
2433
2434} // namespace viennals
constexpr int D
Definition Epitaxy.cpp:12
double T
Definition Epitaxy.cpp:13
Class containing all information about the level set, including the dimensions of the domain,...
Definition lsDomain.hpp:27
OxidationConstrainedAmbient(SmartPointer< OxidationDeformation< T, D > > passedDeformation, SmartPointer< OxidationMaskBending< T, D > > passedMaskVelocity, SmartPointer< Domain< T, D > > passedMaskInterface, SmartPointer< Domain< T, D > > passedAmbientInterface, int passedMaskSign=1)
Definition lsOxidationMask.hpp:2252
T getDissipationAlpha(int direction, int material, const Vec3D< T > &centralDifferences) final
If lsLocalLaxFriedrichsAnalytical is used as the spatial discretization scheme, this is called to pro...
Definition lsOxidationMask.hpp:2327
T getScalarVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &normalVector, unsigned long pointId) final
Should return a scalar value for the velocity at coordinate for a point of material with the given no...
Definition lsOxidationMask.hpp:2316
static auto New(Args &&...args)
Definition lsOxidationMask.hpp:2266
OxidationConstrainedAmbient(SmartPointer< OxidationDeformation< T, D > > passedDeformation, SmartPointer< OxidationMaskBending< T, D > > passedMaskVelocity, SmartPointer< Domain< T, D > > passedMaskInterface, int passedMaskSign=1)
Definition lsOxidationMask.hpp:2240
Vec3D< T > getVectorVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &normalVector, unsigned long pointId) final
Like getScalarVelocity, but returns a velocity value for each cartesian direction.
Definition lsOxidationMask.hpp:2271
Propagates the volume expansion generated at the Si/SiO2 interface through the oxide as a Cartesian-g...
Definition lsOxidationDeformation.hpp:60
Vector velocity field for a compliant oxidation mask driven by solved oxide traction....
Definition lsOxidationMask.hpp:91
void apply()
Definition lsOxidationMask.hpp:268
void setSolveBounds(const IndexType &passedMinIndex, const IndexType &passedMaxIndex)
Definition lsOxidationMask.hpp:244
void setMaskInterface(SmartPointer< Domain< T, D > > passedMaskInterface, int passedMaskSign=1)
Definition lsOxidationMask.hpp:221
std::size_t getNumberOfSolutionNodes() const
Definition lsOxidationMask.hpp:260
void setAmbientInterface(SmartPointer< Domain< T, D > > passedAmbientInterface, int passedAmbientSign=-1)
Provide the SiO₂/ambient interface so that contact faces can be detected on any mask face that border...
Definition lsOxidationMask.hpp:232
std::size_t getNumberOfFixedNodes() const
Definition lsOxidationMask.hpp:262
void finalizeElasticAdvectionVelocity()
Definition lsOxidationMask.hpp:348
OxidationMaskBending(SmartPointer< OxidationDeformation< T, D > > passedDeformation, OxidationMaskParameters passedParameters={})
Definition lsOxidationMask.hpp:191
static SmartPointer< OxidationMaskBending > New(SmartPointer< OxidationDeformation< T, D > > passedDeformation, OxidationMaskParameters passedParameters={})
Definition lsOxidationMask.hpp:206
T getDissipationAlpha(int direction, int, const Vec3D< T > &) final
If lsLocalLaxFriedrichsAnalytical is used as the spatial discretization scheme, this is called to pro...
Definition lsOxidationMask.hpp:417
T getLastApplyVelocityChange() const
Definition lsOxidationMask.hpp:263
static SmartPointer< OxidationMaskBending > New(SmartPointer< OxidationDeformation< T, D > > passedDeformation, SmartPointer< Domain< T, D > > passedMaskInterface, OxidationMaskParameters passedParameters={}, int passedMaskSign=1)
Definition lsOxidationMask.hpp:213
unsigned getIterations() const
Definition lsOxidationMask.hpp:258
std::size_t getNumberOfContactNodes() const
Definition lsOxidationMask.hpp:261
void writeFieldsToLevelSet()
Write mask bending velocity into maskInterface->getPointData() so that lsInterior + lsAdvect carry it...
Definition lsOxidationMask.hpp:424
OxidationMaskBending(SmartPointer< OxidationDeformation< T, D > > passedDeformation, SmartPointer< Domain< T, D > > passedMaskInterface, OxidationMaskParameters passedParameters={}, int passedMaskSign=1)
Definition lsOxidationMask.hpp:196
OxidationMaskParameters getParameters() const
Definition lsOxidationMask.hpp:257
void clearSolveBounds()
Definition lsOxidationMask.hpp:252
Vec3D< T > getVectorVelocity(const Vec3D< T > &coordinate, int material, const Vec3D< T > &, unsigned long) final
Like getScalarVelocity, but returns a velocity value for each cartesian direction.
Definition lsOxidationMask.hpp:383
void setParameters(OxidationMaskParameters passedParameters)
Definition lsOxidationMask.hpp:239
T getResidual() const
Definition lsOxidationMask.hpp:259
T getLastApplyAbsoluteVelocityChange() const
Definition lsOxidationMask.hpp:264
Common Cartesian-grid infrastructure shared by the three oxidation solver classes (diffusion,...
Definition lsOxidationSolverBase.hpp:90
static constexpr std::size_t noNode
Definition lsOxidationSolverBase.hpp:96
bool crosses(T a, T b) const
Definition lsOxidationSolverBase.hpp:110
std::size_t lookupNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:137
std::size_t linearIndex(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:143
void initNodeLookup()
Definition lsOxidationSolverBase.hpp:130
bool inBounds(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:123
std::array< std::size_t, D > strides
Definition lsOxidationSolverBase.hpp:101
T gridDelta
Definition lsOxidationSolverBase.hpp:102
bool initializeGridFromMask(SmartPointer< Domain< T, D > > maskInterface, bool useRequestedBounds, const IndexType &requestedMinIndex, const IndexType &requestedMaxIndex, std::size_t maxGridPoints, const std::string &solverName)
Definition lsOxidationSolverBase.hpp:277
std::vector< std::size_t > nodeLookupFlat
Definition lsOxidationSolverBase.hpp:97
std::array< std::size_t, D > extents
Definition lsOxidationSolverBase.hpp:100
viennahrle::ConstSparseIterator< typename Domain< T, D >::DomainType > ConstSparseIterator
Definition lsOxidationSolverBase.hpp:93
T valueAt(ConstSparseIterator &it, const IndexType &index) const
Definition lsOxidationSolverBase.hpp:118
bool increment(IndexType &index) const
Definition lsOxidationSolverBase.hpp:152
IndexType minIndex
Definition lsOxidationSolverBase.hpp:98
std::size_t findNearbyNode(const IndexType &index) const
Definition lsOxidationSolverBase.hpp:166
IndexType maxIndex
Definition lsOxidationSolverBase.hpp:99
float gridDelta
Definition AirGapDeposition.py:61
relaxation
Definition LOCOSOxidation.py:111
dict v
Definition LOCOSOxidation.py:217
Vec3D< T > vecAdd(const Vec3D< T > &a, const Vec3D< T > &b)
Definition lsOxidationSolverBase.hpp:48
T levelSetCrossingDistance(T insidePhi, T outsidePhi, T minBoundaryFraction, T gridDelta)
Definition lsOxidationSolverBase.hpp:76
Vec3D< T > vecSubtract(const Vec3D< T > &a, const Vec3D< T > &b)
Definition lsOxidationSolverBase.hpp:56
T clampLevelSetPhi(T v)
Clamp HRLE far-field sentinels (±DBL_MAX) to ±1 before differencing to prevent DBL_MAX² overflow that...
Definition lsOxidationSolverBase.hpp:71
Vec3D< T > vecScaled(const Vec3D< T > &source, T factor)
Definition lsOxidationSolverBase.hpp:40
std::size_t gridIndexHash(const viennahrle::Index< D > &index)
Definition lsOxidationSolverBase.hpp:23
void vecAddTo(Vec3D< T > &target, const Vec3D< T > &source)
Definition lsOxidationSolverBase.hpp:64
Definition lsAdvect.hpp:41
Definition lsOxidationMask.hpp:17
double creepActivationEnergy
Definition lsOxidationMask.hpp:37
int anchorBoundarySide
Definition lsOxidationMask.hpp:80
int contactMode
Definition lsOxidationMask.hpp:30
std::size_t maxGridPoints
Definition lsOxidationMask.hpp:73
unsigned anchorBoundaryLayers
Definition lsOxidationMask.hpp:81
double minBoundaryDistance
Definition lsOxidationMask.hpp:71
double stressTimeStep
Definition lsOxidationMask.hpp:44
double referenceViscosity
Definition lsOxidationMask.hpp:36
double contactReleaseFraction
Definition lsOxidationMask.hpp:61
double multigridSmootherOmega
Definition lsOxidationMask.hpp:65
int material
Definition lsOxidationMask.hpp:74
double poissonRatio
Definition lsOxidationMask.hpp:45
double referenceTemperature
Definition lsOxidationMask.hpp:35
int anchorBoundaryDirection
Definition lsOxidationMask.hpp:79
double tolerance
Definition lsOxidationMask.hpp:66
unsigned maxIterations
Definition lsOxidationMask.hpp:72
double youngModulus
Definition lsOxidationMask.hpp:40
bool unilateralContact
Definition lsOxidationMask.hpp:48
double relaxation
Definition lsOxidationMask.hpp:51
double temperature
Definition lsOxidationMask.hpp:34
double contactLoadRelaxation
Definition lsOxidationMask.hpp:56