92 using IndexType = viennahrle::Index<D>;
93 using ConstSparseIterator =
94 viennahrle::ConstSparseIterator<typename Domain<T, D>::DomainType>;
117 Vec3D<T> velocity{0., 0., 0.};
118 bool contact =
false;
122 struct SparseMatrix {
123 std::size_t nodeCount = 0;
124 std::vector<std::size_t> rowPtr;
125 std::vector<std::size_t> colIndex;
126 std::vector<T> values;
127 std::vector<T> invDiagonal;
130 struct MultigridLevel {
131 std::vector<IndexType> indices;
133 std::vector<std::vector<std::size_t>> children;
134 std::vector<std::size_t> fineToCoarse;
138 SmartPointer<OxidationDeformation<T, D>> deformationField =
nullptr;
139 SmartPointer<Domain<T, D>> maskInterface =
nullptr;
140 SmartPointer<Domain<T, D>> ambientInterface =
nullptr;
143 int ambientSign = -1;
146 bool useRequestedBounds =
false;
147 T residual = std::numeric_limits<T>::max();
148 unsigned iterations = 0;
149 std::array<T, D> maxVelocity_{};
150 std::size_t contactNodes = 0;
151 std::size_t fixedNodes = 0;
152 T lastApplyVelocityChange = std::numeric_limits<T>::max();
153 T lastApplyAbsoluteVelocityChange = std::numeric_limits<T>::max();
155 std::size_t candidateContactFaces_ = 0;
156 std::size_t activeContactFaces_ = 0;
157 std::size_t tensileContactFaces_ = 0;
158 T minContactNormalTraction_ = std::numeric_limits<T>::max();
159 T maxContactNormalTraction_ = std::numeric_limits<T>::lowest();
160 T contactReleaseThreshold_ = 0.;
161 std::unordered_map<std::size_t, Vec3D<T>> previousContactTraction_;
162 std::unordered_map<std::size_t, T> previousContactReleaseScale_;
163 std::vector<T> previousAitkenResidual;
168 std::vector<Vec3D<T>> elasticU_;
169 IndexType requestedMinIndex{};
170 IndexType requestedMaxIndex{};
171 std::vector<Node> nodes;
173 std::vector<uint8_t> contactFaceActive_;
174 std::vector<Vec3D<T>> contactFaceVelocity_;
175 std::vector<Vec3D<T>> contactFaceTraction_;
176 std::vector<T> contactFaceDistance_;
177 std::unordered_map<std::size_t, T> ambientPhiCache_;
184 std::vector<MultigridLevel> cachedMultigridLevels_;
185 std::vector<uint8_t> cachedContactFaceActive_;
186 std::size_t cachedNodeCount_ = 0;
194 : deformationField(passedDeformation), parameters(passedParameters) {}
200 : deformationField(passedDeformation), maskInterface(passedMaskInterface),
201 parameters(passedParameters), maskSign((passedMaskSign < 0) ? -1 : 1) {}
205 static SmartPointer<OxidationMaskBending>
208 return SmartPointer<OxidationMaskBending>::New(passedDeformation,
212 static SmartPointer<OxidationMaskBending>
216 return SmartPointer<OxidationMaskBending>::New(
217 passedDeformation, passedMaskInterface, passedParameters,
222 int passedMaskSign = 1) {
223 maskInterface = passedMaskInterface;
224 maskSign = (passedMaskSign < 0) ? -1 : 1;
233 int passedAmbientSign = -1) {
234 ambientInterface = passedAmbientInterface;
235 ambientSign = (passedAmbientSign < 0) ? -1 : 1;
240 parameters = passedParameters;
245 const IndexType &passedMaxIndex) {
246 requestedMinIndex = passedMinIndex;
247 requestedMaxIndex = passedMaxIndex;
248 useRequestedBounds =
true;
253 useRequestedBounds =
false;
265 return lastApplyAbsoluteVelocityChange;
269 if (maskInterface ==
nullptr) {
273 if (deformationField ==
nullptr) {
274 Logger::getInstance()
275 .addWarning(
"OxidationMaskBending: deformation field is null; "
276 "mask bending will produce zero velocities.")
282 const auto previousVelocities = collectVelocitiesByGridPoint();
283 if (!initialiseGrid())
289 seedFromPrevious(previousVelocities);
291 Logger::getInstance()
292 .addWarning(
"OxidationMaskBending: no mask nodes found after "
293 "buildNodes(). Verify that the mask level set defines "
294 "a non-empty interior region within the solve bounds.")
299 if (contactNodes == 0)
300 VIENNACORE_LOG_DEBUG(
301 "OxidationMaskBending: mask has no oxide-contact nodes; "
302 "no traction will be applied this step.");
303 solveElasticVelocity();
304 validateNodeVelocities(
"solveElasticVelocity");
305 smoothVelocityField();
307 const auto fixedPointResidual = contactResidualVector(previousVelocities);
308 const T omega = aitkenRelaxation(fixedPointResidual);
309 relaxVelocities(previousVelocities, omega);
310 validateNodeVelocities(
"Aitken relaxation");
311 lastApplyVelocityChange = maxVelocityChange(previousVelocities);
312 lastApplyAbsoluteVelocityChange =
313 absoluteVelocityChange(previousVelocities);
315 maxVelocity_.fill(
T(0));
316 for (
const auto &node : nodes) {
317 for (
unsigned d = 0; d <
D; ++d)
318 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
320 VIENNACORE_LOG_DEBUG(
321 "OxidationMaskBending: contact faces active=" +
322 std::to_string(activeContactFaces_) +
"/" +
323 std::to_string(candidateContactFaces_) +
", tensileSkipped=" +
324 std::to_string(tensileContactFaces_) +
", normalTraction=[" +
326 candidateContactFaces_ == 0 ?
T(0) : minContactNormalTraction_) +
329 candidateContactFaces_ == 0 ?
T(0) : maxContactNormalTraction_) +
330 "], aitkenOmega=" + std::to_string(omega) +
", contactLoadRelaxation=" +
332 std::clamp(parameters.contactLoadRelaxation,
T(0.02),
T(1))) +
333 ", contactReleaseThreshold=" +
334 std::to_string(contactReleaseThreshold_) +
", residual=" +
335 std::to_string(lastApplyVelocityChange) +
", absVelocityChange=" +
336 std::to_string(lastApplyAbsoluteVelocityChange));
349 if (!isElasticContactMode() || nodes.empty())
351 const T dt = parameters.stressTimeStep;
353 for (
auto &node : nodes)
354 node.velocity = {
T(0),
T(0),
T(0)};
358 const std::size_t n = nodes.size();
360 for (std::size_t i = 0; i < n; ++i)
361 elasticU_[i] = nodes[i].velocity;
364 for (std::size_t i = 0; i < n; ++i)
365 for (
unsigned d = 0; d <
D; ++d)
366 maxUNew = std::max(maxUNew, std::abs(elasticU_[i][d]));
368 const T invDt =
T(1) / dt;
369 for (std::size_t i = 0; i < n; ++i)
370 for (
unsigned d = 0; d <
D; ++d)
371 nodes[i].velocity[d] = elasticU_[i][d] * invDt;
373 VIENNACORE_LOG_INFO(
"ElasticFinalize: dt=" + std::to_string(dt) +
374 " max|u_new|=" + std::to_string(maxUNew) +
375 " max|u_new/dt|=" + std::to_string(maxUNew * invDt));
377 maxVelocity_.fill(
T(0));
378 for (
const auto &node : nodes)
379 for (
unsigned d = 0; d <
D; ++d)
380 maxVelocity_[d] = std::max(maxVelocity_[d], std::abs(node.velocity[d]));
385 unsigned long )
final {
386 if (deformationField ==
nullptr)
389 if (parameters.material >= 0 && material != parameters.material)
395 if (maskInterface ==
nullptr || nodes.empty())
399 for (
unsigned i = 0; i <
D; ++i)
400 index[i] = std::llround(coordinate[i] /
gridDelta);
407 if (isElasticContactMode() && elasticU_.empty()) {
408 const T dt = parameters.stressTimeStep;
410 return {
T(0),
T(0),
T(0)};
414 return getVelocity(index);
418 const Vec3D<T> & )
final {
419 return maxVelocity_[direction];
425 if (nodes.empty() || maskInterface ==
nullptr)
428 using VD =
typename PointData<T>::VectorDataType;
435 const bool useElasticU = isElasticContactMode() && !elasticU_.empty();
438 ConstSparseIterator it(maskInterface->getDomain());
439 for (; !it.isFinished(); ++it) {
442 const std::size_t nId =
lookupNode(it.getStartIndices());
443 Vec3D<T> v{
T(0),
T(0),
T(0)};
445 v = (useElasticU && nId < elasticU_.size()) ? elasticU_[nId]
446 : nodes[nId].velocity;
447 velocity.push_back(v);
449 maskInterface->getPointData().insertReplaceVectorData(std::move(velocity),
456 const T lambda = lameLambda();
457 const T mu = lameMu();
459 std::vector<Vec3D<T>> velForStress(nodes.size());
460 for (std::size_t i = 0; i < nodes.size(); ++i)
461 velForStress[i] = (useElasticU && i < elasticU_.size())
465 VD stressR0, stressR1, stressR2;
466 for (ConstSparseIterator sit(maskInterface->getDomain()); !sit.isFinished();
468 if (!sit.isDefined())
470 const std::size_t nId =
lookupNode(sit.getStartIndices());
471 Vec3D<T> row0{
T(0),
T(0),
T(0)};
472 Vec3D<T> row1{
T(0),
T(0),
T(0)};
473 Vec3D<T> row2{
T(0),
T(0),
T(0)};
476 std::array<Vec3D<T>, 3> grad{};
477 for (
unsigned j = 0; j < static_cast<unsigned>(
D); ++j) {
478 const auto vp = simpleNeighborVelocity(velForStress, nId, j, +1);
479 const auto vm = simpleNeighborVelocity(velForStress, nId, j, -1);
480 for (
unsigned i = 0; i < 3; ++i)
481 grad[j][i] = (vp[i] - vm[i]) / (
T(2) *
gridDelta);
484 for (
unsigned i = 0; i < static_cast<unsigned>(
D); ++i)
487 row0[0] = lambda * div +
T(2) * mu * grad[0][0];
488 row0[1] = mu * (grad[1][0] + grad[0][1]);
489 row0[2] = (
D > 2) ? mu * (grad[2][0] + grad[0][2]) :
T(0);
491 row1[1] = lambda * div +
T(2) * mu * grad[1][1];
492 row1[2] = (
D > 2) ? mu * (grad[2][1] + grad[1][2]) :
T(0);
495 row2[2] = (
D > 2) ? lambda * div +
T(2) * mu * grad[2][2] :
T(0);
497 stressR0.push_back(row0);
498 stressR1.push_back(row1);
499 stressR2.push_back(row2);
501 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR0),
503 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR1),
505 maskInterface->getPointData().insertReplaceVectorData(std::move(stressR2),
510 bool initialiseGrid() {
512 maskInterface, useRequestedBounds, requestedMinIndex, requestedMaxIndex,
518 void seedFromLevelSet() {
519 if (maskInterface ==
nullptr || nodes.empty())
522 maskInterface->getPointData().getVectorDataIndex(
"MaskVelocity");
525 const auto *vd = maskInterface->getPointData().getVectorData(vIdx);
530 for (; !it.isFinished(); ++it) {
533 const auto ptId = it.getPointId();
534 if (ptId >=
static_cast<decltype(ptId)
>(vd->size()))
536 const std::size_t nId =
lookupNode(it.getStartIndices());
539 if (!isFinite((*vd)[ptId]))
540 throwNonFinite(
"stored mask velocity point data");
542 nodes[nId].velocity = (*vd)[ptId];
547 seedFromPrevious(
const std::unordered_map<std::size_t, Vec3D<T>> &previous) {
548 if (previous.empty())
550 for (
auto &node : nodes) {
551 const auto found = previous.find(
linearIndex(node.index));
552 if (found != previous.end() && isFinite(found->second))
553 node.velocity = found->second;
555 node.velocity = {
T(0),
T(0),
T(0)};
559 std::unordered_map<std::size_t, Vec3D<T>>
560 collectVelocitiesByGridPoint()
const {
561 std::unordered_map<std::size_t, Vec3D<T>> result;
562 result.reserve(nodes.size());
563 for (
const auto &node : nodes)
564 if (
inBounds(node.index) && isFinite(node.velocity))
565 result.emplace(
linearIndex(node.index), node.velocity);
569 static bool isFinite(
const Vec3D<T> &v) {
570 for (
unsigned i = 0; i < 3; ++i)
571 if (!std::isfinite(v[i]))
576 static bool isFiniteTensor(
const std::array<T, 9> &tensor) {
577 for (
T value : tensor)
578 if (!std::isfinite(value))
583 [[noreturn]]
void throwNonFinite(
const std::string &stage)
const {
584 const std::string message =
585 "OxidationMaskBending: " + stage +
" produced non-finite values.";
586 Logger::getInstance().addError(message).print();
587 throw std::runtime_error(message);
590 void validateNodeVelocities(
const std::string &stage)
const {
591 for (
const auto &node : nodes)
592 for (
unsigned i = 0; i <
D; ++i)
593 if (!std::isfinite(node.velocity[i]))
594 throwNonFinite(stage);
598 const std::unordered_map<std::size_t, Vec3D<T>> &previous)
const {
599 if (previous.empty())
600 return std::numeric_limits<T>::max();
602 T changeSquaredSum = 0.;
603 T magnitudeSquaredSum = std::numeric_limits<T>::epsilon();
604 for (
const auto &node : nodes) {
607 const auto found = previous.find(
linearIndex(node.index));
608 if (found == previous.end())
611 for (
unsigned i = 0; i <
D; ++i) {
612 const T delta = node.velocity[i] - found->second[i];
613 changeSquaredSum += delta * delta;
614 magnitudeSquaredSum += node.velocity[i] * node.velocity[i];
615 magnitudeSquaredSum += found->second[i] * found->second[i];
619 const T change = std::sqrt(changeSquaredSum / magnitudeSquaredSum);
620 if (!std::isfinite(change))
621 throwNonFinite(
"mask velocity coupling residual");
625 T absoluteVelocityChange(
626 const std::unordered_map<std::size_t, Vec3D<T>> &previous)
const {
627 if (previous.empty())
628 return std::numeric_limits<T>::max();
630 T changeSquaredSum = 0.;
631 std::size_t components = 0;
632 for (
const auto &node : nodes) {
635 const auto found = previous.find(
linearIndex(node.index));
636 if (found == previous.end())
639 for (
unsigned i = 0; i <
D; ++i) {
640 const T delta = node.velocity[i] - found->second[i];
641 if (!std::isfinite(delta))
642 throwNonFinite(
"mask absolute velocity coupling residual");
643 changeSquaredSum += delta * delta;
649 return std::numeric_limits<T>::max();
650 const T change = std::sqrt(changeSquaredSum /
static_cast<T>(components));
651 if (!std::isfinite(change))
652 throwNonFinite(
"mask absolute velocity coupling residual");
656 std::vector<T> contactResidualVector(
657 const std::unordered_map<std::size_t, Vec3D<T>> &previous)
const {
658 std::vector<T> residualVector;
659 residualVector.reserve(contactNodes *
D);
660 if (previous.empty())
661 return residualVector;
663 for (
const auto &node : nodes) {
666 const auto found = previous.find(
linearIndex(node.index));
667 if (found == previous.end())
669 for (
unsigned i = 0; i <
D; ++i) {
670 const T residualComponent = node.velocity[i] - found->second[i];
671 if (!std::isfinite(residualComponent))
672 throwNonFinite(
"mask contact residual");
673 residualVector.push_back(residualComponent);
676 return residualVector;
679 T aitkenRelaxation(
const std::vector<T> &residualVector) {
680 const T baseOmega = std::clamp(parameters.relaxation,
T(0.01),
T(1));
681 if (residualVector.empty()) {
682 previousAitkenResidual.clear();
683 aitkenOmega = baseOmega;
687 T omega = std::clamp(aitkenOmega,
T(0.01), baseOmega);
688 if (previousAitkenResidual.size() != residualVector.size())
690 if (previousAitkenResidual.size() == residualVector.size()) {
693 T residualNorm2 = 0.;
694 T previousNorm2 = 0.;
695 for (std::size_t i = 0; i < residualVector.size(); ++i) {
696 if (!std::isfinite(residualVector[i]) ||
697 !std::isfinite(previousAitkenResidual[i]))
698 throwNonFinite(
"mask Aitken residual");
699 const T delta = residualVector[i] - previousAitkenResidual[i];
700 numerator += previousAitkenResidual[i] * delta;
701 denominator += delta * delta;
702 residualNorm2 += residualVector[i] * residualVector[i];
703 previousNorm2 += previousAitkenResidual[i] * previousAitkenResidual[i];
706 if (!std::isfinite(numerator) || !std::isfinite(denominator))
707 throwNonFinite(
"mask Aitken coefficient");
709 if (denominator > std::numeric_limits<T>::epsilon()) {
710 omega = -aitkenOmega * numerator / denominator;
711 if (std::isfinite(omega)) {
717 const T omegaMax = (parameters.contactMode > 0) ? baseOmega :
T(1.5);
718 omega = std::clamp(omega,
T(0.05), omegaMax);
720 throwNonFinite(
"mask Aitken coefficient");
724 if (std::isfinite(residualNorm2) && std::isfinite(previousNorm2) &&
725 residualNorm2 > previousNorm2 *
T(1.05)) {
726 omega = std::min(omega, std::max(
T(0.05), aitkenOmega *
T(0.5)));
730 previousAitkenResidual = residualVector;
739 void smoothVelocityField() {
740 const std::size_t n = nodes.size();
744 std::vector<Vec3D<T>> smoothed(n);
745 for (std::size_t
id = 0;
id < n; ++id) {
746 if (nodes[
id].fixed) {
747 smoothed[id] = {
T(0),
T(0),
T(0)};
750 Vec3D<T> sum = nodes[id].velocity;
752 for (
unsigned dir = 0; dir <
D; ++dir) {
753 for (
int off : {-1, 1}) {
754 IndexType nb = nodes[id].index;
761 for (
unsigned c = 0; c <
D; ++c)
762 sum[c] += nodes[nbId].velocity[c];
766 const T w =
T(1) /
static_cast<T>(count);
767 for (
unsigned c = 0; c <
D; ++c)
768 smoothed[
id][c] = sum[c] * w;
771 for (std::size_t
id = 0;
id < n; ++id)
772 nodes[
id].velocity = smoothed[
id];
776 relaxVelocities(
const std::unordered_map<std::size_t, Vec3D<T>> &previous,
778 if (!std::isfinite(omega))
779 throwNonFinite(
"mask Aitken coefficient");
780 if (previous.empty() || omega ==
T(1))
783 for (
auto &node : nodes) {
784 const auto found = previous.find(
linearIndex(node.index));
785 if (found == previous.end())
787 for (
unsigned i = 0; i <
D; ++i) {
789 found->second[i] + omega * (node.velocity[i] - found->second[i]);
790 if (!std::isfinite(relaxed))
791 throwNonFinite(
"mask velocity relaxation");
792 node.velocity[i] = relaxed;
797 void buildAmbientPhiCache() {
798 ambientPhiCache_.clear();
799 if (ambientInterface ==
nullptr)
801 for (ConstSparseIterator it(ambientInterface->getDomain());
802 !it.isFinished(); ++it) {
806 static_cast<T>(ambientSign) * it.getValue();
811 auto oldContactTraction = std::move(previousContactTraction_);
812 auto oldContactReleaseScale = std::move(previousContactReleaseScale_);
813 previousContactTraction_.clear();
814 previousContactReleaseScale_.clear();
815 contactReleaseThreshold_ =
T(0);
818 buildAmbientPhiCache();
820 ConstSparseIterator maskIt(maskInterface->getDomain());
823 if (isInsideMask(maskIt, index)) {
824 const std::size_t
id = nodes.size();
826 nodes.push_back({index});
833 const std::size_t n = nodes.size();
834 contactFaceActive_.assign(2 *
D * n, uint8_t(0));
835 contactFaceVelocity_.assign(2 *
D * n, Vec3D<T>{
T(0),
T(0),
T(0)});
836 contactFaceTraction_.assign(2 *
D * n, Vec3D<T>{
T(0),
T(0),
T(0)});
837 contactFaceDistance_.assign(2 *
D * n,
gridDelta);
840 candidateContactFaces_ = 0;
841 activeContactFaces_ = 0;
842 tensileContactFaces_ = 0;
843 minContactNormalTraction_ = std::numeric_limits<T>::max();
844 maxContactNormalTraction_ = std::numeric_limits<T>::lowest();
845 for (std::size_t
id = 0;
id < n; ++id) {
846 auto &node = nodes[id];
847 node.contact = touchesContactBoundary(maskIt, node.index);
851 for (
unsigned dir = 0; dir <
D; ++dir) {
852 for (
int offset : {-1, 1}) {
853 const unsigned faceIdx = dir * 2u + (offset == 1 ? 1u : 0u);
854 IndexType neighbor = node.index;
855 neighbor[dir] += offset;
856 const bool neighborIsNode =
858 if (neighborIsNode ||
859 !isContactBoundary(node.index, dir, offset, maskIt))
862 const T faceDistance = maskFaceDistance(maskIt, node.index, neighbor);
863 ++candidateContactFaces_;
864 Vec3D<T> faceNormal{0., 0., 0.};
865 faceNormal[dir] =
static_cast<T>(offset);
866 Vec3D<T> oxidePt{0., 0., 0.};
867 for (
unsigned i = 0; i <
D; ++i)
871 const auto sigma = deformationField->getStressTensor(oxidePt);
872 if (!isFiniteTensor(sigma))
873 throwNonFinite(
"oxide stress at mask contact");
874 Vec3D<T> t{0., 0., 0.};
875 for (
unsigned i = 0; i <
D; ++i)
876 for (
unsigned j = 0; j <
D; ++j)
877 t[i] += sigma[3 * i + j] * faceNormal[j];
880 for (
unsigned i = 0; i <
D; ++i)
881 tn += t[i] * faceNormal[i];
883 if (!isFinite(t) || !std::isfinite(tn))
884 throwNonFinite(
"oxide traction at mask contact");
885 minContactNormalTraction_ = std::min(minContactNormalTraction_, tn);
886 maxContactNormalTraction_ = std::max(maxContactNormalTraction_, tn);
888 Vec3D<T> contactLoad = t;
889 if (parameters.unilateralContact && tn >=
T(0)) {
890 ++tensileContactFaces_;
891 contactLoad = {
T(0),
T(0),
T(0)};
894 if (parameters.contactMode > 0) {
895 const auto key = contactFaceKey(node.index, dir, offset);
896 const auto prevIt = oldContactTraction.find(key);
897 if (prevIt != oldContactTraction.end()) {
899 std::clamp(parameters.contactLoadRelaxation,
T(0.02),
T(1));
900 for (
unsigned c = 0; c <
D; ++c)
901 contactLoad[c] = prevIt->second[c] +
902 alpha * (contactLoad[c] - prevIt->second[c]);
907 for (
unsigned i = 0; i <
D; ++i)
908 loadNormal += contactLoad[i] * faceNormal[i];
910 T releaseThreshold = std::numeric_limits<T>::epsilon();
911 T releaseScale = std::max(std::abs(tn),
T(1));
912 if (parameters.contactMode > 0 && parameters.unilateralContact) {
913 const auto scaleIt = oldContactReleaseScale.find(
914 contactFaceKey(node.index, dir, offset));
915 if (scaleIt != oldContactReleaseScale.end())
916 releaseScale = std::max(releaseScale, scaleIt->second);
918 std::clamp(parameters.contactReleaseFraction,
T(0),
T(0.25)) *
920 contactReleaseThreshold_ =
921 std::max(contactReleaseThreshold_, releaseThreshold);
924 if (parameters.unilateralContact && loadNormal >= -releaseThreshold)
927 if (!isFinite(contactLoad))
928 throwNonFinite(
"relaxed oxide contact load");
930 if (parameters.contactMode > 0) {
931 const auto key = contactFaceKey(node.index, dir, offset);
932 previousContactTraction_[key] = contactLoad;
933 if (parameters.unilateralContact)
934 previousContactReleaseScale_[key] =
935 std::max(releaseScale, std::abs(loadNormal));
938 contactFaceActive_[faceIdx * n + id] = uint8_t(1);
939 ++activeContactFaces_;
940 contactFaceTraction_[faceIdx * n + id] = contactLoad;
941 contactFaceDistance_[faceIdx * n + id] = faceDistance;
942 if (usesKinematicContactBoundary()) {
943 auto oxVel = deformationField->getVectorVelocity(
944 oxidePt, parameters.material, faceNormal, 0);
945 if (isElasticContactMode()) {
951 contactFaceVelocity_[faceIdx * n + id] = oxVel;
959 std::size_t contactFaceKey(
const IndexType &index,
unsigned direction,
962 seed ^= std::hash<unsigned>{}(direction) +
963 std::size_t(0x9e3779b97f4a7c15ULL) + (seed << 6) + (seed >> 2);
964 seed ^= std::hash<int>{}(offset) + std::size_t(0x9e3779b97f4a7c15ULL) +
965 (seed << 6) + (seed >> 2);
971 template <
class SolverT>
972 Vec3D<T> simpleNeighborVelocity(
const std::vector<Vec3D<SolverT>> &velocity,
973 std::size_t nodeId,
unsigned direction,
975 IndexType neighbor = nodes[nodeId].index;
976 neighbor[direction] += offset;
977 const auto toT = [](
const Vec3D<SolverT> &
v) -> Vec3D<T> {
978 return {
static_cast<T>(
v[0]),
static_cast<T>(v[1]),
static_cast<T>(
v[2])};
981 return toT(velocity[nodeId]);
983 return (foundId !=
noNode) ? toT(velocity[foundId]) : toT(velocity[nodeId]);
987 template <
class SolverT>
988 Vec3D<T> stressBoundaryGhost(
const std::vector<Vec3D<SolverT>> &velocity,
989 std::size_t nodeId,
unsigned normalDir,
990 int offset,
T distance,
991 const Vec3D<T> &traction)
const {
992 const T lambda = lameLambda();
993 const T mu = lameMu();
995 std::max(lambda +
T(2) * mu, std::numeric_limits<T>::epsilon());
996 const T signedOffset =
static_cast<T>(offset);
997 Vec3D<T> ghost{
static_cast<T>(velocity[nodeId][0]),
998 static_cast<T>(velocity[nodeId][1]),
999 static_cast<T>(velocity[nodeId][2])};
1000 T tangentialDivergence =
T(0);
1001 std::array<T, D> normalTangentialDerivative{};
1002 for (
unsigned tanDir = 0; tanDir <
D; ++tanDir) {
1003 if (tanDir == normalDir)
1005 const Vec3D<T> vPlus =
1006 simpleNeighborVelocity(velocity, nodeId, tanDir, +1);
1007 const Vec3D<T> vMinus =
1008 simpleNeighborVelocity(velocity, nodeId, tanDir, -1);
1009 tangentialDivergence +=
1010 (vPlus[tanDir] - vMinus[tanDir]) / (
T(2) *
gridDelta);
1011 normalTangentialDerivative[tanDir] =
1012 (vPlus[normalDir] - vMinus[normalDir]) / (
T(2) *
gridDelta);
1015 const T normalTraction = traction[normalDir] * signedOffset;
1016 const T normalDerivative =
1017 (normalTraction - lambda * tangentialDivergence) / denom;
1018 ghost[normalDir] += signedOffset * distance * normalDerivative;
1020 for (
unsigned tanDir = 0; tanDir <
D; ++tanDir) {
1021 if (tanDir == normalDir)
1023 const T normalDerivativeTangential =
1024 signedOffset * traction[tanDir] /
1025 std::max(mu, std::numeric_limits<T>::epsilon()) -
1026 normalTangentialDerivative[tanDir];
1027 ghost[tanDir] += signedOffset * distance * normalDerivativeTangential;
1032 template <
class SolverT>
1033 Vec3D<T> tractionFreeGhost(
const std::vector<Vec3D<SolverT>> &velocity,
1034 std::size_t nodeId,
unsigned normalDir,
1036 return stressBoundaryGhost(velocity, nodeId, normalDir, offset,
gridDelta,
1037 Vec3D<T>{
T(0),
T(0),
T(0)});
1040 template <
class SolverT>
1041 Vec3D<T> neighborVelocity(
const std::vector<Vec3D<SolverT>> &velocity,
1042 std::size_t nodeId,
unsigned direction,
1044 IndexType neighbor = nodes[nodeId].index;
1045 neighbor[direction] += offset;
1050 return {
static_cast<T>(velocity[foundId][0]),
1051 static_cast<T>(velocity[foundId][1]),
1052 static_cast<T>(velocity[foundId][2])};
1055 const unsigned faceIdx = direction * 2u + (offset == 1 ? 1u : 0u);
1056 const std::size_t nn = nodes.size();
1057 if (contactFaceActive_[faceIdx * nn + nodeId]) {
1058 if (usesKinematicContactBoundary())
1062 Vec3D<T>{
static_cast<T>(velocity[nodeId][0]),
1063 static_cast<T>(velocity[nodeId][1]),
1064 static_cast<T>(velocity[nodeId][2])});
1066 return stressBoundaryGhost(velocity, nodeId, direction, offset,
1067 contactFaceDistance_[faceIdx * nn + nodeId],
1068 contactFaceTraction_[faceIdx * nn + nodeId]);
1071 return tractionFreeGhost(velocity, nodeId, direction, offset);
1074 template <
class SolverT>
1075 Vec3D<T> divergenceGradient(
const std::vector<Vec3D<SolverT>> &velocity,
1076 const IndexType &index)
const {
1077 Vec3D<T> gradient{
T(0),
T(0),
T(0)};
1078 for (
unsigned component = 0; component <
D; ++component) {
1079 IndexType plus = index;
1080 IndexType minus = index;
1081 plus[component] += 1;
1082 minus[component] -= 1;
1083 const T plusDiv = divergence(velocity, plus);
1084 const T minusDiv = divergence(velocity, minus);
1085 gradient[component] = (plusDiv - minusDiv) / (
T(2) *
gridDelta);
1090 template <
class SolverT>
1091 T divergence(
const std::vector<Vec3D<SolverT>> &velocity,
1092 const IndexType &index)
const {
1100 for (
unsigned direction = 0; direction <
D; ++direction) {
1101 const Vec3D<T> plus = neighborVelocity(velocity, nodeId, direction, 1);
1102 const Vec3D<T> minus = neighborVelocity(velocity, nodeId, direction, -1);
1103 result += (plus[direction] - minus[direction]) / (
T(2) *
gridDelta);
1111 template <
class SolverT>
1112 Vec3D<T> computeElasticStencilAt(std::size_t nodeId,
1113 const std::vector<Vec3D<SolverT>> &v,
1114 T gradDivWeight)
const {
1115 Vec3D<T> laplaceAverage{
T(0),
T(0),
T(0)};
1116 for (
unsigned direction = 0; direction <
D; ++direction)
1117 for (
int offset : {-1, 1})
1119 neighborVelocity(v, nodeId, direction, offset));
1121 const T count =
static_cast<T>(2 *
D);
1124 divergenceGradient(v, nodes[nodeId].index),
1131 template <
class SolverT>
1132 void elasticMatvec(
const std::vector<Vec3D<SolverT>> &v,
1133 const std::vector<Vec3D<T>> &b,
T gradDivWeight,
1134 std::vector<Vec3D<SolverT>> &Av)
const {
1135#pragma omp parallel for schedule(static)
1136 for (std::size_t i = 0; i < nodes.size(); ++i) {
1137 if (nodes[i].fixed) {
1138 for (
unsigned c = 0; c <
D; ++c)
1142 const Vec3D<T> Fv = computeElasticStencilAt(i, v, gradDivWeight);
1143 for (
unsigned c = 0; c <
D; ++c)
1145 static_cast<SolverT
>(
static_cast<T>(v[i][c]) - Fv[c] + b[i][c]);
1149 static constexpr std::size_t mgNoNode =
1150 std::numeric_limits<std::size_t>::max();
1152 static Vec3D<T> zeroVec() {
return Vec3D<T>{
T(0),
T(0),
T(0)}; }
1154 static IndexType coarsenIndex(
const IndexType &index) {
1156 for (
unsigned d = 0; d <
D; ++d) {
1157 const auto value = index[d];
1158 coarse[d] = (value >= 0) ? value / 2 : -((-value + 1) / 2);
1163 std::vector<MultigridLevel> buildMultigridHierarchy()
const {
1164 std::vector<MultigridLevel> levels;
1165 levels.emplace_back();
1166 auto &fine = levels.back();
1167 fine.indices.reserve(nodes.size());
1168 for (
const auto &node : nodes)
1169 fine.indices.push_back(node.index);
1171 constexpr unsigned maxLevels = 12;
1172 constexpr std::size_t minCoarsenNodes = 48;
1173 while (levels.size() < maxLevels &&
1174 levels.back().indices.size() > minCoarsenNodes) {
1175 const auto &previous = levels.back();
1176 MultigridLevel coarse;
1177 coarse.indices.reserve(previous.indices.size() / 2 + 1);
1178 coarse.children.reserve(previous.indices.size() / 2 + 1);
1179 coarse.fineToCoarse.assign(previous.indices.size(), mgNoNode);
1181 std::unordered_map<std::size_t, std::size_t> coarseLookup;
1182 coarseLookup.reserve(previous.indices.size());
1183 for (std::size_t fineId = 0; fineId < previous.indices.size(); ++fineId) {
1184 const IndexType coarseIndex = coarsenIndex(previous.indices[fineId]);
1186 auto found = coarseLookup.find(key);
1187 if (found == coarseLookup.end()) {
1188 const std::size_t coarseId = coarse.indices.size();
1189 coarseLookup.emplace(key, coarseId);
1190 coarse.indices.push_back(coarseIndex);
1191 coarse.children.emplace_back();
1192 found = coarseLookup.find(key);
1195 const std::size_t coarseId = found->second;
1196 coarse.children[coarseId].push_back(fineId);
1197 coarse.fineToCoarse[fineId] = coarseId;
1200 if (coarse.indices.empty() ||
1201 coarse.indices.size() >= previous.indices.size())
1209 IndexType lo = coarse.indices.front(), hi = coarse.indices.front();
1210 for (
const auto &idx : coarse.indices)
1211 for (
unsigned d = 0; d <
D; ++d) {
1212 lo[d] = std::min(lo[d], idx[d]);
1213 hi[d] = std::max(hi[d], idx[d]);
1215 bool degenerate =
false;
1216 for (
unsigned d = 0; d <
D; ++d)
1223 levels.push_back(std::move(coarse));
1228 static T vectorDot(
const std::vector<Vec3D<T>> &a,
1229 const std::vector<Vec3D<T>> &b) {
1231 for (std::size_t i = 0; i < a.size(); ++i)
1232 for (
unsigned c = 0; c <
D; ++c)
1233 sum += a[i][c] * b[i][c];
1237 static T vectorNorm(
const std::vector<Vec3D<T>> &v) {
1238 return std::sqrt(std::max(vectorDot(v, v),
T(0)));
1241 static void vectorScale(std::vector<Vec3D<T>> &v,
T scale) {
1242 for (
auto &entry : v)
1243 for (
unsigned c = 0; c <
D; ++c)
1247 static void vectorAxpy(std::vector<Vec3D<T>> &y,
T alpha,
1248 const std::vector<Vec3D<T>> &x) {
1249 for (std::size_t i = 0; i < y.size(); ++i)
1250 for (
unsigned c = 0; c <
D; ++c)
1251 y[i][c] += alpha * x[i][c];
1254 static void vectorSubtractInPlace(std::vector<Vec3D<T>> &y,
T alpha,
1255 const std::vector<Vec3D<T>> &x) {
1256 for (std::size_t i = 0; i < y.size(); ++i)
1257 for (
unsigned c = 0; c <
D; ++c)
1258 y[i][c] -= alpha * x[i][c];
1261 static T flatValue(
const std::vector<Vec3D<T>> &v, std::size_t row) {
1262 return v[row /
D][row %
D];
1265 static void flatSet(std::vector<Vec3D<T>> &v, std::size_t row,
T value) {
1266 v[row /
D][row %
D] = value;
1269 void collectLocalCandidatesRecursive(
unsigned dim,
const IndexType ¢er,
1270 IndexType &candidate,
1271 std::vector<std::size_t> &out)
const {
1275 const std::size_t nodeId =
lookupNode(candidate);
1277 out.push_back(nodeId);
1281 for (
int offset = -2; offset <= 2; ++offset) {
1282 candidate[dim] = center[dim] + offset;
1283 collectLocalCandidatesRecursive(dim + 1, center, candidate, out);
1287 void collectLocalCandidates(std::size_t rowNode,
1288 std::vector<std::size_t> &out)
const {
1290 IndexType candidate = nodes[rowNode].index;
1291 collectLocalCandidatesRecursive(0, nodes[rowNode].index, candidate, out);
1292 out.push_back(rowNode);
1293 std::sort(out.begin(), out.end());
1294 out.erase(std::unique(out.begin(), out.end()), out.end());
1298 compressSparseRows(std::vector<std::unordered_map<std::size_t, T>> &rows,
1299 std::size_t nodeCount)
const {
1300 SparseMatrix matrix;
1301 matrix.nodeCount = nodeCount;
1302 matrix.rowPtr.assign(rows.size() + 1, 0);
1303 matrix.invDiagonal.assign(rows.size(),
T(1));
1305 for (std::size_t row = 0; row < rows.size(); ++row) {
1306 std::vector<std::pair<std::size_t, T>> entries(rows[row].begin(),
1308 std::sort(entries.begin(), entries.end(),
1309 [](
const auto &a,
const auto &b) { return a.first < b.first; });
1311 matrix.rowPtr[row] = matrix.values.size();
1313 for (
const auto &[col, value] : entries) {
1314 if (std::abs(value) <= std::numeric_limits<T>::epsilon() *
T(100))
1316 if (!std::isfinite(value))
1317 throwNonFinite(
"traction sparse matrix assembly");
1318 matrix.colIndex.push_back(col);
1319 matrix.values.push_back(value);
1324 if (std::abs(diagonal) > std::numeric_limits<T>::epsilon())
1325 matrix.invDiagonal[row] =
T(1) / diagonal;
1327 matrix.invDiagonal[row] =
T(1);
1329 matrix.rowPtr[rows.size()] = matrix.values.size();
1333 SparseMatrix buildFineElasticMatrix(
const std::vector<Vec3D<T>> &b,
1334 T gradDivWeight)
const {
1335 const std::size_t n = nodes.size();
1336 std::vector<std::unordered_map<std::size_t, T>> rows(n *
D);
1337 std::vector<Vec3D<T>> basis(n, zeroVec());
1338 std::vector<std::size_t> candidates;
1340 for (std::size_t rowNode = 0; rowNode < n; ++rowNode) {
1341 if (nodes[rowNode].fixed) {
1342 for (
unsigned rowComponent = 0; rowComponent <
D; ++rowComponent) {
1343 const std::size_t row = rowNode *
D + rowComponent;
1344 rows[row][row] =
T(1);
1358 collectLocalCandidates(rowNode, candidates);
1359 for (std::size_t colNode : candidates) {
1360 for (
unsigned colComponent = 0; colComponent <
D; ++colComponent) {
1361 basis[colNode][colComponent] =
T(1);
1362 const Vec3D<T> Fbasis =
1363 computeElasticStencilAt(rowNode, basis, gradDivWeight);
1364 basis[colNode][colComponent] =
T(0);
1366 for (
unsigned rowComponent = 0; rowComponent <
D; ++rowComponent) {
1367 const std::size_t row = rowNode *
D + rowComponent;
1368 const std::size_t col = colNode *
D + colComponent;
1370 (rowNode == colNode && rowComponent == colComponent) ?
T(1)
1373 identity - (Fbasis[rowComponent] - b[rowNode][rowComponent]);
1374 rows[row][col] += value;
1380 return compressSparseRows(rows, n);
1383 SparseMatrix buildGalerkinMatrix(
const SparseMatrix &fine,
1384 const MultigridLevel &coarseLevel)
const {
1385 const std::size_t coarseNodes = coarseLevel.indices.size();
1386 std::vector<std::unordered_map<std::size_t, T>> rows(coarseNodes *
D);
1388 for (std::size_t fineRow = 0; fineRow < fine.nodeCount *
D; ++fineRow) {
1389 const std::size_t fineRowNode = fineRow /
D;
1390 if (fineRowNode >= coarseLevel.fineToCoarse.size())
1392 const std::size_t coarseRowNode = coarseLevel.fineToCoarse[fineRowNode];
1393 if (coarseRowNode == mgNoNode)
1396 const T restrictionWeight =
1397 T(1) /
static_cast<T>(std::max<std::size_t>(
1398 coarseLevel.children[coarseRowNode].size(), 1));
1399 const std::size_t coarseRow = coarseRowNode *
D + fineRow %
D;
1401 for (std::size_t nz = fine.rowPtr[fineRow]; nz < fine.rowPtr[fineRow + 1];
1403 const std::size_t fineCol = fine.colIndex[nz];
1404 const std::size_t fineColNode = fineCol /
D;
1405 if (fineColNode >= coarseLevel.fineToCoarse.size())
1407 const std::size_t coarseColNode = coarseLevel.fineToCoarse[fineColNode];
1408 if (coarseColNode == mgNoNode)
1410 const std::size_t coarseCol = coarseColNode *
D + fineCol %
D;
1411 rows[coarseRow][coarseCol] += restrictionWeight * fine.values[nz];
1415 return compressSparseRows(rows, coarseNodes);
1418 void sparseMatvec(
const SparseMatrix &matrix,
const std::vector<Vec3D<T>> &x,
1419 std::vector<Vec3D<T>> &Ax)
const {
1420 Ax.assign(matrix.nodeCount, zeroVec());
1421#pragma omp parallel for schedule(static)
1422 for (std::size_t row = 0; row < matrix.nodeCount *
D; ++row) {
1424 for (std::size_t nz = matrix.rowPtr[row]; nz < matrix.rowPtr[row + 1];
1426 sum += matrix.values[nz] * flatValue(x, matrix.colIndex[nz]);
1427 flatSet(Ax, row, sum);
1431 void multigridProlong(
const std::vector<MultigridLevel> &levels,
1432 std::size_t coarseLevelId,
1433 const std::vector<Vec3D<T>> &coarse,
1434 std::vector<Vec3D<T>> &fine)
const {
1435 const auto &fineLevel = levels[coarseLevelId - 1];
1436 const auto &coarseLevel = levels[coarseLevelId];
1437 fine.assign(fineLevel.indices.size(), zeroVec());
1438 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1440 for (std::size_t fineId : coarseLevel.children[coarseId])
1441 for (
unsigned c = 0; c <
D; ++c)
1442 fine[fineId][c] = coarse[coarseId][c];
1445 void multigridSmooth(
const SparseMatrix &matrix,
1446 const std::vector<Vec3D<T>> &rhs,
1447 std::vector<Vec3D<T>> &x,
unsigned sweeps,
1449 const std::size_t rows = matrix.nodeCount *
D;
1450 auto relaxRow = [&](std::size_t row) {
1451 T offDiagonal =
T(0);
1452 for (std::size_t nz = matrix.rowPtr[row]; nz < matrix.rowPtr[row + 1];
1454 const std::size_t col = matrix.colIndex[nz];
1457 offDiagonal += matrix.values[nz] * flatValue(x, col);
1460 (flatValue(rhs, row) - offDiagonal) * matrix.invDiagonal[row];
1462 flatValue(x, row) + omega * (updated - flatValue(x, row)));
1465 for (
unsigned sweep = 0; sweep < sweeps; ++sweep) {
1466 for (std::size_t row = 0; row < rows; ++row)
1468 for (std::size_t row = rows; row-- > 0;)
1473 void multigridResidual(
const SparseMatrix &matrix,
1474 const std::vector<Vec3D<T>> &rhs,
1475 const std::vector<Vec3D<T>> &x,
1476 std::vector<Vec3D<T>> &residualOut)
const {
1477 sparseMatvec(matrix, x, residualOut);
1478 for (std::size_t i = 0; i < rhs.size(); ++i)
1479 for (
unsigned c = 0; c <
D; ++c)
1480 residualOut[i][c] = rhs[i][c] - residualOut[i][c];
1483 void multigridRestrict(
const std::vector<Vec3D<T>> &fineResidual,
1484 const MultigridLevel &coarseLevel,
1485 std::vector<Vec3D<T>> &coarseRhs)
const {
1486 coarseRhs.assign(coarseLevel.indices.size(), zeroVec());
1487 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1489 const auto &children = coarseLevel.children[coarseId];
1490 if (children.empty())
1492 for (std::size_t fineId : children)
1493 for (
unsigned c = 0; c <
D; ++c)
1494 coarseRhs[coarseId][c] += fineResidual[fineId][c];
1495 const T scale =
T(1) /
static_cast<T>(children.size());
1496 for (
unsigned c = 0; c <
D; ++c)
1497 coarseRhs[coarseId][c] *= scale;
1501 void multigridProlongAdd(
const std::vector<Vec3D<T>> &coarseCorrection,
1502 const MultigridLevel &coarseLevel,
1503 std::vector<Vec3D<T>> &fineCorrection)
const {
1504 for (std::size_t coarseId = 0; coarseId < coarseLevel.children.size();
1506 for (std::size_t fineId : coarseLevel.children[coarseId])
1507 for (
unsigned c = 0; c <
D; ++c)
1508 fineCorrection[fineId][c] += coarseCorrection[coarseId][c];
1512 void multigridVCycle(
const std::vector<MultigridLevel> &levels,
1513 std::size_t levelId,
const std::vector<Vec3D<T>> &rhs,
1514 std::vector<Vec3D<T>> &x,
T smootherOmega)
const {
1515 const auto &level = levels[levelId];
1516 if (levelId + 1 >= levels.size() || level.indices.size() <= 32) {
1517 multigridSmooth(level.matrix, rhs, x, 40, smootherOmega);
1521 multigridSmooth(level.matrix, rhs, x, 2, smootherOmega);
1523 std::vector<Vec3D<T>> fineResidual;
1524 multigridResidual(level.matrix, rhs, x, fineResidual);
1526 std::vector<Vec3D<T>> coarseRhs;
1527 const auto &coarseLevel = levels[levelId + 1];
1528 multigridRestrict(fineResidual, coarseLevel, coarseRhs);
1530 std::vector<Vec3D<T>> coarseCorrection(coarseRhs.size(), zeroVec());
1531 multigridVCycle(levels, levelId + 1, coarseRhs, coarseCorrection,
1533 multigridProlongAdd(coarseCorrection, coarseLevel, x);
1535 multigridSmooth(level.matrix, rhs, x, 2, smootherOmega);
1538 std::vector<Vec3D<T>>
1539 multigridPrecondition(
const std::vector<MultigridLevel> &levels,
1540 const std::vector<Vec3D<T>> &rhs,
1541 T smootherOmega)
const {
1542 std::vector<Vec3D<T>> correction(rhs.size(), zeroVec());
1545 multigridVCycle(levels, 0, rhs, correction, smootherOmega);
1549 void exactElasticResidual(
const SparseMatrix &matrix,
1550 const std::vector<Vec3D<T>> &x,
1551 const std::vector<Vec3D<T>> &b,
1552 std::vector<Vec3D<T>> &r)
const {
1553 std::vector<Vec3D<T>> Ax(x.size(), zeroVec());
1554 sparseMatvec(matrix, x, Ax);
1555 r.assign(x.size(), zeroVec());
1556 for (std::size_t i = 0; i < x.size(); ++i)
1557 for (
unsigned c = 0; c <
D; ++c)
1558 r[i][c] = b[i][c] - Ax[i][c];
1561 static std::vector<T>
1562 solveUpperTriangular(
const std::vector<std::vector<T>> &h,
1563 const std::vector<T> &g,
unsigned usedColumns) {
1564 std::vector<T> y(usedColumns,
T(0));
1565 for (
int row =
static_cast<int>(usedColumns) - 1; row >= 0; --row) {
1566 T sum = g[
static_cast<std::size_t
>(row)];
1567 for (
unsigned col =
static_cast<unsigned>(row) + 1; col < usedColumns;
1569 sum -= h[
static_cast<std::size_t
>(row)][col] * y[col];
1571 h[
static_cast<std::size_t
>(row)][
static_cast<std::size_t
>(row)];
1572 if (std::abs(diag) > std::numeric_limits<T>::epsilon())
1573 y[
static_cast<std::size_t
>(row)] = sum / diag;
1578 void solveElasticVelocity() {
1579 if (parameters.contactMode > 0) {
1580 solveElasticVelocityMultigridGMRES();
1583 solveElasticVelocityBiCGSTAB();
1586 void solveElasticVelocityMultigridGMRES() {
1592 const T lambda = lameLambda();
1593 const T mu = lameMu();
1594 const T gradDivWeight =
1596 std::max(lambda +
T(2) * mu, std::numeric_limits<T>::epsilon());
1597 const T smootherOmega =
1598 std::clamp(parameters.multigridSmootherOmega,
T(0.2),
T(1.4));
1600 const std::size_t n = nodes.size();
1601 const std::vector<Vec3D<T>> zeros(n, zeroVec());
1602 std::vector<Vec3D<T>> b(n, zeroVec());
1603#pragma omp parallel for schedule(static)
1604 for (std::size_t i = 0; i < n; ++i) {
1608 b[i] = computeElasticStencilAt(i, zeros, gradDivWeight);
1611 std::vector<Vec3D<T>> x(n, zeroVec());
1612 for (std::size_t i = 0; i < n; ++i)
1613 x[i] = nodes[i].fixed ? zeroVec() : nodes[i].velocity;
1622 const bool hierarchyDirty =
1623 cachedNodeCount_ != n || cachedContactFaceActive_ != contactFaceActive_;
1625 if (hierarchyDirty) {
1626 cachedMultigridLevels_ = buildMultigridHierarchy();
1627 if (cachedMultigridLevels_.empty())
1629 cachedMultigridLevels_[0].matrix =
1630 buildFineElasticMatrix(b, gradDivWeight);
1631 for (std::size_t level = 1; level < cachedMultigridLevels_.size();
1633 cachedMultigridLevels_[level].matrix =
1634 buildGalerkinMatrix(cachedMultigridLevels_[level - 1].matrix,
1635 cachedMultigridLevels_[level]);
1636 cachedNodeCount_ = n;
1637 cachedContactFaceActive_ = contactFaceActive_;
1640 if (cachedMultigridLevels_.empty())
1642 const auto &multigridLevels = cachedMultigridLevels_;
1644 std::vector<Vec3D<T>> r;
1645 exactElasticResidual(multigridLevels[0].matrix, x, b, r);
1647 const T rhsNorm = vectorNorm(b);
1648 const T absTolerance =
1649 std::max(parameters.tolerance * rhsNorm,
1650 std::numeric_limits<T>::epsilon() *
T(100) *
1651 std::sqrt(
static_cast<T>(
1652 std::max<std::size_t>(std::size_t(1), n *
D))));
1653 const T residualNormDenom = std::max(rhsNorm, absTolerance);
1654 auto updateGmresResidual = [&](
T absResidual) {
1655 residual = (absResidual <= absTolerance)
1657 : absResidual / residualNormDenom;
1660 T absResidual = vectorNorm(r);
1661 updateGmresResidual(absResidual);
1662 if (absResidual <= absTolerance || residual < parameters.tolerance) {
1663 for (std::size_t i = 0; i < n; ++i)
1664 nodes[i].velocity = x[i];
1668 constexpr unsigned restart = 32;
1669 while (iterations < parameters.maxIterations &&
1670 residual > parameters.tolerance) {
1671 const T beta = vectorNorm(r);
1672 if (!std::isfinite(beta))
1673 throwNonFinite(
"traction multigrid GMRES residual");
1674 if (beta <= absTolerance) {
1679 const unsigned innerLimit =
1680 std::min<unsigned>(restart, parameters.maxIterations - iterations);
1681 std::vector<std::vector<Vec3D<T>>>
v(innerLimit + 1,
1682 std::vector<Vec3D<T>>(n, zeroVec()));
1683 std::vector<std::vector<Vec3D<T>>> z(innerLimit,
1684 std::vector<Vec3D<T>>(n, zeroVec()));
1686 vectorScale(v[0],
T(1) / beta);
1688 std::vector<std::vector<T>> h(innerLimit + 1,
1689 std::vector<T>(innerLimit,
T(0)));
1690 std::vector<T> cs(innerLimit,
T(0));
1691 std::vector<T> sn(innerLimit,
T(0));
1692 std::vector<T> g(innerLimit + 1,
T(0));
1695 unsigned usedColumns = 0;
1696 for (
unsigned j = 0; j < innerLimit; ++j) {
1697 z[j] = multigridPrecondition(multigridLevels, v[j], smootherOmega);
1699 std::vector<Vec3D<T>> w(n, zeroVec());
1700 sparseMatvec(multigridLevels[0].matrix, z[j], w);
1702 for (
unsigned i = 0; i <= j; ++i) {
1703 h[i][j] = vectorDot(w, v[i]);
1704 vectorSubtractInPlace(w, h[i][j], v[i]);
1707 h[j + 1][j] = vectorNorm(w);
1708 if (h[j + 1][j] > std::numeric_limits<T>::epsilon()) {
1710 vectorScale(v[j + 1],
T(1) / h[j + 1][j]);
1713 for (
unsigned i = 0; i < j; ++i) {
1714 const T h0 = h[i][j];
1715 const T h1 = h[i + 1][j];
1716 h[i][j] = cs[i] * h0 + sn[i] * h1;
1717 h[i + 1][j] = -sn[i] * h0 + cs[i] * h1;
1720 const T h0 = h[j][j];
1721 const T h1 = h[j + 1][j];
1722 const T denom = std::hypot(h0, h1);
1723 if (denom <= std::numeric_limits<T>::epsilon()) {
1730 h[j][j] = cs[j] * h0 + sn[j] * h1;
1735 g[j + 1] = -sn[j] * g0;
1738 usedColumns = j + 1;
1739 const T projectedAbsResidual = std::abs(g[j + 1]);
1740 updateGmresResidual(projectedAbsResidual);
1741 if (!std::isfinite(residual))
1742 throwNonFinite(
"traction multigrid GMRES residual");
1743 if (projectedAbsResidual <= absTolerance ||
1744 residual < parameters.tolerance)
1748 if (usedColumns == 0)
1751 const auto y = solveUpperTriangular(h, g, usedColumns);
1752 for (
unsigned col = 0; col < usedColumns; ++col)
1753 vectorAxpy(x, y[col], z[col]);
1755 exactElasticResidual(multigridLevels[0].matrix, x, b, r);
1756 absResidual = vectorNorm(r);
1757 updateGmresResidual(absResidual);
1758 if (!std::isfinite(residual))
1759 throwNonFinite(
"traction multigrid GMRES residual");
1762 for (std::size_t i = 0; i < n; ++i)
1763 nodes[i].velocity = x[i];
1765 if (residual > parameters.tolerance)
1766 Logger::getInstance()
1767 .addWarning(
"solveElasticVelocity: traction multigrid GMRES did not "
1769 std::to_string(iterations) +
"/" +
1770 std::to_string(parameters.maxIterations) +
1771 " iterations (residual=" + std::to_string(residual) +
1772 ", tolerance=" + std::to_string(parameters.tolerance) +
1777 void solveElasticVelocityRelaxation() {
1783 const T lambda = lameLambda();
1784 const T mu = lameMu();
1785 const T gradDivWeight =
1787 std::max(lambda +
T(2) * mu, std::numeric_limits<T>::epsilon());
1788 const T relaxation = std::clamp(parameters.relaxation,
T(0.01),
T(1));
1790 std::vector<Vec3D<T>> current(nodes.size());
1791 std::vector<Vec3D<T>> next(nodes.size());
1792 for (std::size_t i = 0; i < nodes.size(); ++i)
1794 nodes[i].fixed ? Vec3D<T>{
T(0),
T(0),
T(0)} : nodes[i].velocity;
1796 for (; iterations < parameters.maxIterations; ++iterations) {
1798 T maxMagnitude = std::numeric_limits<T>::epsilon();
1801#pragma omp parallel for schedule(static) \
1802 reduction(max : maxDelta, maxMagnitude) reduction(min : finiteFlag)
1803 for (std::size_t i = 0; i < nodes.size(); ++i) {
1804 Vec3D<T> candidate =
1805 nodes[i].fixed ? Vec3D<T>{
T(0),
T(0),
T(0)}
1806 : computeElasticStencilAt(i, current, gradDivWeight);
1808 for (
unsigned c = 0; c <
D; ++c) {
1809 if (!std::isfinite(candidate[c]) || !std::isfinite(current[i][c])) {
1811 next[i][c] = current[i][c];
1815 current[i][c] +
relaxation * (candidate[c] - current[i][c]);
1816 maxDelta = std::max(maxDelta, std::abs(next[i][c] - current[i][c]));
1817 maxMagnitude = std::max(maxMagnitude, std::abs(next[i][c]));
1818 maxMagnitude = std::max(maxMagnitude, std::abs(current[i][c]));
1823 residual = std::numeric_limits<T>::infinity();
1824 throwNonFinite(
"traction mask solve");
1827 residual = maxDelta / maxMagnitude;
1829 if (residual < parameters.tolerance) {
1835 for (std::size_t i = 0; i < nodes.size(); ++i)
1836 nodes[i].velocity = current[i];
1838 if (residual > parameters.tolerance)
1839 Logger::getInstance()
1840 .addWarning(
"solveElasticVelocity: traction relaxation did not "
1842 std::to_string(iterations) +
"/" +
1843 std::to_string(parameters.maxIterations) +
1844 " iterations (residual=" + std::to_string(residual) +
1845 ", tolerance=" + std::to_string(parameters.tolerance) +
1850 void solveElasticVelocityBiCGSTAB() {
1856 using SolverT = float;
1858 const T lambda = lameLambda();
1859 const T mu = lameMu();
1860 const T gradDivWeight =
1862 std::max(lambda +
T(2) * mu, std::numeric_limits<T>::epsilon());
1864 const std::size_t n = nodes.size();
1865 const Vec3D<SolverT> zero3{SolverT(0), SolverT(0), SolverT(0)};
1869 std::vector<Vec3D<T>> b(n);
1871 const std::vector<Vec3D<SolverT>> zeros(n, zero3);
1872#pragma omp parallel for schedule(static)
1873 for (std::size_t i = 0; i < n; ++i) {
1875 b[i] = Vec3D<T>{
T(0),
T(0),
T(0)};
1877 b[i] = computeElasticStencilAt(i, zeros, gradDivWeight);
1882 std::vector<Vec3D<SolverT>> x(n, zero3);
1885 std::vector<Vec3D<SolverT>> r(n), r_hat(n);
1886 for (std::size_t i = 0; i < n; ++i)
1887 for (
unsigned c = 0; c <
D; ++c) {
1888 r[i][c] =
static_cast<SolverT
>(b[i][c]);
1889 r_hat[i][c] = r[i][c];
1896 std::vector<Vec3D<SolverT>> pv(n, zero3), sv(n, zero3), y(n), z(n), s(n),
1898 T rho =
T(1), alpha =
T(1), omega =
T(1);
1900 auto vecDot = [&](
const std::vector<Vec3D<SolverT>> &a,
1901 const std::vector<Vec3D<SolverT>> &bv) {
1903 for (std::size_t i = 0; i < n; ++i)
1904 for (
unsigned c = 0; c <
D; ++c) {
1905 const T av =
static_cast<T>(a[i][c]);
1906 const T bvVal =
static_cast<T>(bv[i][c]);
1907 if (!std::isfinite(av) || !std::isfinite(bvVal))
1908 return std::numeric_limits<T>::quiet_NaN();
1914 auto vecMaxAbs = [&](
const std::vector<Vec3D<SolverT>> &vin) {
1916 for (std::size_t i = 0; i < n; ++i)
1917 for (
unsigned c = 0; c <
D; ++c) {
1918 const T value =
static_cast<T>(vin[i][c]);
1919 if (!std::isfinite(value))
1920 return std::numeric_limits<T>::infinity();
1921 m = std::max(m, std::abs(value));
1926 const T b_norm = [&] {
1928 for (std::size_t i = 0; i < n; ++i)
1929 for (
unsigned c = 0; c <
D; ++c)
1930 m = std::max(m, std::abs(b[i][c]));
1931 return (m <
T(1e-100)) ?
T(1) : m;
1934 for (; iterations < parameters.maxIterations; ++iterations) {
1935 const T rho_new = vecDot(r_hat, r);
1936 if (!std::isfinite(rho_new) || std::abs(rho_new) <
T(1e-100))
1938 if (!std::isfinite(rho) || !std::isfinite(alpha) ||
1939 !std::isfinite(omega) || std::abs(omega) <
T(1e-100))
1942 const T beta = (rho_new / rho) * (alpha / omega);
1943 if (!std::isfinite(beta))
1947 for (std::size_t i = 0; i < n; ++i)
1948 for (
unsigned c = 0; c <
D; ++c)
1949 pv[i][c] =
static_cast<SolverT
>(r[i][c] +
1950 beta * (pv[i][c] - omega * sv[i][c]));
1954 elasticMatvec(y, b, gradDivWeight, sv);
1956 const T r_hat_v = vecDot(r_hat, sv);
1957 if (!std::isfinite(r_hat_v) || std::abs(r_hat_v) <
T(1e-100))
1960 alpha = rho_new / r_hat_v;
1961 if (!std::isfinite(alpha))
1964 for (std::size_t i = 0; i < n; ++i)
1965 for (
unsigned c = 0; c <
D; ++c)
1966 s[i][c] =
static_cast<SolverT
>(r[i][c] - alpha * sv[i][c]);
1968 residual = vecMaxAbs(s);
1969 if (!std::isfinite(residual))
1971 if (residual < parameters.tolerance * b_norm) {
1972 for (std::size_t i = 0; i < n; ++i)
1973 for (
unsigned c = 0; c <
D; ++c)
1974 x[i][c] =
static_cast<SolverT
>(x[i][c] + alpha * y[i][c]);
1981 elasticMatvec(z, b, gradDivWeight, t);
1983 const T t_s = vecDot(t, s);
1984 const T t_t = vecDot(t, t);
1985 if (!std::isfinite(t_s) || !std::isfinite(t_t) || t_t <=
T(1e-100))
1988 if (!std::isfinite(omega))
1991 for (std::size_t i = 0; i < n; ++i)
1992 for (
unsigned c = 0; c <
D; ++c) {
1994 static_cast<SolverT
>(x[i][c] + alpha * y[i][c] + omega * z[i][c]);
1995 r[i][c] =
static_cast<SolverT
>(s[i][c] - omega * t[i][c]);
1998 residual = vecMaxAbs(r);
1999 if (!std::isfinite(residual))
2001 if (residual < parameters.tolerance * b_norm) {
2007 bool finiteSolution =
true;
2008 for (std::size_t i = 0; i < n; ++i)
2009 for (
unsigned c = 0; c <
D; ++c)
2010 if (!std::isfinite(
static_cast<T>(x[i][c])))
2011 finiteSolution =
false;
2013 if (finiteSolution) {
2014 for (std::size_t i = 0; i < n; ++i)
2015 for (
unsigned c = 0; c <
D; ++c)
2016 nodes[i].velocity[c] =
static_cast<T>(x[i][c]);
2018 residual = std::numeric_limits<T>::infinity();
2019 throwNonFinite(
"legacy kinematic mask solve");
2021 if (residual > parameters.tolerance * b_norm)
2022 Logger::getInstance()
2024 "solveElasticVelocity: BiCGSTAB did not converge after " +
2025 std::to_string(iterations) +
"/" +
2026 std::to_string(parameters.maxIterations) +
2027 " iterations (residual=" + std::to_string(residual / b_norm) +
2028 ", tolerance=" + std::to_string(parameters.tolerance) +
")")
2032 bool touchesContactBoundary(ConstSparseIterator &maskIt,
2033 const IndexType &index)
const {
2034 for (
unsigned direction = 0; direction <
D; ++direction) {
2035 for (
int offset : {-1, 1}) {
2036 IndexType neighbor = index;
2037 neighbor[direction] += offset;
2041 if (isContactBoundary(index, direction, offset, maskIt))
2049 if (isContactBoundary(index, direction, offset, maskIt))
2056 bool isContactBoundary(
const IndexType &index,
unsigned direction,
int offset,
2057 ConstSparseIterator &maskIt)
const {
2058 const T grad = maskGradientComponent(index, direction, maskIt);
2064 if (
static_cast<T>(maskSign) *
static_cast<T>(offset) * grad >=
T(0))
2067 if (ambientInterface ==
nullptr)
2068 return direction ==
D - 1;
2070 IndexType ghostIndex = index;
2071 ghostIndex[direction] += offset;
2072 return isInsideOxide(ghostIndex);
2075 bool isInsideOxide(
const IndexType &index)
const {
2076 if (ambientInterface ==
nullptr)
2079 return it != ambientPhiCache_.end() && it->second >=
T(0);
2082 T maskFaceDistance(ConstSparseIterator &maskIt,
const IndexType &inside,
2083 const IndexType &outside)
const {
2086 return crossingDistance(
valueAt(maskIt, inside),
valueAt(maskIt, outside));
2089 void markFixedNodes() {
2091 if (nodes.empty() || parameters.anchorBoundarySide == 0)
2093 if (parameters.anchorBoundaryDirection < 0 ||
2094 parameters.anchorBoundaryDirection >=
D)
2097 const unsigned dir =
2098 static_cast<unsigned>(parameters.anchorBoundaryDirection);
2105 if (dir ==
static_cast<unsigned>(
D - 1))
2106 Logger::getInstance()
2107 .addWarning(
"OxidationMaskBending: anchorBoundaryDirection=" +
2108 std::to_string(parameters.anchorBoundaryDirection) +
2109 " points along the oxide-growth axis (direction D-1=" +
2110 std::to_string(
D - 1) +
2111 "). The anchor is normally "
2112 "placed at a lateral edge (direction 0) so bending "
2113 "degrees of freedom are preserved. Set "
2114 "anchorBoundaryDirection=0 for a LOCOS cross-section.")
2116 IndexType nodeMin = nodes.front().index;
2117 IndexType nodeMax = nodes.front().index;
2118 for (
const auto &node : nodes) {
2119 for (
unsigned d = 0; d <
D; ++d) {
2120 nodeMin[d] = std::min(nodeMin[d], node.index[d]);
2121 nodeMax[d] = std::max(nodeMax[d], node.index[d]);
2125 const auto layers =
static_cast<viennahrle::IndexType
>(
2126 std::max(1u, parameters.anchorBoundaryLayers));
2127 for (
auto &node : nodes) {
2128 const bool onLower = parameters.anchorBoundarySide < 0 &&
2129 node.index[dir] <= nodeMin[dir] + layers - 1;
2130 const bool onUpper = parameters.anchorBoundarySide > 0 &&
2131 node.index[dir] >= nodeMax[dir] - layers + 1;
2132 if (onLower || onUpper) {
2134 node.velocity = {
T(0),
T(0),
T(0)};
2140 T maskGradientComponent(
const IndexType &index,
unsigned direction,
2141 ConstSparseIterator &maskIt)
const {
2142 IndexType pos = index;
2143 IndexType neg = index;
2144 pos[direction] += 1;
2145 neg[direction] -= 1;
2154 T clampedPoissonRatio()
const {
2155 return std::clamp(parameters.poissonRatio,
T(-0.95),
T(0.49));
2158 static constexpr T gasConstant =
T(8.31446261815324);
2160 bool isElasticContactMode()
const {
return parameters.contactMode == 2; }
2162 bool usesKinematicContactBoundary()
const {
2163 return parameters.contactMode == 0 || isElasticContactMode();
2166 T effectiveMaskViscosity()
const {
2174 if (isElasticContactMode())
2175 return parameters.youngModulus;
2176 const T temperature = std::max(parameters.temperature,
T(1.));
2177 const T referenceTemperature =
2178 std::max(parameters.referenceTemperature,
T(1.));
2179 return parameters.referenceViscosity *
2180 std::exp(parameters.creepActivationEnergy / gasConstant *
2181 (
T(1) / temperature -
T(1) / referenceTemperature));
2188 const T nu = clampedPoissonRatio();
2189 return effectiveMaskViscosity() / (
T(2) * (
T(1) + nu));
2192 T lameLambda()
const {
2193 const T nu = clampedPoissonRatio();
2194 return effectiveMaskViscosity() * nu / ((
T(1) + nu) * (
T(1) -
T(2) * nu));
2197 Vec3D<T> getVelocity(
const IndexType &index)
const {
2198 const std::size_t nodeId =
lookupNode(index);
2200 return nodes[nodeId].velocity;
2204 return {0., 0., 0.};
2205 return nodes[nearby].velocity;
2208 bool isInsideMask(ConstSparseIterator &maskIt,
const IndexType &index)
const {
2209 return maskSign *
valueAt(maskIt, index) >= 0.;
2212 T crossingDistance(
T insidePhi,
T outsidePhi)
const {
2214 insidePhi, outsidePhi, parameters.minBoundaryDistance,
gridDelta);