Actual source code: pipecgrr.c
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
11: {
12: /* get work vectors needed by PIPECGRR */
13: KSPSetWorkVecs(ksp,9);
14: return 0;
15: }
17: /*
18: KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
19: */
20: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
21: {
22: PetscInt i = 0,replace = 0,totreplaces = 0,nsize;
23: PetscScalar alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0,alphap = 0.0,betap = 0.0;
24: PetscReal dp = 0.0,nsi = 0.0,sqn = 0.0,Anorm = 0.0,rnp = 0.0,pnp = 0.0,snp = 0.0,unp = 0.0,wnp = 0.0,xnp = 0.0,qnp = 0.0,znp = 0.0,mnz = 5.0,tol = PETSC_SQRT_MACHINE_EPSILON,eps = PETSC_MACHINE_EPSILON;
25: PetscReal ds = 0.0,dz = 0.0,dx = 0.0,dpp = 0.0,dq = 0.0,dm = 0.0,du = 0.0,dw = 0.0,db = 0.0,errr = 0.0,errrprev = 0.0,errs = 0.0,errw = 0.0,errz = 0.0,errncr = 0.0,errncs = 0.0,errncw = 0.0,errncz = 0.0;
26: Vec X,B,Z,P,W,Q,U,M,N,R,S;
27: Mat Amat,Pmat;
28: PetscBool diagonalscale;
30: PCGetDiagonalScale(ksp->pc,&diagonalscale);
33: X = ksp->vec_sol;
34: B = ksp->vec_rhs;
35: M = ksp->work[0];
36: Z = ksp->work[1];
37: P = ksp->work[2];
38: N = ksp->work[3];
39: W = ksp->work[4];
40: Q = ksp->work[5];
41: U = ksp->work[6];
42: R = ksp->work[7];
43: S = ksp->work[8];
45: PCGetOperators(ksp->pc,&Amat,&Pmat);
47: ksp->its = 0;
48: if (!ksp->guess_zero) {
49: KSP_MatMult(ksp,Amat,X,R); /* r <- b - Ax */
50: VecAYPX(R,-1.0,B);
51: } else {
52: VecCopy(B,R); /* r <- b (x is 0) */
53: }
55: KSP_PCApply(ksp,R,U); /* u <- Br */
57: switch (ksp->normtype) {
58: case KSP_NORM_PRECONDITIONED:
59: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
60: VecNormBegin(B,NORM_2,&db);
61: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
62: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
63: VecNormEnd(U,NORM_2,&dp);
64: VecNormEnd(B,NORM_2,&db);
65: break;
66: case KSP_NORM_UNPRECONDITIONED:
67: VecNormBegin(R,NORM_2,&dp); /* dp <- r'*r = e'*A'*A*e */
68: VecNormBegin(B,NORM_2,&db);
69: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
70: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
71: VecNormEnd(R,NORM_2,&dp);
72: VecNormEnd(B,NORM_2,&db);
73: break;
74: case KSP_NORM_NATURAL:
75: VecDotBegin(R,U,&gamma); /* gamma <- u'*r */
76: VecNormBegin(B,NORM_2,&db);
77: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
78: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
79: VecDotEnd(R,U,&gamma);
80: VecNormEnd(B,NORM_2,&db);
81: KSPCheckDot(ksp,gamma);
82: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
83: break;
84: case KSP_NORM_NONE:
85: KSP_MatMult(ksp,Amat,U,W);
86: dp = 0.0;
87: break;
88: default: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
89: }
90: KSPLogResidualHistory(ksp,dp);
91: KSPMonitor(ksp,0,dp);
92: ksp->rnorm = dp;
93: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
94: if (ksp->reason) return 0;
96: MatNorm(Amat,NORM_INFINITY,&Anorm);
97: VecGetSize(B,&nsize);
98: nsi = (PetscReal) nsize;
99: sqn = PetscSqrtReal(nsi);
101: do {
102: if (i > 1) {
103: pnp = dpp;
104: snp = ds;
105: qnp = dq;
106: znp = dz;
107: }
108: if (i > 0) {
109: rnp = dp;
110: unp = du;
111: wnp = dw;
112: xnp = dx;
113: alphap = alpha;
114: betap = beta;
115: }
117: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
118: VecNormBegin(R,NORM_2,&dp);
119: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
120: VecNormBegin(U,NORM_2,&dp);
121: }
122: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
123: VecDotBegin(R,U,&gamma);
124: }
125: VecDotBegin(W,U,&delta);
127: if (i > 0) {
128: VecNormBegin(S,NORM_2,&ds);
129: VecNormBegin(Z,NORM_2,&dz);
130: VecNormBegin(P,NORM_2,&dpp);
131: VecNormBegin(Q,NORM_2,&dq);
132: VecNormBegin(M,NORM_2,&dm);
133: }
134: VecNormBegin(X,NORM_2,&dx);
135: VecNormBegin(U,NORM_2,&du);
136: VecNormBegin(W,NORM_2,&dw);
138: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
139: KSP_PCApply(ksp,W,M); /* m <- Bw */
140: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
142: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143: VecNormEnd(R,NORM_2,&dp);
144: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145: VecNormEnd(U,NORM_2,&dp);
146: }
147: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
148: VecDotEnd(R,U,&gamma);
149: }
150: VecDotEnd(W,U,&delta);
152: if (i > 0) {
153: VecNormEnd(S,NORM_2,&ds);
154: VecNormEnd(Z,NORM_2,&dz);
155: VecNormEnd(P,NORM_2,&dpp);
156: VecNormEnd(Q,NORM_2,&dq);
157: VecNormEnd(M,NORM_2,&dm);
158: }
159: VecNormEnd(X,NORM_2,&dx);
160: VecNormEnd(U,NORM_2,&du);
161: VecNormEnd(W,NORM_2,&dw);
163: if (i > 0) {
164: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
165: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
167: ksp->rnorm = dp;
168: KSPLogResidualHistory(ksp,dp);
169: KSPMonitor(ksp,i,dp);
170: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
171: if (ksp->reason) return 0;
172: }
174: if (i == 0) {
175: alpha = gamma / delta;
176: VecCopy(N,Z); /* z <- n */
177: VecCopy(M,Q); /* q <- m */
178: VecCopy(U,P); /* p <- u */
179: VecCopy(W,S); /* s <- w */
180: } else {
181: beta = gamma / gammaold;
182: alpha = gamma / (delta - beta / alpha * gamma);
183: VecAYPX(Z,beta,N); /* z <- n + beta * z */
184: VecAYPX(Q,beta,M); /* q <- m + beta * q */
185: VecAYPX(P,beta,U); /* p <- u + beta * p */
186: VecAYPX(S,beta,W); /* s <- w + beta * s */
187: }
188: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
189: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
190: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
191: VecAXPY(R,-alpha,S); /* r <- r - alpha * s */
192: gammaold = gamma;
194: if (i > 0) {
195: errncr = PetscSqrtReal(Anorm*xnp+2.0*Anorm*PetscAbsScalar(alphap)*dpp+rnp+2.0*PetscAbsScalar(alphap)*ds)*eps;
196: errncw = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(alphap)*dq+wnp+2.0*PetscAbsScalar(alphap)*dz)*eps;
197: }
198: if (i > 1) {
199: errncs = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(betap)*pnp+wnp+2.0*PetscAbsScalar(betap)*snp)*eps;
200: errncz = PetscSqrtReal((mnz*sqn+2)*Anorm*dm+2.0*Anorm*PetscAbsScalar(betap)*qnp+2.0*PetscAbsScalar(betap)*znp)*eps;
201: }
203: if (i > 0) {
204: if (i == 1) {
205: errr = PetscSqrtReal((mnz*sqn+1)*Anorm*xnp+db)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dpp)*eps+errncr;
206: errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
207: errw = PetscSqrtReal(mnz*sqn*Anorm*unp)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dq)*eps+errncw;
208: errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
209: } else if (replace == 1) {
210: errrprev = errr;
211: errr = PetscSqrtReal((mnz*sqn+1)*Anorm*dx+db)*eps;
212: errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
213: errw = PetscSqrtReal(mnz*sqn*Anorm*du)*eps;
214: errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
215: replace = 0;
216: } else {
217: errrprev = errr;
218: errr = errr+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errs+PetscAbsScalar(alphap)*errw+errncr+PetscAbsScalar(alphap)*errncs;
219: errs = errw+PetscAbsScalar(betap)*errs+errncs;
220: errw = errw+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errz+errncw+PetscAbsScalar(alphap)*errncz;
221: errz = PetscAbsScalar(betap)*errz+errncz;
222: }
223: if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
224: KSP_MatMult(ksp,Amat,X,R); /* r <- Ax - b */
225: VecAYPX(R,-1.0,B);
226: KSP_PCApply(ksp,R,U); /* u <- Br */
227: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
228: KSP_MatMult(ksp,Amat,P,S); /* s <- Ap */
229: KSP_PCApply(ksp,S,Q); /* q <- Bs */
230: KSP_MatMult(ksp,Amat,Q,Z); /* z <- Aq */
231: replace = 1;
232: totreplaces++;
233: }
234: }
236: i++;
237: ksp->its = i;
239: } while (i<=ksp->max_it);
240: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
241: return 0;
242: }
244: /*MC
245: KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements.
247: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG. The
248: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
250: KSPPIPECGRR improves the robustness of KSPPIPECG by adding an automated residual replacement strategy.
251: True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252: iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
254: See also KSPPIPECG, which is identical to KSPPIPECGRR without residual replacements.
255: See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.
257: Level: intermediate
259: Notes:
260: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
261: performance of pipelined methods. See the FAQ on the PETSc website for details.
263: Contributed by:
264: Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
265: European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
267: Reference:
268: S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
269: propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
270: SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.
272: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPIPECG, KSPPGMRES, KSPCG, KSPPIPEBCGS, KSPCGUseSingleReduction()
273: M*/
274: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
275: {
276: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
277: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
278: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
279: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
281: ksp->ops->setup = KSPSetUp_PIPECGRR;
282: ksp->ops->solve = KSPSolve_PIPECGRR;
283: ksp->ops->destroy = KSPDestroyDefault;
284: ksp->ops->view = NULL;
285: ksp->ops->setfromoptions = NULL;
286: ksp->ops->buildsolution = KSPBuildSolutionDefault;
287: ksp->ops->buildresidual = KSPBuildResidualDefault;
288: return 0;
289: }