Actual source code: dgefa2.c
2: /*
3: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
8: This is a combination of the Linpack routines
9: dgefa() and dgedi() specialized for a size of 2.
11: */
12: #include <petscsys.h>
14: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
15: {
16: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
17: PetscInt k4,j3;
18: MatScalar *aa,*ax,*ay,work[4],stmp;
19: MatReal tmp,max;
21: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
22: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
24: /* Parameter adjustments */
25: a -= 3;
27: k = 1;
28: kp1 = k + 1;
29: k3 = 2*k;
30: k4 = k3 + k;
32: /* find l = pivot index */
33: i__2 = 3 - k;
34: aa = &a[k4];
35: max = PetscAbsScalar(aa[0]);
36: l = 1;
37: for (ll=1; ll<i__2; ll++) {
38: tmp = PetscAbsScalar(aa[ll]);
39: if (tmp > max) { max = tmp; l = ll+1;}
40: }
41: l += k - 1;
42: ipvt[k-1] = l;
44: if (a[l + k3] == 0.0) {
45: if (shift == 0.0) {
46: if (allowzeropivot) {
47: PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1);
48: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
49: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1);
50: } else {
51: a[l + k3] = shift;
52: }
53: }
55: /* interchange if necessary */
56: if (l != k) {
57: stmp = a[l + k3];
58: a[l + k3] = a[k4];
59: a[k4] = stmp;
60: }
62: /* compute multipliers */
63: stmp = -1. / a[k4];
64: i__2 = 2 - k;
65: aa = &a[1 + k4];
66: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
68: /* row elimination with column indexing */
69: ax = &a[k4+1];
70: for (j = kp1; j <= 2; ++j) {
71: j3 = 2*j;
72: stmp = a[l + j3];
73: if (l != k) {
74: a[l + j3] = a[k + j3];
75: a[k + j3] = stmp;
76: }
78: i__3 = 2 - k;
79: ay = &a[1+k+j3];
80: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
81: }
83: ipvt[1] = 2;
84: if (a[6] == 0.0) {
85: if (PetscLikely(allowzeropivot)) {
86: PetscInfo(NULL,"Zero pivot, row 1\n");
87: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
88: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 1");
89: }
91: /* Now form the inverse */
92: /* compute inverse(u) */
93: for (k = 1; k <= 2; ++k) {
94: k3 = 2*k;
95: k4 = k3 + k;
96: a[k4] = 1.0 / a[k4];
97: stmp = -a[k4];
98: i__2 = k - 1;
99: aa = &a[k3 + 1];
100: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
101: kp1 = k + 1;
102: if (2 < kp1) continue;
103: ax = aa;
104: for (j = kp1; j <= 2; ++j) {
105: j3 = 2*j;
106: stmp = a[k + j3];
107: a[k + j3] = 0.0;
108: ay = &a[j3 + 1];
109: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
110: }
111: }
113: /* form inverse(u)*inverse(l) */
114: k = 1;
115: k3 = 2*k;
116: kp1 = k + 1;
117: aa = a + k3;
118: for (i = kp1; i <= 2; ++i) {
119: work[i-1] = aa[i];
120: aa[i] = 0.0;
121: }
122: for (j = kp1; j <= 2; ++j) {
123: stmp = work[j-1];
124: ax = &a[2*j + 1];
125: ay = &a[k3 + 1];
126: ay[0] += stmp*ax[0];
127: ay[1] += stmp*ax[1];
128: }
129: l = ipvt[k-1];
130: if (l != k) {
131: ax = &a[k3 + 1];
132: ay = &a[2*l + 1];
133: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
134: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
135: }
136: return 0;
137: }
139: /* gaussian elimination with partial pivoting */
140: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
141: {
142: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
143: PetscInt k4,j3;
144: MatScalar *aa,*ax,*ay,work[81],stmp;
145: MatReal tmp,max;
147: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
149: /* Parameter adjustments */
150: a -= 10;
152: for (k = 1; k <= 8; ++k) {
153: kp1 = k + 1;
154: k3 = 9*k;
155: k4 = k3 + k;
157: /* find l = pivot index */
158: i__2 = 10 - k;
159: aa = &a[k4];
160: max = PetscAbsScalar(aa[0]);
161: l = 1;
162: for (ll=1; ll<i__2; ll++) {
163: tmp = PetscAbsScalar(aa[ll]);
164: if (tmp > max) { max = tmp; l = ll+1;}
165: }
166: l += k - 1;
167: ipvt[k-1] = l;
169: if (a[l + k3] == 0.0) {
170: if (shift == 0.0) {
171: if (allowzeropivot) {
172: PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1);
173: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
174: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1);
175: } else {
176: a[l + k3] = shift;
177: }
178: }
180: /* interchange if necessary */
181: if (l != k) {
182: stmp = a[l + k3];
183: a[l + k3] = a[k4];
184: a[k4] = stmp;
185: }
187: /* compute multipliers */
188: stmp = -1. / a[k4];
189: i__2 = 9 - k;
190: aa = &a[1 + k4];
191: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
193: /* row elimination with column indexing */
194: ax = &a[k4+1];
195: for (j = kp1; j <= 9; ++j) {
196: j3 = 9*j;
197: stmp = a[l + j3];
198: if (l != k) {
199: a[l + j3] = a[k + j3];
200: a[k + j3] = stmp;
201: }
203: i__3 = 9 - k;
204: ay = &a[1+k+j3];
205: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
206: }
207: }
208: ipvt[8] = 9;
209: if (a[90] == 0.0) {
210: if (PetscLikely(allowzeropivot)) {
211: PetscInfo(NULL,"Zero pivot, row 8\n");
212: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
213: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 8");
214: }
216: /* Now form the inverse */
217: /* compute inverse(u) */
218: for (k = 1; k <= 9; ++k) {
219: k3 = 9*k;
220: k4 = k3 + k;
221: a[k4] = 1.0 / a[k4];
222: stmp = -a[k4];
223: i__2 = k - 1;
224: aa = &a[k3 + 1];
225: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
226: kp1 = k + 1;
227: if (9 < kp1) continue;
228: ax = aa;
229: for (j = kp1; j <= 9; ++j) {
230: j3 = 9*j;
231: stmp = a[k + j3];
232: a[k + j3] = 0.0;
233: ay = &a[j3 + 1];
234: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
235: }
236: }
238: /* form inverse(u)*inverse(l) */
239: for (kb = 1; kb <= 8; ++kb) {
240: k = 9 - kb;
241: k3 = 9*k;
242: kp1 = k + 1;
243: aa = a + k3;
244: for (i = kp1; i <= 9; ++i) {
245: work[i-1] = aa[i];
246: aa[i] = 0.0;
247: }
248: for (j = kp1; j <= 9; ++j) {
249: stmp = work[j-1];
250: ax = &a[9*j + 1];
251: ay = &a[k3 + 1];
252: ay[0] += stmp*ax[0];
253: ay[1] += stmp*ax[1];
254: ay[2] += stmp*ax[2];
255: ay[3] += stmp*ax[3];
256: ay[4] += stmp*ax[4];
257: ay[5] += stmp*ax[5];
258: ay[6] += stmp*ax[6];
259: ay[7] += stmp*ax[7];
260: ay[8] += stmp*ax[8];
261: }
262: l = ipvt[k-1];
263: if (l != k) {
264: ax = &a[k3 + 1];
265: ay = &a[9*l + 1];
266: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
267: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
268: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
269: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
270: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
271: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
272: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
273: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
274: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
275: }
276: }
277: return 0;
278: }
280: /*
281: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
283: Used by the sparse factorization routines in
284: src/mat/impls/baij/seq
286: This is a combination of the Linpack routines
287: dgefa() and dgedi() specialized for a size of 15.
289: */
291: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
292: {
293: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
294: PetscInt k4,j3;
295: MatScalar *aa,*ax,*ay,stmp;
296: MatReal tmp,max;
298: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
300: /* Parameter adjustments */
301: a -= 16;
303: for (k = 1; k <= 14; ++k) {
304: kp1 = k + 1;
305: k3 = 15*k;
306: k4 = k3 + k;
308: /* find l = pivot index */
309: i__2 = 16 - k;
310: aa = &a[k4];
311: max = PetscAbsScalar(aa[0]);
312: l = 1;
313: for (ll=1; ll<i__2; ll++) {
314: tmp = PetscAbsScalar(aa[ll]);
315: if (tmp > max) { max = tmp; l = ll+1;}
316: }
317: l += k - 1;
318: ipvt[k-1] = l;
320: if (a[l + k3] == 0.0) {
321: if (shift == 0.0) {
322: if (allowzeropivot) {
323: PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1);
324: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
325: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1);
326: } else {
327: a[l + k3] = shift;
328: }
329: }
331: /* interchange if necessary */
332: if (l != k) {
333: stmp = a[l + k3];
334: a[l + k3] = a[k4];
335: a[k4] = stmp;
336: }
338: /* compute multipliers */
339: stmp = -1. / a[k4];
340: i__2 = 15 - k;
341: aa = &a[1 + k4];
342: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
344: /* row elimination with column indexing */
345: ax = &a[k4+1];
346: for (j = kp1; j <= 15; ++j) {
347: j3 = 15*j;
348: stmp = a[l + j3];
349: if (l != k) {
350: a[l + j3] = a[k + j3];
351: a[k + j3] = stmp;
352: }
354: i__3 = 15 - k;
355: ay = &a[1+k+j3];
356: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
357: }
358: }
359: ipvt[14] = 15;
360: if (a[240] == 0.0) {
361: if (PetscLikely(allowzeropivot)) {
362: PetscInfo(NULL,"Zero pivot, row 14\n");
363: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
364: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 14");
365: }
367: /* Now form the inverse */
368: /* compute inverse(u) */
369: for (k = 1; k <= 15; ++k) {
370: k3 = 15*k;
371: k4 = k3 + k;
372: a[k4] = 1.0 / a[k4];
373: stmp = -a[k4];
374: i__2 = k - 1;
375: aa = &a[k3 + 1];
376: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
377: kp1 = k + 1;
378: if (15 < kp1) continue;
379: ax = aa;
380: for (j = kp1; j <= 15; ++j) {
381: j3 = 15*j;
382: stmp = a[k + j3];
383: a[k + j3] = 0.0;
384: ay = &a[j3 + 1];
385: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
386: }
387: }
389: /* form inverse(u)*inverse(l) */
390: for (kb = 1; kb <= 14; ++kb) {
391: k = 15 - kb;
392: k3 = 15*k;
393: kp1 = k + 1;
394: aa = a + k3;
395: for (i = kp1; i <= 15; ++i) {
396: work[i-1] = aa[i];
397: aa[i] = 0.0;
398: }
399: for (j = kp1; j <= 15; ++j) {
400: stmp = work[j-1];
401: ax = &a[15*j + 1];
402: ay = &a[k3 + 1];
403: ay[0] += stmp*ax[0];
404: ay[1] += stmp*ax[1];
405: ay[2] += stmp*ax[2];
406: ay[3] += stmp*ax[3];
407: ay[4] += stmp*ax[4];
408: ay[5] += stmp*ax[5];
409: ay[6] += stmp*ax[6];
410: ay[7] += stmp*ax[7];
411: ay[8] += stmp*ax[8];
412: ay[9] += stmp*ax[9];
413: ay[10] += stmp*ax[10];
414: ay[11] += stmp*ax[11];
415: ay[12] += stmp*ax[12];
416: ay[13] += stmp*ax[13];
417: ay[14] += stmp*ax[14];
418: }
419: l = ipvt[k-1];
420: if (l != k) {
421: ax = &a[k3 + 1];
422: ay = &a[15*l + 1];
423: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
424: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
425: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
426: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
427: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
428: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
429: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
430: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
431: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
432: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
433: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
434: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
435: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
436: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
437: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
438: }
439: }
440: return 0;
441: }