xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision e4dd521c7a718087a78a4fcbe68c46ab2c9b5c3f)
1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2 /*
3   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4           C = A * B
5           C = P * A * P^T
6 */
7 
8 #include "src/mat/impls/aij/seq/aij.h"
9 #include "src/mat/utils/freespace.h"
10 
11 static int logkey_matmatmult            = 0;
12 static int logkey_matmatmult_symbolic   = 0;
13 static int logkey_matmatmult_numeric    = 0;
14 
15 static int logkey_matapplypapt          = 0;
16 static int logkey_matapplypapt_symbolic = 0;
17 static int logkey_matapplypapt_numeric  = 0;
18 
19 /*
20      MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
21            C = A * B;
22 
23      Note: C is assumed to be uncreated.
24            If this is not the case, Destroy C before calling this routine.
25 */
26 #ifdef USE_INTSORT
27 /*
28 This roution is modified by the one below for better performance.
29 The changes are:
30    -- PetscSortInt() is replace by a linked list
31    -- malloc larger Initial FreeSpace
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
35 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
36 {
37   int            ierr;
38   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
39   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
40   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
41   int            *ci,*cj,*denserow,*sparserow;
42   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
43   int            i,j,k,anzi,brow,bnzj,cnzi;
44   MatScalar      *ca;
45 
46   PetscFunctionBegin;
47   /* some error checking which could be moved into interface layer */
48   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
49   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
50 
51   /* Set up timers */
52   if (!logkey_matmatmult_symbolic) {
53     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
54   }
55   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
56 
57   /* Set up */
58   /* Allocate ci array, arrays for fill computation and */
59   /* free space for accumulating nonzero column info */
60   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
61   ci[0] = 0;
62 
63   ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr);
64   ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr);
65   sparserow = denserow + bn;
66 
67   /* Initial FreeSpace size is nnz(B)=bi[bm] */
68   ierr          = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr);
69   current_space = free_space;
70 
71   /* Determine symbolic info for each row of the product: */
72   for (i=0;i<am;i++) {
73     anzi = ai[i+1] - ai[i];
74     cnzi = 0;
75     for (j=0;j<anzi;j++) {
76       brow = *aj++;
77       bnzj = bi[brow+1] - bi[brow];
78       bjj  = bj + bi[brow];
79       for (k=0;k<bnzj;k++) {
80         /* If column is not marked, mark it in compressed and uncompressed locations. */
81         /* For simplicity, leave uncompressed row unsorted until finished with row, */
82         /* and increment nonzero count for this row. */
83         if (!denserow[bjj[k]]) {
84           denserow[bjj[k]]  = -1;
85           sparserow[cnzi++] = bjj[k];
86         }
87       }
88     }
89 
90     /* sort sparserow */
91     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
92 
93     /* If free space is not available, make more free space */
94     /* Double the amount of total space in the list */
95     if (current_space->local_remaining<cnzi) {
96       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
97     }
98 
99     /* Copy data into free space, and zero out denserow */
100     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
101     current_space->array           += cnzi;
102     current_space->local_used      += cnzi;
103     current_space->local_remaining -= cnzi;
104     for (j=0;j<cnzi;j++) {
105       denserow[sparserow[j]] = 0;
106     }
107     ci[i+1] = ci[i] + cnzi;
108   }
109 
110   /* Column indices are in the list of free space */
111   /* Allocate space for cj, initialize cj, and */
112   /* destroy list of free space and other temporary array(s) */
113   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
114   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
115   ierr = PetscFree(denserow);CHKERRQ(ierr);
116 
117   /* Allocate space for ca */
118   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
119   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
120 
121   /* put together the new matrix */
122   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
123 
124   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
125   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
126   c = (Mat_SeqAIJ *)((*C)->data);
127   c->freedata = PETSC_TRUE;
128   c->nonew    = 0;
129 
130   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 #endif /*  USE_INTSORT */
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
137 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
138 {
139   int            ierr;
140   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
141   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
142   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
143   int            *ci,*cj,*lnk,idx0,idx,bcol;
144   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
145   int            i,j,k,anzi,brow,bnzj,cnzi;
146   MatScalar      *ca;
147 
148   PetscFunctionBegin;
149   /* some error checking which could be moved into interface layer */
150   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
151 
152   /* Set up timers */
153   if (!logkey_matmatmult_symbolic) {
154     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
155   }
156   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
157 
158   /* Set up */
159   /* Allocate ci array, arrays for fill computation and */
160   /* free space for accumulating nonzero column info */
161   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
162   ci[0] = 0;
163 
164   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
165   for (i=0; i<bn; i++) lnk[i] = -1;
166 
167   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
168   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
169   current_space = free_space;
170 
171   /* Determine symbolic info for each row of the product: */
172   for (i=0;i<am;i++) {
173     anzi = ai[i+1] - ai[i];
174     cnzi = 0;
175     lnk[bn] = bn;
176     for (j=0;j<anzi;j++) {
177       brow = *aj++;
178       bnzj = bi[brow+1] - bi[brow];
179       bjj  = bj + bi[brow];
180       idx  = bn;
181       for (k=0;k<bnzj;k++) {
182         bcol = bjj[k];
183         if (lnk[bcol] == -1) { /* new col */
184           if (k>0) idx = bjj[k-1];
185           do {
186             idx0 = idx;
187             idx  = lnk[idx0];
188           } while (bcol > idx);
189           lnk[idx0] = bcol;
190           lnk[bcol] = idx;
191           cnzi++;
192         }
193       }
194     }
195 
196     /* If free space is not available, make more free space */
197     /* Double the amount of total space in the list */
198     if (current_space->local_remaining<cnzi) {
199       printf("...%d -th row, double space ...\n",i);
200       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
201     }
202 
203     /* Copy data into free space, and zero out denserow and lnk */
204     idx = bn;
205     for (j=0; j<cnzi; j++){
206       idx0 = idx;
207       idx  = lnk[idx0];
208       *current_space->array++ = idx;
209       lnk[idx0] = -1;
210     }
211     lnk[idx] = -1;
212 
213     current_space->local_used      += cnzi;
214     current_space->local_remaining -= cnzi;
215 
216     ci[i+1] = ci[i] + cnzi;
217   }
218 
219   /* Column indices are in the list of free space */
220   /* Allocate space for cj, initialize cj, and */
221   /* destroy list of free space and other temporary array(s) */
222   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
223   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
224   ierr = PetscFree(lnk);CHKERRQ(ierr);
225 
226   /* Allocate space for ca */
227   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
228   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
229 
230   /* put together the new matrix */
231   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
232 
233   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
234   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
235   c = (Mat_SeqAIJ *)((*C)->data);
236   c->freedata = PETSC_TRUE;
237   c->nonew    = 0;
238 
239   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 /*
244      MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
245            C=A*B;
246      Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
247 */
248 #undef __FUNCT__
249 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
250 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
251 {
252   int        ierr,flops=0;
253   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
254   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
255   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
256   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
257   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
258   int        i,j,k,anzi,bnzi,cnzi,brow;
259   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
260 
261   PetscFunctionBegin;
262 
263   /* This error checking should be unnecessary if the symbolic was performed */
264   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
265   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
266   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
267 
268   /* Set up timers */
269   if (!logkey_matmatmult_numeric) {
270     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
271   }
272   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
273 
274   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
275   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
276   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
277   /* Traverse A row-wise. */
278   /* Build the ith row in C by summing over nonzero columns in A, */
279   /* the rows of B corresponding to nonzeros of A. */
280   for (i=0;i<am;i++) {
281     anzi = ai[i+1] - ai[i];
282     for (j=0;j<anzi;j++) {
283       brow = *aj++;
284       bnzi = bi[brow+1] - bi[brow];
285       bjj  = bj + bi[brow];
286       baj  = ba + bi[brow];
287       for (k=0;k<bnzi;k++) {
288         temp[bjj[k]] += (*aa)*baj[k];
289       }
290       flops += 2*bnzi;
291       aa++;
292     }
293     /* Store row back into C, and re-zero temp */
294     cnzi = ci[i+1] - ci[i];
295     for (j=0;j<cnzi;j++) {
296       ca[j] = temp[cj[j]];
297       temp[cj[j]] = 0.0;
298     }
299     ca += cnzi;
300     cj += cnzi;
301   }
302   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304 
305   /* Free temp */
306   ierr = PetscFree(temp);CHKERRQ(ierr);
307   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
308   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
314 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
315   int ierr;
316 
317   PetscFunctionBegin;
318   if (!logkey_matmatmult) {
319     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
320   }
321   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
322   ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr);
323   ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
324   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 
329 /*
330      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
331            C = P * A * P^T;
332 
333      Note: C is assumed to be uncreated.
334            If this is not the case, Destroy C before calling this routine.
335 */
336 #undef __FUNCT__
337 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ"
338 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
339   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
340   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
341   int            ierr;
342   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
343   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
344   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
345   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
346   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
347   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
348   MatScalar      *ca;
349 
350   PetscFunctionBegin;
351 
352   /* some error checking which could be moved into interface layer */
353   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
354   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
355 
356   /* Set up timers */
357   if (!logkey_matapplypapt_symbolic) {
358     ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
359   }
360   ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
361 
362   /* Create ij structure of P^T */
363   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
364 
365   /* Allocate ci array, arrays for fill computation and */
366   /* free space for accumulating nonzero column info */
367   ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
368   ci[0] = 0;
369 
370   ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr);
371   ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr);
372   pasparserow  = padenserow  + an;
373   denserow     = pasparserow + an;
374   sparserow    = denserow    + pm;
375 
376   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
377   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
378   ierr          = GetMoreSpace((ai[am]/pn)*pm,&free_space);
379   current_space = free_space;
380 
381   /* Determine fill for each row of C: */
382   for (i=0;i<pm;i++) {
383     pnzi  = pi[i+1] - pi[i];
384     panzi = 0;
385     /* Get symbolic sparse row of PA: */
386     for (j=0;j<pnzi;j++) {
387       arow = *pj++;
388       anzj = ai[arow+1] - ai[arow];
389       ajj  = aj + ai[arow];
390       for (k=0;k<anzj;k++) {
391         if (!padenserow[ajj[k]]) {
392           padenserow[ajj[k]]   = -1;
393           pasparserow[panzi++] = ajj[k];
394         }
395       }
396     }
397     /* Using symbolic row of PA, determine symbolic row of C: */
398     paj    = pasparserow;
399     cnzi   = 0;
400     for (j=0;j<panzi;j++) {
401       ptrow = *paj++;
402       ptnzj = pti[ptrow+1] - pti[ptrow];
403       ptjj  = ptj + pti[ptrow];
404       for (k=0;k<ptnzj;k++) {
405         if (!denserow[ptjj[k]]) {
406           denserow[ptjj[k]] = -1;
407           sparserow[cnzi++] = ptjj[k];
408         }
409       }
410     }
411 
412     /* sort sparse representation */
413     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
414 
415     /* If free space is not available, make more free space */
416     /* Double the amount of total space in the list */
417     if (current_space->local_remaining<cnzi) {
418       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
419     }
420 
421     /* Copy data into free space, and zero out dense row */
422     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
423     current_space->array           += cnzi;
424     current_space->local_used      += cnzi;
425     current_space->local_remaining -= cnzi;
426 
427     for (j=0;j<panzi;j++) {
428       padenserow[pasparserow[j]] = 0;
429     }
430     for (j=0;j<cnzi;j++) {
431       denserow[sparserow[j]] = 0;
432     }
433     ci[i+1] = ci[i] + cnzi;
434   }
435   /* column indices are in the list of free space */
436   /* Allocate space for cj, initialize cj, and */
437   /* destroy list of free space and other temporary array(s) */
438   ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr);
439   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
440   ierr = PetscFree(padenserow);CHKERRQ(ierr);
441 
442   /* Allocate space for ca */
443   ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
444   ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr);
445 
446   /* put together the new matrix */
447   ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr);
448 
449   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
450   /* Since these are PETSc arrays, change flags to free them as necessary. */
451   c = (Mat_SeqAIJ *)((*C)->data);
452   c->freedata = PETSC_TRUE;
453   c->nonew    = 0;
454 
455   /* Clean up. */
456   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
457 
458   ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 /*
463      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
464            C = P * A * P^T;
465      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
466 */
467 #undef __FUNCT__
468 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ"
469 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
470   int        ierr,flops=0;
471   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
472   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
473   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
474   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
475   int        *ci=c->i,*cj=c->j;
476   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
477   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
478   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
479 
480   PetscFunctionBegin;
481 
482   /* This error checking should be unnecessary if the symbolic was performed */
483   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
484   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
485   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
486   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
487 
488   /* Set up timers */
489   if (!logkey_matapplypapt_numeric) {
490     ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr);
491   }
492   ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
493 
494   ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr);
495   ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
496   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
497 
498   paj      = (int *)(paa + an);
499   pajdense = paj + an;
500 
501   for (i=0;i<pm;i++) {
502     /* Form sparse row of P*A */
503     pnzi  = pi[i+1] - pi[i];
504     panzj = 0;
505     for (j=0;j<pnzi;j++) {
506       arow = *pj++;
507       anzj = ai[arow+1] - ai[arow];
508       ajj  = aj + ai[arow];
509       aaj  = aa + ai[arow];
510       for (k=0;k<anzj;k++) {
511         if (!pajdense[ajj[k]]) {
512           pajdense[ajj[k]] = -1;
513           paj[panzj++]     = ajj[k];
514         }
515         paa[ajj[k]] += (*pa)*aaj[k];
516       }
517       flops += 2*anzj;
518       pa++;
519     }
520 
521     /* Sort the j index array for quick sparse axpy. */
522     ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr);
523 
524     /* Compute P*A*P^T using sparse inner products. */
525     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
526     cnzi = ci[i+1] - ci[i];
527     for (j=0;j<cnzi;j++) {
528       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
529       ptcol = *cj++;
530       ptnzj = pi[ptcol+1] - pi[ptcol];
531       ptj   = pjj + pi[ptcol];
532       ptaj  = pta + pi[ptcol];
533       sum   = 0.;
534       k1    = 0;
535       k2    = 0;
536       while ((k1<panzj) && (k2<ptnzj)) {
537         if (paj[k1]==ptj[k2]) {
538           sum += paa[paj[k1++]]*ptaj[k2++];
539         } else if (paj[k1] < ptj[k2]) {
540           k1++;
541         } else /* if (paj[k1] > ptj[k2]) */ {
542           k2++;
543         }
544       }
545       *ca++ = sum;
546     }
547 
548     /* Zero the current row info for P*A */
549     for (j=0;j<panzj;j++) {
550       paa[paj[j]]      = 0.;
551       pajdense[paj[j]] = 0;
552     }
553   }
554 
555   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
558   ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ"
564 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
565   int ierr;
566 
567   PetscFunctionBegin;
568   if (!logkey_matapplypapt) {
569     ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr);
570   }
571   ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
572   ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
573   ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
574   ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577