xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision f37cbb0890cfcc075bf42ae2b4760d76e6f1168d)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92 
93     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
94     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
95   }
96   ierr = PetscFree(ptap);CHKERRQ(ierr);
97 
98   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
104 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
105 {
106   PetscErrorCode       ierr;
107   Mat_MatMatMultMPI    *ptap;
108   PetscObjectContainer container;
109 
110   PetscFunctionBegin;
111   if (scall == MAT_INITIAL_MATRIX){
112     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
113     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
114 
115     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
116     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
117 
118     /* get P_loc by taking all local rows of P */
119     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
120 
121     /* attach the supporting struct to P for reuse */
122     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
123     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
124     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
125     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
126 
127     /* now, compute symbolic local P^T*A*P */
128     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
129     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
130   } else if (scall == MAT_REUSE_MATRIX){
131     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
132     if (container) {
133       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
134     } else {
135       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
136     }
137 
138     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
139     /* improve! */
140     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
141     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142 
143     /* get P_loc by taking all local rows of P */
144     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); /* improve! */
145     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
146 
147   } else {
148     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
149   }
150   /* now, compute numeric local P^T*A*P */
151   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
152   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
153   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
154 
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
160 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
161 {
162   PetscErrorCode       ierr;
163   Mat                  C_seq;
164   Mat_MatMatMultMPI    *ptap;
165   PetscObjectContainer container;
166 
167   PetscFunctionBegin;
168 
169   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
170   if (container) {
171     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
172   } else {
173     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
174   }
175   /* compute C_seq = P_loc^T * A_loc * P_seq */
176   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
177 
178   /* add C_seq into mpi C */
179   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
180 
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
186 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
187 {
188   PetscErrorCode       ierr;
189   Mat_Merge_SeqsToMPI  *merge;
190   Mat_MatMatMultMPI    *ptap;
191   PetscObjectContainer cont_merge,cont_ptap;
192 
193   PetscFunctionBegin;
194   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
195   if (cont_merge) {
196     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
197   } else {
198     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
199   }
200   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
201   if (cont_ptap) {
202     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
203   } else {
204     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
205   }
206   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
207   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
213 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
214 {
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (scall == MAT_INITIAL_MATRIX){
219     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
220     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
221     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
222   }
223   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
224   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
225   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
231 
232    Collective on Mat
233 
234    Input Parameters:
235 +  A - the matrix
236 -  P - the projection matrix
237 
238    Output Parameters:
239 .  C - the (i,j) structure of the product matrix
240 
241    Notes:
242    C will be created and must be destroyed by the user with MatDestroy().
243 
244    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
245    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
246    this (i,j) structure by calling MatPtAPNumeric().
247 
248    Level: intermediate
249 
250 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
251 */
252 #undef __FUNCT__
253 #define __FUNCT__ "MatPtAPSymbolic"
254 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
255 {
256   PetscErrorCode ierr;
257   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
258   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
259 
260   PetscFunctionBegin;
261 
262   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
263   PetscValidType(A,1);
264   MatPreallocated(A);
265   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
266   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
267 
268   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
269   PetscValidType(P,2);
270   MatPreallocated(P);
271   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
272   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
273 
274   PetscValidPointer(C,3);
275 
276   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
277   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
278 
279   /* For now, we do not dispatch based on the type of A and P */
280   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
281   fA = A->ops->ptapsymbolic;
282   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
283   fP = P->ops->ptapsymbolic;
284   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
285   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
286 
287   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
288   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
289   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
290 
291   PetscFunctionReturn(0);
292 }
293 
294 typedef struct {
295   Mat    symAP;
296 } Mat_PtAPstruct;
297 
298 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
302 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
303 {
304   PetscErrorCode    ierr;
305   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
306 
307   PetscFunctionBegin;
308   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
309   ierr = PetscFree(ptap);CHKERRQ(ierr);
310   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
316 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
317 {
318   PetscErrorCode ierr;
319   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
320   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
321   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
322   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
323   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
324   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
325   MatScalar      *ca;
326   PetscBT        lnkbt;
327 
328   PetscFunctionBegin;
329   /* Get ij structure of P^T */
330   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
331   ptJ=ptj;
332 
333   /* Allocate ci array, arrays for fill computation and */
334   /* free space for accumulating nonzero column info */
335   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
336   ci[0] = 0;
337 
338   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
339   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
340   ptasparserow = ptadenserow  + an;
341 
342   /* create and initialize a linked list */
343   nlnk = pn+1;
344   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
345 
346   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
347   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
348   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
349   current_space = free_space;
350 
351   /* Determine symbolic info for each row of C: */
352   for (i=0;i<pn;i++) {
353     ptnzi  = pti[i+1] - pti[i];
354     ptanzi = 0;
355     /* Determine symbolic row of PtA: */
356     for (j=0;j<ptnzi;j++) {
357       arow = *ptJ++;
358       anzj = ai[arow+1] - ai[arow];
359       ajj  = aj + ai[arow];
360       for (k=0;k<anzj;k++) {
361         if (!ptadenserow[ajj[k]]) {
362           ptadenserow[ajj[k]]    = -1;
363           ptasparserow[ptanzi++] = ajj[k];
364         }
365       }
366     }
367       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
368     ptaj = ptasparserow;
369     cnzi   = 0;
370     for (j=0;j<ptanzi;j++) {
371       prow = *ptaj++;
372       pnzj = pi[prow+1] - pi[prow];
373       pjj  = pj + pi[prow];
374       /* add non-zero cols of P into the sorted linked list lnk */
375       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
376       cnzi += nlnk;
377     }
378 
379     /* If free space is not available, make more free space */
380     /* Double the amount of total space in the list */
381     if (current_space->local_remaining<cnzi) {
382       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
383     }
384 
385     /* Copy data into free space, and zero out denserows */
386     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
387     current_space->array           += cnzi;
388     current_space->local_used      += cnzi;
389     current_space->local_remaining -= cnzi;
390 
391     for (j=0;j<ptanzi;j++) {
392       ptadenserow[ptasparserow[j]] = 0;
393     }
394     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
395     /*        For now, we will recompute what is needed. */
396     ci[i+1] = ci[i] + cnzi;
397   }
398   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
399   /* Allocate space for cj, initialize cj, and */
400   /* destroy list of free space and other temporary array(s) */
401   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
402   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
403   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
404   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
405 
406   /* Allocate space for ca */
407   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
408   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
409 
410   /* put together the new matrix */
411   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
412 
413   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
414   /* Since these are PETSc arrays, change flags to free them as necessary. */
415   c = (Mat_SeqAIJ *)((*C)->data);
416   c->freedata = PETSC_TRUE;
417   c->nonew    = 0;
418 
419   /* Clean up. */
420   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
421 
422   PetscFunctionReturn(0);
423 }
424 
425 #include "src/mat/impls/maij/maij.h"
426 EXTERN_C_BEGIN
427 #undef __FUNCT__
428 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
429 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
430 {
431   /* This routine requires testing -- I don't think it works. */
432   PetscErrorCode ierr;
433   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
434   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
435   Mat            P=pp->AIJ;
436   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
437   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
438   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
439   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
440   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
441   MatScalar      *ca;
442 
443   PetscFunctionBegin;
444   /* Start timer */
445   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
446 
447   /* Get ij structure of P^T */
448   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
449 
450   /* Allocate ci array, arrays for fill computation and */
451   /* free space for accumulating nonzero column info */
452   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
453   ci[0] = 0;
454 
455   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
456   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
457   ptasparserow = ptadenserow  + an;
458   denserow     = ptasparserow + an;
459   sparserow    = denserow     + pn;
460 
461   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
462   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
463   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
464   current_space = free_space;
465 
466   /* Determine symbolic info for each row of C: */
467   for (i=0;i<pn/ppdof;i++) {
468     ptnzi  = pti[i+1] - pti[i];
469     ptanzi = 0;
470     ptJ    = ptj + pti[i];
471     for (dof=0;dof<ppdof;dof++) {
472     /* Determine symbolic row of PtA: */
473       for (j=0;j<ptnzi;j++) {
474         arow = ptJ[j] + dof;
475         anzj = ai[arow+1] - ai[arow];
476         ajj  = aj + ai[arow];
477         for (k=0;k<anzj;k++) {
478           if (!ptadenserow[ajj[k]]) {
479             ptadenserow[ajj[k]]    = -1;
480             ptasparserow[ptanzi++] = ajj[k];
481           }
482         }
483       }
484       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
485       ptaj = ptasparserow;
486       cnzi   = 0;
487       for (j=0;j<ptanzi;j++) {
488         pdof = *ptaj%dof;
489         prow = (*ptaj++)/dof;
490         pnzj = pi[prow+1] - pi[prow];
491         pjj  = pj + pi[prow];
492         for (k=0;k<pnzj;k++) {
493           if (!denserow[pjj[k]+pdof]) {
494             denserow[pjj[k]+pdof] = -1;
495             sparserow[cnzi++]     = pjj[k]+pdof;
496           }
497         }
498       }
499 
500       /* sort sparserow */
501       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
502 
503       /* If free space is not available, make more free space */
504       /* Double the amount of total space in the list */
505       if (current_space->local_remaining<cnzi) {
506         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
507       }
508 
509       /* Copy data into free space, and zero out denserows */
510       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
511       current_space->array           += cnzi;
512       current_space->local_used      += cnzi;
513       current_space->local_remaining -= cnzi;
514 
515       for (j=0;j<ptanzi;j++) {
516         ptadenserow[ptasparserow[j]] = 0;
517       }
518       for (j=0;j<cnzi;j++) {
519         denserow[sparserow[j]] = 0;
520       }
521       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
522       /*        For now, we will recompute what is needed. */
523       ci[i+1+dof] = ci[i+dof] + cnzi;
524     }
525   }
526   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
527   /* Allocate space for cj, initialize cj, and */
528   /* destroy list of free space and other temporary array(s) */
529   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
530   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
531   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
532 
533   /* Allocate space for ca */
534   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
535   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
536 
537   /* put together the new matrix */
538   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
539 
540   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
541   /* Since these are PETSc arrays, change flags to free them as necessary. */
542   c = (Mat_SeqAIJ *)((*C)->data);
543   c->freedata = PETSC_TRUE;
544   c->nonew    = 0;
545 
546   /* Clean up. */
547   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
548 
549   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 EXTERN_C_END
553 
554 /*
555    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
556 
557    Collective on Mat
558 
559    Input Parameters:
560 +  A - the matrix
561 -  P - the projection matrix
562 
563    Output Parameters:
564 .  C - the product matrix
565 
566    Notes:
567    C must have been created by calling MatPtAPSymbolic and must be destroyed by
568    the user using MatDeatroy().
569 
570    This routine is currently only implemented for pairs of AIJ matrices and classes
571    which inherit from AIJ.  C will be of type MATAIJ.
572 
573    Level: intermediate
574 
575 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
576 */
577 #undef __FUNCT__
578 #define __FUNCT__ "MatPtAPNumeric"
579 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
580 {
581   PetscErrorCode ierr;
582   PetscErrorCode (*fA)(Mat,Mat,Mat);
583   PetscErrorCode (*fP)(Mat,Mat,Mat);
584 
585   PetscFunctionBegin;
586 
587   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
588   PetscValidType(A,1);
589   MatPreallocated(A);
590   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
591   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
592 
593   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
594   PetscValidType(P,2);
595   MatPreallocated(P);
596   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
597   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
598 
599   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
600   PetscValidType(C,3);
601   MatPreallocated(C);
602   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
603   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
604 
605   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
606   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
607   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
608   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
609 
610   /* For now, we do not dispatch based on the type of A and P */
611   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
612   fA = A->ops->ptapnumeric;
613   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
614   fP = P->ops->ptapnumeric;
615   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
616   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
617 
618   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
619   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
620   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
621 
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
627 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
628 {
629   PetscErrorCode ierr;
630   PetscInt       flops=0;
631   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
632   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
633   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
634   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
635   PetscInt       *ci=c->i,*cj=c->j,*cjj;
636   PetscInt       am=A->M,cn=C->N,cm=C->M;
637   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
638   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
639 
640   PetscFunctionBegin;
641   /* Allocate temporary array for storage of one row of A*P */
642   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
643   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
644 
645   apj      = (PetscInt *)(apa + cn);
646   apjdense = apj + cn;
647 
648   /* Clear old values in C */
649   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
650 
651   for (i=0;i<am;i++) {
652     /* Form sparse row of A*P */
653     anzi  = ai[i+1] - ai[i];
654     apnzj = 0;
655     for (j=0;j<anzi;j++) {
656       prow = *aj++;
657       pnzj = pi[prow+1] - pi[prow];
658       pjj  = pj + pi[prow];
659       paj  = pa + pi[prow];
660       for (k=0;k<pnzj;k++) {
661         if (!apjdense[pjj[k]]) {
662           apjdense[pjj[k]] = -1;
663           apj[apnzj++]     = pjj[k];
664         }
665         apa[pjj[k]] += (*aa)*paj[k];
666       }
667       flops += 2*pnzj;
668       aa++;
669     }
670 
671     /* Sort the j index array for quick sparse axpy. */
672     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
673 
674     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
675     pnzi = pi[i+1] - pi[i];
676     for (j=0;j<pnzi;j++) {
677       nextap = 0;
678       crow   = *pJ++;
679       cjj    = cj + ci[crow];
680       caj    = ca + ci[crow];
681       /* Perform sparse axpy operation.  Note cjj includes apj. */
682       for (k=0;nextap<apnzj;k++) {
683         if (cjj[k]==apj[nextap]) {
684           caj[k] += (*pA)*apa[apj[nextap++]];
685         }
686       }
687       flops += 2*apnzj;
688       pA++;
689     }
690 
691     /* Zero the current row info for A*P */
692     for (j=0;j<apnzj;j++) {
693       apa[apj[j]]      = 0.;
694       apjdense[apj[j]] = 0;
695     }
696   }
697 
698   /* Assemble the final matrix and clean up */
699   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
700   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
701   ierr = PetscFree(apa);CHKERRQ(ierr);
702   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
703 
704   PetscFunctionReturn(0);
705 }
706 
707 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
708 static PetscEvent logkey_matptapnumeric_local = 0;
709 #undef __FUNCT__
710 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
711 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
712 {
713   PetscErrorCode ierr;
714   PetscInt       flops=0;
715   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
716   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
717   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
718   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
719   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
720   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
721   PetscInt       *pJ=pj_loc,*pjj;
722   PetscInt       *ci=c->i,*cj=c->j,*cjj;
723   PetscInt       am=A->m,cn=C->N,cm=C->M;
724   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
725   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
726   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
727 
728   PetscFunctionBegin;
729   if (!logkey_matptapnumeric_local) {
730     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
731   }
732   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
733 
734   /* Allocate temporary array for storage of one row of A*P */
735   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
736   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
737   apj      = (PetscInt *)(apa + cn);
738   apjdense = apj + cn;
739 
740   /* Clear old values in C */
741   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
742 
743   for (i=0;i<am;i++) {
744     /* Form i-th sparse row of A*P */
745      apnzj = 0;
746     /* diagonal portion of A */
747     anzi  = adi[i+1] - adi[i];
748     for (j=0;j<anzi;j++) {
749       prow = *adj;
750       adj++;
751       pnzj = pi_loc[prow+1] - pi_loc[prow];
752       pjj  = pj_loc + pi_loc[prow];
753       paj  = pa_loc + pi_loc[prow];
754       for (k=0;k<pnzj;k++) {
755         if (!apjdense[pjj[k]]) {
756           apjdense[pjj[k]] = -1;
757           apj[apnzj++]     = pjj[k];
758         }
759         apa[pjj[k]] += (*ada)*paj[k];
760       }
761       flops += 2*pnzj;
762       ada++;
763     }
764     /* off-diagonal portion of A */
765     anzi  = aoi[i+1] - aoi[i];
766     for (j=0;j<anzi;j++) {
767       col = a->garray[*aoj];
768       if (col < cstart){
769         prow = *aoj;
770       } else if (col >= cend){
771         prow = *aoj;
772       } else {
773         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
774       }
775       aoj++;
776       pnzj = pi_oth[prow+1] - pi_oth[prow];
777       pjj  = pj_oth + pi_oth[prow];
778       paj  = pa_oth + pi_oth[prow];
779       for (k=0;k<pnzj;k++) {
780         if (!apjdense[pjj[k]]) {
781           apjdense[pjj[k]] = -1;
782           apj[apnzj++]     = pjj[k];
783         }
784         apa[pjj[k]] += (*aoa)*paj[k];
785       }
786       flops += 2*pnzj;
787       aoa++;
788     }
789     /* Sort the j index array for quick sparse axpy. */
790     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
791 
792     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
793     pnzi = pi_loc[i+1] - pi_loc[i];
794     for (j=0;j<pnzi;j++) {
795       nextap = 0;
796       crow   = *pJ++;
797       cjj    = cj + ci[crow];
798       caj    = ca + ci[crow];
799       /* Perform sparse axpy operation.  Note cjj includes apj. */
800       for (k=0;nextap<apnzj;k++) {
801         if (cjj[k]==apj[nextap]) {
802           caj[k] += (*pA)*apa[apj[nextap++]];
803         }
804       }
805       flops += 2*apnzj;
806       pA++;
807     }
808 
809     /* Zero the current row info for A*P */
810     for (j=0;j<apnzj;j++) {
811       apa[apj[j]]      = 0.;
812       apjdense[apj[j]] = 0;
813     }
814   }
815 
816   /* Assemble the final matrix and clean up */
817   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819   ierr = PetscFree(apa);CHKERRQ(ierr);
820   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
821   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 static PetscEvent logkey_matptapsymbolic_local = 0;
825 #undef __FUNCT__
826 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
827 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
828 {
829   PetscErrorCode ierr;
830   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
831   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
832   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
833   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
834   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
835   PetscInt       *ci,*cj,*ptaj;
836   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
837   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
838   PetscInt       pm=P_loc->m,nlnk,*lnk;
839   MatScalar      *ca;
840   PetscBT        lnkbt;
841   PetscInt       prend,nprow_loc,nprow_oth;
842   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
843   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
844   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
845 
846   PetscFunctionBegin;
847   if (!logkey_matptapsymbolic_local) {
848     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
849   }
850   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
851 
852   prend = prstart + pm;
853 
854   /* get ij structure of P_loc^T */
855   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
856   ptJ=ptj;
857 
858   /* Allocate ci array, arrays for fill computation and */
859   /* free space for accumulating nonzero column info */
860   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
861   ci[0] = 0;
862 
863   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
864   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
865   ptasparserow_loc = ptadenserow_loc + an;
866   ptadenserow_oth  = ptasparserow_loc + an;
867   ptasparserow_oth = ptadenserow_oth + an;
868 
869   /* create and initialize a linked list */
870   nlnk = pN+1;
871   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
872 
873   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
874   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
875   nnz           = adi[am] + aoi[am];
876   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
877   current_space = free_space;
878 
879   /* determine symbolic info for each row of C: */
880   for (i=0;i<pN;i++) {
881     ptnzi  = pti[i+1] - pti[i];
882     nprow_loc = 0; nprow_oth = 0;
883     /* i-th row of symbolic P_loc^T*A_loc: */
884     for (j=0;j<ptnzi;j++) {
885       arow = *ptJ++;
886       /* diagonal portion of A */
887       anzj = adi[arow+1] - adi[arow];
888       ajj  = adj + adi[arow];
889       for (k=0;k<anzj;k++) {
890         col = ajj[k]+prstart;
891         if (!ptadenserow_loc[col]) {
892           ptadenserow_loc[col]    = -1;
893           ptasparserow_loc[nprow_loc++] = col;
894         }
895       }
896       /* off-diagonal portion of A */
897       anzj = aoi[arow+1] - aoi[arow];
898       ajj  = aoj + aoi[arow];
899       for (k=0;k<anzj;k++) {
900         col = a->garray[ajj[k]];  /* global col */
901         if (col < cstart){
902           col = ajj[k];
903         } else if (col >= cend){
904           col = ajj[k] + pm;
905         } else {
906           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
907         }
908         if (!ptadenserow_oth[col]) {
909           ptadenserow_oth[col]    = -1;
910           ptasparserow_oth[nprow_oth++] = col;
911         }
912       }
913     }
914 
915     /* using symbolic info of local PtA, determine symbolic info for row of C: */
916     cnzi   = 0;
917     /* rows of P_loc */
918     ptaj = ptasparserow_loc;
919     for (j=0; j<nprow_loc; j++) {
920       prow = *ptaj++;
921       prow -= prstart; /* rm */
922       pnzj = pi_loc[prow+1] - pi_loc[prow];
923       pjj  = pj_loc + pi_loc[prow];
924       /* add non-zero cols of P into the sorted linked list lnk */
925       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
926       cnzi += nlnk;
927     }
928     /* rows of P_oth */
929     ptaj = ptasparserow_oth;
930     for (j=0; j<nprow_oth; j++) {
931       prow = *ptaj++;
932       if (prow >= prend) prow -= pm; /* rm */
933       pnzj = pi_oth[prow+1] - pi_oth[prow];
934       pjj  = pj_oth + pi_oth[prow];
935       /* add non-zero cols of P into the sorted linked list lnk */
936       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
937       cnzi += nlnk;
938     }
939 
940     /* If free space is not available, make more free space */
941     /* Double the amount of total space in the list */
942     if (current_space->local_remaining<cnzi) {
943       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
944     }
945 
946     /* Copy data into free space, and zero out denserows */
947     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
948     current_space->array           += cnzi;
949     current_space->local_used      += cnzi;
950     current_space->local_remaining -= cnzi;
951 
952     for (j=0;j<nprow_loc; j++) {
953       ptadenserow_loc[ptasparserow_loc[j]] = 0;
954     }
955     for (j=0;j<nprow_oth; j++) {
956       ptadenserow_oth[ptasparserow_oth[j]] = 0;
957     }
958 
959     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
960     /*        For now, we will recompute what is needed. */
961     ci[i+1] = ci[i] + cnzi;
962   }
963   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
964   /* Allocate space for cj, initialize cj, and */
965   /* destroy list of free space and other temporary array(s) */
966   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
967   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
968   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
969   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
970 
971   /* Allocate space for ca */
972   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
973   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
974 
975   /* put together the new matrix */
976   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
977 
978   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
979   /* Since these are PETSc arrays, change flags to free them as necessary. */
980   c = (Mat_SeqAIJ *)((*C)->data);
981   c->freedata = PETSC_TRUE;
982   c->nonew    = 0;
983 
984   /* Clean up. */
985   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
986 
987   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990