xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 589412d92bc4b292f53a960b45ffae7defff5ac1)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
75 EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
76 EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat);
77 
78 
79 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
80 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
84 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
85 {
86   PetscErrorCode    ierr;
87 
88   PetscFunctionBegin;
89   if (scall == MAT_INITIAL_MATRIX){
90     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
91     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
92     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
93   } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */
94     ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
95     ierr = MatDestroy(*C);CHKERRQ(ierr);
96     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);
97 
98     /*
99     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
100     */
101     ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
102   } else {
103     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
104   }
105   PetscFunctionReturn(0);
106 }
107 #define NEW
108 #undef __FUNCT__
109 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
110 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
111 {
112   PetscErrorCode    ierr;
113   Mat               C_seq;
114   Mat               P_loc,P_oth,*ploc;
115   PetscInt          prstart,prend,m=P->m,low;
116   IS                isrow,iscol;
117 
118   PetscFunctionBegin;
119   /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
120   ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr);
121   prend = prstart + m;
122   /* get P_loc by taking all local rows of P */
123   ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
124   ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
125   ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
126   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr);
127   P_loc = ploc[0];
128   ierr = ISDestroy(isrow);CHKERRQ(ierr);
129   ierr = ISDestroy(iscol);CHKERRQ(ierr);
130 
131   /* compute C_seq = P_loc^T * A_loc * P_seq */
132   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr);
133   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr);
134 
135   ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr);
136   ierr = MatDestroy(P_oth);CHKERRQ(ierr);
137 #ifdef OLD
138   Mat               P_seq;
139   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
140   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
141   /* compute C_seq = P_loc^T * A_loc * P_seq */
142   prend  = prstart + m;
143   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
144   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
145 #endif
146   /* add C_seq into mpi C */
147   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
148 
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
154 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
155 {
156   PetscErrorCode       ierr;
157   Mat                  P_seq,C_seq;
158   PetscInt             prstart,prend,m=P->m;
159   Mat_Merge_SeqsToMPI  *merge;
160   PetscObjectContainer container;
161 
162   PetscFunctionBegin;
163   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
164   if (container) {
165     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
166   } else {
167     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container");
168   }
169 
170   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
171   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
172 
173   /* compute C_seq = P_loc^T * A_loc * P_seq */
174   prend = prstart + m;
175   C_seq = merge->C_seq;
176   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
177   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
178 
179   /* add C_seq into mpi C */
180   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
181 
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
187 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
188 {
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   if (scall == MAT_INITIAL_MATRIX){
193     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
194     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
195     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
196   }
197   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
198   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
199   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 /*
204    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
205 
206    Collective on Mat
207 
208    Input Parameters:
209 +  A - the matrix
210 -  P - the projection matrix
211 
212    Output Parameters:
213 .  C - the (i,j) structure of the product matrix
214 
215    Notes:
216    C will be created and must be destroyed by the user with MatDestroy().
217 
218    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
219    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
220    this (i,j) structure by calling MatPtAPNumeric().
221 
222    Level: intermediate
223 
224 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
225 */
226 #undef __FUNCT__
227 #define __FUNCT__ "MatPtAPSymbolic"
228 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
229 {
230   PetscErrorCode ierr;
231   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
232   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
233 
234   PetscFunctionBegin;
235 
236   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
237   PetscValidType(A,1);
238   MatPreallocated(A);
239   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
240   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
241 
242   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
243   PetscValidType(P,2);
244   MatPreallocated(P);
245   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
246   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
247 
248   PetscValidPointer(C,3);
249 
250   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
251   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
252 
253   /* For now, we do not dispatch based on the type of A and P */
254   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
255   fA = A->ops->ptapsymbolic;
256   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
257   fP = P->ops->ptapsymbolic;
258   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
259   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
260 
261   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
262   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
263   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
264 
265   PetscFunctionReturn(0);
266 }
267 
268 typedef struct {
269   Mat    symAP;
270 } Mat_PtAPstruct;
271 
272 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
276 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
277 {
278   PetscErrorCode    ierr;
279   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
280 
281   PetscFunctionBegin;
282   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
283   ierr = PetscFree(ptap);CHKERRQ(ierr);
284   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
290 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
291 {
292   PetscErrorCode ierr;
293   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
294   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
295   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
296   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
297   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
298   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
299   MatScalar      *ca;
300   PetscBT        lnkbt;
301 
302   PetscFunctionBegin;
303   /* Get ij structure of P^T */
304   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
305   ptJ=ptj;
306 
307   /* Allocate ci array, arrays for fill computation and */
308   /* free space for accumulating nonzero column info */
309   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
310   ci[0] = 0;
311 
312   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
313   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
314   ptasparserow = ptadenserow  + an;
315 
316   /* create and initialize a linked list */
317   nlnk = pn+1;
318   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319 
320   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
321   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
322   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
323   current_space = free_space;
324 
325   /* Determine symbolic info for each row of C: */
326   for (i=0;i<pn;i++) {
327     ptnzi  = pti[i+1] - pti[i];
328     ptanzi = 0;
329     /* Determine symbolic row of PtA: */
330     for (j=0;j<ptnzi;j++) {
331       arow = *ptJ++;
332       anzj = ai[arow+1] - ai[arow];
333       ajj  = aj + ai[arow];
334       for (k=0;k<anzj;k++) {
335         if (!ptadenserow[ajj[k]]) {
336           ptadenserow[ajj[k]]    = -1;
337           ptasparserow[ptanzi++] = ajj[k];
338         }
339       }
340     }
341       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
342     ptaj = ptasparserow;
343     cnzi   = 0;
344     for (j=0;j<ptanzi;j++) {
345       prow = *ptaj++;
346       pnzj = pi[prow+1] - pi[prow];
347       pjj  = pj + pi[prow];
348       /* add non-zero cols of P into the sorted linked list lnk */
349       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
350       cnzi += nlnk;
351     }
352 
353     /* If free space is not available, make more free space */
354     /* Double the amount of total space in the list */
355     if (current_space->local_remaining<cnzi) {
356       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
357     }
358 
359     /* Copy data into free space, and zero out denserows */
360     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
361     current_space->array           += cnzi;
362     current_space->local_used      += cnzi;
363     current_space->local_remaining -= cnzi;
364 
365     for (j=0;j<ptanzi;j++) {
366       ptadenserow[ptasparserow[j]] = 0;
367     }
368     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
369     /*        For now, we will recompute what is needed. */
370     ci[i+1] = ci[i] + cnzi;
371   }
372   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
373   /* Allocate space for cj, initialize cj, and */
374   /* destroy list of free space and other temporary array(s) */
375   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
376   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
377   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
378   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
379 
380   /* Allocate space for ca */
381   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
382   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
383 
384   /* put together the new matrix */
385   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
386 
387   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
388   /* Since these are PETSc arrays, change flags to free them as necessary. */
389   c = (Mat_SeqAIJ *)((*C)->data);
390   c->freedata = PETSC_TRUE;
391   c->nonew    = 0;
392 
393   /* Clean up. */
394   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
395 
396   PetscFunctionReturn(0);
397 }
398 
399 #include "src/mat/impls/maij/maij.h"
400 EXTERN_C_BEGIN
401 #undef __FUNCT__
402 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
403 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
404 {
405   /* This routine requires testing -- I don't think it works. */
406   PetscErrorCode ierr;
407   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
408   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
409   Mat            P=pp->AIJ;
410   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
411   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
412   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
413   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
414   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
415   MatScalar      *ca;
416 
417   PetscFunctionBegin;
418   /* Start timer */
419   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
420 
421   /* Get ij structure of P^T */
422   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
423 
424   /* Allocate ci array, arrays for fill computation and */
425   /* free space for accumulating nonzero column info */
426   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
427   ci[0] = 0;
428 
429   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
430   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
431   ptasparserow = ptadenserow  + an;
432   denserow     = ptasparserow + an;
433   sparserow    = denserow     + pn;
434 
435   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
436   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
437   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
438   current_space = free_space;
439 
440   /* Determine symbolic info for each row of C: */
441   for (i=0;i<pn/ppdof;i++) {
442     ptnzi  = pti[i+1] - pti[i];
443     ptanzi = 0;
444     ptJ    = ptj + pti[i];
445     for (dof=0;dof<ppdof;dof++) {
446     /* Determine symbolic row of PtA: */
447       for (j=0;j<ptnzi;j++) {
448         arow = ptJ[j] + dof;
449         anzj = ai[arow+1] - ai[arow];
450         ajj  = aj + ai[arow];
451         for (k=0;k<anzj;k++) {
452           if (!ptadenserow[ajj[k]]) {
453             ptadenserow[ajj[k]]    = -1;
454             ptasparserow[ptanzi++] = ajj[k];
455           }
456         }
457       }
458       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
459       ptaj = ptasparserow;
460       cnzi   = 0;
461       for (j=0;j<ptanzi;j++) {
462         pdof = *ptaj%dof;
463         prow = (*ptaj++)/dof;
464         pnzj = pi[prow+1] - pi[prow];
465         pjj  = pj + pi[prow];
466         for (k=0;k<pnzj;k++) {
467           if (!denserow[pjj[k]+pdof]) {
468             denserow[pjj[k]+pdof] = -1;
469             sparserow[cnzi++]     = pjj[k]+pdof;
470           }
471         }
472       }
473 
474       /* sort sparserow */
475       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
476 
477       /* If free space is not available, make more free space */
478       /* Double the amount of total space in the list */
479       if (current_space->local_remaining<cnzi) {
480         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
481       }
482 
483       /* Copy data into free space, and zero out denserows */
484       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
485       current_space->array           += cnzi;
486       current_space->local_used      += cnzi;
487       current_space->local_remaining -= cnzi;
488 
489       for (j=0;j<ptanzi;j++) {
490         ptadenserow[ptasparserow[j]] = 0;
491       }
492       for (j=0;j<cnzi;j++) {
493         denserow[sparserow[j]] = 0;
494       }
495       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
496       /*        For now, we will recompute what is needed. */
497       ci[i+1+dof] = ci[i+dof] + cnzi;
498     }
499   }
500   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
501   /* Allocate space for cj, initialize cj, and */
502   /* destroy list of free space and other temporary array(s) */
503   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
504   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
505   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
506 
507   /* Allocate space for ca */
508   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
509   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
510 
511   /* put together the new matrix */
512   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
513 
514   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
515   /* Since these are PETSc arrays, change flags to free them as necessary. */
516   c = (Mat_SeqAIJ *)((*C)->data);
517   c->freedata = PETSC_TRUE;
518   c->nonew    = 0;
519 
520   /* Clean up. */
521   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
522 
523   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 EXTERN_C_END
527 
528 /*
529    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
530 
531    Collective on Mat
532 
533    Input Parameters:
534 +  A - the matrix
535 -  P - the projection matrix
536 
537    Output Parameters:
538 .  C - the product matrix
539 
540    Notes:
541    C must have been created by calling MatPtAPSymbolic and must be destroyed by
542    the user using MatDeatroy().
543 
544    This routine is currently only implemented for pairs of AIJ matrices and classes
545    which inherit from AIJ.  C will be of type MATAIJ.
546 
547    Level: intermediate
548 
549 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
550 */
551 #undef __FUNCT__
552 #define __FUNCT__ "MatPtAPNumeric"
553 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
554 {
555   PetscErrorCode ierr;
556   PetscErrorCode (*fA)(Mat,Mat,Mat);
557   PetscErrorCode (*fP)(Mat,Mat,Mat);
558 
559   PetscFunctionBegin;
560 
561   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
562   PetscValidType(A,1);
563   MatPreallocated(A);
564   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
565   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
566 
567   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
568   PetscValidType(P,2);
569   MatPreallocated(P);
570   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
571   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
572 
573   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
574   PetscValidType(C,3);
575   MatPreallocated(C);
576   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
577   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
578 
579   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
580   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
581   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
582   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
583 
584   /* For now, we do not dispatch based on the type of A and P */
585   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
586   fA = A->ops->ptapnumeric;
587   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
588   fP = P->ops->ptapnumeric;
589   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
590   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
591 
592   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
593   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
594   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
595 
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
601 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
602 {
603   PetscErrorCode ierr;
604   PetscInt       flops=0;
605   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
606   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
607   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
608   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
609   PetscInt       *ci=c->i,*cj=c->j,*cjj;
610   PetscInt       am=A->M,cn=C->N,cm=C->M;
611   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
612   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
613 
614   PetscFunctionBegin;
615   /* Allocate temporary array for storage of one row of A*P */
616   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
617   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
618 
619   apj      = (PetscInt *)(apa + cn);
620   apjdense = apj + cn;
621 
622   /* Clear old values in C */
623   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
624 
625   for (i=0;i<am;i++) {
626     /* Form sparse row of A*P */
627     anzi  = ai[i+1] - ai[i];
628     apnzj = 0;
629     for (j=0;j<anzi;j++) {
630       prow = *aj++;
631       pnzj = pi[prow+1] - pi[prow];
632       pjj  = pj + pi[prow];
633       paj  = pa + pi[prow];
634       for (k=0;k<pnzj;k++) {
635         if (!apjdense[pjj[k]]) {
636           apjdense[pjj[k]] = -1;
637           apj[apnzj++]     = pjj[k];
638         }
639         apa[pjj[k]] += (*aa)*paj[k];
640       }
641       flops += 2*pnzj;
642       aa++;
643     }
644 
645     /* Sort the j index array for quick sparse axpy. */
646     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
647 
648     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
649     pnzi = pi[i+1] - pi[i];
650     for (j=0;j<pnzi;j++) {
651       nextap = 0;
652       crow   = *pJ++;
653       cjj    = cj + ci[crow];
654       caj    = ca + ci[crow];
655       /* Perform sparse axpy operation.  Note cjj includes apj. */
656       for (k=0;nextap<apnzj;k++) {
657         if (cjj[k]==apj[nextap]) {
658           caj[k] += (*pA)*apa[apj[nextap++]];
659         }
660       }
661       flops += 2*apnzj;
662       pA++;
663     }
664 
665     /* Zero the current row info for A*P */
666     for (j=0;j<apnzj;j++) {
667       apa[apj[j]]      = 0.;
668       apjdense[apj[j]] = 0;
669     }
670   }
671 
672   /* Assemble the final matrix and clean up */
673   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
674   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
675   ierr = PetscFree(apa);CHKERRQ(ierr);
676   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
677 
678   PetscFunctionReturn(0);
679 }
680 
681 /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ"
685 PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
686 {
687   PetscErrorCode ierr;
688   PetscInt       flops=0;
689   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
690   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
691   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
692   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
693   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
694   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
695   PetscInt       *ci=c->i,*cj=c->j,*cjj;
696   PetscInt       am=A->m,cn=C->N,cm=C->M;
697   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
698   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
699 
700   PetscFunctionBegin;
701   pA=p->a+pi[prstart];
702   /* Allocate temporary array for storage of one row of A*P */
703   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
704   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
705 
706   apj      = (PetscInt *)(apa + cn);
707   apjdense = apj + cn;
708 
709   /* Clear old values in C */
710   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
711 
712   for (i=0;i<am;i++) {
713     /* Form sparse row of A*P */
714     /* diagonal portion of A */
715     anzi  = adi[i+1] - adi[i];
716     apnzj = 0;
717     for (j=0;j<anzi;j++) {
718       prow = *adj + prstart;
719       adj++;
720       pnzj = pi[prow+1] - pi[prow];
721       pjj  = pj + pi[prow];
722       paj  = pa + pi[prow];
723       for (k=0;k<pnzj;k++) {
724         if (!apjdense[pjj[k]]) {
725           apjdense[pjj[k]] = -1;
726           apj[apnzj++]     = pjj[k];
727         }
728         apa[pjj[k]] += (*ada)*paj[k];
729       }
730       flops += 2*pnzj;
731       ada++;
732     }
733     /* off-diagonal portion of A */
734     anzi  = aoi[i+1] - aoi[i];
735     for (j=0;j<anzi;j++) {
736       col = a->garray[*aoj];
737       if (col < cstart){
738         prow = *aoj;
739       } else if (col >= cend){
740         prow = *aoj + prend-prstart;
741       } else {
742         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
743       }
744       aoj++;
745       pnzj = pi[prow+1] - pi[prow];
746       pjj  = pj + pi[prow];
747       paj  = pa + pi[prow];
748       for (k=0;k<pnzj;k++) {
749         if (!apjdense[pjj[k]]) {
750           apjdense[pjj[k]] = -1;
751           apj[apnzj++]     = pjj[k];
752         }
753         apa[pjj[k]] += (*aoa)*paj[k];
754       }
755       flops += 2*pnzj;
756       aoa++;
757     }
758 
759     /* Sort the j index array for quick sparse axpy. */
760     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
761 
762     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
763     pnzi = pi[i+1+prstart] - pi[i+prstart];
764     for (j=0;j<pnzi;j++) {
765       nextap = 0;
766       crow   = *pJ++;
767       cjj    = cj + ci[crow];
768       caj    = ca + ci[crow];
769       /* Perform sparse axpy operation.  Note cjj includes apj. */
770       for (k=0;nextap<apnzj;k++) {
771         if (cjj[k]==apj[nextap]) {
772           caj[k] += (*pA)*apa[apj[nextap++]];
773         }
774       }
775       flops += 2*apnzj;
776       pA++;
777     }
778 
779     /* Zero the current row info for A*P */
780     for (j=0;j<apnzj;j++) {
781       apa[apj[j]]      = 0.;
782       apjdense[apj[j]] = 0;
783     }
784   }
785 
786   /* Assemble the final matrix and clean up */
787   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789   ierr = PetscFree(apa);CHKERRQ(ierr);
790   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
791 
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ"
797 PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
798 {
799   PetscErrorCode ierr;
800   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
801   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
802   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
803   Mat_SeqAIJ     *p=(Mat_SeqAIJ*)P->data,*c;
804   PetscInt       *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj;
805   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
806   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
807   PetscInt       an=A->N,am=A->m,pn=P->N,pm=P->M;
808   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
809   PetscInt       m=prend-prstart,nlnk,*lnk;
810   MatScalar      *ca;
811   PetscBT        lnkbt;
812 
813   PetscFunctionBegin;
814   int rank;
815   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
816 
817   /* Get ij structure of P[rstart:rend,:]^T */
818   ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr);
819   ptJ=ptj;
820 
821   /* Allocate ci array, arrays for fill computation and */
822   /* free space for accumulating nonzero column info */
823   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
824   ci[0] = 0;
825 
826   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
827   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
828   ptasparserow = ptadenserow  + an;
829 
830   /* create and initialize a linked list */
831   nlnk = pn+1;
832   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
833 
834   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
835   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
836   nnz           = adi[am] + aoi[am];
837   ierr          = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space);
838   current_space = free_space;
839 
840   /* Determine symbolic info for each row of C: */
841   for (i=0;i<pn;i++) {
842     ptnzi  = pti[i+1] - pti[i];
843     ptanzi = 0;
844     /* Determine symbolic row of PtA_reduced: */
845     for (j=0;j<ptnzi;j++) {
846       arow = *ptJ++;
847       if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow);
848       /* diagonal portion of A */
849       anzj = adi[arow+1] - adi[arow];
850       ajj  = adj + adi[arow];
851       for (k=0;k<anzj;k++) {
852         col = ajj[k]+prstart;
853         if (!ptadenserow[col]) {
854           ptadenserow[col]    = -1;
855           ptasparserow[ptanzi++] = col;
856         }
857       }
858       /* off-diagonal portion of A */
859       anzj = aoi[arow+1] - aoi[arow];
860       ajj  = aoj + aoi[arow];
861       for (k=0;k<anzj;k++) {
862         col = a->garray[ajj[k]];  /* global col */
863         if (col < cstart){
864           col = ajj[k];
865         } else if (col >= cend){
866           col = ajj[k] + m;
867         } else {
868           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
869         }
870         if (!ptadenserow[col]) {
871           ptadenserow[col]    = -1;
872           ptasparserow[ptanzi++] = col;
873         }
874       }
875     }
876 
877     printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi);
878     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
879     ptaj = ptasparserow;
880     cnzi   = 0;
881     for (j=0;j<ptanzi;j++) {
882       prow = *ptaj++;
883       pnzj = pi[prow+1] - pi[prow];
884       pjj  = pj + pi[prow];
885       /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */
886       /* add non-zero cols of P into the sorted linked list lnk */
887       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
888       cnzi += nlnk;
889     }
890 
891     /* If free space is not available, make more free space */
892     /* Double the amount of total space in the list */
893     if (current_space->local_remaining<cnzi) {
894       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
895     }
896 
897     /* Copy data into free space, and zero out denserows */
898     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
899     current_space->array           += cnzi;
900     current_space->local_used      += cnzi;
901     current_space->local_remaining -= cnzi;
902 
903     for (j=0;j<ptanzi;j++) {
904       ptadenserow[ptasparserow[j]] = 0;
905     }
906 
907     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
908     /*        For now, we will recompute what is needed. */
909     ci[i+1] = ci[i] + cnzi;
910   }
911   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
912   /* Allocate space for cj, initialize cj, and */
913   /* destroy list of free space and other temporary array(s) */
914   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
915   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
916   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
917   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
918 
919   /* Allocate space for ca */
920   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
921   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
922 
923   /* put together the new matrix */
924   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
925 
926   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
927   /* Since these are PETSc arrays, change flags to free them as necessary. */
928   c = (Mat_SeqAIJ *)((*C)->data);
929   c->freedata = PETSC_TRUE;
930   c->nonew    = 0;
931 
932   /* Clean up. */
933   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
934 
935   PetscFunctionReturn(0);
936 }
937 
938 /*@C
939    MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
940                 used by MatPtAP_MPIAIJ_MPIAIJ()
941 
942    Collective on Mat
943 
944    Input Parameters:
945 +  A - the matrix in mpiaij format
946 .  P - the projection matrix in seqaij format
947 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
948 .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
949 .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
950 
951    Output Parameters:
952 .  C - the product matrix in seqaij format
953 
954    Level: developer
955 
956 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
957 @*/
958 static PetscEvent logkey_matptapreducedpt = 0;
959 #undef __FUNCT__
960 #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ"
961 PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
962 {
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
967   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
968   if (!logkey_matptapreducedpt) {
969     ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE);
970   }
971   PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
972 
973   if (scall == MAT_INITIAL_MATRIX){
974     ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
975   }
976   ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr);
977   ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
978 
979   PetscFunctionReturn(0);
980 }
981 
982 /* ------------- New --------------------*/
983 static PetscEvent logkey_matptapnumeric_local = 0;
984 #undef __FUNCT__
985 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
986 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
987 {
988   PetscErrorCode ierr;
989   PetscInt       flops=0;
990   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
991   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
992   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
993   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
994   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
995   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
996   PetscInt       *pJ=pj_loc,*pjj;
997   PetscInt       *ci=c->i,*cj=c->j,*cjj;
998   PetscInt       am=A->m,cn=C->N,cm=C->M;
999   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1000   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
1001   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
1002 
1003   PetscFunctionBegin;
1004   if (!logkey_matptapnumeric_local) {
1005     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1006   }
1007   PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1008 
1009   /* Allocate temporary array for storage of one row of A*P */
1010   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1011   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1012   apj      = (PetscInt *)(apa + cn);
1013   apjdense = apj + cn;
1014 
1015   /* Clear old values in C */
1016   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1017 
1018   for (i=0;i<am;i++) {
1019     /* Form i-th sparse row of A*P */
1020      apnzj = 0;
1021     /* diagonal portion of A */
1022     anzi  = adi[i+1] - adi[i];
1023     for (j=0;j<anzi;j++) {
1024       prow = *adj;
1025       adj++;
1026       pnzj = pi_loc[prow+1] - pi_loc[prow];
1027       pjj  = pj_loc + pi_loc[prow];
1028       paj  = pa_loc + pi_loc[prow];
1029       for (k=0;k<pnzj;k++) {
1030         if (!apjdense[pjj[k]]) {
1031           apjdense[pjj[k]] = -1;
1032           apj[apnzj++]     = pjj[k];
1033         }
1034         apa[pjj[k]] += (*ada)*paj[k];
1035       }
1036       flops += 2*pnzj;
1037       ada++;
1038     }
1039     /* off-diagonal portion of A */
1040     anzi  = aoi[i+1] - aoi[i];
1041     for (j=0;j<anzi;j++) {
1042       col = a->garray[*aoj];
1043       if (col < cstart){
1044         prow = *aoj;
1045       } else if (col >= cend){
1046         prow = *aoj;
1047       } else {
1048         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1049       }
1050       aoj++;
1051       pnzj = pi_oth[prow+1] - pi_oth[prow];
1052       pjj  = pj_oth + pi_oth[prow];
1053       paj  = pa_oth + pi_oth[prow];
1054       for (k=0;k<pnzj;k++) {
1055         if (!apjdense[pjj[k]]) {
1056           apjdense[pjj[k]] = -1;
1057           apj[apnzj++]     = pjj[k];
1058         }
1059         apa[pjj[k]] += (*aoa)*paj[k];
1060       }
1061       flops += 2*pnzj;
1062       aoa++;
1063     }
1064     /* Sort the j index array for quick sparse axpy. */
1065     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1066 
1067     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1068     pnzi = pi_loc[i+1] - pi_loc[i];
1069     for (j=0;j<pnzi;j++) {
1070       nextap = 0;
1071       crow   = *pJ++;
1072       cjj    = cj + ci[crow];
1073       caj    = ca + ci[crow];
1074       /* Perform sparse axpy operation.  Note cjj includes apj. */
1075       for (k=0;nextap<apnzj;k++) {
1076         if (cjj[k]==apj[nextap]) {
1077           caj[k] += (*pA)*apa[apj[nextap++]];
1078         }
1079       }
1080       flops += 2*apnzj;
1081       pA++;
1082     }
1083 
1084     /* Zero the current row info for A*P */
1085     for (j=0;j<apnzj;j++) {
1086       apa[apj[j]]      = 0.;
1087       apjdense[apj[j]] = 0;
1088     }
1089   }
1090 
1091   /* Assemble the final matrix and clean up */
1092   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1093   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1094   ierr = PetscFree(apa);CHKERRQ(ierr);
1095   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1096   PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 static PetscEvent logkey_matptapsymbolic_local = 0;
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1102 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1103 {
1104   PetscErrorCode ierr;
1105   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1106   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1107   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1108   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1109   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1110   PetscInt       *ci,*cj,*ptaj;
1111   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
1112   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1113   PetscInt       pm=P_loc->m,nlnk,*lnk;
1114   MatScalar      *ca;
1115   PetscBT        lnkbt;
1116   PetscInt       prend,nprow_loc,nprow_oth;
1117   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1118   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1119   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1120   /* PetscInt       rank; */
1121 
1122   PetscFunctionBegin;
1123   if (!logkey_matptapsymbolic_local) {
1124     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1125   }
1126   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1127 
1128   /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */
1129   prend = prstart + pm;
1130 
1131   /* get ij structure of P_loc^T */
1132   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1133   ptJ=ptj;
1134 
1135   /* Allocate ci array, arrays for fill computation and */
1136   /* free space for accumulating nonzero column info */
1137   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1138   ci[0] = 0;
1139 
1140   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1141   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1142   ptasparserow_loc = ptadenserow_loc + an;
1143   ptadenserow_oth  = ptasparserow_loc + an;
1144   ptasparserow_oth = ptadenserow_oth + an;
1145 
1146   /* create and initialize a linked list */
1147   nlnk = pN+1;
1148   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1149 
1150   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
1151   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1152   nnz           = adi[am] + aoi[am];
1153   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
1154   current_space = free_space;
1155 
1156   /* determine symbolic info for each row of C: */
1157   for (i=0;i<pN;i++) {
1158     ptnzi  = pti[i+1] - pti[i];
1159     nprow_loc = 0; nprow_oth = 0;
1160     /* i-th row of symbolic P_loc^T*A_loc: */
1161     for (j=0;j<ptnzi;j++) {
1162       arow = *ptJ++;
1163       /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */
1164       /* diagonal portion of A */
1165       anzj = adi[arow+1] - adi[arow];
1166       ajj  = adj + adi[arow];
1167       for (k=0;k<anzj;k++) {
1168         col = ajj[k]+prstart;
1169         if (!ptadenserow_loc[col]) {
1170           ptadenserow_loc[col]    = -1;
1171           ptasparserow_loc[nprow_loc++] = col;
1172         }
1173       }
1174       /* off-diagonal portion of A */
1175       anzj = aoi[arow+1] - aoi[arow];
1176       ajj  = aoj + aoi[arow];
1177       for (k=0;k<anzj;k++) {
1178         col = a->garray[ajj[k]];  /* global col */
1179         if (col < cstart){
1180           col = ajj[k];
1181         } else if (col >= cend){
1182           col = ajj[k] + pm;
1183         } else {
1184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1185         }
1186         if (!ptadenserow_oth[col]) {
1187           ptadenserow_oth[col]    = -1;
1188           ptasparserow_oth[nprow_oth++] = col;
1189         }
1190       }
1191     }
1192 
1193     /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */
1194     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1195     cnzi   = 0;
1196     /* rows of P_loc */
1197     ptaj = ptasparserow_loc;
1198     for (j=0; j<nprow_loc; j++) {
1199       prow = *ptaj++;
1200       prow -= prstart; /* rm */
1201       pnzj = pi_loc[prow+1] - pi_loc[prow];
1202       pjj  = pj_loc + pi_loc[prow];
1203       /* add non-zero cols of P into the sorted linked list lnk */
1204       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1205       cnzi += nlnk;
1206     }
1207     /* rows of P_oth */
1208     ptaj = ptasparserow_oth;
1209     for (j=0; j<nprow_oth; j++) {
1210       prow = *ptaj++;
1211       if (prow >= prend) prow -= pm; /* rm */
1212       pnzj = pi_oth[prow+1] - pi_oth[prow];
1213       pjj  = pj_oth + pi_oth[prow];
1214       /* add non-zero cols of P into the sorted linked list lnk */
1215       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1216       cnzi += nlnk;
1217     }
1218 
1219     /* If free space is not available, make more free space */
1220     /* Double the amount of total space in the list */
1221     if (current_space->local_remaining<cnzi) {
1222       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1223     }
1224 
1225     /* Copy data into free space, and zero out denserows */
1226     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1227     current_space->array           += cnzi;
1228     current_space->local_used      += cnzi;
1229     current_space->local_remaining -= cnzi;
1230 
1231     for (j=0;j<nprow_loc; j++) {
1232       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1233     }
1234     for (j=0;j<nprow_oth; j++) {
1235       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1236     }
1237 
1238     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1239     /*        For now, we will recompute what is needed. */
1240     ci[i+1] = ci[i] + cnzi;
1241   }
1242   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1243   /* Allocate space for cj, initialize cj, and */
1244   /* destroy list of free space and other temporary array(s) */
1245   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1246   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1247   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1248   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1249 
1250   /* Allocate space for ca */
1251   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1252   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1253 
1254   /* put together the new matrix */
1255   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1256 
1257   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1258   /* Since these are PETSc arrays, change flags to free them as necessary. */
1259   c = (Mat_SeqAIJ *)((*C)->data);
1260   c->freedata = PETSC_TRUE;
1261   c->nonew    = 0;
1262 
1263   /* Clean up. */
1264   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1265   /*
1266   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1267   if (rank == 1){
1268     printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N);
1269     ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
1270     } */
1271   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274