xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 046da0a13659bf6557c8549a2fea89880b2ef309)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139     /* update P_oth */
140     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
141 
142   } else {
143     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall);
144   }
145   /* now, compute numeric local P^T*A*P */
146   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
147   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
148   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
149 
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
156 {
157   PetscErrorCode       ierr;
158   Mat                  P_loc,P_oth;
159   Mat_MatMatMultMPI    *ptap;
160   PetscObjectContainer container;
161   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
162   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data;
163   Mat_SeqAIJ           *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
164   Mat_SeqAIJ           *p_loc,*p_oth;
165   PetscInt             *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj;
166   PetscInt             *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
167   PetscInt             pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj;
168   PetscInt             aN=A->N,am=A->m,pN=P->N;
169   PetscInt             i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
170   MatScalar            *ca;
171   PetscBT              lnkbt;
172   PetscInt             prstart,prend,nprow_loc,nprow_oth;
173   PetscInt             *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
174 
175   MPI_Comm             comm=A->comm;
176   Mat                  B_mpi;
177   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
178   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
179   PetscInt             len,proc,*dnz,*onz,*owners;
180   PetscInt             anzi,*bi,*bj,bnzi,nspacedouble=0;
181   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci;
182   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
183   MPI_Status           *status;
184   Mat_Merge_SeqsToMPI  *merge;
185 
186   PetscFunctionBegin;
187   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
188   if (container) {
189     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
190   } else {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
192   }
193   /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */
194   /*--------------------------------------------------*/
195   /* get data from P_loc and P_oth */
196   P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart;
197   p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data;
198   pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j;
199   prend = prstart + pm;
200 
201   /* get ij structure of P_loc^T */
202   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
203   ptJ=ptj;
204 
205   /* Allocate ci array, arrays for fill computation and */
206   /* free space for accumulating nonzero column info */
207   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
208   ci[0] = 0;
209 
210   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
211   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
212   ptasparserow_loc = ptadenserow_loc + aN;
213   ptadenserow_oth  = ptasparserow_loc + aN;
214   ptasparserow_oth = ptadenserow_oth + aN;
215 
216   /* create and initialize a linked list */
217   nlnk = pN+1;
218   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
219 
220   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
221   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
222   nnz           = adi[am] + aoi[am];
223   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
224   current_space = free_space;
225 
226   /* determine symbolic info for each row of C: */
227   for (i=0;i<pN;i++) {
228     ptnzi  = pti[i+1] - pti[i];
229     nprow_loc = 0; nprow_oth = 0;
230     /* i-th row of symbolic P_loc^T*A_loc: */
231     for (j=0;j<ptnzi;j++) {
232       arow = *ptJ++;
233       /* diagonal portion of A */
234       anzj = adi[arow+1] - adi[arow];
235       ajj  = adj + adi[arow];
236       for (k=0;k<anzj;k++) {
237         col = ajj[k]+prstart;
238         if (!ptadenserow_loc[col]) {
239           ptadenserow_loc[col]    = -1;
240           ptasparserow_loc[nprow_loc++] = col;
241         }
242       }
243       /* off-diagonal portion of A */
244       anzj = aoi[arow+1] - aoi[arow];
245       ajj  = aoj + aoi[arow];
246       for (k=0;k<anzj;k++) {
247         col = a->garray[ajj[k]];  /* global col */
248         if (col < cstart){
249           col = ajj[k];
250         } else if (col >= cend){
251           col = ajj[k] + pm;
252         } else {
253           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
254         }
255         if (!ptadenserow_oth[col]) {
256           ptadenserow_oth[col]    = -1;
257           ptasparserow_oth[nprow_oth++] = col;
258         }
259       }
260     }
261 
262     /* using symbolic info of local PtA, determine symbolic info for row of C: */
263     cnzi   = 0;
264     /* rows of P_loc */
265     ptaj = ptasparserow_loc;
266     for (j=0; j<nprow_loc; j++) {
267       prow = *ptaj++;
268       prow -= prstart; /* rm */
269       pnzj = pi_loc[prow+1] - pi_loc[prow];
270       pjj  = pj_loc + pi_loc[prow];
271       /* add non-zero cols of P into the sorted linked list lnk */
272       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
273       cnzi += nlnk;
274     }
275     /* rows of P_oth */
276     ptaj = ptasparserow_oth;
277     for (j=0; j<nprow_oth; j++) {
278       prow = *ptaj++;
279       if (prow >= prend) prow -= pm; /* rm */
280       pnzj = pi_oth[prow+1] - pi_oth[prow];
281       pjj  = pj_oth + pi_oth[prow];
282       /* add non-zero cols of P into the sorted linked list lnk */
283       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
284       cnzi += nlnk;
285     }
286 
287     /* If free space is not available, make more free space */
288     /* Double the amount of total space in the list */
289     if (current_space->local_remaining<cnzi) {
290       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
291     }
292 
293     /* Copy data into free space, and zero out denserows */
294     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
295     current_space->array           += cnzi;
296     current_space->local_used      += cnzi;
297     current_space->local_remaining -= cnzi;
298 
299     for (j=0;j<nprow_loc; j++) {
300       ptadenserow_loc[ptasparserow_loc[j]] = 0;
301     }
302     for (j=0;j<nprow_oth; j++) {
303       ptadenserow_oth[ptasparserow_oth[j]] = 0;
304     }
305 
306     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
307     /*        For now, we will recompute what is needed. */
308     ci[i+1] = ci[i] + cnzi;
309   }
310   /* Clean up. */
311   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
312 
313   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
314   /* Allocate space for cj, initialize cj, and */
315   /* destroy list of free space and other temporary array(s) */
316   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
317   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
318   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
319   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
320 
321   /* Allocate space for ca */
322   /*
323   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
324   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
325   */
326 
327   /* add C_seq into mpi C              */
328   /*-----------------------------------*/
329   free_space=PETSC_NULL; current_space=PETSC_NULL;
330 
331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
332   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
333 
334   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
335 
336 
337   /* determine row ownership */
338   /*---------------------------------------------------------*/
339   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
340   ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr);
341   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
342   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
343 
344   /* determine the number of messages to send, their lengths */
345   /*---------------------------------------------------------*/
346   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
347   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
348   len_s  = merge->len_s;
349   len = 0;  /* length of buf_si[] */
350   merge->nsend = 0;
351   for (proc=0; proc<size; proc++){
352     len_si[proc] = 0;
353     if (proc == rank){
354       len_s[proc] = 0;
355     } else {
356       len_si[proc] = owners[proc+1] - owners[proc] + 1;
357       len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */
358     }
359     if (len_s[proc]) {
360       merge->nsend++;
361       nrows = 0;
362       for (i=owners[proc]; i<owners[proc+1]; i++){
363         if (ci[i+1] > ci[i]) nrows++;
364       }
365       len_si[proc] = 2*(nrows+1);
366       len += len_si[proc];
367     }
368   }
369 
370   /* determine the number and length of messages to receive for ij-structure */
371   /*-------------------------------------------------------------------------*/
372   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
373   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
374 
375   /* post the Irecv of j-structure */
376   /*-------------------------------*/
377   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
378   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
379 
380   /* post the Isend of j-structure */
381   /*--------------------------------*/
382   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
383   sj_waits = si_waits + merge->nsend;
384 
385   for (proc=0, k=0; proc<size; proc++){
386     if (!len_s[proc]) continue;
387     i = owners[proc];
388     ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
389     k++;
390   }
391 
392   /* receives and sends of j-structure are complete */
393   /*------------------------------------------------*/
394   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
395   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
396   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
397 
398   /* send and recv i-structure */
399   /*---------------------------*/
400   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
401   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
402 
403   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
404   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
405   for (proc=0,k=0; proc<size; proc++){
406     if (!len_s[proc]) continue;
407     /* form outgoing message for i-structure:
408          buf_si[0]:                 nrows to be sent
409                [1:nrows]:           row index (global)
410                [nrows+1:2*nrows+1]: i-structure index
411     */
412     /*-------------------------------------------*/
413     nrows = len_si[proc]/2 - 1;
414     buf_si_i    = buf_si + nrows+1;
415     buf_si[0]   = nrows;
416     buf_si_i[0] = 0;
417     nrows = 0;
418     for (i=owners[proc]; i<owners[proc+1]; i++){
419       anzi = ci[i+1] - ci[i];
420       if (anzi) {
421         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
422         buf_si[nrows+1] = i-owners[proc]; /* local row index */
423         nrows++;
424       }
425     }
426     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
427     k++;
428     buf_si += len_si[proc];
429   }
430 
431   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
432   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
433 
434   ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
435   for (i=0; i<merge->nrecv; i++){
436     ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI:   recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
437   }
438 
439   ierr = PetscFree(len_si);CHKERRQ(ierr);
440   ierr = PetscFree(len_ri);CHKERRQ(ierr);
441   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
442   ierr = PetscFree(si_waits);CHKERRQ(ierr);
443   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
444   ierr = PetscFree(buf_s);CHKERRQ(ierr);
445   ierr = PetscFree(status);CHKERRQ(ierr);
446 
447   /* compute a local seq matrix in each processor */
448   /*----------------------------------------------*/
449   /* allocate bi array and free space for accumulating nonzero column info */
450   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
451   bi[0] = 0;
452 
453   /* create and initialize a linked list */
454   nlnk = pN+1;
455   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
456 
457   /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */
458   len = 0;
459   len  = ci[owners[rank+1]] - ci[owners[rank]];
460   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
461   current_space = free_space;
462 
463   /* determine symbolic info for each local row */
464   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
465   nextrow = buf_ri_k + merge->nrecv;
466   nextci  = nextrow + merge->nrecv;
467   for (k=0; k<merge->nrecv; k++){
468     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
469     nrows = *buf_ri_k[k];
470     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
471     nextci[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
472   }
473 
474   ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr);
475   len = 0;
476   for (i=0; i<pn; i++) {
477     bnzi   = 0;
478     /* add local non-zero cols of this proc's C_seq into lnk */
479     arow   = owners[rank] + i;
480     anzi   = ci[arow+1] - ci[arow];
481     cji    = cj + ci[arow];
482     ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
483     bnzi += nlnk;
484     /* add received col data into lnk */
485     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
486       if (i == *nextrow[k]) { /* i-th row */
487         anzi = *(nextci[k]+1) - *nextci[k];
488         cji  = buf_rj[k] + *nextci[k];
489         ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
490         bnzi += nlnk;
491         nextrow[k]++; nextci[k]++;
492       }
493     }
494     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
495 
496     /* if free space is not available, make more free space */
497     if (current_space->local_remaining<bnzi) {
498       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
499       nspacedouble++;
500     }
501     /* copy data into free space, then initialize lnk */
502     ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
503     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
504 
505     current_space->array           += bnzi;
506     current_space->local_used      += bnzi;
507     current_space->local_remaining -= bnzi;
508 
509     bi[i+1] = bi[i] + bnzi;
510   }
511 
512   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
513 
514   ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
515   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
516   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
517 
518   /* create symbolic parallel matrix B_mpi */
519   /*---------------------------------------*/
520   ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
521   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
522   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
523   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
524 
525   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
526   B_mpi->assembled     = PETSC_FALSE;
527   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
528   merge->bi            = bi;
529   merge->bj            = bj;
530   merge->ci            = ci;
531   merge->cj            = cj;
532   merge->buf_ri        = buf_ri;
533   merge->buf_rj        = buf_rj;
534   merge->C_seq         = PETSC_NULL;
535 
536   /* attach the supporting struct to B_mpi for reuse */
537   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
538   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
539   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
540   *C = B_mpi;
541 
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
547 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
548 {
549   PetscErrorCode       ierr;
550   Mat_Merge_SeqsToMPI  *merge;
551   Mat_MatMatMultMPI    *ptap;
552   PetscObjectContainer cont_merge,cont_ptap;
553   PetscInt             flops=0;
554   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data;
555   Mat_SeqAIJ           *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,
556                        *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,
557                        *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
558   Mat                  C_seq;
559   Mat_SeqAIJ           *cseq,*p_oth;
560   PetscInt             *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend;
561   PetscInt             *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj;
562   PetscInt             i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
563   MatScalar            *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca;
564   MatScalar            *pA_d=pd->a,*pA_o=po->a,*pa_oth;
565   PetscInt             am=A->m,cN=C->N,cm=C->m;
566   MPI_Comm             comm=C->comm;
567   PetscMPIInt          size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi;
568   PetscInt             *owners,proc,nrows;
569   PetscInt             *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
570   MPI_Request          *s_waits,*r_waits;
571   MPI_Status           *status;
572   PetscInt             *cseqi,*cseqj,col;
573 
574   PetscFunctionBegin;
575   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
576   if (cont_merge) {
577     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
578   } else {
579     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
580   }
581   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
582   if (cont_ptap) {
583     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
584   } else {
585     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
586   }
587   /* get data from symbolic products */
588   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
589   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
590 
591   cseqi = merge->ci; cseqj=merge->cj;
592   ierr  = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr);
593   ierr  = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
594 
595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
597   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
598 
599   /* Allocate temporary array for storage of one row of A*P and one row of C */
600   ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr);
601   ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
602   ca       = (MatScalar*)(apa + cN);
603   apj      = (PetscInt *)(ca + cN);
604   apjdense = (PetscInt *)(apj + cN);
605 
606   /* Clear old values in C */
607   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
608   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
609 
610   for (i=0;i<am;i++) {
611     /* Form i-th sparse row of A*P */
612      apnzj = 0;
613     /* diagonal portion of A */
614     nzi  = adi[i+1] - adi[i];
615     for (j=0;j<nzi;j++) {
616       prow = *adj++;
617       /* diagonal portion of P */
618       pnzj = pd->i[prow+1] - pd->i[prow];
619       pjj  = pd->j + pd->i[prow]; /* local col index of P */
620       paj  = pd->a + pd->i[prow];
621       for (k=0;k<pnzj;k++) {
622         col = *pjj + p->cstart; pjj++; /* global col index of P */
623         if (!apjdense[col]) {
624           apjdense[col] = -1;
625           apj[apnzj++]  = col;
626         }
627         apa[col] += (*ada)*paj[k];
628       }
629       flops += 2*pnzj;
630       /* off-diagonal portion of P */
631       pnzj = po->i[prow+1] - po->i[prow];
632       pjj  = po->j + po->i[prow]; /* local col index of P */
633       paj  = po->a + po->i[prow];
634       for (k=0;k<pnzj;k++) {
635         col = p->garray[*pjj]; pjj++; /* global col index of P */
636         if (!apjdense[col]) {
637           apjdense[col] = -1;
638           apj[apnzj++]  = col;
639         }
640         apa[col] += (*ada)*paj[k];
641       }
642       flops += 2*pnzj;
643 
644       ada++;
645     }
646     /* off-diagonal portion of A */
647     nzi  = aoi[i+1] - aoi[i];
648     for (j=0;j<nzi;j++) {
649       prow = *aoj++;
650       pnzj = pi_oth[prow+1] - pi_oth[prow];
651       pjj  = pj_oth + pi_oth[prow];
652       paj  = pa_oth + pi_oth[prow];
653       for (k=0;k<pnzj;k++) {
654         if (!apjdense[pjj[k]]) {
655           apjdense[pjj[k]] = -1;
656           apj[apnzj++]     = pjj[k];
657         }
658         apa[pjj[k]] += (*aoa)*paj[k];
659       }
660       flops += 2*pnzj;
661       aoa++;
662     }
663     /* Sort the j index array for quick sparse axpy. */
664     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
665 
666     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
667     /* diagonal portion of P -- gives matrix value of local C */
668     pnzi = pd->i[i+1] - pd->i[i];
669     for (j=0;j<pnzi;j++) {
670       crow = (*pJ_d++) + owners[rank];
671       cjj    = cseqj + cseqi[crow];
672       /* add value into C */
673       for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]];
674       ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
675       ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
676       pA_d++;
677       flops += 2*apnzj;
678     }
679 
680     /* off-diagonal portion of P -- gives matrix values to be sent to others */
681     pnzi = po->i[i+1] - po->i[i];
682     for (j=0;j<pnzi;j++) {
683       crow   = p->garray[*pJ_o++];
684       cjj    = cseqj + cseqi[crow];
685       caj    = cseqa + cseqi[crow];
686       /* add value into C_seq to be sent to other processors */
687       nextap = 0;
688       for (k=0;nextap<apnzj;k++) {
689         if (cjj[k]==apj[nextap]) {
690           caj[k] += (*pA_o)*apa[apj[nextap++]];
691         }
692       }
693       flops += 2*apnzj;
694       pA_o++;
695     }
696 
697     /* Zero the current row info for A*P */
698     for (j=0;j<apnzj;j++) {
699       apa[apj[j]]      = 0.;
700       apjdense[apj[j]] = 0;
701     }
702   }
703 
704   /* send and recv matrix values */
705   /*-----------------------------*/
706   bi     = merge->bi;
707   bj     = merge->bj;
708   buf_ri = merge->buf_ri;
709   buf_rj = merge->buf_rj;
710   len_s  = merge->len_s;
711   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
712   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
713 
714   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
715   for (proc=0,k=0; proc<size; proc++){
716     if (!len_s[proc]) continue;
717     i = owners[proc];
718     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
719     k++;
720   }
721   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
722   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
723   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
724   ierr = PetscFree(status);CHKERRQ(ierr);
725 
726   ierr = PetscFree(s_waits);CHKERRQ(ierr);
727   ierr = PetscFree(r_waits);CHKERRQ(ierr);
728   ierr = PetscFree(cseqa);CHKERRQ(ierr);
729 
730   /* insert mat values of mpimat */
731   /*----------------------------*/
732   ba_i = apa; /* rename, temporary array for storage of one row of C */
733   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
734   nextrow = buf_ri_k + merge->nrecv;
735   nextcseqi = nextrow + merge->nrecv;
736 
737   for (k=0; k<merge->nrecv; k++){
738     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
739     nrows        = *(buf_ri_k[k]);
740     nextrow[k]   = buf_ri_k[k]+1;  /* next row index of k-th recved i-structure */
741     nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
742   }
743 
744   /* add received local vals to C */
745   for (i=0; i<cm; i++) {
746     crow = owners[rank] + i;
747     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
748     bnzi = bi[i+1] - bi[i];
749     nzi = 0;
750     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
751       /* i-th row */
752       if (i == *nextrow[k]) {
753         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
754         cseqj   = buf_rj[k] + *(nextcseqi[k]);
755         cseqa   = abuf_r[k] + *(nextcseqi[k]);
756         nextcseqj = 0;
757         for (j=0; nextcseqj<cseqnzi; j++){
758           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
759             ba_i[j] += cseqa[nextcseqj++];
760             nzi++;
761           }
762         }
763         nextrow[k]++; nextcseqi[k]++;
764       }
765     }
766     if (nzi>0){
767       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
768       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
769       flops += 2*nzi;
770     }
771   }
772   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774 
775   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
776   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
777   ierr = PetscFree(apa);CHKERRQ(ierr);
778   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
779 
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
785 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
786 {
787   PetscErrorCode ierr;
788 
789   PetscFunctionBegin;
790   if (scall == MAT_INITIAL_MATRIX){
791     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
792     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
793     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
794   }
795   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
796   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
797   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 /*
802    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
803 
804    Collective on Mat
805 
806    Input Parameters:
807 +  A - the matrix
808 -  P - the projection matrix
809 
810    Output Parameters:
811 .  C - the (i,j) structure of the product matrix
812 
813    Notes:
814    C will be created and must be destroyed by the user with MatDestroy().
815 
816    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
817    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
818    this (i,j) structure by calling MatPtAPNumeric().
819 
820    Level: intermediate
821 
822 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
823 */
824 #undef __FUNCT__
825 #define __FUNCT__ "MatPtAPSymbolic"
826 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
827 {
828   PetscErrorCode ierr;
829   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
830   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
831 
832   PetscFunctionBegin;
833 
834   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
835   PetscValidType(A,1);
836   MatPreallocated(A);
837   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
838   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
839 
840   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
841   PetscValidType(P,2);
842   MatPreallocated(P);
843   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
844   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
845 
846   PetscValidPointer(C,3);
847 
848   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
849   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
850 
851   /* For now, we do not dispatch based on the type of A and P */
852   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
853   fA = A->ops->ptapsymbolic;
854   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
855   fP = P->ops->ptapsymbolic;
856   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
857   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
858 
859   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
860   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
861   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
862 
863   PetscFunctionReturn(0);
864 }
865 
866 typedef struct {
867   Mat    symAP;
868 } Mat_PtAPstruct;
869 
870 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
874 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
875 {
876   PetscErrorCode    ierr;
877   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
878 
879   PetscFunctionBegin;
880   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
881   ierr = PetscFree(ptap);CHKERRQ(ierr);
882   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
888 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
889 {
890   PetscErrorCode ierr;
891   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
892   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
893   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
894   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
895   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
896   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
897   MatScalar      *ca;
898   PetscBT        lnkbt;
899 
900   PetscFunctionBegin;
901   /* Get ij structure of P^T */
902   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
903   ptJ=ptj;
904 
905   /* Allocate ci array, arrays for fill computation and */
906   /* free space for accumulating nonzero column info */
907   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
908   ci[0] = 0;
909 
910   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
911   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
912   ptasparserow = ptadenserow  + an;
913 
914   /* create and initialize a linked list */
915   nlnk = pn+1;
916   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
917 
918   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
919   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
920   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
921   current_space = free_space;
922 
923   /* Determine symbolic info for each row of C: */
924   for (i=0;i<pn;i++) {
925     ptnzi  = pti[i+1] - pti[i];
926     ptanzi = 0;
927     /* Determine symbolic row of PtA: */
928     for (j=0;j<ptnzi;j++) {
929       arow = *ptJ++;
930       anzj = ai[arow+1] - ai[arow];
931       ajj  = aj + ai[arow];
932       for (k=0;k<anzj;k++) {
933         if (!ptadenserow[ajj[k]]) {
934           ptadenserow[ajj[k]]    = -1;
935           ptasparserow[ptanzi++] = ajj[k];
936         }
937       }
938     }
939       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
940     ptaj = ptasparserow;
941     cnzi   = 0;
942     for (j=0;j<ptanzi;j++) {
943       prow = *ptaj++;
944       pnzj = pi[prow+1] - pi[prow];
945       pjj  = pj + pi[prow];
946       /* add non-zero cols of P into the sorted linked list lnk */
947       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
948       cnzi += nlnk;
949     }
950 
951     /* If free space is not available, make more free space */
952     /* Double the amount of total space in the list */
953     if (current_space->local_remaining<cnzi) {
954       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
955     }
956 
957     /* Copy data into free space, and zero out denserows */
958     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
959     current_space->array           += cnzi;
960     current_space->local_used      += cnzi;
961     current_space->local_remaining -= cnzi;
962 
963     for (j=0;j<ptanzi;j++) {
964       ptadenserow[ptasparserow[j]] = 0;
965     }
966     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
967     /*        For now, we will recompute what is needed. */
968     ci[i+1] = ci[i] + cnzi;
969   }
970   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
971   /* Allocate space for cj, initialize cj, and */
972   /* destroy list of free space and other temporary array(s) */
973   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
974   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
975   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
976   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
977 
978   /* Allocate space for ca */
979   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
980   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
981 
982   /* put together the new matrix */
983   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
984 
985   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
986   /* Since these are PETSc arrays, change flags to free them as necessary. */
987   c = (Mat_SeqAIJ *)((*C)->data);
988   c->freedata = PETSC_TRUE;
989   c->nonew    = 0;
990 
991   /* Clean up. */
992   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
993 
994   PetscFunctionReturn(0);
995 }
996 
997 #include "src/mat/impls/maij/maij.h"
998 EXTERN_C_BEGIN
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
1001 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
1002 {
1003   /* This routine requires testing -- I don't think it works. */
1004   PetscErrorCode ierr;
1005   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1006   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
1007   Mat            P=pp->AIJ;
1008   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
1009   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
1010   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
1011   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
1012   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1013   MatScalar      *ca;
1014 
1015   PetscFunctionBegin;
1016   /* Start timer */
1017   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
1018 
1019   /* Get ij structure of P^T */
1020   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1021 
1022   /* Allocate ci array, arrays for fill computation and */
1023   /* free space for accumulating nonzero column info */
1024   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1025   ci[0] = 0;
1026 
1027   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
1028   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1029   ptasparserow = ptadenserow  + an;
1030   denserow     = ptasparserow + an;
1031   sparserow    = denserow     + pn;
1032 
1033   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1034   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1035   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
1036   current_space = free_space;
1037 
1038   /* Determine symbolic info for each row of C: */
1039   for (i=0;i<pn/ppdof;i++) {
1040     ptnzi  = pti[i+1] - pti[i];
1041     ptanzi = 0;
1042     ptJ    = ptj + pti[i];
1043     for (dof=0;dof<ppdof;dof++) {
1044     /* Determine symbolic row of PtA: */
1045       for (j=0;j<ptnzi;j++) {
1046         arow = ptJ[j] + dof;
1047         anzj = ai[arow+1] - ai[arow];
1048         ajj  = aj + ai[arow];
1049         for (k=0;k<anzj;k++) {
1050           if (!ptadenserow[ajj[k]]) {
1051             ptadenserow[ajj[k]]    = -1;
1052             ptasparserow[ptanzi++] = ajj[k];
1053           }
1054         }
1055       }
1056       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1057       ptaj = ptasparserow;
1058       cnzi   = 0;
1059       for (j=0;j<ptanzi;j++) {
1060         pdof = *ptaj%dof;
1061         prow = (*ptaj++)/dof;
1062         pnzj = pi[prow+1] - pi[prow];
1063         pjj  = pj + pi[prow];
1064         for (k=0;k<pnzj;k++) {
1065           if (!denserow[pjj[k]+pdof]) {
1066             denserow[pjj[k]+pdof] = -1;
1067             sparserow[cnzi++]     = pjj[k]+pdof;
1068           }
1069         }
1070       }
1071 
1072       /* sort sparserow */
1073       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
1074 
1075       /* If free space is not available, make more free space */
1076       /* Double the amount of total space in the list */
1077       if (current_space->local_remaining<cnzi) {
1078         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1079       }
1080 
1081       /* Copy data into free space, and zero out denserows */
1082       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
1083       current_space->array           += cnzi;
1084       current_space->local_used      += cnzi;
1085       current_space->local_remaining -= cnzi;
1086 
1087       for (j=0;j<ptanzi;j++) {
1088         ptadenserow[ptasparserow[j]] = 0;
1089       }
1090       for (j=0;j<cnzi;j++) {
1091         denserow[sparserow[j]] = 0;
1092       }
1093       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1094       /*        For now, we will recompute what is needed. */
1095       ci[i+1+dof] = ci[i+dof] + cnzi;
1096     }
1097   }
1098   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1099   /* Allocate space for cj, initialize cj, and */
1100   /* destroy list of free space and other temporary array(s) */
1101   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1102   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1103   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
1104 
1105   /* Allocate space for ca */
1106   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1107   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1108 
1109   /* put together the new matrix */
1110   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
1111 
1112   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1113   /* Since these are PETSc arrays, change flags to free them as necessary. */
1114   c = (Mat_SeqAIJ *)((*C)->data);
1115   c->freedata = PETSC_TRUE;
1116   c->nonew    = 0;
1117 
1118   /* Clean up. */
1119   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1120 
1121   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 EXTERN_C_END
1125 
1126 /*
1127    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
1128 
1129    Collective on Mat
1130 
1131    Input Parameters:
1132 +  A - the matrix
1133 -  P - the projection matrix
1134 
1135    Output Parameters:
1136 .  C - the product matrix
1137 
1138    Notes:
1139    C must have been created by calling MatPtAPSymbolic and must be destroyed by
1140    the user using MatDeatroy().
1141 
1142    This routine is currently only implemented for pairs of AIJ matrices and classes
1143    which inherit from AIJ.  C will be of type MATAIJ.
1144 
1145    Level: intermediate
1146 
1147 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
1148 */
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatPtAPNumeric"
1151 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
1152 {
1153   PetscErrorCode ierr;
1154   PetscErrorCode (*fA)(Mat,Mat,Mat);
1155   PetscErrorCode (*fP)(Mat,Mat,Mat);
1156 
1157   PetscFunctionBegin;
1158 
1159   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
1160   PetscValidType(A,1);
1161   MatPreallocated(A);
1162   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1163   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1164 
1165   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
1166   PetscValidType(P,2);
1167   MatPreallocated(P);
1168   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1169   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1170 
1171   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
1172   PetscValidType(C,3);
1173   MatPreallocated(C);
1174   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1175   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1176 
1177   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M);
1178   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
1179   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
1180   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N);
1181 
1182   /* For now, we do not dispatch based on the type of A and P */
1183   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
1184   fA = A->ops->ptapnumeric;
1185   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
1186   fP = P->ops->ptapnumeric;
1187   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
1188   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
1189 
1190   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1191   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
1192   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
1199 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
1200 {
1201   PetscErrorCode ierr;
1202   PetscInt       flops=0;
1203   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
1204   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
1205   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
1206   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
1207   PetscInt       *ci=c->i,*cj=c->j,*cjj;
1208   PetscInt       am=A->M,cn=C->N,cm=C->M;
1209   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1210   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
1211 
1212   PetscFunctionBegin;
1213   /* Allocate temporary array for storage of one row of A*P */
1214   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1215   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1216 
1217   apj      = (PetscInt *)(apa + cn);
1218   apjdense = apj + cn;
1219 
1220   /* Clear old values in C */
1221   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1222 
1223   for (i=0;i<am;i++) {
1224     /* Form sparse row of A*P */
1225     anzi  = ai[i+1] - ai[i];
1226     apnzj = 0;
1227     for (j=0;j<anzi;j++) {
1228       prow = *aj++;
1229       pnzj = pi[prow+1] - pi[prow];
1230       pjj  = pj + pi[prow];
1231       paj  = pa + pi[prow];
1232       for (k=0;k<pnzj;k++) {
1233         if (!apjdense[pjj[k]]) {
1234           apjdense[pjj[k]] = -1;
1235           apj[apnzj++]     = pjj[k];
1236         }
1237         apa[pjj[k]] += (*aa)*paj[k];
1238       }
1239       flops += 2*pnzj;
1240       aa++;
1241     }
1242 
1243     /* Sort the j index array for quick sparse axpy. */
1244     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1245 
1246     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
1247     pnzi = pi[i+1] - pi[i];
1248     for (j=0;j<pnzi;j++) {
1249       nextap = 0;
1250       crow   = *pJ++;
1251       cjj    = cj + ci[crow];
1252       caj    = ca + ci[crow];
1253       /* Perform sparse axpy operation.  Note cjj includes apj. */
1254       for (k=0;nextap<apnzj;k++) {
1255         if (cjj[k]==apj[nextap]) {
1256           caj[k] += (*pA)*apa[apj[nextap++]];
1257         }
1258       }
1259       flops += 2*apnzj;
1260       pA++;
1261     }
1262 
1263     /* Zero the current row info for A*P */
1264     for (j=0;j<apnzj;j++) {
1265       apa[apj[j]]      = 0.;
1266       apjdense[apj[j]] = 0;
1267     }
1268   }
1269 
1270   /* Assemble the final matrix and clean up */
1271   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1272   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1273   ierr = PetscFree(apa);CHKERRQ(ierr);
1274   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1275 
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
1280 static PetscEvent logkey_matptapnumeric_local = 0;
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
1283 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
1284 {
1285   PetscErrorCode ierr;
1286   PetscInt       flops=0;
1287   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1288   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
1289   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
1290   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1291   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1292   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
1293   PetscInt       *pJ=pj_loc,*pjj;
1294   PetscInt       *ci=c->i,*cj=c->j,*cjj;
1295   PetscInt       am=A->m,cn=C->N,cm=C->M;
1296   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1297   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
1298   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
1299 
1300   PetscFunctionBegin;
1301   if (!logkey_matptapnumeric_local) {
1302     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
1303   }
1304   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1305 
1306   /* Allocate temporary array for storage of one row of A*P */
1307   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1308   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1309   apj      = (PetscInt *)(apa + cn);
1310   apjdense = apj + cn;
1311 
1312   /* Clear old values in C */
1313   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1314 
1315   for (i=0;i<am;i++) {
1316     /* Form i-th sparse row of A*P */
1317      apnzj = 0;
1318     /* diagonal portion of A */
1319     anzi  = adi[i+1] - adi[i];
1320     for (j=0;j<anzi;j++) {
1321       prow = *adj;
1322       adj++;
1323       pnzj = pi_loc[prow+1] - pi_loc[prow];
1324       pjj  = pj_loc + pi_loc[prow];
1325       paj  = pa_loc + pi_loc[prow];
1326       for (k=0;k<pnzj;k++) {
1327         if (!apjdense[pjj[k]]) {
1328           apjdense[pjj[k]] = -1;
1329           apj[apnzj++]     = pjj[k];
1330         }
1331         apa[pjj[k]] += (*ada)*paj[k];
1332       }
1333       flops += 2*pnzj;
1334       ada++;
1335     }
1336     /* off-diagonal portion of A */
1337     anzi  = aoi[i+1] - aoi[i];
1338     for (j=0;j<anzi;j++) {
1339       col = a->garray[*aoj];
1340       if (col < cstart){
1341         prow = *aoj;
1342       } else if (col >= cend){
1343         prow = *aoj;
1344       } else {
1345         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1346       }
1347       aoj++;
1348       pnzj = pi_oth[prow+1] - pi_oth[prow];
1349       pjj  = pj_oth + pi_oth[prow];
1350       paj  = pa_oth + pi_oth[prow];
1351       for (k=0;k<pnzj;k++) {
1352         if (!apjdense[pjj[k]]) {
1353           apjdense[pjj[k]] = -1;
1354           apj[apnzj++]     = pjj[k];
1355         }
1356         apa[pjj[k]] += (*aoa)*paj[k];
1357       }
1358       flops += 2*pnzj;
1359       aoa++;
1360     }
1361     /* Sort the j index array for quick sparse axpy. */
1362     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1363 
1364     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1365     pnzi = pi_loc[i+1] - pi_loc[i];
1366     for (j=0;j<pnzi;j++) {
1367       nextap = 0;
1368       crow   = *pJ++;
1369       cjj    = cj + ci[crow];
1370       caj    = ca + ci[crow];
1371       /* Perform sparse axpy operation.  Note cjj includes apj. */
1372       for (k=0;nextap<apnzj;k++) {
1373         if (cjj[k]==apj[nextap]) {
1374           caj[k] += (*pA)*apa[apj[nextap++]];
1375         }
1376       }
1377       flops += 2*apnzj;
1378       pA++;
1379     }
1380 
1381     /* Zero the current row info for A*P */
1382     for (j=0;j<apnzj;j++) {
1383       apa[apj[j]]      = 0.;
1384       apjdense[apj[j]] = 0;
1385     }
1386   }
1387 
1388   /* Assemble the final matrix and clean up */
1389   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   ierr = PetscFree(apa);CHKERRQ(ierr);
1392   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1393   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 static PetscEvent logkey_matptapsymbolic_local = 0;
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1399 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1400 {
1401   PetscErrorCode ierr;
1402   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1403   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1404   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1405   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1406   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1407   PetscInt       *ci,*cj,*ptaj;
1408   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1409   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1410   PetscInt       pm=P_loc->m,nlnk,*lnk;
1411   MatScalar      *ca;
1412   PetscBT        lnkbt;
1413   PetscInt       prend,nprow_loc,nprow_oth;
1414   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1415   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1416   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1417 
1418   PetscFunctionBegin;
1419   if (!logkey_matptapsymbolic_local) {
1420     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1421   }
1422   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1423 
1424   prend = prstart + pm;
1425 
1426   /* get ij structure of P_loc^T */
1427   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1428   ptJ=ptj;
1429 
1430   /* Allocate ci array, arrays for fill computation and */
1431   /* free space for accumulating nonzero column info */
1432   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1433   ci[0] = 0;
1434 
1435   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1436   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1437   ptasparserow_loc = ptadenserow_loc + aN;
1438   ptadenserow_oth  = ptasparserow_loc + aN;
1439   ptasparserow_oth = ptadenserow_oth + aN;
1440 
1441   /* create and initialize a linked list */
1442   nlnk = pN+1;
1443   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1444 
1445   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1446   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1447   nnz           = adi[am] + aoi[am];
1448   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1449   current_space = free_space;
1450 
1451   /* determine symbolic info for each row of C: */
1452   for (i=0;i<pN;i++) {
1453     ptnzi  = pti[i+1] - pti[i];
1454     nprow_loc = 0; nprow_oth = 0;
1455     /* i-th row of symbolic P_loc^T*A_loc: */
1456     for (j=0;j<ptnzi;j++) {
1457       arow = *ptJ++;
1458       /* diagonal portion of A */
1459       anzj = adi[arow+1] - adi[arow];
1460       ajj  = adj + adi[arow];
1461       for (k=0;k<anzj;k++) {
1462         col = ajj[k]+prstart;
1463         if (!ptadenserow_loc[col]) {
1464           ptadenserow_loc[col]    = -1;
1465           ptasparserow_loc[nprow_loc++] = col;
1466         }
1467       }
1468       /* off-diagonal portion of A */
1469       anzj = aoi[arow+1] - aoi[arow];
1470       ajj  = aoj + aoi[arow];
1471       for (k=0;k<anzj;k++) {
1472         col = a->garray[ajj[k]];  /* global col */
1473         if (col < cstart){
1474           col = ajj[k];
1475         } else if (col >= cend){
1476           col = ajj[k] + pm;
1477         } else {
1478           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1479         }
1480         if (!ptadenserow_oth[col]) {
1481           ptadenserow_oth[col]    = -1;
1482           ptasparserow_oth[nprow_oth++] = col;
1483         }
1484       }
1485     }
1486 
1487     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1488     cnzi   = 0;
1489     /* rows of P_loc */
1490     ptaj = ptasparserow_loc;
1491     for (j=0; j<nprow_loc; j++) {
1492       prow = *ptaj++;
1493       prow -= prstart; /* rm */
1494       pnzj = pi_loc[prow+1] - pi_loc[prow];
1495       pjj  = pj_loc + pi_loc[prow];
1496       /* add non-zero cols of P into the sorted linked list lnk */
1497       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1498       cnzi += nlnk;
1499     }
1500     /* rows of P_oth */
1501     ptaj = ptasparserow_oth;
1502     for (j=0; j<nprow_oth; j++) {
1503       prow = *ptaj++;
1504       if (prow >= prend) prow -= pm; /* rm */
1505       pnzj = pi_oth[prow+1] - pi_oth[prow];
1506       pjj  = pj_oth + pi_oth[prow];
1507       /* add non-zero cols of P into the sorted linked list lnk */
1508       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1509       cnzi += nlnk;
1510     }
1511 
1512     /* If free space is not available, make more free space */
1513     /* Double the amount of total space in the list */
1514     if (current_space->local_remaining<cnzi) {
1515       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1516     }
1517 
1518     /* Copy data into free space, and zero out denserows */
1519     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1520     current_space->array           += cnzi;
1521     current_space->local_used      += cnzi;
1522     current_space->local_remaining -= cnzi;
1523 
1524     for (j=0;j<nprow_loc; j++) {
1525       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1526     }
1527     for (j=0;j<nprow_oth; j++) {
1528       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1529     }
1530 
1531     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1532     /*        For now, we will recompute what is needed. */
1533     ci[i+1] = ci[i] + cnzi;
1534   }
1535   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1536   /* Allocate space for cj, initialize cj, and */
1537   /* destroy list of free space and other temporary array(s) */
1538   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1539   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1540   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1541   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1542 
1543   /* Allocate space for ca */
1544   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1545   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1546 
1547   /* put together the new matrix */
1548   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1549 
1550   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1551   /* Since these are PETSc arrays, change flags to free them as necessary. */
1552   c = (Mat_SeqAIJ *)((*C)->data);
1553   c->freedata = PETSC_TRUE;
1554   c->nonew    = 0;
1555 
1556   /* Clean up. */
1557   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1558 
1559   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562