1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 156 { 157 PetscErrorCode ierr; 158 Mat C_seq; 159 Mat_MatMatMultMPI *ptap; 160 PetscObjectContainer container; 161 162 PetscFunctionBegin; 163 164 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 165 if (container) { 166 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 167 } else { 168 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 169 } 170 /* compute C_seq = P_loc^T * A_loc * P_seq */ 171 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 172 173 /* add C_seq into mpi C */ 174 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 175 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 181 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 182 { 183 PetscErrorCode ierr; 184 Mat_Merge_SeqsToMPI *merge; 185 Mat_MatMatMultMPI *ptap; 186 PetscObjectContainer cont_merge,cont_ptap; 187 PetscInt flops=0; 188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 189 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 190 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data, 191 *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 192 Mat C_seq; 193 Mat_SeqAIJ *cseq,*p_oth; 194 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend; 195 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 196 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 197 PetscInt *cjj; 198 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 199 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 200 PetscInt am=A->m,cN=C->N,cm=C->m; 201 MPI_Comm comm=C->comm; 202 PetscMPIInt size,rank,taga,*len_s; 203 PetscInt *owners; 204 PetscInt proc; 205 PetscInt **buf_ri,**buf_rj; 206 PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 207 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 208 MPI_Request *s_waits,*r_waits; 209 MPI_Status *status; 210 MatScalar **abuf_r,*ba_i,*ca; 211 PetscInt *cseqi,*cseqj,col; 212 213 214 PetscFunctionBegin; 215 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 216 if (cont_merge) { 217 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 218 } else { 219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 220 } 221 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 222 if (cont_ptap) { 223 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 224 } else { 225 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 226 } 227 228 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 229 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 230 231 C_seq=merge->C_seq; 232 cseq=(Mat_SeqAIJ*)C_seq->data; 233 cseqi=cseq->i; cseqj=cseq->j; 234 cseqa=cseq->a; 235 236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 237 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 238 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 239 240 /* Allocate temporary array for storage of one row of A*P */ 241 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 242 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 243 apj = (PetscInt *)(apa + cN); 244 apjdense = apj + cN; 245 246 /* Allocate temporary MatScalar array for storage of one row of C */ 247 ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 248 ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 249 250 /* Clear old values in C_Seq and C */ 251 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 252 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 253 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 254 255 for (i=0;i<am;i++) { 256 /* Form i-th sparse row of A*P */ 257 apnzj = 0; 258 /* diagonal portion of A */ 259 nzi = adi[i+1] - adi[i]; 260 for (j=0;j<nzi;j++) { 261 prow = *adj++; 262 /* diagonal portion of P */ 263 pnzj = pd->i[prow+1] - pd->i[prow]; 264 pjj = pd->j + pd->i[prow]; /* local col index of P */ 265 paj = pd->a + pd->i[prow]; 266 for (k=0;k<pnzj;k++) { 267 col = *pjj + p->cstart; pjj++; /* global col index of P */ 268 if (!apjdense[col]) { 269 apjdense[col] = -1; 270 apj[apnzj++] = col; 271 } 272 apa[col] += (*ada)*paj[k]; 273 } 274 flops += 2*pnzj; 275 /* off-diagonal portion of P */ 276 pnzj = po->i[prow+1] - po->i[prow]; 277 pjj = po->j + po->i[prow]; /* local col index of P */ 278 paj = po->a + po->i[prow]; 279 for (k=0;k<pnzj;k++) { 280 col = p->garray[*pjj]; pjj++; /* global col index of P */ 281 if (!apjdense[col]) { 282 apjdense[col] = -1; 283 apj[apnzj++] = col; 284 } 285 apa[col] += (*ada)*paj[k]; 286 } 287 flops += 2*pnzj; 288 289 ada++; 290 } 291 /* off-diagonal portion of A */ 292 nzi = aoi[i+1] - aoi[i]; 293 for (j=0;j<nzi;j++) { 294 prow = *aoj++; 295 pnzj = pi_oth[prow+1] - pi_oth[prow]; 296 pjj = pj_oth + pi_oth[prow]; 297 paj = pa_oth + pi_oth[prow]; 298 for (k=0;k<pnzj;k++) { 299 if (!apjdense[pjj[k]]) { 300 apjdense[pjj[k]] = -1; 301 apj[apnzj++] = pjj[k]; 302 } 303 apa[pjj[k]] += (*aoa)*paj[k]; 304 } 305 flops += 2*pnzj; 306 aoa++; 307 } 308 /* Sort the j index array for quick sparse axpy. */ 309 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 310 311 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 312 /* diagonal portion of P */ 313 pnzi = pd->i[i+1] - pd->i[i]; 314 for (j=0;j<pnzi;j++) { 315 crow = (*pJ_d++) + owners[rank]; 316 cjj = cseqj + cseqi[crow]; 317 caj = cseqa + cseqi[crow]; 318 /* add value into C */ 319 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 320 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 321 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 322 pA_d++; 323 flops += 2*apnzj; 324 } 325 326 /* off-diagonal portion of P */ 327 pnzi = po->i[i+1] - po->i[i]; 328 for (j=0;j<pnzi;j++) { 329 crow = p->garray[*pJ_o++]; 330 cjj = cseqj + cseqi[crow]; 331 caj = cseqa + cseqi[crow]; 332 /* add value into C_seq to be sent to other processors */ 333 nextap = 0; 334 for (k=0;nextap<apnzj;k++) { 335 if (cjj[k]==apj[nextap]) { 336 caj[k] += (*pA_o)*apa[apj[nextap++]]; 337 } 338 } 339 flops += 2*apnzj; 340 pA_o++; 341 } 342 343 /* Zero the current row info for A*P */ 344 for (j=0;j<apnzj;j++) { 345 apa[apj[j]] = 0.; 346 apjdense[apj[j]] = 0; 347 } 348 } 349 350 /* Assemble the final matrix and clean up */ 351 ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = PetscFree(apa);CHKERRQ(ierr); 354 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 355 356 /*--------------------------------------------------------------*/ 357 /* send and recv matrix values */ 358 /*-----------------------------*/ 359 bi = merge->bi; 360 bj = merge->bj; 361 buf_ri = merge->buf_ri; 362 buf_rj = merge->buf_rj; 363 len_s = merge->len_s; 364 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 365 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 366 367 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 368 for (proc=0,k=0; proc<size; proc++){ 369 if (!len_s[proc]) continue; 370 i = owners[proc]; 371 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 372 k++; 373 } 374 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 376 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 377 ierr = PetscFree(status);CHKERRQ(ierr); 378 379 ierr = PetscFree(s_waits);CHKERRQ(ierr); 380 ierr = PetscFree(r_waits);CHKERRQ(ierr); 381 382 /* insert mat values of mpimat */ 383 /*----------------------------*/ 384 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 385 ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 386 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 387 nextrow = buf_ri_k + merge->nrecv; 388 nextcseqi = nextrow + merge->nrecv; 389 390 for (k=0; k<merge->nrecv; k++){ 391 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 392 nrows = *(buf_ri_k[k]); 393 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 394 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 395 } 396 397 /* add received local vals to C */ 398 for (i=0; i<cm; i++) { 399 crow = owners[rank] + i; 400 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 401 bnzi = bi[i+1] - bi[i]; 402 nzi = 0; 403 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 404 /* i-th row */ 405 if (i == *nextrow[k]) { 406 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 407 cseqj = buf_rj[k] + *(nextcseqi[k]); 408 cseqa = abuf_r[k] + *(nextcseqi[k]); 409 nextcseqj = 0; 410 for (j=0; nextcseqj<cseqnzi; j++){ 411 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 412 ba_i[j] += cseqa[nextcseqj++]; 413 nzi++; 414 } 415 } 416 nextrow[k]++; nextcseqi[k]++; 417 } 418 } 419 if (nzi>0){ 420 /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */ 421 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 422 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 423 flops += 2*nzi; 424 } 425 } 426 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 427 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 428 429 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 430 ierr = PetscFree(ba_i);CHKERRQ(ierr); 431 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 432 ierr = PetscFree(ca);CHKERRQ(ierr); 433 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 439 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 440 { 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 if (scall == MAT_INITIAL_MATRIX){ 445 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 446 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 447 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 448 } 449 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 450 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 451 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 /* 456 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 457 458 Collective on Mat 459 460 Input Parameters: 461 + A - the matrix 462 - P - the projection matrix 463 464 Output Parameters: 465 . C - the (i,j) structure of the product matrix 466 467 Notes: 468 C will be created and must be destroyed by the user with MatDestroy(). 469 470 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 471 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 472 this (i,j) structure by calling MatPtAPNumeric(). 473 474 Level: intermediate 475 476 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 477 */ 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatPtAPSymbolic" 480 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 481 { 482 PetscErrorCode ierr; 483 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 484 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 485 486 PetscFunctionBegin; 487 488 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 489 PetscValidType(A,1); 490 MatPreallocated(A); 491 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 492 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 493 494 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 495 PetscValidType(P,2); 496 MatPreallocated(P); 497 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 498 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499 500 PetscValidPointer(C,3); 501 502 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 503 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 504 505 /* For now, we do not dispatch based on the type of A and P */ 506 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 507 fA = A->ops->ptapsymbolic; 508 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 509 fP = P->ops->ptapsymbolic; 510 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 511 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 512 513 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 514 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 515 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 516 517 PetscFunctionReturn(0); 518 } 519 520 typedef struct { 521 Mat symAP; 522 } Mat_PtAPstruct; 523 524 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 528 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 529 { 530 PetscErrorCode ierr; 531 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 532 533 PetscFunctionBegin; 534 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 535 ierr = PetscFree(ptap);CHKERRQ(ierr); 536 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 542 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 543 { 544 PetscErrorCode ierr; 545 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 547 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 548 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 549 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 550 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 551 MatScalar *ca; 552 PetscBT lnkbt; 553 554 PetscFunctionBegin; 555 /* Get ij structure of P^T */ 556 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 557 ptJ=ptj; 558 559 /* Allocate ci array, arrays for fill computation and */ 560 /* free space for accumulating nonzero column info */ 561 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 562 ci[0] = 0; 563 564 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 565 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 566 ptasparserow = ptadenserow + an; 567 568 /* create and initialize a linked list */ 569 nlnk = pn+1; 570 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 571 572 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 573 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 574 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 575 current_space = free_space; 576 577 /* Determine symbolic info for each row of C: */ 578 for (i=0;i<pn;i++) { 579 ptnzi = pti[i+1] - pti[i]; 580 ptanzi = 0; 581 /* Determine symbolic row of PtA: */ 582 for (j=0;j<ptnzi;j++) { 583 arow = *ptJ++; 584 anzj = ai[arow+1] - ai[arow]; 585 ajj = aj + ai[arow]; 586 for (k=0;k<anzj;k++) { 587 if (!ptadenserow[ajj[k]]) { 588 ptadenserow[ajj[k]] = -1; 589 ptasparserow[ptanzi++] = ajj[k]; 590 } 591 } 592 } 593 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 594 ptaj = ptasparserow; 595 cnzi = 0; 596 for (j=0;j<ptanzi;j++) { 597 prow = *ptaj++; 598 pnzj = pi[prow+1] - pi[prow]; 599 pjj = pj + pi[prow]; 600 /* add non-zero cols of P into the sorted linked list lnk */ 601 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 602 cnzi += nlnk; 603 } 604 605 /* If free space is not available, make more free space */ 606 /* Double the amount of total space in the list */ 607 if (current_space->local_remaining<cnzi) { 608 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 609 } 610 611 /* Copy data into free space, and zero out denserows */ 612 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 613 current_space->array += cnzi; 614 current_space->local_used += cnzi; 615 current_space->local_remaining -= cnzi; 616 617 for (j=0;j<ptanzi;j++) { 618 ptadenserow[ptasparserow[j]] = 0; 619 } 620 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 621 /* For now, we will recompute what is needed. */ 622 ci[i+1] = ci[i] + cnzi; 623 } 624 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 625 /* Allocate space for cj, initialize cj, and */ 626 /* destroy list of free space and other temporary array(s) */ 627 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 628 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 629 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 630 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 631 632 /* Allocate space for ca */ 633 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 634 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 635 636 /* put together the new matrix */ 637 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 638 639 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 640 /* Since these are PETSc arrays, change flags to free them as necessary. */ 641 c = (Mat_SeqAIJ *)((*C)->data); 642 c->freedata = PETSC_TRUE; 643 c->nonew = 0; 644 645 /* Clean up. */ 646 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 651 #include "src/mat/impls/maij/maij.h" 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 655 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 656 { 657 /* This routine requires testing -- I don't think it works. */ 658 PetscErrorCode ierr; 659 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 660 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 661 Mat P=pp->AIJ; 662 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 663 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 664 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 665 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 666 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 667 MatScalar *ca; 668 669 PetscFunctionBegin; 670 /* Start timer */ 671 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 672 673 /* Get ij structure of P^T */ 674 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 675 676 /* Allocate ci array, arrays for fill computation and */ 677 /* free space for accumulating nonzero column info */ 678 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 679 ci[0] = 0; 680 681 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 682 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 683 ptasparserow = ptadenserow + an; 684 denserow = ptasparserow + an; 685 sparserow = denserow + pn; 686 687 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 688 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 689 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 690 current_space = free_space; 691 692 /* Determine symbolic info for each row of C: */ 693 for (i=0;i<pn/ppdof;i++) { 694 ptnzi = pti[i+1] - pti[i]; 695 ptanzi = 0; 696 ptJ = ptj + pti[i]; 697 for (dof=0;dof<ppdof;dof++) { 698 /* Determine symbolic row of PtA: */ 699 for (j=0;j<ptnzi;j++) { 700 arow = ptJ[j] + dof; 701 anzj = ai[arow+1] - ai[arow]; 702 ajj = aj + ai[arow]; 703 for (k=0;k<anzj;k++) { 704 if (!ptadenserow[ajj[k]]) { 705 ptadenserow[ajj[k]] = -1; 706 ptasparserow[ptanzi++] = ajj[k]; 707 } 708 } 709 } 710 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 711 ptaj = ptasparserow; 712 cnzi = 0; 713 for (j=0;j<ptanzi;j++) { 714 pdof = *ptaj%dof; 715 prow = (*ptaj++)/dof; 716 pnzj = pi[prow+1] - pi[prow]; 717 pjj = pj + pi[prow]; 718 for (k=0;k<pnzj;k++) { 719 if (!denserow[pjj[k]+pdof]) { 720 denserow[pjj[k]+pdof] = -1; 721 sparserow[cnzi++] = pjj[k]+pdof; 722 } 723 } 724 } 725 726 /* sort sparserow */ 727 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 728 729 /* If free space is not available, make more free space */ 730 /* Double the amount of total space in the list */ 731 if (current_space->local_remaining<cnzi) { 732 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 733 } 734 735 /* Copy data into free space, and zero out denserows */ 736 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 737 current_space->array += cnzi; 738 current_space->local_used += cnzi; 739 current_space->local_remaining -= cnzi; 740 741 for (j=0;j<ptanzi;j++) { 742 ptadenserow[ptasparserow[j]] = 0; 743 } 744 for (j=0;j<cnzi;j++) { 745 denserow[sparserow[j]] = 0; 746 } 747 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 748 /* For now, we will recompute what is needed. */ 749 ci[i+1+dof] = ci[i+dof] + cnzi; 750 } 751 } 752 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 753 /* Allocate space for cj, initialize cj, and */ 754 /* destroy list of free space and other temporary array(s) */ 755 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 756 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 757 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 758 759 /* Allocate space for ca */ 760 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 761 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 762 763 /* put together the new matrix */ 764 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 765 766 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 767 /* Since these are PETSc arrays, change flags to free them as necessary. */ 768 c = (Mat_SeqAIJ *)((*C)->data); 769 c->freedata = PETSC_TRUE; 770 c->nonew = 0; 771 772 /* Clean up. */ 773 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 774 775 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 EXTERN_C_END 779 780 /* 781 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 782 783 Collective on Mat 784 785 Input Parameters: 786 + A - the matrix 787 - P - the projection matrix 788 789 Output Parameters: 790 . C - the product matrix 791 792 Notes: 793 C must have been created by calling MatPtAPSymbolic and must be destroyed by 794 the user using MatDeatroy(). 795 796 This routine is currently only implemented for pairs of AIJ matrices and classes 797 which inherit from AIJ. C will be of type MATAIJ. 798 799 Level: intermediate 800 801 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 802 */ 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatPtAPNumeric" 805 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 806 { 807 PetscErrorCode ierr; 808 PetscErrorCode (*fA)(Mat,Mat,Mat); 809 PetscErrorCode (*fP)(Mat,Mat,Mat); 810 811 PetscFunctionBegin; 812 813 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 814 PetscValidType(A,1); 815 MatPreallocated(A); 816 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 817 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 818 819 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 820 PetscValidType(P,2); 821 MatPreallocated(P); 822 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 823 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 824 825 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 826 PetscValidType(C,3); 827 MatPreallocated(C); 828 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 829 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 830 831 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 832 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 833 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 834 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 835 836 /* For now, we do not dispatch based on the type of A and P */ 837 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 838 fA = A->ops->ptapnumeric; 839 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 840 fP = P->ops->ptapnumeric; 841 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 842 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 843 844 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 845 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 846 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 847 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 853 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 854 { 855 PetscErrorCode ierr; 856 PetscInt flops=0; 857 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 858 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 859 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 860 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 861 PetscInt *ci=c->i,*cj=c->j,*cjj; 862 PetscInt am=A->M,cn=C->N,cm=C->M; 863 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 864 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 865 866 PetscFunctionBegin; 867 /* Allocate temporary array for storage of one row of A*P */ 868 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 869 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 870 871 apj = (PetscInt *)(apa + cn); 872 apjdense = apj + cn; 873 874 /* Clear old values in C */ 875 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 876 877 for (i=0;i<am;i++) { 878 /* Form sparse row of A*P */ 879 anzi = ai[i+1] - ai[i]; 880 apnzj = 0; 881 for (j=0;j<anzi;j++) { 882 prow = *aj++; 883 pnzj = pi[prow+1] - pi[prow]; 884 pjj = pj + pi[prow]; 885 paj = pa + pi[prow]; 886 for (k=0;k<pnzj;k++) { 887 if (!apjdense[pjj[k]]) { 888 apjdense[pjj[k]] = -1; 889 apj[apnzj++] = pjj[k]; 890 } 891 apa[pjj[k]] += (*aa)*paj[k]; 892 } 893 flops += 2*pnzj; 894 aa++; 895 } 896 897 /* Sort the j index array for quick sparse axpy. */ 898 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 899 900 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 901 pnzi = pi[i+1] - pi[i]; 902 for (j=0;j<pnzi;j++) { 903 nextap = 0; 904 crow = *pJ++; 905 cjj = cj + ci[crow]; 906 caj = ca + ci[crow]; 907 /* Perform sparse axpy operation. Note cjj includes apj. */ 908 for (k=0;nextap<apnzj;k++) { 909 if (cjj[k]==apj[nextap]) { 910 caj[k] += (*pA)*apa[apj[nextap++]]; 911 } 912 } 913 flops += 2*apnzj; 914 pA++; 915 } 916 917 /* Zero the current row info for A*P */ 918 for (j=0;j<apnzj;j++) { 919 apa[apj[j]] = 0.; 920 apjdense[apj[j]] = 0; 921 } 922 } 923 924 /* Assemble the final matrix and clean up */ 925 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 926 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 927 ierr = PetscFree(apa);CHKERRQ(ierr); 928 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 929 930 PetscFunctionReturn(0); 931 } 932 933 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 934 static PetscEvent logkey_matptapnumeric_local = 0; 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 937 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 938 { 939 PetscErrorCode ierr; 940 PetscInt flops=0; 941 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 942 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 943 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 944 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 945 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 946 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 947 PetscInt *pJ=pj_loc,*pjj; 948 PetscInt *ci=c->i,*cj=c->j,*cjj; 949 PetscInt am=A->m,cn=C->N,cm=C->M; 950 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 951 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 952 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 953 954 PetscFunctionBegin; 955 if (!logkey_matptapnumeric_local) { 956 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 957 } 958 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 959 960 /* Allocate temporary array for storage of one row of A*P */ 961 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 962 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 963 apj = (PetscInt *)(apa + cn); 964 apjdense = apj + cn; 965 966 /* Clear old values in C */ 967 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 968 969 for (i=0;i<am;i++) { 970 /* Form i-th sparse row of A*P */ 971 apnzj = 0; 972 /* diagonal portion of A */ 973 anzi = adi[i+1] - adi[i]; 974 for (j=0;j<anzi;j++) { 975 prow = *adj; 976 adj++; 977 pnzj = pi_loc[prow+1] - pi_loc[prow]; 978 pjj = pj_loc + pi_loc[prow]; 979 paj = pa_loc + pi_loc[prow]; 980 for (k=0;k<pnzj;k++) { 981 if (!apjdense[pjj[k]]) { 982 apjdense[pjj[k]] = -1; 983 apj[apnzj++] = pjj[k]; 984 } 985 apa[pjj[k]] += (*ada)*paj[k]; 986 } 987 flops += 2*pnzj; 988 ada++; 989 } 990 /* off-diagonal portion of A */ 991 anzi = aoi[i+1] - aoi[i]; 992 for (j=0;j<anzi;j++) { 993 col = a->garray[*aoj]; 994 if (col < cstart){ 995 prow = *aoj; 996 } else if (col >= cend){ 997 prow = *aoj; 998 } else { 999 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1000 } 1001 aoj++; 1002 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1003 pjj = pj_oth + pi_oth[prow]; 1004 paj = pa_oth + pi_oth[prow]; 1005 for (k=0;k<pnzj;k++) { 1006 if (!apjdense[pjj[k]]) { 1007 apjdense[pjj[k]] = -1; 1008 apj[apnzj++] = pjj[k]; 1009 } 1010 apa[pjj[k]] += (*aoa)*paj[k]; 1011 } 1012 flops += 2*pnzj; 1013 aoa++; 1014 } 1015 /* Sort the j index array for quick sparse axpy. */ 1016 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1017 1018 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1019 pnzi = pi_loc[i+1] - pi_loc[i]; 1020 for (j=0;j<pnzi;j++) { 1021 nextap = 0; 1022 crow = *pJ++; 1023 cjj = cj + ci[crow]; 1024 caj = ca + ci[crow]; 1025 /* Perform sparse axpy operation. Note cjj includes apj. */ 1026 for (k=0;nextap<apnzj;k++) { 1027 if (cjj[k]==apj[nextap]) { 1028 caj[k] += (*pA)*apa[apj[nextap++]]; 1029 } 1030 } 1031 flops += 2*apnzj; 1032 pA++; 1033 } 1034 1035 /* Zero the current row info for A*P */ 1036 for (j=0;j<apnzj;j++) { 1037 apa[apj[j]] = 0.; 1038 apjdense[apj[j]] = 0; 1039 } 1040 } 1041 1042 /* Assemble the final matrix and clean up */ 1043 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1044 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1045 ierr = PetscFree(apa);CHKERRQ(ierr); 1046 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1047 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 static PetscEvent logkey_matptapsymbolic_local = 0; 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1053 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1054 { 1055 PetscErrorCode ierr; 1056 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1057 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1058 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1059 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1060 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1061 PetscInt *ci,*cj,*ptaj; 1062 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1063 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1064 PetscInt pm=P_loc->m,nlnk,*lnk; 1065 MatScalar *ca; 1066 PetscBT lnkbt; 1067 PetscInt prend,nprow_loc,nprow_oth; 1068 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1069 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1070 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1071 1072 PetscFunctionBegin; 1073 if (!logkey_matptapsymbolic_local) { 1074 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1075 } 1076 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1077 1078 prend = prstart + pm; 1079 1080 /* get ij structure of P_loc^T */ 1081 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1082 ptJ=ptj; 1083 1084 /* Allocate ci array, arrays for fill computation and */ 1085 /* free space for accumulating nonzero column info */ 1086 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1087 ci[0] = 0; 1088 1089 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1090 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1091 ptasparserow_loc = ptadenserow_loc + aN; 1092 ptadenserow_oth = ptasparserow_loc + aN; 1093 ptasparserow_oth = ptadenserow_oth + aN; 1094 1095 /* create and initialize a linked list */ 1096 nlnk = pN+1; 1097 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1098 1099 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1100 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1101 nnz = adi[am] + aoi[am]; 1102 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1103 current_space = free_space; 1104 1105 /* determine symbolic info for each row of C: */ 1106 for (i=0;i<pN;i++) { 1107 ptnzi = pti[i+1] - pti[i]; 1108 nprow_loc = 0; nprow_oth = 0; 1109 /* i-th row of symbolic P_loc^T*A_loc: */ 1110 for (j=0;j<ptnzi;j++) { 1111 arow = *ptJ++; 1112 /* diagonal portion of A */ 1113 anzj = adi[arow+1] - adi[arow]; 1114 ajj = adj + adi[arow]; 1115 for (k=0;k<anzj;k++) { 1116 col = ajj[k]+prstart; 1117 if (!ptadenserow_loc[col]) { 1118 ptadenserow_loc[col] = -1; 1119 ptasparserow_loc[nprow_loc++] = col; 1120 } 1121 } 1122 /* off-diagonal portion of A */ 1123 anzj = aoi[arow+1] - aoi[arow]; 1124 ajj = aoj + aoi[arow]; 1125 for (k=0;k<anzj;k++) { 1126 col = a->garray[ajj[k]]; /* global col */ 1127 if (col < cstart){ 1128 col = ajj[k]; 1129 } else if (col >= cend){ 1130 col = ajj[k] + pm; 1131 } else { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1133 } 1134 if (!ptadenserow_oth[col]) { 1135 ptadenserow_oth[col] = -1; 1136 ptasparserow_oth[nprow_oth++] = col; 1137 } 1138 } 1139 } 1140 1141 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1142 cnzi = 0; 1143 /* rows of P_loc */ 1144 ptaj = ptasparserow_loc; 1145 for (j=0; j<nprow_loc; j++) { 1146 prow = *ptaj++; 1147 prow -= prstart; /* rm */ 1148 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1149 pjj = pj_loc + pi_loc[prow]; 1150 /* add non-zero cols of P into the sorted linked list lnk */ 1151 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1152 cnzi += nlnk; 1153 } 1154 /* rows of P_oth */ 1155 ptaj = ptasparserow_oth; 1156 for (j=0; j<nprow_oth; j++) { 1157 prow = *ptaj++; 1158 if (prow >= prend) prow -= pm; /* rm */ 1159 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1160 pjj = pj_oth + pi_oth[prow]; 1161 /* add non-zero cols of P into the sorted linked list lnk */ 1162 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1163 cnzi += nlnk; 1164 } 1165 1166 /* If free space is not available, make more free space */ 1167 /* Double the amount of total space in the list */ 1168 if (current_space->local_remaining<cnzi) { 1169 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1170 } 1171 1172 /* Copy data into free space, and zero out denserows */ 1173 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1174 current_space->array += cnzi; 1175 current_space->local_used += cnzi; 1176 current_space->local_remaining -= cnzi; 1177 1178 for (j=0;j<nprow_loc; j++) { 1179 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1180 } 1181 for (j=0;j<nprow_oth; j++) { 1182 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1183 } 1184 1185 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1186 /* For now, we will recompute what is needed. */ 1187 ci[i+1] = ci[i] + cnzi; 1188 } 1189 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1190 /* Allocate space for cj, initialize cj, and */ 1191 /* destroy list of free space and other temporary array(s) */ 1192 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1193 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1194 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1195 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1196 1197 /* Allocate space for ca */ 1198 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1199 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1200 1201 /* put together the new matrix */ 1202 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1203 1204 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1205 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1206 c = (Mat_SeqAIJ *)((*C)->data); 1207 c->freedata = PETSC_TRUE; 1208 c->nonew = 0; 1209 1210 /* Clean up. */ 1211 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1212 1213 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216