1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat C_seq; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 166 PetscFunctionBegin; 167 168 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169 if (container) { 170 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171 } else { 172 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173 } 174 /* compute C_seq = P_loc^T * A_loc * P_seq */ 175 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176 177 /* add C_seq into mpi C */ 178 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186 { 187 PetscErrorCode ierr; 188 Mat_Merge_SeqsToMPI *merge; 189 Mat_MatMatMultMPI *ptap; 190 PetscObjectContainer cont_merge,cont_ptap; 191 192 PetscInt flops=0; 193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; 194 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 195 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 196 Mat C_seq; 197 Mat_SeqAIJ *p_loc,*p_oth; 198 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 199 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 200 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 201 PetscInt *cjj; 202 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 203 MatScalar *pa_loc,*pA,*pa_oth; 204 PetscInt am=A->m,cN=C->N,cm=C->m; 205 206 MPI_Comm comm=C->comm; 207 PetscMPIInt size,rank,taga,*len_s; 208 PetscInt *owners; 209 PetscInt proc; 210 PetscInt **buf_ri,**buf_rj; 211 PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 212 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 213 MPI_Request *s_waits,*r_waits; 214 MPI_Status *status; 215 MatScalar **abuf_r,*ba_i; 216 Mat_SeqAIJ *cseq; 217 PetscInt *cseqi,*cseqj; 218 MatScalar *ca; 219 220 PetscFunctionBegin; 221 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 222 if (cont_merge) { 223 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 224 } else { 225 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 226 } 227 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 228 if (cont_ptap) { 229 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 230 } else { 231 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 232 } 233 #ifdef OLD 234 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 235 #endif /* OLD */ 236 /*--------------------------------------------------------------*/ 237 238 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 239 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 240 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 241 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 242 243 C_seq=merge->C_seq; 244 cseq=(Mat_SeqAIJ*)C_seq->data; 245 cseqi=cseq->i; cseqj=cseq->j; 246 cseqa=cseq->a; 247 248 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 251 252 /* Allocate temporary array for storage of one row of A*P */ 253 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 254 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 255 apj = (PetscInt *)(apa + cN); 256 apjdense = apj + cN; 257 258 /* Allocate temporary MatScalar array for storage of one row of C */ 259 ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 260 ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 261 262 /* Clear old values in C_Seq and C */ 263 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 264 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 265 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 266 267 for (i=0;i<am;i++) { 268 /* Form i-th sparse row of A*P */ 269 apnzj = 0; 270 /* diagonal portion of A */ 271 nzi = adi[i+1] - adi[i]; 272 for (j=0;j<nzi;j++) { 273 prow = *adj; 274 adj++; 275 pnzj = pi_loc[prow+1] - pi_loc[prow]; 276 pjj = pj_loc + pi_loc[prow]; 277 paj = pa_loc + pi_loc[prow]; 278 for (k=0;k<pnzj;k++) { 279 if (!apjdense[pjj[k]]) { 280 apjdense[pjj[k]] = -1; 281 apj[apnzj++] = pjj[k]; 282 } 283 apa[pjj[k]] += (*ada)*paj[k]; 284 } 285 flops += 2*pnzj; 286 ada++; 287 } 288 /* off-diagonal portion of A */ 289 nzi = aoi[i+1] - aoi[i]; 290 for (j=0;j<nzi;j++) { 291 col = a->garray[*aoj]; 292 if (col < cstart){ 293 prow = *aoj; 294 } else if (col >= cend){ 295 prow = *aoj; 296 } else { 297 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 298 } 299 aoj++; 300 pnzj = pi_oth[prow+1] - pi_oth[prow]; 301 pjj = pj_oth + pi_oth[prow]; 302 paj = pa_oth + pi_oth[prow]; 303 for (k=0;k<pnzj;k++) { 304 if (!apjdense[pjj[k]]) { 305 apjdense[pjj[k]] = -1; 306 apj[apnzj++] = pjj[k]; 307 } 308 apa[pjj[k]] += (*aoa)*paj[k]; 309 } 310 flops += 2*pnzj; 311 aoa++; 312 } 313 /* Sort the j index array for quick sparse axpy. */ 314 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 315 316 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 317 pnzi = pi_loc[i+1] - pi_loc[i]; 318 for (j=0;j<pnzi;j++) { 319 crow = *pJ++; 320 cjj = cseqj + cseqi[crow]; 321 caj = cseqa + cseqi[crow]; 322 /* Perform sparse axpy operation. Note cjj includes apj. */ 323 if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */ 324 for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]]; 325 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 326 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 327 } else { /* add value into C_seq to be sent to other processors */ 328 nextap = 0; 329 for (k=0;nextap<apnzj;k++) { 330 if (cjj[k]==apj[nextap]) { 331 caj[k] += (*pA)*apa[apj[nextap++]]; 332 } 333 } 334 } 335 flops += 2*apnzj; 336 pA++; 337 } 338 339 /* Zero the current row info for A*P */ 340 for (j=0;j<apnzj;j++) { 341 apa[apj[j]] = 0.; 342 apjdense[apj[j]] = 0; 343 } 344 } 345 346 /* Assemble the final matrix and clean up */ 347 ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 ierr = PetscFree(apa);CHKERRQ(ierr); 350 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 351 352 #ifdef OLD1 353 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 354 #endif /* OLD1 */ 355 /*--------------------------------------------------------------*/ 356 /* send and recv matrix values */ 357 /*-----------------------------*/ 358 bi = merge->bi; 359 bj = merge->bj; 360 buf_ri = merge->buf_ri; 361 buf_rj = merge->buf_rj; 362 len_s = merge->len_s; 363 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 364 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 365 366 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 367 for (proc=0,k=0; proc<size; proc++){ 368 if (!len_s[proc]) continue; 369 i = owners[proc]; 370 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 371 k++; 372 } 373 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 374 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 376 ierr = PetscFree(status);CHKERRQ(ierr); 377 378 ierr = PetscFree(s_waits);CHKERRQ(ierr); 379 ierr = PetscFree(r_waits);CHKERRQ(ierr); 380 381 /* insert mat values of mpimat */ 382 /*----------------------------*/ 383 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 384 ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 385 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 386 nextrow = buf_ri_k + merge->nrecv; 387 nextcseqi = nextrow + merge->nrecv; 388 389 for (k=0; k<merge->nrecv; k++){ 390 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 391 nrows = *(buf_ri_k[k]); 392 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 393 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 394 } 395 396 /* add received local vals to C */ 397 for (i=0; i<cm; i++) { 398 crow = owners[rank] + i; 399 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 400 bnzi = bi[i+1] - bi[i]; 401 nzi = 0; 402 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 403 /* i-th row */ 404 if (i == *nextrow[k]) { 405 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 406 cseqj = buf_rj[k] + *(nextcseqi[k]); 407 cseqa = abuf_r[k] + *(nextcseqi[k]); 408 nextcseqj = 0; 409 for (j=0; nextcseqj<cseqnzi; j++){ 410 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 411 ba_i[j] += cseqa[nextcseqj++]; 412 nzi++; 413 } 414 } 415 nextrow[k]++; nextcseqi[k]++; 416 } 417 } 418 if (nzi>0){ 419 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 420 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 421 } 422 } 423 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 424 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 425 426 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 427 ierr = PetscFree(ba_i);CHKERRQ(ierr); 428 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 429 ierr = PetscFree(ca);CHKERRQ(ierr); 430 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 436 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 437 { 438 PetscErrorCode ierr; 439 440 PetscFunctionBegin; 441 if (scall == MAT_INITIAL_MATRIX){ 442 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 443 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 444 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 445 } 446 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 447 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 448 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 /* 453 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 454 455 Collective on Mat 456 457 Input Parameters: 458 + A - the matrix 459 - P - the projection matrix 460 461 Output Parameters: 462 . C - the (i,j) structure of the product matrix 463 464 Notes: 465 C will be created and must be destroyed by the user with MatDestroy(). 466 467 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 468 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 469 this (i,j) structure by calling MatPtAPNumeric(). 470 471 Level: intermediate 472 473 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 474 */ 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatPtAPSymbolic" 477 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 478 { 479 PetscErrorCode ierr; 480 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 481 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 482 483 PetscFunctionBegin; 484 485 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 486 PetscValidType(A,1); 487 MatPreallocated(A); 488 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 490 491 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 492 PetscValidType(P,2); 493 MatPreallocated(P); 494 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 495 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 496 497 PetscValidPointer(C,3); 498 499 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 500 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 501 502 /* For now, we do not dispatch based on the type of A and P */ 503 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 504 fA = A->ops->ptapsymbolic; 505 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 506 fP = P->ops->ptapsymbolic; 507 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 508 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 509 510 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 511 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 512 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 513 514 PetscFunctionReturn(0); 515 } 516 517 typedef struct { 518 Mat symAP; 519 } Mat_PtAPstruct; 520 521 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 525 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 526 { 527 PetscErrorCode ierr; 528 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 529 530 PetscFunctionBegin; 531 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 532 ierr = PetscFree(ptap);CHKERRQ(ierr); 533 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 539 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 540 { 541 PetscErrorCode ierr; 542 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 544 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 545 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 546 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 547 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 548 MatScalar *ca; 549 PetscBT lnkbt; 550 551 PetscFunctionBegin; 552 /* Get ij structure of P^T */ 553 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 554 ptJ=ptj; 555 556 /* Allocate ci array, arrays for fill computation and */ 557 /* free space for accumulating nonzero column info */ 558 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 559 ci[0] = 0; 560 561 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 562 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 563 ptasparserow = ptadenserow + an; 564 565 /* create and initialize a linked list */ 566 nlnk = pn+1; 567 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 568 569 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 570 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 571 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 572 current_space = free_space; 573 574 /* Determine symbolic info for each row of C: */ 575 for (i=0;i<pn;i++) { 576 ptnzi = pti[i+1] - pti[i]; 577 ptanzi = 0; 578 /* Determine symbolic row of PtA: */ 579 for (j=0;j<ptnzi;j++) { 580 arow = *ptJ++; 581 anzj = ai[arow+1] - ai[arow]; 582 ajj = aj + ai[arow]; 583 for (k=0;k<anzj;k++) { 584 if (!ptadenserow[ajj[k]]) { 585 ptadenserow[ajj[k]] = -1; 586 ptasparserow[ptanzi++] = ajj[k]; 587 } 588 } 589 } 590 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 591 ptaj = ptasparserow; 592 cnzi = 0; 593 for (j=0;j<ptanzi;j++) { 594 prow = *ptaj++; 595 pnzj = pi[prow+1] - pi[prow]; 596 pjj = pj + pi[prow]; 597 /* add non-zero cols of P into the sorted linked list lnk */ 598 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 599 cnzi += nlnk; 600 } 601 602 /* If free space is not available, make more free space */ 603 /* Double the amount of total space in the list */ 604 if (current_space->local_remaining<cnzi) { 605 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 606 } 607 608 /* Copy data into free space, and zero out denserows */ 609 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 610 current_space->array += cnzi; 611 current_space->local_used += cnzi; 612 current_space->local_remaining -= cnzi; 613 614 for (j=0;j<ptanzi;j++) { 615 ptadenserow[ptasparserow[j]] = 0; 616 } 617 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 618 /* For now, we will recompute what is needed. */ 619 ci[i+1] = ci[i] + cnzi; 620 } 621 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 622 /* Allocate space for cj, initialize cj, and */ 623 /* destroy list of free space and other temporary array(s) */ 624 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 625 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 626 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 627 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 628 629 /* Allocate space for ca */ 630 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 631 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 632 633 /* put together the new matrix */ 634 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 635 636 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 637 /* Since these are PETSc arrays, change flags to free them as necessary. */ 638 c = (Mat_SeqAIJ *)((*C)->data); 639 c->freedata = PETSC_TRUE; 640 c->nonew = 0; 641 642 /* Clean up. */ 643 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 644 645 PetscFunctionReturn(0); 646 } 647 648 #include "src/mat/impls/maij/maij.h" 649 EXTERN_C_BEGIN 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 652 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 653 { 654 /* This routine requires testing -- I don't think it works. */ 655 PetscErrorCode ierr; 656 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 657 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 658 Mat P=pp->AIJ; 659 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 660 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 661 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 662 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 663 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 664 MatScalar *ca; 665 666 PetscFunctionBegin; 667 /* Start timer */ 668 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 669 670 /* Get ij structure of P^T */ 671 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 672 673 /* Allocate ci array, arrays for fill computation and */ 674 /* free space for accumulating nonzero column info */ 675 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 676 ci[0] = 0; 677 678 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 679 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 680 ptasparserow = ptadenserow + an; 681 denserow = ptasparserow + an; 682 sparserow = denserow + pn; 683 684 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 685 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 686 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 687 current_space = free_space; 688 689 /* Determine symbolic info for each row of C: */ 690 for (i=0;i<pn/ppdof;i++) { 691 ptnzi = pti[i+1] - pti[i]; 692 ptanzi = 0; 693 ptJ = ptj + pti[i]; 694 for (dof=0;dof<ppdof;dof++) { 695 /* Determine symbolic row of PtA: */ 696 for (j=0;j<ptnzi;j++) { 697 arow = ptJ[j] + dof; 698 anzj = ai[arow+1] - ai[arow]; 699 ajj = aj + ai[arow]; 700 for (k=0;k<anzj;k++) { 701 if (!ptadenserow[ajj[k]]) { 702 ptadenserow[ajj[k]] = -1; 703 ptasparserow[ptanzi++] = ajj[k]; 704 } 705 } 706 } 707 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 708 ptaj = ptasparserow; 709 cnzi = 0; 710 for (j=0;j<ptanzi;j++) { 711 pdof = *ptaj%dof; 712 prow = (*ptaj++)/dof; 713 pnzj = pi[prow+1] - pi[prow]; 714 pjj = pj + pi[prow]; 715 for (k=0;k<pnzj;k++) { 716 if (!denserow[pjj[k]+pdof]) { 717 denserow[pjj[k]+pdof] = -1; 718 sparserow[cnzi++] = pjj[k]+pdof; 719 } 720 } 721 } 722 723 /* sort sparserow */ 724 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 725 726 /* If free space is not available, make more free space */ 727 /* Double the amount of total space in the list */ 728 if (current_space->local_remaining<cnzi) { 729 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 730 } 731 732 /* Copy data into free space, and zero out denserows */ 733 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 734 current_space->array += cnzi; 735 current_space->local_used += cnzi; 736 current_space->local_remaining -= cnzi; 737 738 for (j=0;j<ptanzi;j++) { 739 ptadenserow[ptasparserow[j]] = 0; 740 } 741 for (j=0;j<cnzi;j++) { 742 denserow[sparserow[j]] = 0; 743 } 744 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 745 /* For now, we will recompute what is needed. */ 746 ci[i+1+dof] = ci[i+dof] + cnzi; 747 } 748 } 749 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 750 /* Allocate space for cj, initialize cj, and */ 751 /* destroy list of free space and other temporary array(s) */ 752 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 753 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 754 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 755 756 /* Allocate space for ca */ 757 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 758 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 759 760 /* put together the new matrix */ 761 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 762 763 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 764 /* Since these are PETSc arrays, change flags to free them as necessary. */ 765 c = (Mat_SeqAIJ *)((*C)->data); 766 c->freedata = PETSC_TRUE; 767 c->nonew = 0; 768 769 /* Clean up. */ 770 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 771 772 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 EXTERN_C_END 776 777 /* 778 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 779 780 Collective on Mat 781 782 Input Parameters: 783 + A - the matrix 784 - P - the projection matrix 785 786 Output Parameters: 787 . C - the product matrix 788 789 Notes: 790 C must have been created by calling MatPtAPSymbolic and must be destroyed by 791 the user using MatDeatroy(). 792 793 This routine is currently only implemented for pairs of AIJ matrices and classes 794 which inherit from AIJ. C will be of type MATAIJ. 795 796 Level: intermediate 797 798 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 799 */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatPtAPNumeric" 802 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 803 { 804 PetscErrorCode ierr; 805 PetscErrorCode (*fA)(Mat,Mat,Mat); 806 PetscErrorCode (*fP)(Mat,Mat,Mat); 807 808 PetscFunctionBegin; 809 810 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 811 PetscValidType(A,1); 812 MatPreallocated(A); 813 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 814 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 815 816 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 817 PetscValidType(P,2); 818 MatPreallocated(P); 819 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 820 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 821 822 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 823 PetscValidType(C,3); 824 MatPreallocated(C); 825 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 826 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 827 828 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 829 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 830 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 831 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 832 833 /* For now, we do not dispatch based on the type of A and P */ 834 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 835 fA = A->ops->ptapnumeric; 836 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 837 fP = P->ops->ptapnumeric; 838 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 839 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 840 841 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 842 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 843 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 844 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 850 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 851 { 852 PetscErrorCode ierr; 853 PetscInt flops=0; 854 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 855 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 856 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 857 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 858 PetscInt *ci=c->i,*cj=c->j,*cjj; 859 PetscInt am=A->M,cn=C->N,cm=C->M; 860 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 861 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 862 863 PetscFunctionBegin; 864 /* Allocate temporary array for storage of one row of A*P */ 865 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 866 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 867 868 apj = (PetscInt *)(apa + cn); 869 apjdense = apj + cn; 870 871 /* Clear old values in C */ 872 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 873 874 for (i=0;i<am;i++) { 875 /* Form sparse row of A*P */ 876 anzi = ai[i+1] - ai[i]; 877 apnzj = 0; 878 for (j=0;j<anzi;j++) { 879 prow = *aj++; 880 pnzj = pi[prow+1] - pi[prow]; 881 pjj = pj + pi[prow]; 882 paj = pa + pi[prow]; 883 for (k=0;k<pnzj;k++) { 884 if (!apjdense[pjj[k]]) { 885 apjdense[pjj[k]] = -1; 886 apj[apnzj++] = pjj[k]; 887 } 888 apa[pjj[k]] += (*aa)*paj[k]; 889 } 890 flops += 2*pnzj; 891 aa++; 892 } 893 894 /* Sort the j index array for quick sparse axpy. */ 895 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 896 897 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 898 pnzi = pi[i+1] - pi[i]; 899 for (j=0;j<pnzi;j++) { 900 nextap = 0; 901 crow = *pJ++; 902 cjj = cj + ci[crow]; 903 caj = ca + ci[crow]; 904 /* Perform sparse axpy operation. Note cjj includes apj. */ 905 for (k=0;nextap<apnzj;k++) { 906 if (cjj[k]==apj[nextap]) { 907 caj[k] += (*pA)*apa[apj[nextap++]]; 908 } 909 } 910 flops += 2*apnzj; 911 pA++; 912 } 913 914 /* Zero the current row info for A*P */ 915 for (j=0;j<apnzj;j++) { 916 apa[apj[j]] = 0.; 917 apjdense[apj[j]] = 0; 918 } 919 } 920 921 /* Assemble the final matrix and clean up */ 922 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 ierr = PetscFree(apa);CHKERRQ(ierr); 925 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 926 927 PetscFunctionReturn(0); 928 } 929 930 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 931 static PetscEvent logkey_matptapnumeric_local = 0; 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 934 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 935 { 936 PetscErrorCode ierr; 937 PetscInt flops=0; 938 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 939 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 940 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 941 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 942 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 943 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 944 PetscInt *pJ=pj_loc,*pjj; 945 PetscInt *ci=c->i,*cj=c->j,*cjj; 946 PetscInt am=A->m,cn=C->N,cm=C->M; 947 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 948 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 949 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 950 951 PetscFunctionBegin; 952 if (!logkey_matptapnumeric_local) { 953 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 954 } 955 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 956 957 /* Allocate temporary array for storage of one row of A*P */ 958 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 959 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 960 apj = (PetscInt *)(apa + cn); 961 apjdense = apj + cn; 962 963 /* Clear old values in C */ 964 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 965 966 for (i=0;i<am;i++) { 967 /* Form i-th sparse row of A*P */ 968 apnzj = 0; 969 /* diagonal portion of A */ 970 anzi = adi[i+1] - adi[i]; 971 for (j=0;j<anzi;j++) { 972 prow = *adj; 973 adj++; 974 pnzj = pi_loc[prow+1] - pi_loc[prow]; 975 pjj = pj_loc + pi_loc[prow]; 976 paj = pa_loc + pi_loc[prow]; 977 for (k=0;k<pnzj;k++) { 978 if (!apjdense[pjj[k]]) { 979 apjdense[pjj[k]] = -1; 980 apj[apnzj++] = pjj[k]; 981 } 982 apa[pjj[k]] += (*ada)*paj[k]; 983 } 984 flops += 2*pnzj; 985 ada++; 986 } 987 /* off-diagonal portion of A */ 988 anzi = aoi[i+1] - aoi[i]; 989 for (j=0;j<anzi;j++) { 990 col = a->garray[*aoj]; 991 if (col < cstart){ 992 prow = *aoj; 993 } else if (col >= cend){ 994 prow = *aoj; 995 } else { 996 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 997 } 998 aoj++; 999 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1000 pjj = pj_oth + pi_oth[prow]; 1001 paj = pa_oth + pi_oth[prow]; 1002 for (k=0;k<pnzj;k++) { 1003 if (!apjdense[pjj[k]]) { 1004 apjdense[pjj[k]] = -1; 1005 apj[apnzj++] = pjj[k]; 1006 } 1007 apa[pjj[k]] += (*aoa)*paj[k]; 1008 } 1009 flops += 2*pnzj; 1010 aoa++; 1011 } 1012 /* Sort the j index array for quick sparse axpy. */ 1013 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1014 1015 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1016 pnzi = pi_loc[i+1] - pi_loc[i]; 1017 for (j=0;j<pnzi;j++) { 1018 nextap = 0; 1019 crow = *pJ++; 1020 cjj = cj + ci[crow]; 1021 caj = ca + ci[crow]; 1022 /* Perform sparse axpy operation. Note cjj includes apj. */ 1023 for (k=0;nextap<apnzj;k++) { 1024 if (cjj[k]==apj[nextap]) { 1025 caj[k] += (*pA)*apa[apj[nextap++]]; 1026 } 1027 } 1028 flops += 2*apnzj; 1029 pA++; 1030 } 1031 1032 /* Zero the current row info for A*P */ 1033 for (j=0;j<apnzj;j++) { 1034 apa[apj[j]] = 0.; 1035 apjdense[apj[j]] = 0; 1036 } 1037 } 1038 1039 /* Assemble the final matrix and clean up */ 1040 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = PetscFree(apa);CHKERRQ(ierr); 1043 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1044 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 static PetscEvent logkey_matptapsymbolic_local = 0; 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1050 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1051 { 1052 PetscErrorCode ierr; 1053 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1054 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1055 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1056 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1057 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1058 PetscInt *ci,*cj,*ptaj; 1059 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1060 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1061 PetscInt pm=P_loc->m,nlnk,*lnk; 1062 MatScalar *ca; 1063 PetscBT lnkbt; 1064 PetscInt prend,nprow_loc,nprow_oth; 1065 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1066 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1067 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1068 1069 PetscFunctionBegin; 1070 if (!logkey_matptapsymbolic_local) { 1071 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1072 } 1073 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1074 1075 prend = prstart + pm; 1076 1077 /* get ij structure of P_loc^T */ 1078 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1079 ptJ=ptj; 1080 1081 /* Allocate ci array, arrays for fill computation and */ 1082 /* free space for accumulating nonzero column info */ 1083 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1084 ci[0] = 0; 1085 1086 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1087 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1088 ptasparserow_loc = ptadenserow_loc + aN; 1089 ptadenserow_oth = ptasparserow_loc + aN; 1090 ptasparserow_oth = ptadenserow_oth + aN; 1091 1092 /* create and initialize a linked list */ 1093 nlnk = pN+1; 1094 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1095 1096 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1097 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1098 nnz = adi[am] + aoi[am]; 1099 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1100 current_space = free_space; 1101 1102 /* determine symbolic info for each row of C: */ 1103 for (i=0;i<pN;i++) { 1104 ptnzi = pti[i+1] - pti[i]; 1105 nprow_loc = 0; nprow_oth = 0; 1106 /* i-th row of symbolic P_loc^T*A_loc: */ 1107 for (j=0;j<ptnzi;j++) { 1108 arow = *ptJ++; 1109 /* diagonal portion of A */ 1110 anzj = adi[arow+1] - adi[arow]; 1111 ajj = adj + adi[arow]; 1112 for (k=0;k<anzj;k++) { 1113 col = ajj[k]+prstart; 1114 if (!ptadenserow_loc[col]) { 1115 ptadenserow_loc[col] = -1; 1116 ptasparserow_loc[nprow_loc++] = col; 1117 } 1118 } 1119 /* off-diagonal portion of A */ 1120 anzj = aoi[arow+1] - aoi[arow]; 1121 ajj = aoj + aoi[arow]; 1122 for (k=0;k<anzj;k++) { 1123 col = a->garray[ajj[k]]; /* global col */ 1124 if (col < cstart){ 1125 col = ajj[k]; 1126 } else if (col >= cend){ 1127 col = ajj[k] + pm; 1128 } else { 1129 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1130 } 1131 if (!ptadenserow_oth[col]) { 1132 ptadenserow_oth[col] = -1; 1133 ptasparserow_oth[nprow_oth++] = col; 1134 } 1135 } 1136 } 1137 1138 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1139 cnzi = 0; 1140 /* rows of P_loc */ 1141 ptaj = ptasparserow_loc; 1142 for (j=0; j<nprow_loc; j++) { 1143 prow = *ptaj++; 1144 prow -= prstart; /* rm */ 1145 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1146 pjj = pj_loc + pi_loc[prow]; 1147 /* add non-zero cols of P into the sorted linked list lnk */ 1148 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1149 cnzi += nlnk; 1150 } 1151 /* rows of P_oth */ 1152 ptaj = ptasparserow_oth; 1153 for (j=0; j<nprow_oth; j++) { 1154 prow = *ptaj++; 1155 if (prow >= prend) prow -= pm; /* rm */ 1156 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1157 pjj = pj_oth + pi_oth[prow]; 1158 /* add non-zero cols of P into the sorted linked list lnk */ 1159 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1160 cnzi += nlnk; 1161 } 1162 1163 /* If free space is not available, make more free space */ 1164 /* Double the amount of total space in the list */ 1165 if (current_space->local_remaining<cnzi) { 1166 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1167 } 1168 1169 /* Copy data into free space, and zero out denserows */ 1170 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1171 current_space->array += cnzi; 1172 current_space->local_used += cnzi; 1173 current_space->local_remaining -= cnzi; 1174 1175 for (j=0;j<nprow_loc; j++) { 1176 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1177 } 1178 for (j=0;j<nprow_oth; j++) { 1179 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1180 } 1181 1182 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1183 /* For now, we will recompute what is needed. */ 1184 ci[i+1] = ci[i] + cnzi; 1185 } 1186 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1187 /* Allocate space for cj, initialize cj, and */ 1188 /* destroy list of free space and other temporary array(s) */ 1189 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1190 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1191 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1192 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1193 1194 /* Allocate space for ca */ 1195 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1196 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1197 1198 /* put together the new matrix */ 1199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1200 1201 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1202 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1203 c = (Mat_SeqAIJ *)((*C)->data); 1204 c->freedata = PETSC_TRUE; 1205 c->nonew = 0; 1206 1207 /* Clean up. */ 1208 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1209 1210 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213