1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat C_seq; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 166 PetscFunctionBegin; 167 168 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169 if (container) { 170 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171 } else { 172 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173 } 174 /* compute C_seq = P_loc^T * A_loc * P_seq */ 175 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176 177 /* add C_seq into mpi C */ 178 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186 { 187 PetscErrorCode ierr; 188 Mat_Merge_SeqsToMPI *merge; 189 Mat_MatMatMultMPI *ptap; 190 PetscObjectContainer cont_merge,cont_ptap; 191 192 PetscInt flops=0; 193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; 194 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 195 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 196 Mat C_seq; 197 Mat_SeqAIJ *p_loc,*p_oth; 198 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 199 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 200 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 201 PetscInt *cjj; 202 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 203 MatScalar *pa_loc,*pA,*pa_oth; 204 PetscInt am=A->m,cN=C->N,cm=C->m; 205 206 MPI_Comm comm=C->comm; 207 PetscMPIInt size,rank,taga,*len_s; 208 PetscInt *owners; 209 PetscInt proc; 210 PetscInt **buf_ri,**buf_rj; 211 PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 212 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 213 MPI_Request *s_waits,*r_waits; 214 MPI_Status *status; 215 MatScalar **abuf_r,*ba_i; 216 Mat_SeqAIJ *cseq; 217 PetscInt *cseqi,*cseqj; 218 MatScalar *ca; 219 220 PetscFunctionBegin; 221 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 222 if (cont_merge) { 223 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 224 } else { 225 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 226 } 227 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 228 if (cont_ptap) { 229 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 230 } else { 231 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 232 } 233 #ifdef OLD 234 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 235 #endif /* OLD */ 236 /*--------------------------------------------------------------*/ 237 238 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 239 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 240 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 241 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 242 243 C_seq=merge->C_seq; 244 cseq=(Mat_SeqAIJ*)C_seq->data; 245 cseqi=cseq->i; cseqj=cseq->j; 246 cseqa=cseq->a; 247 248 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 251 252 /* Allocate temporary array for storage of one row of A*P */ 253 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 254 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 255 apj = (PetscInt *)(apa + cN); 256 apjdense = apj + cN; 257 258 /* Allocate temporary MatScalar array for storage of one row of C */ 259 ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 260 ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 261 262 /* Clear old values in C_Seq and C */ 263 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 264 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 265 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 266 267 for (i=0;i<am;i++) { 268 /* Form i-th sparse row of A*P */ 269 apnzj = 0; 270 /* diagonal portion of A */ 271 nzi = adi[i+1] - adi[i]; 272 for (j=0;j<nzi;j++) { 273 prow = *adj; 274 adj++; 275 pnzj = pi_loc[prow+1] - pi_loc[prow]; 276 pjj = pj_loc + pi_loc[prow]; 277 paj = pa_loc + pi_loc[prow]; 278 for (k=0;k<pnzj;k++) { 279 if (!apjdense[pjj[k]]) { 280 apjdense[pjj[k]] = -1; 281 apj[apnzj++] = pjj[k]; 282 } 283 apa[pjj[k]] += (*ada)*paj[k]; 284 } 285 flops += 2*pnzj; 286 ada++; 287 } 288 /* off-diagonal portion of A */ 289 nzi = aoi[i+1] - aoi[i]; 290 for (j=0;j<nzi;j++) { 291 col = a->garray[*aoj]; 292 if (col < cstart){ 293 prow = *aoj; 294 } else if (col >= cend){ 295 prow = *aoj; 296 } else { 297 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 298 } 299 aoj++; 300 pnzj = pi_oth[prow+1] - pi_oth[prow]; 301 pjj = pj_oth + pi_oth[prow]; 302 paj = pa_oth + pi_oth[prow]; 303 for (k=0;k<pnzj;k++) { 304 if (!apjdense[pjj[k]]) { 305 apjdense[pjj[k]] = -1; 306 apj[apnzj++] = pjj[k]; 307 } 308 apa[pjj[k]] += (*aoa)*paj[k]; 309 } 310 flops += 2*pnzj; 311 aoa++; 312 } 313 /* Sort the j index array for quick sparse axpy. */ 314 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 315 316 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 317 pnzi = pi_loc[i+1] - pi_loc[i]; 318 for (j=0;j<pnzi;j++) { 319 crow = *pJ++; 320 cjj = cseqj + cseqi[crow]; 321 caj = cseqa + cseqi[crow]; 322 /* Perform sparse axpy operation. Note cjj includes apj. */ 323 if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */ 324 for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]]; 325 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 326 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 327 } else { /* add value into C_seq to be sent to other processors */ 328 nextap = 0; 329 for (k=0;nextap<apnzj;k++) { 330 if (cjj[k]==apj[nextap]) { 331 caj[k] += (*pA)*apa[apj[nextap++]]; 332 } 333 } 334 } 335 flops += 2*apnzj; 336 pA++; 337 } 338 339 /* Zero the current row info for A*P */ 340 for (j=0;j<apnzj;j++) { 341 apa[apj[j]] = 0.; 342 apjdense[apj[j]] = 0; 343 } 344 } 345 346 /* Assemble the final matrix and clean up */ 347 ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 ierr = PetscFree(apa);CHKERRQ(ierr); 350 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 351 352 #ifdef OLD1 353 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 354 #endif /* OLD1 */ 355 /*--------------------------------------------------------------*/ 356 /* send and recv matrix values */ 357 /*-----------------------------*/ 358 bi = merge->bi; 359 bj = merge->bj; 360 buf_ri = merge->buf_ri; 361 buf_rj = merge->buf_rj; 362 len_s = merge->len_s; 363 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 364 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 365 366 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 367 for (proc=0,k=0; proc<size; proc++){ 368 if (!len_s[proc]) continue; 369 i = owners[proc]; 370 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 371 k++; 372 } 373 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 374 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 376 ierr = PetscFree(status);CHKERRQ(ierr); 377 378 ierr = PetscFree(s_waits);CHKERRQ(ierr); 379 ierr = PetscFree(r_waits);CHKERRQ(ierr); 380 381 /* insert mat values of mpimat */ 382 /*----------------------------*/ 383 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 384 ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 385 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 386 nextrow = buf_ri_k + merge->nrecv; 387 nextcseqi = nextrow + merge->nrecv; 388 389 for (k=0; k<merge->nrecv; k++){ 390 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 391 nrows = *(buf_ri_k[k]); 392 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 393 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 394 } 395 396 /* add received local vals to C */ 397 for (i=0; i<cm; i++) { 398 crow = owners[rank] + i; 399 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 400 bnzi = bi[i+1] - bi[i]; 401 nzi = 0; 402 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 403 /* i-th row */ 404 if (i == *nextrow[k]) { 405 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 406 cseqj = buf_rj[k] + *(nextcseqi[k]); 407 cseqa = abuf_r[k] + *(nextcseqi[k]); 408 nextcseqj = 0; 409 for (j=0; nextcseqj<cseqnzi; j++){ 410 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 411 ba_i[j] += cseqa[nextcseqj++]; 412 nzi++; 413 } 414 } 415 nextrow[k]++; nextcseqi[k]++; 416 } 417 } 418 if (nzi>0){ 419 /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */ 420 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 421 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 422 } 423 } 424 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 425 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 426 427 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 428 ierr = PetscFree(ba_i);CHKERRQ(ierr); 429 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 430 ierr = PetscFree(ca);CHKERRQ(ierr); 431 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 437 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 438 { 439 PetscErrorCode ierr; 440 441 PetscFunctionBegin; 442 if (scall == MAT_INITIAL_MATRIX){ 443 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 444 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 445 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 446 } 447 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 448 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 449 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 /* 454 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 455 456 Collective on Mat 457 458 Input Parameters: 459 + A - the matrix 460 - P - the projection matrix 461 462 Output Parameters: 463 . C - the (i,j) structure of the product matrix 464 465 Notes: 466 C will be created and must be destroyed by the user with MatDestroy(). 467 468 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 469 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 470 this (i,j) structure by calling MatPtAPNumeric(). 471 472 Level: intermediate 473 474 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 475 */ 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatPtAPSymbolic" 478 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 479 { 480 PetscErrorCode ierr; 481 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 482 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 483 484 PetscFunctionBegin; 485 486 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 487 PetscValidType(A,1); 488 MatPreallocated(A); 489 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 490 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 491 492 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 493 PetscValidType(P,2); 494 MatPreallocated(P); 495 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 496 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 497 498 PetscValidPointer(C,3); 499 500 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 501 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 502 503 /* For now, we do not dispatch based on the type of A and P */ 504 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 505 fA = A->ops->ptapsymbolic; 506 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 507 fP = P->ops->ptapsymbolic; 508 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 509 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 510 511 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 512 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 513 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 514 515 PetscFunctionReturn(0); 516 } 517 518 typedef struct { 519 Mat symAP; 520 } Mat_PtAPstruct; 521 522 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 526 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 527 { 528 PetscErrorCode ierr; 529 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 530 531 PetscFunctionBegin; 532 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 533 ierr = PetscFree(ptap);CHKERRQ(ierr); 534 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 540 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 541 { 542 PetscErrorCode ierr; 543 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 545 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 546 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 547 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 548 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 549 MatScalar *ca; 550 PetscBT lnkbt; 551 552 PetscFunctionBegin; 553 /* Get ij structure of P^T */ 554 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 555 ptJ=ptj; 556 557 /* Allocate ci array, arrays for fill computation and */ 558 /* free space for accumulating nonzero column info */ 559 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 560 ci[0] = 0; 561 562 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 563 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 564 ptasparserow = ptadenserow + an; 565 566 /* create and initialize a linked list */ 567 nlnk = pn+1; 568 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 569 570 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 571 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 572 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 573 current_space = free_space; 574 575 /* Determine symbolic info for each row of C: */ 576 for (i=0;i<pn;i++) { 577 ptnzi = pti[i+1] - pti[i]; 578 ptanzi = 0; 579 /* Determine symbolic row of PtA: */ 580 for (j=0;j<ptnzi;j++) { 581 arow = *ptJ++; 582 anzj = ai[arow+1] - ai[arow]; 583 ajj = aj + ai[arow]; 584 for (k=0;k<anzj;k++) { 585 if (!ptadenserow[ajj[k]]) { 586 ptadenserow[ajj[k]] = -1; 587 ptasparserow[ptanzi++] = ajj[k]; 588 } 589 } 590 } 591 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 592 ptaj = ptasparserow; 593 cnzi = 0; 594 for (j=0;j<ptanzi;j++) { 595 prow = *ptaj++; 596 pnzj = pi[prow+1] - pi[prow]; 597 pjj = pj + pi[prow]; 598 /* add non-zero cols of P into the sorted linked list lnk */ 599 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 600 cnzi += nlnk; 601 } 602 603 /* If free space is not available, make more free space */ 604 /* Double the amount of total space in the list */ 605 if (current_space->local_remaining<cnzi) { 606 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 607 } 608 609 /* Copy data into free space, and zero out denserows */ 610 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 611 current_space->array += cnzi; 612 current_space->local_used += cnzi; 613 current_space->local_remaining -= cnzi; 614 615 for (j=0;j<ptanzi;j++) { 616 ptadenserow[ptasparserow[j]] = 0; 617 } 618 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 619 /* For now, we will recompute what is needed. */ 620 ci[i+1] = ci[i] + cnzi; 621 } 622 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 623 /* Allocate space for cj, initialize cj, and */ 624 /* destroy list of free space and other temporary array(s) */ 625 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 626 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 627 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 628 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 629 630 /* Allocate space for ca */ 631 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 632 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 633 634 /* put together the new matrix */ 635 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 636 637 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 638 /* Since these are PETSc arrays, change flags to free them as necessary. */ 639 c = (Mat_SeqAIJ *)((*C)->data); 640 c->freedata = PETSC_TRUE; 641 c->nonew = 0; 642 643 /* Clean up. */ 644 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 645 646 PetscFunctionReturn(0); 647 } 648 649 #include "src/mat/impls/maij/maij.h" 650 EXTERN_C_BEGIN 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 653 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 654 { 655 /* This routine requires testing -- I don't think it works. */ 656 PetscErrorCode ierr; 657 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 658 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 659 Mat P=pp->AIJ; 660 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 661 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 662 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 663 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 664 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 665 MatScalar *ca; 666 667 PetscFunctionBegin; 668 /* Start timer */ 669 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 670 671 /* Get ij structure of P^T */ 672 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 673 674 /* Allocate ci array, arrays for fill computation and */ 675 /* free space for accumulating nonzero column info */ 676 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 677 ci[0] = 0; 678 679 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 680 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 681 ptasparserow = ptadenserow + an; 682 denserow = ptasparserow + an; 683 sparserow = denserow + pn; 684 685 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 686 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 687 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 688 current_space = free_space; 689 690 /* Determine symbolic info for each row of C: */ 691 for (i=0;i<pn/ppdof;i++) { 692 ptnzi = pti[i+1] - pti[i]; 693 ptanzi = 0; 694 ptJ = ptj + pti[i]; 695 for (dof=0;dof<ppdof;dof++) { 696 /* Determine symbolic row of PtA: */ 697 for (j=0;j<ptnzi;j++) { 698 arow = ptJ[j] + dof; 699 anzj = ai[arow+1] - ai[arow]; 700 ajj = aj + ai[arow]; 701 for (k=0;k<anzj;k++) { 702 if (!ptadenserow[ajj[k]]) { 703 ptadenserow[ajj[k]] = -1; 704 ptasparserow[ptanzi++] = ajj[k]; 705 } 706 } 707 } 708 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 709 ptaj = ptasparserow; 710 cnzi = 0; 711 for (j=0;j<ptanzi;j++) { 712 pdof = *ptaj%dof; 713 prow = (*ptaj++)/dof; 714 pnzj = pi[prow+1] - pi[prow]; 715 pjj = pj + pi[prow]; 716 for (k=0;k<pnzj;k++) { 717 if (!denserow[pjj[k]+pdof]) { 718 denserow[pjj[k]+pdof] = -1; 719 sparserow[cnzi++] = pjj[k]+pdof; 720 } 721 } 722 } 723 724 /* sort sparserow */ 725 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 726 727 /* If free space is not available, make more free space */ 728 /* Double the amount of total space in the list */ 729 if (current_space->local_remaining<cnzi) { 730 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 731 } 732 733 /* Copy data into free space, and zero out denserows */ 734 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 735 current_space->array += cnzi; 736 current_space->local_used += cnzi; 737 current_space->local_remaining -= cnzi; 738 739 for (j=0;j<ptanzi;j++) { 740 ptadenserow[ptasparserow[j]] = 0; 741 } 742 for (j=0;j<cnzi;j++) { 743 denserow[sparserow[j]] = 0; 744 } 745 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 746 /* For now, we will recompute what is needed. */ 747 ci[i+1+dof] = ci[i+dof] + cnzi; 748 } 749 } 750 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 751 /* Allocate space for cj, initialize cj, and */ 752 /* destroy list of free space and other temporary array(s) */ 753 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 754 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 755 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 756 757 /* Allocate space for ca */ 758 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 759 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 760 761 /* put together the new matrix */ 762 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 763 764 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 765 /* Since these are PETSc arrays, change flags to free them as necessary. */ 766 c = (Mat_SeqAIJ *)((*C)->data); 767 c->freedata = PETSC_TRUE; 768 c->nonew = 0; 769 770 /* Clean up. */ 771 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 772 773 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 EXTERN_C_END 777 778 /* 779 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 780 781 Collective on Mat 782 783 Input Parameters: 784 + A - the matrix 785 - P - the projection matrix 786 787 Output Parameters: 788 . C - the product matrix 789 790 Notes: 791 C must have been created by calling MatPtAPSymbolic and must be destroyed by 792 the user using MatDeatroy(). 793 794 This routine is currently only implemented for pairs of AIJ matrices and classes 795 which inherit from AIJ. C will be of type MATAIJ. 796 797 Level: intermediate 798 799 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 800 */ 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatPtAPNumeric" 803 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 804 { 805 PetscErrorCode ierr; 806 PetscErrorCode (*fA)(Mat,Mat,Mat); 807 PetscErrorCode (*fP)(Mat,Mat,Mat); 808 809 PetscFunctionBegin; 810 811 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 812 PetscValidType(A,1); 813 MatPreallocated(A); 814 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 815 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 816 817 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 818 PetscValidType(P,2); 819 MatPreallocated(P); 820 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 821 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 822 823 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 824 PetscValidType(C,3); 825 MatPreallocated(C); 826 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 827 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 828 829 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 830 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 831 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 832 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 833 834 /* For now, we do not dispatch based on the type of A and P */ 835 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 836 fA = A->ops->ptapnumeric; 837 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 838 fP = P->ops->ptapnumeric; 839 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 840 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 841 842 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 843 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 844 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 845 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 851 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 852 { 853 PetscErrorCode ierr; 854 PetscInt flops=0; 855 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 856 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 857 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 858 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 859 PetscInt *ci=c->i,*cj=c->j,*cjj; 860 PetscInt am=A->M,cn=C->N,cm=C->M; 861 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 862 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 863 864 PetscFunctionBegin; 865 /* Allocate temporary array for storage of one row of A*P */ 866 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 867 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 868 869 apj = (PetscInt *)(apa + cn); 870 apjdense = apj + cn; 871 872 /* Clear old values in C */ 873 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 874 875 for (i=0;i<am;i++) { 876 /* Form sparse row of A*P */ 877 anzi = ai[i+1] - ai[i]; 878 apnzj = 0; 879 for (j=0;j<anzi;j++) { 880 prow = *aj++; 881 pnzj = pi[prow+1] - pi[prow]; 882 pjj = pj + pi[prow]; 883 paj = pa + pi[prow]; 884 for (k=0;k<pnzj;k++) { 885 if (!apjdense[pjj[k]]) { 886 apjdense[pjj[k]] = -1; 887 apj[apnzj++] = pjj[k]; 888 } 889 apa[pjj[k]] += (*aa)*paj[k]; 890 } 891 flops += 2*pnzj; 892 aa++; 893 } 894 895 /* Sort the j index array for quick sparse axpy. */ 896 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 897 898 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 899 pnzi = pi[i+1] - pi[i]; 900 for (j=0;j<pnzi;j++) { 901 nextap = 0; 902 crow = *pJ++; 903 cjj = cj + ci[crow]; 904 caj = ca + ci[crow]; 905 /* Perform sparse axpy operation. Note cjj includes apj. */ 906 for (k=0;nextap<apnzj;k++) { 907 if (cjj[k]==apj[nextap]) { 908 caj[k] += (*pA)*apa[apj[nextap++]]; 909 } 910 } 911 flops += 2*apnzj; 912 pA++; 913 } 914 915 /* Zero the current row info for A*P */ 916 for (j=0;j<apnzj;j++) { 917 apa[apj[j]] = 0.; 918 apjdense[apj[j]] = 0; 919 } 920 } 921 922 /* Assemble the final matrix and clean up */ 923 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925 ierr = PetscFree(apa);CHKERRQ(ierr); 926 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 932 static PetscEvent logkey_matptapnumeric_local = 0; 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 935 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 936 { 937 PetscErrorCode ierr; 938 PetscInt flops=0; 939 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 940 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 941 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 942 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 943 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 944 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 945 PetscInt *pJ=pj_loc,*pjj; 946 PetscInt *ci=c->i,*cj=c->j,*cjj; 947 PetscInt am=A->m,cn=C->N,cm=C->M; 948 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 949 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 950 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 951 952 PetscFunctionBegin; 953 if (!logkey_matptapnumeric_local) { 954 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 955 } 956 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 957 958 /* Allocate temporary array for storage of one row of A*P */ 959 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 960 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 961 apj = (PetscInt *)(apa + cn); 962 apjdense = apj + cn; 963 964 /* Clear old values in C */ 965 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 966 967 for (i=0;i<am;i++) { 968 /* Form i-th sparse row of A*P */ 969 apnzj = 0; 970 /* diagonal portion of A */ 971 anzi = adi[i+1] - adi[i]; 972 for (j=0;j<anzi;j++) { 973 prow = *adj; 974 adj++; 975 pnzj = pi_loc[prow+1] - pi_loc[prow]; 976 pjj = pj_loc + pi_loc[prow]; 977 paj = pa_loc + pi_loc[prow]; 978 for (k=0;k<pnzj;k++) { 979 if (!apjdense[pjj[k]]) { 980 apjdense[pjj[k]] = -1; 981 apj[apnzj++] = pjj[k]; 982 } 983 apa[pjj[k]] += (*ada)*paj[k]; 984 } 985 flops += 2*pnzj; 986 ada++; 987 } 988 /* off-diagonal portion of A */ 989 anzi = aoi[i+1] - aoi[i]; 990 for (j=0;j<anzi;j++) { 991 col = a->garray[*aoj]; 992 if (col < cstart){ 993 prow = *aoj; 994 } else if (col >= cend){ 995 prow = *aoj; 996 } else { 997 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 998 } 999 aoj++; 1000 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1001 pjj = pj_oth + pi_oth[prow]; 1002 paj = pa_oth + pi_oth[prow]; 1003 for (k=0;k<pnzj;k++) { 1004 if (!apjdense[pjj[k]]) { 1005 apjdense[pjj[k]] = -1; 1006 apj[apnzj++] = pjj[k]; 1007 } 1008 apa[pjj[k]] += (*aoa)*paj[k]; 1009 } 1010 flops += 2*pnzj; 1011 aoa++; 1012 } 1013 /* Sort the j index array for quick sparse axpy. */ 1014 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1015 1016 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1017 pnzi = pi_loc[i+1] - pi_loc[i]; 1018 for (j=0;j<pnzi;j++) { 1019 nextap = 0; 1020 crow = *pJ++; 1021 cjj = cj + ci[crow]; 1022 caj = ca + ci[crow]; 1023 /* Perform sparse axpy operation. Note cjj includes apj. */ 1024 for (k=0;nextap<apnzj;k++) { 1025 if (cjj[k]==apj[nextap]) { 1026 caj[k] += (*pA)*apa[apj[nextap++]]; 1027 } 1028 } 1029 flops += 2*apnzj; 1030 pA++; 1031 } 1032 1033 /* Zero the current row info for A*P */ 1034 for (j=0;j<apnzj;j++) { 1035 apa[apj[j]] = 0.; 1036 apjdense[apj[j]] = 0; 1037 } 1038 } 1039 1040 /* Assemble the final matrix and clean up */ 1041 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 ierr = PetscFree(apa);CHKERRQ(ierr); 1044 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1045 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 static PetscEvent logkey_matptapsymbolic_local = 0; 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1051 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1052 { 1053 PetscErrorCode ierr; 1054 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1055 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1056 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1057 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1058 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1059 PetscInt *ci,*cj,*ptaj; 1060 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1061 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1062 PetscInt pm=P_loc->m,nlnk,*lnk; 1063 MatScalar *ca; 1064 PetscBT lnkbt; 1065 PetscInt prend,nprow_loc,nprow_oth; 1066 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1067 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1068 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1069 1070 PetscFunctionBegin; 1071 if (!logkey_matptapsymbolic_local) { 1072 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1073 } 1074 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1075 1076 prend = prstart + pm; 1077 1078 /* get ij structure of P_loc^T */ 1079 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1080 ptJ=ptj; 1081 1082 /* Allocate ci array, arrays for fill computation and */ 1083 /* free space for accumulating nonzero column info */ 1084 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1085 ci[0] = 0; 1086 1087 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1088 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1089 ptasparserow_loc = ptadenserow_loc + aN; 1090 ptadenserow_oth = ptasparserow_loc + aN; 1091 ptasparserow_oth = ptadenserow_oth + aN; 1092 1093 /* create and initialize a linked list */ 1094 nlnk = pN+1; 1095 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1096 1097 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1098 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1099 nnz = adi[am] + aoi[am]; 1100 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1101 current_space = free_space; 1102 1103 /* determine symbolic info for each row of C: */ 1104 for (i=0;i<pN;i++) { 1105 ptnzi = pti[i+1] - pti[i]; 1106 nprow_loc = 0; nprow_oth = 0; 1107 /* i-th row of symbolic P_loc^T*A_loc: */ 1108 for (j=0;j<ptnzi;j++) { 1109 arow = *ptJ++; 1110 /* diagonal portion of A */ 1111 anzj = adi[arow+1] - adi[arow]; 1112 ajj = adj + adi[arow]; 1113 for (k=0;k<anzj;k++) { 1114 col = ajj[k]+prstart; 1115 if (!ptadenserow_loc[col]) { 1116 ptadenserow_loc[col] = -1; 1117 ptasparserow_loc[nprow_loc++] = col; 1118 } 1119 } 1120 /* off-diagonal portion of A */ 1121 anzj = aoi[arow+1] - aoi[arow]; 1122 ajj = aoj + aoi[arow]; 1123 for (k=0;k<anzj;k++) { 1124 col = a->garray[ajj[k]]; /* global col */ 1125 if (col < cstart){ 1126 col = ajj[k]; 1127 } else if (col >= cend){ 1128 col = ajj[k] + pm; 1129 } else { 1130 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1131 } 1132 if (!ptadenserow_oth[col]) { 1133 ptadenserow_oth[col] = -1; 1134 ptasparserow_oth[nprow_oth++] = col; 1135 } 1136 } 1137 } 1138 1139 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1140 cnzi = 0; 1141 /* rows of P_loc */ 1142 ptaj = ptasparserow_loc; 1143 for (j=0; j<nprow_loc; j++) { 1144 prow = *ptaj++; 1145 prow -= prstart; /* rm */ 1146 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1147 pjj = pj_loc + pi_loc[prow]; 1148 /* add non-zero cols of P into the sorted linked list lnk */ 1149 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1150 cnzi += nlnk; 1151 } 1152 /* rows of P_oth */ 1153 ptaj = ptasparserow_oth; 1154 for (j=0; j<nprow_oth; j++) { 1155 prow = *ptaj++; 1156 if (prow >= prend) prow -= pm; /* rm */ 1157 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1158 pjj = pj_oth + pi_oth[prow]; 1159 /* add non-zero cols of P into the sorted linked list lnk */ 1160 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1161 cnzi += nlnk; 1162 } 1163 1164 /* If free space is not available, make more free space */ 1165 /* Double the amount of total space in the list */ 1166 if (current_space->local_remaining<cnzi) { 1167 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1168 } 1169 1170 /* Copy data into free space, and zero out denserows */ 1171 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1172 current_space->array += cnzi; 1173 current_space->local_used += cnzi; 1174 current_space->local_remaining -= cnzi; 1175 1176 for (j=0;j<nprow_loc; j++) { 1177 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1178 } 1179 for (j=0;j<nprow_oth; j++) { 1180 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1181 } 1182 1183 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1184 /* For now, we will recompute what is needed. */ 1185 ci[i+1] = ci[i] + cnzi; 1186 } 1187 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1188 /* Allocate space for cj, initialize cj, and */ 1189 /* destroy list of free space and other temporary array(s) */ 1190 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1191 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1192 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1193 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1194 1195 /* Allocate space for ca */ 1196 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1197 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1198 1199 /* put together the new matrix */ 1200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1201 1202 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1203 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1204 c = (Mat_SeqAIJ *)((*C)->data); 1205 c->freedata = PETSC_TRUE; 1206 c->nonew = 0; 1207 1208 /* Clean up. */ 1209 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1210 1211 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214