1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat C_seq; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 166 PetscFunctionBegin; 167 168 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169 if (container) { 170 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171 } else { 172 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173 } 174 /* compute C_seq = P_loc^T * A_loc * P_seq */ 175 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176 177 /* add C_seq into mpi C */ 178 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186 { 187 PetscErrorCode ierr; 188 Mat_Merge_SeqsToMPI *merge; 189 Mat_MatMatMultMPI *ptap; 190 PetscObjectContainer cont_merge,cont_ptap; 191 192 PetscInt flops=0; 193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 194 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 195 Mat C_seq; 196 Mat_SeqAIJ *c,*p_loc,*p_oth; 197 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 198 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 199 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 200 PetscInt *cjj; 201 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 202 MatScalar *pa_loc,*pA,*pa_oth; 203 PetscInt am=A->m,cN=C->N; 204 205 MPI_Comm comm=C->comm; 206 PetscMPIInt size,rank,taga,*len_s; 207 PetscInt *owners; 208 PetscInt proc; 209 PetscInt **buf_ri,**buf_rj; 210 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 211 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 212 MPI_Request *s_waits,*r_waits; 213 MPI_Status *status; 214 MatScalar **abuf_r,*ba_i; 215 Mat_SeqAIJ *cseq; 216 PetscInt *cseqi,*cseqj; 217 218 PetscFunctionBegin; 219 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 220 if (cont_merge) { 221 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 222 } else { 223 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 224 } 225 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 226 if (cont_ptap) { 227 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 228 } else { 229 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 230 } 231 #ifdef OLD 232 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 233 #endif /* OLD */ 234 /*--------------------------------------------------------------*/ 235 236 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 237 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 238 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 239 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 240 241 C_seq=merge->C_seq; 242 cseq=(Mat_SeqAIJ*)C_seq->data; 243 cseqi=cseq->i; cseqj=cseq->j; 244 cseqa=cseq->a; 245 246 /* Allocate temporary array for storage of one row of A*P */ 247 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 248 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 249 apj = (PetscInt *)(apa + cN); 250 apjdense = apj + cN; 251 252 /* Clear old values in C_Seq */ 253 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 254 255 for (i=0;i<am;i++) { 256 /* Form i-th sparse row of A*P */ 257 apnzj = 0; 258 /* diagonal portion of A */ 259 anzi = adi[i+1] - adi[i]; 260 for (j=0;j<anzi;j++) { 261 prow = *adj; 262 adj++; 263 pnzj = pi_loc[prow+1] - pi_loc[prow]; 264 pjj = pj_loc + pi_loc[prow]; 265 paj = pa_loc + pi_loc[prow]; 266 for (k=0;k<pnzj;k++) { 267 if (!apjdense[pjj[k]]) { 268 apjdense[pjj[k]] = -1; 269 apj[apnzj++] = pjj[k]; 270 } 271 apa[pjj[k]] += (*ada)*paj[k]; 272 } 273 flops += 2*pnzj; 274 ada++; 275 } 276 /* off-diagonal portion of A */ 277 anzi = aoi[i+1] - aoi[i]; 278 for (j=0;j<anzi;j++) { 279 col = a->garray[*aoj]; 280 if (col < cstart){ 281 prow = *aoj; 282 } else if (col >= cend){ 283 prow = *aoj; 284 } else { 285 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 286 } 287 aoj++; 288 pnzj = pi_oth[prow+1] - pi_oth[prow]; 289 pjj = pj_oth + pi_oth[prow]; 290 paj = pa_oth + pi_oth[prow]; 291 for (k=0;k<pnzj;k++) { 292 if (!apjdense[pjj[k]]) { 293 apjdense[pjj[k]] = -1; 294 apj[apnzj++] = pjj[k]; 295 } 296 apa[pjj[k]] += (*aoa)*paj[k]; 297 } 298 flops += 2*pnzj; 299 aoa++; 300 } 301 /* Sort the j index array for quick sparse axpy. */ 302 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 303 304 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 305 pnzi = pi_loc[i+1] - pi_loc[i]; 306 for (j=0;j<pnzi;j++) { 307 nextap = 0; 308 crow = *pJ++; 309 cjj = cseqj + cseqi[crow]; 310 caj = cseqa + cseqi[crow]; 311 /* Perform sparse axpy operation. Note cjj includes apj. */ 312 for (k=0;nextap<apnzj;k++) { 313 if (cjj[k]==apj[nextap]) { 314 caj[k] += (*pA)*apa[apj[nextap++]]; 315 } 316 } 317 flops += 2*apnzj; 318 pA++; 319 } 320 321 /* Zero the current row info for A*P */ 322 for (j=0;j<apnzj;j++) { 323 apa[apj[j]] = 0.; 324 apjdense[apj[j]] = 0; 325 } 326 } 327 328 /* Assemble the final matrix and clean up */ 329 ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 ierr = PetscFree(apa);CHKERRQ(ierr); 332 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 333 334 #ifdef OLD1 335 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 336 #endif /* OLD1 */ 337 /*--------------------------------------------------------------*/ 338 339 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 340 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 341 342 bi = merge->bi; 343 bj = merge->bj; 344 buf_ri = merge->buf_ri; 345 buf_rj = merge->buf_rj; 346 347 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 348 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 349 350 /* send and recv matrix values */ 351 /*-----------------------------*/ 352 len_s = merge->len_s; 353 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 354 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 355 356 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 357 for (proc=0,k=0; proc<size; proc++){ 358 if (!len_s[proc]) continue; 359 i = owners[proc]; 360 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 361 k++; 362 } 363 364 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 365 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 366 ierr = PetscFree(status);CHKERRQ(ierr); 367 368 ierr = PetscFree(s_waits);CHKERRQ(ierr); 369 ierr = PetscFree(r_waits);CHKERRQ(ierr); 370 371 /* insert mat values of mpimat */ 372 /*----------------------------*/ 373 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 374 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 375 nextrow = buf_ri_k + merge->nrecv; 376 nextcseqi = nextrow + merge->nrecv; 377 378 for (k=0; k<merge->nrecv; k++){ 379 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 380 nrows = *(buf_ri_k[k]); 381 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 382 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 383 } 384 385 /* set values of ba */ 386 for (i=0; i<C->m; i++) { 387 cseqrow = owners[rank] + i; 388 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 389 bnzi = bi[i+1] - bi[i]; 390 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 391 392 /* add local non-zero vals of this proc's C_seq into ba */ 393 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 394 cseqj = cseq->j + cseqi[cseqrow]; 395 cseqa = cseq->a + cseqi[cseqrow]; 396 nextcseqj = 0; 397 for (j=0; nextcseqj<cseqnzi; j++){ 398 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 399 ba_i[j] += cseqa[nextcseqj++]; 400 } 401 } 402 403 /* add received vals into ba */ 404 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 405 /* i-th row */ 406 if (i == *nextrow[k]) { 407 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 408 cseqj = buf_rj[k] + *(nextcseqi[k]); 409 cseqa = abuf_r[k] + *(nextcseqi[k]); 410 nextcseqj = 0; 411 for (j=0; nextcseqj<cseqnzi; j++){ 412 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 413 ba_i[j] += cseqa[nextcseqj++]; 414 } 415 } 416 nextrow[k]++; nextcseqi[k]++; 417 } 418 } 419 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 422 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 423 424 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 425 ierr = PetscFree(ba_i);CHKERRQ(ierr); 426 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 427 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 433 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 434 { 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 if (scall == MAT_INITIAL_MATRIX){ 439 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 440 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 441 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 442 } 443 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 444 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 445 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 /* 450 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 451 452 Collective on Mat 453 454 Input Parameters: 455 + A - the matrix 456 - P - the projection matrix 457 458 Output Parameters: 459 . C - the (i,j) structure of the product matrix 460 461 Notes: 462 C will be created and must be destroyed by the user with MatDestroy(). 463 464 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 465 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 466 this (i,j) structure by calling MatPtAPNumeric(). 467 468 Level: intermediate 469 470 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 471 */ 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatPtAPSymbolic" 474 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 475 { 476 PetscErrorCode ierr; 477 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 478 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 479 480 PetscFunctionBegin; 481 482 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 483 PetscValidType(A,1); 484 MatPreallocated(A); 485 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 486 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 487 488 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 489 PetscValidType(P,2); 490 MatPreallocated(P); 491 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 492 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 493 494 PetscValidPointer(C,3); 495 496 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 497 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 498 499 /* For now, we do not dispatch based on the type of A and P */ 500 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 501 fA = A->ops->ptapsymbolic; 502 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 503 fP = P->ops->ptapsymbolic; 504 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 505 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 506 507 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 508 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 509 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 510 511 PetscFunctionReturn(0); 512 } 513 514 typedef struct { 515 Mat symAP; 516 } Mat_PtAPstruct; 517 518 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 522 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 523 { 524 PetscErrorCode ierr; 525 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 526 527 PetscFunctionBegin; 528 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 529 ierr = PetscFree(ptap);CHKERRQ(ierr); 530 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 536 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 537 { 538 PetscErrorCode ierr; 539 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 541 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 542 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 543 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 544 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 545 MatScalar *ca; 546 PetscBT lnkbt; 547 548 PetscFunctionBegin; 549 /* Get ij structure of P^T */ 550 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 551 ptJ=ptj; 552 553 /* Allocate ci array, arrays for fill computation and */ 554 /* free space for accumulating nonzero column info */ 555 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 556 ci[0] = 0; 557 558 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 559 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 560 ptasparserow = ptadenserow + an; 561 562 /* create and initialize a linked list */ 563 nlnk = pn+1; 564 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 565 566 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 567 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 568 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 569 current_space = free_space; 570 571 /* Determine symbolic info for each row of C: */ 572 for (i=0;i<pn;i++) { 573 ptnzi = pti[i+1] - pti[i]; 574 ptanzi = 0; 575 /* Determine symbolic row of PtA: */ 576 for (j=0;j<ptnzi;j++) { 577 arow = *ptJ++; 578 anzj = ai[arow+1] - ai[arow]; 579 ajj = aj + ai[arow]; 580 for (k=0;k<anzj;k++) { 581 if (!ptadenserow[ajj[k]]) { 582 ptadenserow[ajj[k]] = -1; 583 ptasparserow[ptanzi++] = ajj[k]; 584 } 585 } 586 } 587 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 588 ptaj = ptasparserow; 589 cnzi = 0; 590 for (j=0;j<ptanzi;j++) { 591 prow = *ptaj++; 592 pnzj = pi[prow+1] - pi[prow]; 593 pjj = pj + pi[prow]; 594 /* add non-zero cols of P into the sorted linked list lnk */ 595 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 596 cnzi += nlnk; 597 } 598 599 /* If free space is not available, make more free space */ 600 /* Double the amount of total space in the list */ 601 if (current_space->local_remaining<cnzi) { 602 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 603 } 604 605 /* Copy data into free space, and zero out denserows */ 606 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 607 current_space->array += cnzi; 608 current_space->local_used += cnzi; 609 current_space->local_remaining -= cnzi; 610 611 for (j=0;j<ptanzi;j++) { 612 ptadenserow[ptasparserow[j]] = 0; 613 } 614 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 615 /* For now, we will recompute what is needed. */ 616 ci[i+1] = ci[i] + cnzi; 617 } 618 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 619 /* Allocate space for cj, initialize cj, and */ 620 /* destroy list of free space and other temporary array(s) */ 621 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 622 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 623 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 624 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 625 626 /* Allocate space for ca */ 627 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 628 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 629 630 /* put together the new matrix */ 631 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 632 633 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 634 /* Since these are PETSc arrays, change flags to free them as necessary. */ 635 c = (Mat_SeqAIJ *)((*C)->data); 636 c->freedata = PETSC_TRUE; 637 c->nonew = 0; 638 639 /* Clean up. */ 640 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 641 642 PetscFunctionReturn(0); 643 } 644 645 #include "src/mat/impls/maij/maij.h" 646 EXTERN_C_BEGIN 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 649 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 650 { 651 /* This routine requires testing -- I don't think it works. */ 652 PetscErrorCode ierr; 653 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 654 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 655 Mat P=pp->AIJ; 656 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 657 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 658 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 659 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 660 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 661 MatScalar *ca; 662 663 PetscFunctionBegin; 664 /* Start timer */ 665 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 666 667 /* Get ij structure of P^T */ 668 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 669 670 /* Allocate ci array, arrays for fill computation and */ 671 /* free space for accumulating nonzero column info */ 672 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 673 ci[0] = 0; 674 675 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 676 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 677 ptasparserow = ptadenserow + an; 678 denserow = ptasparserow + an; 679 sparserow = denserow + pn; 680 681 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 682 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 683 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 684 current_space = free_space; 685 686 /* Determine symbolic info for each row of C: */ 687 for (i=0;i<pn/ppdof;i++) { 688 ptnzi = pti[i+1] - pti[i]; 689 ptanzi = 0; 690 ptJ = ptj + pti[i]; 691 for (dof=0;dof<ppdof;dof++) { 692 /* Determine symbolic row of PtA: */ 693 for (j=0;j<ptnzi;j++) { 694 arow = ptJ[j] + dof; 695 anzj = ai[arow+1] - ai[arow]; 696 ajj = aj + ai[arow]; 697 for (k=0;k<anzj;k++) { 698 if (!ptadenserow[ajj[k]]) { 699 ptadenserow[ajj[k]] = -1; 700 ptasparserow[ptanzi++] = ajj[k]; 701 } 702 } 703 } 704 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 705 ptaj = ptasparserow; 706 cnzi = 0; 707 for (j=0;j<ptanzi;j++) { 708 pdof = *ptaj%dof; 709 prow = (*ptaj++)/dof; 710 pnzj = pi[prow+1] - pi[prow]; 711 pjj = pj + pi[prow]; 712 for (k=0;k<pnzj;k++) { 713 if (!denserow[pjj[k]+pdof]) { 714 denserow[pjj[k]+pdof] = -1; 715 sparserow[cnzi++] = pjj[k]+pdof; 716 } 717 } 718 } 719 720 /* sort sparserow */ 721 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 722 723 /* If free space is not available, make more free space */ 724 /* Double the amount of total space in the list */ 725 if (current_space->local_remaining<cnzi) { 726 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 727 } 728 729 /* Copy data into free space, and zero out denserows */ 730 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 731 current_space->array += cnzi; 732 current_space->local_used += cnzi; 733 current_space->local_remaining -= cnzi; 734 735 for (j=0;j<ptanzi;j++) { 736 ptadenserow[ptasparserow[j]] = 0; 737 } 738 for (j=0;j<cnzi;j++) { 739 denserow[sparserow[j]] = 0; 740 } 741 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 742 /* For now, we will recompute what is needed. */ 743 ci[i+1+dof] = ci[i+dof] + cnzi; 744 } 745 } 746 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 747 /* Allocate space for cj, initialize cj, and */ 748 /* destroy list of free space and other temporary array(s) */ 749 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 750 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 751 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 752 753 /* Allocate space for ca */ 754 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 755 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 756 757 /* put together the new matrix */ 758 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 759 760 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 761 /* Since these are PETSc arrays, change flags to free them as necessary. */ 762 c = (Mat_SeqAIJ *)((*C)->data); 763 c->freedata = PETSC_TRUE; 764 c->nonew = 0; 765 766 /* Clean up. */ 767 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 768 769 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 EXTERN_C_END 773 774 /* 775 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 776 777 Collective on Mat 778 779 Input Parameters: 780 + A - the matrix 781 - P - the projection matrix 782 783 Output Parameters: 784 . C - the product matrix 785 786 Notes: 787 C must have been created by calling MatPtAPSymbolic and must be destroyed by 788 the user using MatDeatroy(). 789 790 This routine is currently only implemented for pairs of AIJ matrices and classes 791 which inherit from AIJ. C will be of type MATAIJ. 792 793 Level: intermediate 794 795 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 796 */ 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatPtAPNumeric" 799 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 800 { 801 PetscErrorCode ierr; 802 PetscErrorCode (*fA)(Mat,Mat,Mat); 803 PetscErrorCode (*fP)(Mat,Mat,Mat); 804 805 PetscFunctionBegin; 806 807 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 808 PetscValidType(A,1); 809 MatPreallocated(A); 810 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 811 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 812 813 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 814 PetscValidType(P,2); 815 MatPreallocated(P); 816 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 817 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 818 819 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 820 PetscValidType(C,3); 821 MatPreallocated(C); 822 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 823 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 824 825 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 826 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 827 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 828 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 829 830 /* For now, we do not dispatch based on the type of A and P */ 831 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 832 fA = A->ops->ptapnumeric; 833 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 834 fP = P->ops->ptapnumeric; 835 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 836 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 837 838 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 839 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 840 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 841 842 PetscFunctionReturn(0); 843 } 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 847 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 848 { 849 PetscErrorCode ierr; 850 PetscInt flops=0; 851 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 852 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 853 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 854 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 855 PetscInt *ci=c->i,*cj=c->j,*cjj; 856 PetscInt am=A->M,cn=C->N,cm=C->M; 857 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 858 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 859 860 PetscFunctionBegin; 861 /* Allocate temporary array for storage of one row of A*P */ 862 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 863 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 864 865 apj = (PetscInt *)(apa + cn); 866 apjdense = apj + cn; 867 868 /* Clear old values in C */ 869 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 870 871 for (i=0;i<am;i++) { 872 /* Form sparse row of A*P */ 873 anzi = ai[i+1] - ai[i]; 874 apnzj = 0; 875 for (j=0;j<anzi;j++) { 876 prow = *aj++; 877 pnzj = pi[prow+1] - pi[prow]; 878 pjj = pj + pi[prow]; 879 paj = pa + pi[prow]; 880 for (k=0;k<pnzj;k++) { 881 if (!apjdense[pjj[k]]) { 882 apjdense[pjj[k]] = -1; 883 apj[apnzj++] = pjj[k]; 884 } 885 apa[pjj[k]] += (*aa)*paj[k]; 886 } 887 flops += 2*pnzj; 888 aa++; 889 } 890 891 /* Sort the j index array for quick sparse axpy. */ 892 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 893 894 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 895 pnzi = pi[i+1] - pi[i]; 896 for (j=0;j<pnzi;j++) { 897 nextap = 0; 898 crow = *pJ++; 899 cjj = cj + ci[crow]; 900 caj = ca + ci[crow]; 901 /* Perform sparse axpy operation. Note cjj includes apj. */ 902 for (k=0;nextap<apnzj;k++) { 903 if (cjj[k]==apj[nextap]) { 904 caj[k] += (*pA)*apa[apj[nextap++]]; 905 } 906 } 907 flops += 2*apnzj; 908 pA++; 909 } 910 911 /* Zero the current row info for A*P */ 912 for (j=0;j<apnzj;j++) { 913 apa[apj[j]] = 0.; 914 apjdense[apj[j]] = 0; 915 } 916 } 917 918 /* Assemble the final matrix and clean up */ 919 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921 ierr = PetscFree(apa);CHKERRQ(ierr); 922 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 923 924 PetscFunctionReturn(0); 925 } 926 927 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 928 static PetscEvent logkey_matptapnumeric_local = 0; 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 931 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 932 { 933 PetscErrorCode ierr; 934 PetscInt flops=0; 935 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 936 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 937 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 938 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 939 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 940 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 941 PetscInt *pJ=pj_loc,*pjj; 942 PetscInt *ci=c->i,*cj=c->j,*cjj; 943 PetscInt am=A->m,cn=C->N,cm=C->M; 944 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 945 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 946 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 947 948 PetscFunctionBegin; 949 if (!logkey_matptapnumeric_local) { 950 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 951 } 952 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 953 954 /* Allocate temporary array for storage of one row of A*P */ 955 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 956 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 957 apj = (PetscInt *)(apa + cn); 958 apjdense = apj + cn; 959 960 /* Clear old values in C */ 961 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 962 963 for (i=0;i<am;i++) { 964 /* Form i-th sparse row of A*P */ 965 apnzj = 0; 966 /* diagonal portion of A */ 967 anzi = adi[i+1] - adi[i]; 968 for (j=0;j<anzi;j++) { 969 prow = *adj; 970 adj++; 971 pnzj = pi_loc[prow+1] - pi_loc[prow]; 972 pjj = pj_loc + pi_loc[prow]; 973 paj = pa_loc + pi_loc[prow]; 974 for (k=0;k<pnzj;k++) { 975 if (!apjdense[pjj[k]]) { 976 apjdense[pjj[k]] = -1; 977 apj[apnzj++] = pjj[k]; 978 } 979 apa[pjj[k]] += (*ada)*paj[k]; 980 } 981 flops += 2*pnzj; 982 ada++; 983 } 984 /* off-diagonal portion of A */ 985 anzi = aoi[i+1] - aoi[i]; 986 for (j=0;j<anzi;j++) { 987 col = a->garray[*aoj]; 988 if (col < cstart){ 989 prow = *aoj; 990 } else if (col >= cend){ 991 prow = *aoj; 992 } else { 993 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 994 } 995 aoj++; 996 pnzj = pi_oth[prow+1] - pi_oth[prow]; 997 pjj = pj_oth + pi_oth[prow]; 998 paj = pa_oth + pi_oth[prow]; 999 for (k=0;k<pnzj;k++) { 1000 if (!apjdense[pjj[k]]) { 1001 apjdense[pjj[k]] = -1; 1002 apj[apnzj++] = pjj[k]; 1003 } 1004 apa[pjj[k]] += (*aoa)*paj[k]; 1005 } 1006 flops += 2*pnzj; 1007 aoa++; 1008 } 1009 /* Sort the j index array for quick sparse axpy. */ 1010 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1011 1012 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1013 pnzi = pi_loc[i+1] - pi_loc[i]; 1014 for (j=0;j<pnzi;j++) { 1015 nextap = 0; 1016 crow = *pJ++; 1017 cjj = cj + ci[crow]; 1018 caj = ca + ci[crow]; 1019 /* Perform sparse axpy operation. Note cjj includes apj. */ 1020 for (k=0;nextap<apnzj;k++) { 1021 if (cjj[k]==apj[nextap]) { 1022 caj[k] += (*pA)*apa[apj[nextap++]]; 1023 } 1024 } 1025 flops += 2*apnzj; 1026 pA++; 1027 } 1028 1029 /* Zero the current row info for A*P */ 1030 for (j=0;j<apnzj;j++) { 1031 apa[apj[j]] = 0.; 1032 apjdense[apj[j]] = 0; 1033 } 1034 } 1035 1036 /* Assemble the final matrix and clean up */ 1037 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = PetscFree(apa);CHKERRQ(ierr); 1040 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1041 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 static PetscEvent logkey_matptapsymbolic_local = 0; 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1047 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1048 { 1049 PetscErrorCode ierr; 1050 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1051 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1052 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1053 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1054 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1055 PetscInt *ci,*cj,*ptaj; 1056 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1057 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1058 PetscInt pm=P_loc->m,nlnk,*lnk; 1059 MatScalar *ca; 1060 PetscBT lnkbt; 1061 PetscInt prend,nprow_loc,nprow_oth; 1062 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1063 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1064 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1065 1066 PetscFunctionBegin; 1067 if (!logkey_matptapsymbolic_local) { 1068 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1069 } 1070 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1071 1072 prend = prstart + pm; 1073 1074 /* get ij structure of P_loc^T */ 1075 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1076 ptJ=ptj; 1077 1078 /* Allocate ci array, arrays for fill computation and */ 1079 /* free space for accumulating nonzero column info */ 1080 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1081 ci[0] = 0; 1082 1083 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1084 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1085 ptasparserow_loc = ptadenserow_loc + aN; 1086 ptadenserow_oth = ptasparserow_loc + aN; 1087 ptasparserow_oth = ptadenserow_oth + aN; 1088 1089 /* create and initialize a linked list */ 1090 nlnk = pN+1; 1091 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1092 1093 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1094 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1095 nnz = adi[am] + aoi[am]; 1096 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1097 current_space = free_space; 1098 1099 /* determine symbolic info for each row of C: */ 1100 for (i=0;i<pN;i++) { 1101 ptnzi = pti[i+1] - pti[i]; 1102 nprow_loc = 0; nprow_oth = 0; 1103 /* i-th row of symbolic P_loc^T*A_loc: */ 1104 for (j=0;j<ptnzi;j++) { 1105 arow = *ptJ++; 1106 /* diagonal portion of A */ 1107 anzj = adi[arow+1] - adi[arow]; 1108 ajj = adj + adi[arow]; 1109 for (k=0;k<anzj;k++) { 1110 col = ajj[k]+prstart; 1111 if (!ptadenserow_loc[col]) { 1112 ptadenserow_loc[col] = -1; 1113 ptasparserow_loc[nprow_loc++] = col; 1114 } 1115 } 1116 /* off-diagonal portion of A */ 1117 anzj = aoi[arow+1] - aoi[arow]; 1118 ajj = aoj + aoi[arow]; 1119 for (k=0;k<anzj;k++) { 1120 col = a->garray[ajj[k]]; /* global col */ 1121 if (col < cstart){ 1122 col = ajj[k]; 1123 } else if (col >= cend){ 1124 col = ajj[k] + pm; 1125 } else { 1126 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1127 } 1128 if (!ptadenserow_oth[col]) { 1129 ptadenserow_oth[col] = -1; 1130 ptasparserow_oth[nprow_oth++] = col; 1131 } 1132 } 1133 } 1134 1135 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1136 cnzi = 0; 1137 /* rows of P_loc */ 1138 ptaj = ptasparserow_loc; 1139 for (j=0; j<nprow_loc; j++) { 1140 prow = *ptaj++; 1141 prow -= prstart; /* rm */ 1142 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1143 pjj = pj_loc + pi_loc[prow]; 1144 /* add non-zero cols of P into the sorted linked list lnk */ 1145 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1146 cnzi += nlnk; 1147 } 1148 /* rows of P_oth */ 1149 ptaj = ptasparserow_oth; 1150 for (j=0; j<nprow_oth; j++) { 1151 prow = *ptaj++; 1152 if (prow >= prend) prow -= pm; /* rm */ 1153 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1154 pjj = pj_oth + pi_oth[prow]; 1155 /* add non-zero cols of P into the sorted linked list lnk */ 1156 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1157 cnzi += nlnk; 1158 } 1159 1160 /* If free space is not available, make more free space */ 1161 /* Double the amount of total space in the list */ 1162 if (current_space->local_remaining<cnzi) { 1163 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1164 } 1165 1166 /* Copy data into free space, and zero out denserows */ 1167 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1168 current_space->array += cnzi; 1169 current_space->local_used += cnzi; 1170 current_space->local_remaining -= cnzi; 1171 1172 for (j=0;j<nprow_loc; j++) { 1173 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1174 } 1175 for (j=0;j<nprow_oth; j++) { 1176 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1177 } 1178 1179 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1180 /* For now, we will recompute what is needed. */ 1181 ci[i+1] = ci[i] + cnzi; 1182 } 1183 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1184 /* Allocate space for cj, initialize cj, and */ 1185 /* destroy list of free space and other temporary array(s) */ 1186 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1187 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1188 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1189 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1190 1191 /* Allocate space for ca */ 1192 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1193 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1194 1195 /* put together the new matrix */ 1196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1197 1198 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1199 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1200 c = (Mat_SeqAIJ *)((*C)->data); 1201 c->freedata = PETSC_TRUE; 1202 c->nonew = 0; 1203 1204 /* Clean up. */ 1205 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1206 1207 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210