1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat P_loc,P_oth; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 167 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 168 Mat_SeqAIJ *p_loc,*p_oth; 169 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 170 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 171 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 172 PetscInt aN=A->N,am=A->m,pN=P->N; 173 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 174 PetscBT lnkbt; 175 PetscInt prstart,prend,nprow_loc,nprow_oth; 176 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 177 178 MPI_Comm comm=A->comm; 179 Mat B_mpi; 180 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 181 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 182 PetscInt len,proc,*dnz,*onz,*owners; 183 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 184 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 185 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 186 MPI_Status *status; 187 Mat_Merge_SeqsToMPI *merge; 188 189 PetscFunctionBegin; 190 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 191 if (container) { 192 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 193 } else { 194 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 195 } 196 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 197 /*--------------------------------------------------*/ 198 /* get data from P_loc and P_oth */ 199 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 200 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 201 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 202 prend = prstart + pm; 203 204 /* get ij structure of P_loc^T */ 205 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 206 ptJ=ptj; 207 208 /* Allocate ci array, arrays for fill computation and */ 209 /* free space for accumulating nonzero column info */ 210 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 211 ci[0] = 0; 212 213 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 214 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 215 ptasparserow_loc = ptadenserow_loc + aN; 216 ptadenserow_oth = ptasparserow_loc + aN; 217 ptasparserow_oth = ptadenserow_oth + aN; 218 219 /* create and initialize a linked list */ 220 nlnk = pN+1; 221 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 222 223 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 224 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 225 nnz = adi[am] + aoi[am]; 226 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 227 current_space = free_space; 228 229 /* determine symbolic info for each row of C: */ 230 for (i=0;i<pN;i++) { 231 ptnzi = pti[i+1] - pti[i]; 232 nprow_loc = 0; nprow_oth = 0; 233 /* i-th row of symbolic P_loc^T*A_loc: */ 234 for (j=0;j<ptnzi;j++) { 235 arow = *ptJ++; 236 /* diagonal portion of A */ 237 anzj = adi[arow+1] - adi[arow]; 238 ajj = adj + adi[arow]; 239 for (k=0;k<anzj;k++) { 240 col = ajj[k]+prstart; 241 if (!ptadenserow_loc[col]) { 242 ptadenserow_loc[col] = -1; 243 ptasparserow_loc[nprow_loc++] = col; 244 } 245 } 246 /* off-diagonal portion of A */ 247 anzj = aoi[arow+1] - aoi[arow]; 248 ajj = aoj + aoi[arow]; 249 for (k=0;k<anzj;k++) { 250 col = a->garray[ajj[k]]; /* global col */ 251 if (col < cstart){ 252 col = ajj[k]; 253 } else if (col >= cend){ 254 col = ajj[k] + pm; 255 } else { 256 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 257 } 258 if (!ptadenserow_oth[col]) { 259 ptadenserow_oth[col] = -1; 260 ptasparserow_oth[nprow_oth++] = col; 261 } 262 } 263 } 264 265 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 266 cnzi = 0; 267 /* rows of P_loc */ 268 ptaj = ptasparserow_loc; 269 for (j=0; j<nprow_loc; j++) { 270 prow = *ptaj++; 271 prow -= prstart; /* rm */ 272 pnzj = pi_loc[prow+1] - pi_loc[prow]; 273 pjj = pj_loc + pi_loc[prow]; 274 /* add non-zero cols of P into the sorted linked list lnk */ 275 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 276 cnzi += nlnk; 277 } 278 /* rows of P_oth */ 279 ptaj = ptasparserow_oth; 280 for (j=0; j<nprow_oth; j++) { 281 prow = *ptaj++; 282 if (prow >= prend) prow -= pm; /* rm */ 283 pnzj = pi_oth[prow+1] - pi_oth[prow]; 284 pjj = pj_oth + pi_oth[prow]; 285 /* add non-zero cols of P into the sorted linked list lnk */ 286 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 287 cnzi += nlnk; 288 } 289 290 /* If free space is not available, make more free space */ 291 /* Double the amount of total space in the list */ 292 if (current_space->local_remaining<cnzi) { 293 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 294 } 295 296 /* Copy data into free space, and zero out denserows */ 297 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 298 current_space->array += cnzi; 299 current_space->local_used += cnzi; 300 current_space->local_remaining -= cnzi; 301 302 for (j=0;j<nprow_loc; j++) { 303 ptadenserow_loc[ptasparserow_loc[j]] = 0; 304 } 305 for (j=0;j<nprow_oth; j++) { 306 ptadenserow_oth[ptasparserow_oth[j]] = 0; 307 } 308 309 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 310 /* For now, we will recompute what is needed. */ 311 ci[i+1] = ci[i] + cnzi; 312 } 313 /* Clean up. */ 314 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 315 316 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 317 /* Allocate space for cj, initialize cj, and */ 318 /* destroy list of free space and other temporary array(s) */ 319 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 320 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 321 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 322 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 323 324 /* Allocate space for ca */ 325 /* 326 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 327 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 328 */ 329 330 /* add C_seq into mpi C */ 331 /*-----------------------------------*/ 332 free_space=PETSC_NULL; current_space=PETSC_NULL; 333 334 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 335 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 336 337 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 338 339 340 /* determine row ownership */ 341 /*---------------------------------------------------------*/ 342 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 343 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 344 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 345 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 346 347 /* determine the number of messages to send, their lengths */ 348 /*---------------------------------------------------------*/ 349 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 350 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 351 len_s = merge->len_s; 352 len = 0; /* length of buf_si[] */ 353 merge->nsend = 0; 354 for (proc=0; proc<size; proc++){ 355 len_si[proc] = 0; 356 if (proc == rank){ 357 len_s[proc] = 0; 358 } else { 359 len_si[proc] = owners[proc+1] - owners[proc] + 1; 360 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 361 } 362 if (len_s[proc]) { 363 merge->nsend++; 364 nrows = 0; 365 for (i=owners[proc]; i<owners[proc+1]; i++){ 366 if (ci[i+1] > ci[i]) nrows++; 367 } 368 len_si[proc] = 2*(nrows+1); 369 len += len_si[proc]; 370 } 371 } 372 373 /* determine the number and length of messages to receive for ij-structure */ 374 /*-------------------------------------------------------------------------*/ 375 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 376 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 377 378 /* post the Irecv of j-structure */ 379 /*-------------------------------*/ 380 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 381 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 382 383 /* post the Isend of j-structure */ 384 /*--------------------------------*/ 385 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 386 sj_waits = si_waits + merge->nsend; 387 388 for (proc=0, k=0; proc<size; proc++){ 389 if (!len_s[proc]) continue; 390 i = owners[proc]; 391 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 392 k++; 393 } 394 395 /* receives and sends of j-structure are complete */ 396 /*------------------------------------------------*/ 397 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 398 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 399 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 400 401 /* send and recv i-structure */ 402 /*---------------------------*/ 403 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 404 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 405 406 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 407 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 408 for (proc=0,k=0; proc<size; proc++){ 409 if (!len_s[proc]) continue; 410 /* form outgoing message for i-structure: 411 buf_si[0]: nrows to be sent 412 [1:nrows]: row index (global) 413 [nrows+1:2*nrows+1]: i-structure index 414 */ 415 /*-------------------------------------------*/ 416 nrows = len_si[proc]/2 - 1; 417 buf_si_i = buf_si + nrows+1; 418 buf_si[0] = nrows; 419 buf_si_i[0] = 0; 420 nrows = 0; 421 for (i=owners[proc]; i<owners[proc+1]; i++){ 422 anzi = ci[i+1] - ci[i]; 423 if (anzi) { 424 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 425 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 426 nrows++; 427 } 428 } 429 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 430 k++; 431 buf_si += len_si[proc]; 432 } 433 434 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 436 437 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 438 for (i=0; i<merge->nrecv; i++){ 439 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 440 } 441 442 ierr = PetscFree(len_si);CHKERRQ(ierr); 443 ierr = PetscFree(len_ri);CHKERRQ(ierr); 444 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 445 ierr = PetscFree(si_waits);CHKERRQ(ierr); 446 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 447 ierr = PetscFree(buf_s);CHKERRQ(ierr); 448 ierr = PetscFree(status);CHKERRQ(ierr); 449 450 /* compute a local seq matrix in each processor */ 451 /*----------------------------------------------*/ 452 /* allocate bi array and free space for accumulating nonzero column info */ 453 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 454 bi[0] = 0; 455 456 /* create and initialize a linked list */ 457 nlnk = pN+1; 458 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 459 460 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 461 len = 0; 462 len = ci[owners[rank+1]] - ci[owners[rank]]; 463 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 464 current_space = free_space; 465 466 /* determine symbolic info for each local row */ 467 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 468 nextrow = buf_ri_k + merge->nrecv; 469 nextci = nextrow + merge->nrecv; 470 for (k=0; k<merge->nrecv; k++){ 471 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 472 nrows = *buf_ri_k[k]; 473 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 474 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 475 } 476 477 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 478 len = 0; 479 for (i=0; i<pn; i++) { 480 bnzi = 0; 481 /* add local non-zero cols of this proc's C_seq into lnk */ 482 arow = owners[rank] + i; 483 anzi = ci[arow+1] - ci[arow]; 484 cji = cj + ci[arow]; 485 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 486 bnzi += nlnk; 487 /* add received col data into lnk */ 488 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 489 if (i == *nextrow[k]) { /* i-th row */ 490 anzi = *(nextci[k]+1) - *nextci[k]; 491 cji = buf_rj[k] + *nextci[k]; 492 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 493 bnzi += nlnk; 494 nextrow[k]++; nextci[k]++; 495 } 496 } 497 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 498 499 /* if free space is not available, make more free space */ 500 if (current_space->local_remaining<bnzi) { 501 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 502 nspacedouble++; 503 } 504 /* copy data into free space, then initialize lnk */ 505 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 506 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 507 508 current_space->array += bnzi; 509 current_space->local_used += bnzi; 510 current_space->local_remaining -= bnzi; 511 512 bi[i+1] = bi[i] + bnzi; 513 } 514 515 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 516 517 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 518 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 519 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 520 521 /* create symbolic parallel matrix B_mpi */ 522 /*---------------------------------------*/ 523 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 524 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 525 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 526 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 527 ierr = MatSetOption(B_mpi,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 528 529 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 530 B_mpi->assembled = PETSC_FALSE; 531 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 532 merge->bi = bi; 533 merge->bj = bj; 534 merge->ci = ci; 535 merge->cj = cj; 536 merge->buf_ri = buf_ri; 537 merge->buf_rj = buf_rj; 538 merge->C_seq = PETSC_NULL; 539 540 /* attach the supporting struct to B_mpi for reuse */ 541 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 542 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 543 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 544 *C = B_mpi; 545 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 551 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 552 { 553 PetscErrorCode ierr; 554 Mat_Merge_SeqsToMPI *merge; 555 Mat_MatMatMultMPI *ptap; 556 PetscObjectContainer cont_merge,cont_ptap; 557 558 PetscInt flops=0; 559 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 560 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 561 Mat_SeqAIJ *p_loc,*p_oth; 562 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 563 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 564 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 565 PetscInt *cjj; 566 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 567 MatScalar *pa_loc,*pA,*pa_oth; 568 PetscInt am=A->m,cN=C->N; 569 570 MPI_Comm comm=C->comm; 571 PetscMPIInt size,rank,taga,*len_s; 572 PetscInt *owners; 573 PetscInt proc; 574 PetscInt **buf_ri,**buf_rj; 575 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 576 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 577 MPI_Request *s_waits,*r_waits; 578 MPI_Status *status; 579 MatScalar **abuf_r,*ba_i; 580 PetscInt *cseqi,*cseqj; 581 PetscInt *cseqj_tmp; 582 MatScalar *cseqa_tmp; 583 PetscInt stages[3]; 584 585 PetscFunctionBegin; 586 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 587 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 588 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 589 590 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 591 if (cont_merge) { 592 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 593 } else { 594 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 595 } 596 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 597 if (cont_ptap) { 598 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 599 } else { 600 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 601 } 602 /* get data from symbolic products */ 603 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 604 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 605 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 606 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 607 608 cseqi = merge->ci; cseqj=merge->cj; 609 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 610 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 611 612 /* Allocate temporary array for storage of one row of A*P */ 613 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 614 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 615 apj = (PetscInt *)(apa + cN); 616 apjdense = apj + cN; 617 618 /* Clear old values in C_Seq */ 619 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 620 621 for (i=0;i<am;i++) { 622 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 623 /* Form i-th sparse row of A*P */ 624 apnzj = 0; 625 /* diagonal portion of A */ 626 anzi = adi[i+1] - adi[i]; 627 for (j=0;j<anzi;j++) { 628 prow = *adj; 629 adj++; 630 pnzj = pi_loc[prow+1] - pi_loc[prow]; 631 pjj = pj_loc + pi_loc[prow]; 632 paj = pa_loc + pi_loc[prow]; 633 for (k=0;k<pnzj;k++) { 634 if (!apjdense[pjj[k]]) { 635 apjdense[pjj[k]] = -1; 636 apj[apnzj++] = pjj[k]; 637 } 638 apa[pjj[k]] += (*ada)*paj[k]; 639 } 640 flops += 2*pnzj; 641 ada++; 642 } 643 /* off-diagonal portion of A */ 644 anzi = aoi[i+1] - aoi[i]; 645 for (j=0;j<anzi;j++) { 646 col = a->garray[*aoj]; 647 if (col < cstart){ 648 prow = *aoj; 649 } else if (col >= cend){ 650 prow = *aoj; 651 } else { 652 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 653 } 654 aoj++; 655 pnzj = pi_oth[prow+1] - pi_oth[prow]; 656 pjj = pj_oth + pi_oth[prow]; 657 paj = pa_oth + pi_oth[prow]; 658 for (k=0;k<pnzj;k++) { 659 if (!apjdense[pjj[k]]) { 660 apjdense[pjj[k]] = -1; 661 apj[apnzj++] = pjj[k]; 662 } 663 apa[pjj[k]] += (*aoa)*paj[k]; 664 } 665 flops += 2*pnzj; 666 aoa++; 667 } 668 /* Sort the j index array for quick sparse axpy. */ 669 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 670 ierr = PetscLogStagePop(); 671 672 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 673 ierr = PetscLogStagePush(stages[1]); 674 pnzi = pi_loc[i+1] - pi_loc[i]; 675 for (j=0;j<pnzi;j++) { 676 nextap = 0; 677 crow = *pJ++; 678 cjj = cseqj + cseqi[crow]; 679 caj = cseqa + cseqi[crow]; 680 /* Perform sparse axpy operation. Note cjj includes apj. */ 681 for (k=0;nextap<apnzj;k++) { 682 if (cjj[k]==apj[nextap]) { 683 caj[k] += (*pA)*apa[apj[nextap++]]; 684 } 685 } 686 flops += 2*apnzj; 687 pA++; 688 } 689 690 /* Zero the current row info for A*P */ 691 for (j=0;j<apnzj;j++) { 692 apa[apj[j]] = 0.; 693 apjdense[apj[j]] = 0; 694 } 695 ierr = PetscLogStagePop(); 696 } 697 698 ierr = PetscFree(apa);CHKERRQ(ierr); 699 700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 701 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 702 703 bi = merge->bi; 704 bj = merge->bj; 705 buf_ri = merge->buf_ri; 706 buf_rj = merge->buf_rj; 707 708 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 709 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 710 711 /* send and recv matrix values */ 712 /*-----------------------------*/ 713 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 714 len_s = merge->len_s; 715 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 716 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 717 718 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 719 for (proc=0,k=0; proc<size; proc++){ 720 if (!len_s[proc]) continue; 721 i = owners[proc]; 722 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 723 k++; 724 } 725 726 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 727 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 728 ierr = PetscFree(status);CHKERRQ(ierr); 729 730 ierr = PetscFree(s_waits);CHKERRQ(ierr); 731 ierr = PetscFree(r_waits);CHKERRQ(ierr); 732 733 /* insert mat values of mpimat */ 734 /*----------------------------*/ 735 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 736 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 737 nextrow = buf_ri_k + merge->nrecv; 738 nextcseqi = nextrow + merge->nrecv; 739 740 for (k=0; k<merge->nrecv; k++){ 741 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 742 nrows = *(buf_ri_k[k]); 743 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 744 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 745 } 746 747 /* set values of ba */ 748 for (i=0; i<C->m; i++) { 749 cseqrow = owners[rank] + i; /* global row index of C_seq */ 750 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 751 bnzi = bi[i+1] - bi[i]; 752 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 753 754 /* add local non-zero vals of this proc's C_seq into ba */ 755 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 756 cseqj_tmp = cseqj + cseqi[cseqrow]; 757 cseqa_tmp = cseqa + cseqi[cseqrow]; 758 nextcseqj = 0; 759 for (j=0; nextcseqj<cseqnzi; j++){ 760 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 761 ba_i[j] += cseqa_tmp[nextcseqj++]; 762 } 763 } 764 765 /* add received vals into ba */ 766 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 767 /* i-th row */ 768 if (i == *nextrow[k]) { 769 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 770 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 771 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 772 nextcseqj = 0; 773 for (j=0; nextcseqj<cseqnzi; j++){ 774 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 775 ba_i[j] += cseqa_tmp[nextcseqj++]; 776 } 777 } 778 nextrow[k]++; nextcseqi[k]++; 779 } 780 } 781 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 782 flops += 2*bnzi; 783 } 784 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = PetscLogStagePop();CHKERRQ(ierr); 787 788 ierr = PetscFree(cseqa);CHKERRQ(ierr); 789 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 790 ierr = PetscFree(ba_i);CHKERRQ(ierr); 791 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 792 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 793 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 799 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 800 { 801 PetscErrorCode ierr; 802 803 PetscFunctionBegin; 804 if (scall == MAT_INITIAL_MATRIX){ 805 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 806 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 807 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 808 } 809 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 810 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 811 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 812 PetscFunctionReturn(0); 813 } 814 815 /* 816 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 817 818 Collective on Mat 819 820 Input Parameters: 821 + A - the matrix 822 - P - the projection matrix 823 824 Output Parameters: 825 . C - the (i,j) structure of the product matrix 826 827 Notes: 828 C will be created and must be destroyed by the user with MatDestroy(). 829 830 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 831 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 832 this (i,j) structure by calling MatPtAPNumeric(). 833 834 Level: intermediate 835 836 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatPtAPSymbolic" 840 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 841 { 842 PetscErrorCode ierr; 843 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 844 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 845 846 PetscFunctionBegin; 847 848 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 849 PetscValidType(A,1); 850 MatPreallocated(A); 851 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 852 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 853 854 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 855 PetscValidType(P,2); 856 MatPreallocated(P); 857 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 858 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 859 860 PetscValidPointer(C,3); 861 862 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 863 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 864 865 /* For now, we do not dispatch based on the type of A and P */ 866 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 867 fA = A->ops->ptapsymbolic; 868 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 869 fP = P->ops->ptapsymbolic; 870 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 871 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 872 873 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 874 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 875 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 876 877 PetscFunctionReturn(0); 878 } 879 880 typedef struct { 881 Mat symAP; 882 } Mat_PtAPstruct; 883 884 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 888 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 889 { 890 PetscErrorCode ierr; 891 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 892 893 PetscFunctionBegin; 894 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 895 ierr = PetscFree(ptap);CHKERRQ(ierr); 896 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 902 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 903 { 904 PetscErrorCode ierr; 905 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 906 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 907 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 908 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 909 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 910 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 911 MatScalar *ca; 912 PetscBT lnkbt; 913 914 PetscFunctionBegin; 915 /* Get ij structure of P^T */ 916 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 917 ptJ=ptj; 918 919 /* Allocate ci array, arrays for fill computation and */ 920 /* free space for accumulating nonzero column info */ 921 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 922 ci[0] = 0; 923 924 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 925 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 926 ptasparserow = ptadenserow + an; 927 928 /* create and initialize a linked list */ 929 nlnk = pn+1; 930 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 931 932 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 933 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 934 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 935 current_space = free_space; 936 937 /* Determine symbolic info for each row of C: */ 938 for (i=0;i<pn;i++) { 939 ptnzi = pti[i+1] - pti[i]; 940 ptanzi = 0; 941 /* Determine symbolic row of PtA: */ 942 for (j=0;j<ptnzi;j++) { 943 arow = *ptJ++; 944 anzj = ai[arow+1] - ai[arow]; 945 ajj = aj + ai[arow]; 946 for (k=0;k<anzj;k++) { 947 if (!ptadenserow[ajj[k]]) { 948 ptadenserow[ajj[k]] = -1; 949 ptasparserow[ptanzi++] = ajj[k]; 950 } 951 } 952 } 953 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 954 ptaj = ptasparserow; 955 cnzi = 0; 956 for (j=0;j<ptanzi;j++) { 957 prow = *ptaj++; 958 pnzj = pi[prow+1] - pi[prow]; 959 pjj = pj + pi[prow]; 960 /* add non-zero cols of P into the sorted linked list lnk */ 961 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 962 cnzi += nlnk; 963 } 964 965 /* If free space is not available, make more free space */ 966 /* Double the amount of total space in the list */ 967 if (current_space->local_remaining<cnzi) { 968 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 969 } 970 971 /* Copy data into free space, and zero out denserows */ 972 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 973 current_space->array += cnzi; 974 current_space->local_used += cnzi; 975 current_space->local_remaining -= cnzi; 976 977 for (j=0;j<ptanzi;j++) { 978 ptadenserow[ptasparserow[j]] = 0; 979 } 980 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 981 /* For now, we will recompute what is needed. */ 982 ci[i+1] = ci[i] + cnzi; 983 } 984 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 985 /* Allocate space for cj, initialize cj, and */ 986 /* destroy list of free space and other temporary array(s) */ 987 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 988 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 989 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 990 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 991 992 /* Allocate space for ca */ 993 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 994 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 995 996 /* put together the new matrix */ 997 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 998 999 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1000 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1001 c = (Mat_SeqAIJ *)((*C)->data); 1002 c->freedata = PETSC_TRUE; 1003 c->nonew = 0; 1004 1005 /* Clean up. */ 1006 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1007 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #include "src/mat/impls/maij/maij.h" 1012 EXTERN_C_BEGIN 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1015 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1016 { 1017 /* This routine requires testing -- I don't think it works. */ 1018 PetscErrorCode ierr; 1019 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1020 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1021 Mat P=pp->AIJ; 1022 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1023 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1024 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1025 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1026 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1027 MatScalar *ca; 1028 1029 PetscFunctionBegin; 1030 /* Start timer */ 1031 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1032 1033 /* Get ij structure of P^T */ 1034 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1035 1036 /* Allocate ci array, arrays for fill computation and */ 1037 /* free space for accumulating nonzero column info */ 1038 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1039 ci[0] = 0; 1040 1041 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1042 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1043 ptasparserow = ptadenserow + an; 1044 denserow = ptasparserow + an; 1045 sparserow = denserow + pn; 1046 1047 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1048 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1049 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1050 current_space = free_space; 1051 1052 /* Determine symbolic info for each row of C: */ 1053 for (i=0;i<pn/ppdof;i++) { 1054 ptnzi = pti[i+1] - pti[i]; 1055 ptanzi = 0; 1056 ptJ = ptj + pti[i]; 1057 for (dof=0;dof<ppdof;dof++) { 1058 /* Determine symbolic row of PtA: */ 1059 for (j=0;j<ptnzi;j++) { 1060 arow = ptJ[j] + dof; 1061 anzj = ai[arow+1] - ai[arow]; 1062 ajj = aj + ai[arow]; 1063 for (k=0;k<anzj;k++) { 1064 if (!ptadenserow[ajj[k]]) { 1065 ptadenserow[ajj[k]] = -1; 1066 ptasparserow[ptanzi++] = ajj[k]; 1067 } 1068 } 1069 } 1070 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1071 ptaj = ptasparserow; 1072 cnzi = 0; 1073 for (j=0;j<ptanzi;j++) { 1074 pdof = *ptaj%dof; 1075 prow = (*ptaj++)/dof; 1076 pnzj = pi[prow+1] - pi[prow]; 1077 pjj = pj + pi[prow]; 1078 for (k=0;k<pnzj;k++) { 1079 if (!denserow[pjj[k]+pdof]) { 1080 denserow[pjj[k]+pdof] = -1; 1081 sparserow[cnzi++] = pjj[k]+pdof; 1082 } 1083 } 1084 } 1085 1086 /* sort sparserow */ 1087 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1088 1089 /* If free space is not available, make more free space */ 1090 /* Double the amount of total space in the list */ 1091 if (current_space->local_remaining<cnzi) { 1092 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1093 } 1094 1095 /* Copy data into free space, and zero out denserows */ 1096 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1097 current_space->array += cnzi; 1098 current_space->local_used += cnzi; 1099 current_space->local_remaining -= cnzi; 1100 1101 for (j=0;j<ptanzi;j++) { 1102 ptadenserow[ptasparserow[j]] = 0; 1103 } 1104 for (j=0;j<cnzi;j++) { 1105 denserow[sparserow[j]] = 0; 1106 } 1107 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1108 /* For now, we will recompute what is needed. */ 1109 ci[i+1+dof] = ci[i+dof] + cnzi; 1110 } 1111 } 1112 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1113 /* Allocate space for cj, initialize cj, and */ 1114 /* destroy list of free space and other temporary array(s) */ 1115 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1116 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1117 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1118 1119 /* Allocate space for ca */ 1120 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1121 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1122 1123 /* put together the new matrix */ 1124 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1125 1126 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1127 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1128 c = (Mat_SeqAIJ *)((*C)->data); 1129 c->freedata = PETSC_TRUE; 1130 c->nonew = 0; 1131 1132 /* Clean up. */ 1133 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1134 1135 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 EXTERN_C_END 1139 1140 /* 1141 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1142 1143 Collective on Mat 1144 1145 Input Parameters: 1146 + A - the matrix 1147 - P - the projection matrix 1148 1149 Output Parameters: 1150 . C - the product matrix 1151 1152 Notes: 1153 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1154 the user using MatDeatroy(). 1155 1156 This routine is currently only implemented for pairs of AIJ matrices and classes 1157 which inherit from AIJ. C will be of type MATAIJ. 1158 1159 Level: intermediate 1160 1161 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1162 */ 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatPtAPNumeric" 1165 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1166 { 1167 PetscErrorCode ierr; 1168 PetscErrorCode (*fA)(Mat,Mat,Mat); 1169 PetscErrorCode (*fP)(Mat,Mat,Mat); 1170 1171 PetscFunctionBegin; 1172 1173 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1174 PetscValidType(A,1); 1175 MatPreallocated(A); 1176 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1177 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1178 1179 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1180 PetscValidType(P,2); 1181 MatPreallocated(P); 1182 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1183 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1184 1185 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1186 PetscValidType(C,3); 1187 MatPreallocated(C); 1188 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1189 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1190 1191 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1192 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1193 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1194 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1195 1196 /* For now, we do not dispatch based on the type of A and P */ 1197 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1198 fA = A->ops->ptapnumeric; 1199 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1200 fP = P->ops->ptapnumeric; 1201 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1202 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1203 1204 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1205 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1206 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1207 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1213 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1214 { 1215 PetscErrorCode ierr; 1216 PetscInt flops=0; 1217 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1218 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1219 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1220 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1221 PetscInt *ci=c->i,*cj=c->j,*cjj; 1222 PetscInt am=A->M,cn=C->N,cm=C->M; 1223 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1224 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1225 1226 PetscFunctionBegin; 1227 /* Allocate temporary array for storage of one row of A*P */ 1228 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1229 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1230 1231 apj = (PetscInt *)(apa + cn); 1232 apjdense = apj + cn; 1233 1234 /* Clear old values in C */ 1235 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1236 1237 for (i=0;i<am;i++) { 1238 /* Form sparse row of A*P */ 1239 anzi = ai[i+1] - ai[i]; 1240 apnzj = 0; 1241 for (j=0;j<anzi;j++) { 1242 prow = *aj++; 1243 pnzj = pi[prow+1] - pi[prow]; 1244 pjj = pj + pi[prow]; 1245 paj = pa + pi[prow]; 1246 for (k=0;k<pnzj;k++) { 1247 if (!apjdense[pjj[k]]) { 1248 apjdense[pjj[k]] = -1; 1249 apj[apnzj++] = pjj[k]; 1250 } 1251 apa[pjj[k]] += (*aa)*paj[k]; 1252 } 1253 flops += 2*pnzj; 1254 aa++; 1255 } 1256 1257 /* Sort the j index array for quick sparse axpy. */ 1258 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1259 1260 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1261 pnzi = pi[i+1] - pi[i]; 1262 for (j=0;j<pnzi;j++) { 1263 nextap = 0; 1264 crow = *pJ++; 1265 cjj = cj + ci[crow]; 1266 caj = ca + ci[crow]; 1267 /* Perform sparse axpy operation. Note cjj includes apj. */ 1268 for (k=0;nextap<apnzj;k++) { 1269 if (cjj[k]==apj[nextap]) { 1270 caj[k] += (*pA)*apa[apj[nextap++]]; 1271 } 1272 } 1273 flops += 2*apnzj; 1274 pA++; 1275 } 1276 1277 /* Zero the current row info for A*P */ 1278 for (j=0;j<apnzj;j++) { 1279 apa[apj[j]] = 0.; 1280 apjdense[apj[j]] = 0; 1281 } 1282 } 1283 1284 /* Assemble the final matrix and clean up */ 1285 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1286 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1287 ierr = PetscFree(apa);CHKERRQ(ierr); 1288 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1289 1290 PetscFunctionReturn(0); 1291 } 1292 1293 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1294 static PetscEvent logkey_matptapnumeric_local = 0; 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1297 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1298 { 1299 PetscErrorCode ierr; 1300 PetscInt flops=0; 1301 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1302 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1303 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1304 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1305 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1306 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1307 PetscInt *pJ=pj_loc,*pjj; 1308 PetscInt *ci=c->i,*cj=c->j,*cjj; 1309 PetscInt am=A->m,cn=C->N,cm=C->M; 1310 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1311 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1312 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1313 1314 PetscFunctionBegin; 1315 if (!logkey_matptapnumeric_local) { 1316 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1317 } 1318 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1319 1320 /* Allocate temporary array for storage of one row of A*P */ 1321 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1322 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1323 apj = (PetscInt *)(apa + cn); 1324 apjdense = apj + cn; 1325 1326 /* Clear old values in C */ 1327 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1328 1329 for (i=0;i<am;i++) { 1330 /* Form i-th sparse row of A*P */ 1331 apnzj = 0; 1332 /* diagonal portion of A */ 1333 anzi = adi[i+1] - adi[i]; 1334 for (j=0;j<anzi;j++) { 1335 prow = *adj; 1336 adj++; 1337 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1338 pjj = pj_loc + pi_loc[prow]; 1339 paj = pa_loc + pi_loc[prow]; 1340 for (k=0;k<pnzj;k++) { 1341 if (!apjdense[pjj[k]]) { 1342 apjdense[pjj[k]] = -1; 1343 apj[apnzj++] = pjj[k]; 1344 } 1345 apa[pjj[k]] += (*ada)*paj[k]; 1346 } 1347 flops += 2*pnzj; 1348 ada++; 1349 } 1350 /* off-diagonal portion of A */ 1351 anzi = aoi[i+1] - aoi[i]; 1352 for (j=0;j<anzi;j++) { 1353 col = a->garray[*aoj]; 1354 if (col < cstart){ 1355 prow = *aoj; 1356 } else if (col >= cend){ 1357 prow = *aoj; 1358 } else { 1359 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1360 } 1361 aoj++; 1362 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1363 pjj = pj_oth + pi_oth[prow]; 1364 paj = pa_oth + pi_oth[prow]; 1365 for (k=0;k<pnzj;k++) { 1366 if (!apjdense[pjj[k]]) { 1367 apjdense[pjj[k]] = -1; 1368 apj[apnzj++] = pjj[k]; 1369 } 1370 apa[pjj[k]] += (*aoa)*paj[k]; 1371 } 1372 flops += 2*pnzj; 1373 aoa++; 1374 } 1375 /* Sort the j index array for quick sparse axpy. */ 1376 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1377 1378 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1379 pnzi = pi_loc[i+1] - pi_loc[i]; 1380 for (j=0;j<pnzi;j++) { 1381 nextap = 0; 1382 crow = *pJ++; 1383 cjj = cj + ci[crow]; 1384 caj = ca + ci[crow]; 1385 /* Perform sparse axpy operation. Note cjj includes apj. */ 1386 for (k=0;nextap<apnzj;k++) { 1387 if (cjj[k]==apj[nextap]) { 1388 caj[k] += (*pA)*apa[apj[nextap++]]; 1389 } 1390 } 1391 flops += 2*apnzj; 1392 pA++; 1393 } 1394 1395 /* Zero the current row info for A*P */ 1396 for (j=0;j<apnzj;j++) { 1397 apa[apj[j]] = 0.; 1398 apjdense[apj[j]] = 0; 1399 } 1400 } 1401 1402 /* Assemble the final matrix and clean up */ 1403 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 ierr = PetscFree(apa);CHKERRQ(ierr); 1406 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1407 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 static PetscEvent logkey_matptapsymbolic_local = 0; 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1413 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1414 { 1415 PetscErrorCode ierr; 1416 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1417 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1418 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1419 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1420 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1421 PetscInt *ci,*cj,*ptaj; 1422 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1423 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1424 PetscInt pm=P_loc->m,nlnk,*lnk; 1425 MatScalar *ca; 1426 PetscBT lnkbt; 1427 PetscInt prend,nprow_loc,nprow_oth; 1428 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1429 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1430 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1431 1432 PetscFunctionBegin; 1433 if (!logkey_matptapsymbolic_local) { 1434 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1435 } 1436 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1437 1438 prend = prstart + pm; 1439 1440 /* get ij structure of P_loc^T */ 1441 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1442 ptJ=ptj; 1443 1444 /* Allocate ci array, arrays for fill computation and */ 1445 /* free space for accumulating nonzero column info */ 1446 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1447 ci[0] = 0; 1448 1449 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1450 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1451 ptasparserow_loc = ptadenserow_loc + aN; 1452 ptadenserow_oth = ptasparserow_loc + aN; 1453 ptasparserow_oth = ptadenserow_oth + aN; 1454 1455 /* create and initialize a linked list */ 1456 nlnk = pN+1; 1457 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1458 1459 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1460 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1461 nnz = adi[am] + aoi[am]; 1462 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1463 current_space = free_space; 1464 1465 /* determine symbolic info for each row of C: */ 1466 for (i=0;i<pN;i++) { 1467 ptnzi = pti[i+1] - pti[i]; 1468 nprow_loc = 0; nprow_oth = 0; 1469 /* i-th row of symbolic P_loc^T*A_loc: */ 1470 for (j=0;j<ptnzi;j++) { 1471 arow = *ptJ++; 1472 /* diagonal portion of A */ 1473 anzj = adi[arow+1] - adi[arow]; 1474 ajj = adj + adi[arow]; 1475 for (k=0;k<anzj;k++) { 1476 col = ajj[k]+prstart; 1477 if (!ptadenserow_loc[col]) { 1478 ptadenserow_loc[col] = -1; 1479 ptasparserow_loc[nprow_loc++] = col; 1480 } 1481 } 1482 /* off-diagonal portion of A */ 1483 anzj = aoi[arow+1] - aoi[arow]; 1484 ajj = aoj + aoi[arow]; 1485 for (k=0;k<anzj;k++) { 1486 col = a->garray[ajj[k]]; /* global col */ 1487 if (col < cstart){ 1488 col = ajj[k]; 1489 } else if (col >= cend){ 1490 col = ajj[k] + pm; 1491 } else { 1492 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1493 } 1494 if (!ptadenserow_oth[col]) { 1495 ptadenserow_oth[col] = -1; 1496 ptasparserow_oth[nprow_oth++] = col; 1497 } 1498 } 1499 } 1500 1501 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1502 cnzi = 0; 1503 /* rows of P_loc */ 1504 ptaj = ptasparserow_loc; 1505 for (j=0; j<nprow_loc; j++) { 1506 prow = *ptaj++; 1507 prow -= prstart; /* rm */ 1508 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1509 pjj = pj_loc + pi_loc[prow]; 1510 /* add non-zero cols of P into the sorted linked list lnk */ 1511 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1512 cnzi += nlnk; 1513 } 1514 /* rows of P_oth */ 1515 ptaj = ptasparserow_oth; 1516 for (j=0; j<nprow_oth; j++) { 1517 prow = *ptaj++; 1518 if (prow >= prend) prow -= pm; /* rm */ 1519 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1520 pjj = pj_oth + pi_oth[prow]; 1521 /* add non-zero cols of P into the sorted linked list lnk */ 1522 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1523 cnzi += nlnk; 1524 } 1525 1526 /* If free space is not available, make more free space */ 1527 /* Double the amount of total space in the list */ 1528 if (current_space->local_remaining<cnzi) { 1529 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1530 } 1531 1532 /* Copy data into free space, and zero out denserows */ 1533 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1534 current_space->array += cnzi; 1535 current_space->local_used += cnzi; 1536 current_space->local_remaining -= cnzi; 1537 1538 for (j=0;j<nprow_loc; j++) { 1539 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1540 } 1541 for (j=0;j<nprow_oth; j++) { 1542 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1543 } 1544 1545 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1546 /* For now, we will recompute what is needed. */ 1547 ci[i+1] = ci[i] + cnzi; 1548 } 1549 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1550 /* Allocate space for cj, initialize cj, and */ 1551 /* destroy list of free space and other temporary array(s) */ 1552 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1553 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1554 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1555 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1556 1557 /* Allocate space for ca */ 1558 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1559 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1560 1561 /* put together the new matrix */ 1562 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1563 1564 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1565 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1566 c = (Mat_SeqAIJ *)((*C)->data); 1567 c->freedata = PETSC_TRUE; 1568 c->nonew = 0; 1569 1570 /* Clean up. */ 1571 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1572 1573 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576