1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat P_loc,P_oth; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 167 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 168 Mat_SeqAIJ *p_loc,*p_oth; 169 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 170 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 171 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 172 PetscInt aN=A->N,am=A->m,pN=P->N; 173 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 174 PetscBT lnkbt; 175 PetscInt prstart,prend,nprow_loc,nprow_oth; 176 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 177 178 MPI_Comm comm=A->comm; 179 Mat B_mpi; 180 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 181 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 182 PetscInt len,proc,*dnz,*onz,*owners; 183 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 184 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 185 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 186 MPI_Status *status; 187 Mat_Merge_SeqsToMPI *merge; 188 189 PetscFunctionBegin; 190 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 191 if (container) { 192 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 193 } else { 194 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 195 } 196 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 197 /*--------------------------------------------------*/ 198 /* get data from P_loc and P_oth */ 199 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 200 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 201 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 202 prend = prstart + pm; 203 204 /* get ij structure of P_loc^T */ 205 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 206 ptJ=ptj; 207 208 /* Allocate ci array, arrays for fill computation and */ 209 /* free space for accumulating nonzero column info */ 210 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 211 ci[0] = 0; 212 213 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 214 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 215 ptasparserow_loc = ptadenserow_loc + aN; 216 ptadenserow_oth = ptasparserow_loc + aN; 217 ptasparserow_oth = ptadenserow_oth + aN; 218 219 /* create and initialize a linked list */ 220 nlnk = pN+1; 221 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 222 223 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 224 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 225 nnz = adi[am] + aoi[am]; 226 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 227 current_space = free_space; 228 229 /* determine symbolic info for each row of C: */ 230 for (i=0;i<pN;i++) { 231 ptnzi = pti[i+1] - pti[i]; 232 nprow_loc = 0; nprow_oth = 0; 233 /* i-th row of symbolic P_loc^T*A_loc: */ 234 for (j=0;j<ptnzi;j++) { 235 arow = *ptJ++; 236 /* diagonal portion of A */ 237 anzj = adi[arow+1] - adi[arow]; 238 ajj = adj + adi[arow]; 239 for (k=0;k<anzj;k++) { 240 col = ajj[k]+prstart; 241 if (!ptadenserow_loc[col]) { 242 ptadenserow_loc[col] = -1; 243 ptasparserow_loc[nprow_loc++] = col; 244 } 245 } 246 /* off-diagonal portion of A */ 247 anzj = aoi[arow+1] - aoi[arow]; 248 ajj = aoj + aoi[arow]; 249 for (k=0;k<anzj;k++) { 250 col = a->garray[ajj[k]]; /* global col */ 251 if (col < cstart){ 252 col = ajj[k]; 253 } else if (col >= cend){ 254 col = ajj[k] + pm; 255 } else { 256 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 257 } 258 if (!ptadenserow_oth[col]) { 259 ptadenserow_oth[col] = -1; 260 ptasparserow_oth[nprow_oth++] = col; 261 } 262 } 263 } 264 265 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 266 cnzi = 0; 267 /* rows of P_loc */ 268 ptaj = ptasparserow_loc; 269 for (j=0; j<nprow_loc; j++) { 270 prow = *ptaj++; 271 prow -= prstart; /* rm */ 272 pnzj = pi_loc[prow+1] - pi_loc[prow]; 273 pjj = pj_loc + pi_loc[prow]; 274 /* add non-zero cols of P into the sorted linked list lnk */ 275 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 276 cnzi += nlnk; 277 } 278 /* rows of P_oth */ 279 ptaj = ptasparserow_oth; 280 for (j=0; j<nprow_oth; j++) { 281 prow = *ptaj++; 282 if (prow >= prend) prow -= pm; /* rm */ 283 pnzj = pi_oth[prow+1] - pi_oth[prow]; 284 pjj = pj_oth + pi_oth[prow]; 285 /* add non-zero cols of P into the sorted linked list lnk */ 286 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 287 cnzi += nlnk; 288 } 289 290 /* If free space is not available, make more free space */ 291 /* Double the amount of total space in the list */ 292 if (current_space->local_remaining<cnzi) { 293 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 294 } 295 296 /* Copy data into free space, and zero out denserows */ 297 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 298 current_space->array += cnzi; 299 current_space->local_used += cnzi; 300 current_space->local_remaining -= cnzi; 301 302 for (j=0;j<nprow_loc; j++) { 303 ptadenserow_loc[ptasparserow_loc[j]] = 0; 304 } 305 for (j=0;j<nprow_oth; j++) { 306 ptadenserow_oth[ptasparserow_oth[j]] = 0; 307 } 308 309 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 310 /* For now, we will recompute what is needed. */ 311 ci[i+1] = ci[i] + cnzi; 312 } 313 /* Clean up. */ 314 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 315 316 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 317 /* Allocate space for cj, initialize cj, and */ 318 /* destroy list of free space and other temporary array(s) */ 319 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 320 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 321 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 322 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 323 324 /* Allocate space for ca */ 325 /* 326 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 327 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 328 */ 329 330 /* add C_seq into mpi C */ 331 /*-----------------------------------*/ 332 free_space=PETSC_NULL; current_space=PETSC_NULL; 333 334 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 335 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 336 337 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 338 339 340 /* determine row ownership */ 341 /*---------------------------------------------------------*/ 342 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 343 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 344 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 345 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 346 347 /* determine the number of messages to send, their lengths */ 348 /*---------------------------------------------------------*/ 349 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 350 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 351 len_s = merge->len_s; 352 len = 0; /* length of buf_si[] */ 353 merge->nsend = 0; 354 for (proc=0; proc<size; proc++){ 355 len_si[proc] = 0; 356 if (proc == rank){ 357 len_s[proc] = 0; 358 } else { 359 len_si[proc] = owners[proc+1] - owners[proc] + 1; 360 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 361 } 362 if (len_s[proc]) { 363 merge->nsend++; 364 nrows = 0; 365 for (i=owners[proc]; i<owners[proc+1]; i++){ 366 if (ci[i+1] > ci[i]) nrows++; 367 } 368 len_si[proc] = 2*(nrows+1); 369 len += len_si[proc]; 370 } 371 } 372 373 /* determine the number and length of messages to receive for ij-structure */ 374 /*-------------------------------------------------------------------------*/ 375 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 376 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 377 378 /* post the Irecv of j-structure */ 379 /*-------------------------------*/ 380 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 381 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 382 383 /* post the Isend of j-structure */ 384 /*--------------------------------*/ 385 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 386 sj_waits = si_waits + merge->nsend; 387 388 for (proc=0, k=0; proc<size; proc++){ 389 if (!len_s[proc]) continue; 390 i = owners[proc]; 391 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 392 k++; 393 } 394 395 /* receives and sends of j-structure are complete */ 396 /*------------------------------------------------*/ 397 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 398 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 399 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 400 401 /* send and recv i-structure */ 402 /*---------------------------*/ 403 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 404 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 405 406 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 407 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 408 for (proc=0,k=0; proc<size; proc++){ 409 if (!len_s[proc]) continue; 410 /* form outgoing message for i-structure: 411 buf_si[0]: nrows to be sent 412 [1:nrows]: row index (global) 413 [nrows+1:2*nrows+1]: i-structure index 414 */ 415 /*-------------------------------------------*/ 416 nrows = len_si[proc]/2 - 1; 417 buf_si_i = buf_si + nrows+1; 418 buf_si[0] = nrows; 419 buf_si_i[0] = 0; 420 nrows = 0; 421 for (i=owners[proc]; i<owners[proc+1]; i++){ 422 anzi = ci[i+1] - ci[i]; 423 if (anzi) { 424 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 425 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 426 nrows++; 427 } 428 } 429 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 430 k++; 431 buf_si += len_si[proc]; 432 } 433 434 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 436 437 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 438 for (i=0; i<merge->nrecv; i++){ 439 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 440 } 441 442 ierr = PetscFree(len_si);CHKERRQ(ierr); 443 ierr = PetscFree(len_ri);CHKERRQ(ierr); 444 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 445 ierr = PetscFree(si_waits);CHKERRQ(ierr); 446 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 447 ierr = PetscFree(buf_s);CHKERRQ(ierr); 448 ierr = PetscFree(status);CHKERRQ(ierr); 449 450 /* compute a local seq matrix in each processor */ 451 /*----------------------------------------------*/ 452 /* allocate bi array and free space for accumulating nonzero column info */ 453 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 454 bi[0] = 0; 455 456 /* create and initialize a linked list */ 457 nlnk = pN+1; 458 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 459 460 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 461 len = 0; 462 len = ci[owners[rank+1]] - ci[owners[rank]]; 463 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 464 current_space = free_space; 465 466 /* determine symbolic info for each local row */ 467 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 468 nextrow = buf_ri_k + merge->nrecv; 469 nextci = nextrow + merge->nrecv; 470 for (k=0; k<merge->nrecv; k++){ 471 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 472 nrows = *buf_ri_k[k]; 473 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 474 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 475 } 476 477 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 478 len = 0; 479 for (i=0; i<pn; i++) { 480 bnzi = 0; 481 /* add local non-zero cols of this proc's C_seq into lnk */ 482 arow = owners[rank] + i; 483 anzi = ci[arow+1] - ci[arow]; 484 cji = cj + ci[arow]; 485 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 486 bnzi += nlnk; 487 /* add received col data into lnk */ 488 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 489 if (i == *nextrow[k]) { /* i-th row */ 490 anzi = *(nextci[k]+1) - *nextci[k]; 491 cji = buf_rj[k] + *nextci[k]; 492 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 493 bnzi += nlnk; 494 nextrow[k]++; nextci[k]++; 495 } 496 } 497 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 498 499 /* if free space is not available, make more free space */ 500 if (current_space->local_remaining<bnzi) { 501 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 502 nspacedouble++; 503 } 504 /* copy data into free space, then initialize lnk */ 505 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 506 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 507 508 current_space->array += bnzi; 509 current_space->local_used += bnzi; 510 current_space->local_remaining -= bnzi; 511 512 bi[i+1] = bi[i] + bnzi; 513 } 514 515 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 516 517 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 518 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 519 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 520 521 /* create symbolic parallel matrix B_mpi */ 522 /*---------------------------------------*/ 523 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 524 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 525 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 526 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 527 528 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 529 B_mpi->assembled = PETSC_FALSE; 530 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 531 merge->bi = bi; 532 merge->bj = bj; 533 merge->ci = ci; 534 merge->cj = cj; 535 merge->buf_ri = buf_ri; 536 merge->buf_rj = buf_rj; 537 merge->C_seq = PETSC_NULL; 538 539 /* attach the supporting struct to B_mpi for reuse */ 540 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 541 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 542 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 543 *C = B_mpi; 544 545 PetscFunctionReturn(0); 546 547 #ifdef TOBEREMOVED 548 PetscErrorCode ierr; 549 Mat C_seq; 550 Mat_MatMatMultMPI *ptap; 551 PetscObjectContainer container; 552 553 PetscFunctionBegin; 554 555 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 556 if (container) { 557 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 558 } else { 559 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 560 } 561 /* compute C_seq = P_loc^T * A_loc * P_seq */ 562 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 563 564 /* add C_seq into mpi C */ 565 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 566 567 PetscFunctionReturn(0); 568 #endif /* TOBEREMOVED */ 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 573 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 574 { 575 PetscErrorCode ierr; 576 Mat_Merge_SeqsToMPI *merge; 577 Mat_MatMatMultMPI *ptap; 578 PetscObjectContainer cont_merge,cont_ptap; 579 580 PetscInt flops=0; 581 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 582 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 583 Mat_SeqAIJ *p_loc,*p_oth; 584 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 585 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 586 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 587 PetscInt *cjj; 588 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 589 MatScalar *pa_loc,*pA,*pa_oth; 590 PetscInt am=A->m,cN=C->N; 591 592 MPI_Comm comm=C->comm; 593 PetscMPIInt size,rank,taga,*len_s; 594 PetscInt *owners; 595 PetscInt proc; 596 PetscInt **buf_ri,**buf_rj; 597 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 598 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 599 MPI_Request *s_waits,*r_waits; 600 MPI_Status *status; 601 MatScalar **abuf_r,*ba_i; 602 PetscInt *cseqi,*cseqj; 603 PetscInt *cseqj_tmp; 604 MatScalar *cseqa_tmp; 605 606 PetscFunctionBegin; 607 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 608 if (cont_merge) { 609 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 610 } else { 611 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 612 } 613 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 614 if (cont_ptap) { 615 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 618 } 619 /* get data from symbolic products */ 620 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 621 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 622 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 623 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 624 625 cseqi = merge->ci; cseqj=merge->cj; 626 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 627 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 628 629 /* Allocate temporary array for storage of one row of A*P */ 630 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 631 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 632 apj = (PetscInt *)(apa + cN); 633 apjdense = apj + cN; 634 635 /* Clear old values in C_Seq */ 636 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 637 638 for (i=0;i<am;i++) { 639 /* Form i-th sparse row of A*P */ 640 apnzj = 0; 641 /* diagonal portion of A */ 642 anzi = adi[i+1] - adi[i]; 643 for (j=0;j<anzi;j++) { 644 prow = *adj; 645 adj++; 646 pnzj = pi_loc[prow+1] - pi_loc[prow]; 647 pjj = pj_loc + pi_loc[prow]; 648 paj = pa_loc + pi_loc[prow]; 649 for (k=0;k<pnzj;k++) { 650 if (!apjdense[pjj[k]]) { 651 apjdense[pjj[k]] = -1; 652 apj[apnzj++] = pjj[k]; 653 } 654 apa[pjj[k]] += (*ada)*paj[k]; 655 } 656 flops += 2*pnzj; 657 ada++; 658 } 659 /* off-diagonal portion of A */ 660 anzi = aoi[i+1] - aoi[i]; 661 for (j=0;j<anzi;j++) { 662 col = a->garray[*aoj]; 663 if (col < cstart){ 664 prow = *aoj; 665 } else if (col >= cend){ 666 prow = *aoj; 667 } else { 668 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 669 } 670 aoj++; 671 pnzj = pi_oth[prow+1] - pi_oth[prow]; 672 pjj = pj_oth + pi_oth[prow]; 673 paj = pa_oth + pi_oth[prow]; 674 for (k=0;k<pnzj;k++) { 675 if (!apjdense[pjj[k]]) { 676 apjdense[pjj[k]] = -1; 677 apj[apnzj++] = pjj[k]; 678 } 679 apa[pjj[k]] += (*aoa)*paj[k]; 680 } 681 flops += 2*pnzj; 682 aoa++; 683 } 684 /* Sort the j index array for quick sparse axpy. */ 685 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 686 687 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 688 pnzi = pi_loc[i+1] - pi_loc[i]; 689 for (j=0;j<pnzi;j++) { 690 nextap = 0; 691 crow = *pJ++; 692 cjj = cseqj + cseqi[crow]; 693 caj = cseqa + cseqi[crow]; 694 /* Perform sparse axpy operation. Note cjj includes apj. */ 695 for (k=0;nextap<apnzj;k++) { 696 if (cjj[k]==apj[nextap]) { 697 caj[k] += (*pA)*apa[apj[nextap++]]; 698 } 699 } 700 flops += 2*apnzj; 701 pA++; 702 } 703 704 /* Zero the current row info for A*P */ 705 for (j=0;j<apnzj;j++) { 706 apa[apj[j]] = 0.; 707 apjdense[apj[j]] = 0; 708 } 709 } 710 711 ierr = PetscFree(apa);CHKERRQ(ierr); 712 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 713 714 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 716 717 bi = merge->bi; 718 bj = merge->bj; 719 buf_ri = merge->buf_ri; 720 buf_rj = merge->buf_rj; 721 722 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 723 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 724 725 /* send and recv matrix values */ 726 /*-----------------------------*/ 727 len_s = merge->len_s; 728 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 729 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 730 731 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 732 for (proc=0,k=0; proc<size; proc++){ 733 if (!len_s[proc]) continue; 734 i = owners[proc]; 735 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 736 k++; 737 } 738 739 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 740 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 741 ierr = PetscFree(status);CHKERRQ(ierr); 742 743 ierr = PetscFree(s_waits);CHKERRQ(ierr); 744 ierr = PetscFree(r_waits);CHKERRQ(ierr); 745 746 /* insert mat values of mpimat */ 747 /*----------------------------*/ 748 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 749 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 750 nextrow = buf_ri_k + merge->nrecv; 751 nextcseqi = nextrow + merge->nrecv; 752 753 for (k=0; k<merge->nrecv; k++){ 754 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 755 nrows = *(buf_ri_k[k]); 756 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 757 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 758 } 759 760 /* set values of ba */ 761 for (i=0; i<C->m; i++) { 762 cseqrow = owners[rank] + i; 763 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 764 bnzi = bi[i+1] - bi[i]; 765 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 766 767 /* add local non-zero vals of this proc's C_seq into ba */ 768 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 769 cseqj_tmp = cseqj + cseqi[cseqrow]; 770 cseqa_tmp = cseqa + cseqi[cseqrow]; 771 nextcseqj = 0; 772 for (j=0; nextcseqj<cseqnzi; j++){ 773 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 774 ba_i[j] += cseqa_tmp[nextcseqj++]; 775 } 776 } 777 778 /* add received vals into ba */ 779 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 780 /* i-th row */ 781 if (i == *nextrow[k]) { 782 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 783 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 784 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 785 nextcseqj = 0; 786 for (j=0; nextcseqj<cseqnzi; j++){ 787 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 788 ba_i[j] += cseqa_tmp[nextcseqj++]; 789 } 790 } 791 nextrow[k]++; nextcseqi[k]++; 792 } 793 } 794 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 795 } 796 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 797 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 798 799 ierr = PetscFree(cseqa);CHKERRQ(ierr); 800 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 801 ierr = PetscFree(ba_i);CHKERRQ(ierr); 802 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 803 804 PetscFunctionReturn(0); 805 } 806 807 #ifdef TOBEREMOVED 808 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 811 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 812 { 813 PetscErrorCode ierr; 814 Mat_MatMatMultMPI *ptap; 815 PetscObjectContainer container; 816 817 PetscFunctionBegin; 818 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 819 if (container) { 820 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 821 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 822 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 823 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 824 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 825 826 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 827 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(ptap);CHKERRQ(ierr); 830 831 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 837 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 838 { 839 PetscErrorCode ierr; 840 Mat_MatMatMultMPI *ptap; 841 PetscObjectContainer container; 842 843 PetscFunctionBegin; 844 if (scall == MAT_INITIAL_MATRIX){ 845 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 846 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 847 848 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 849 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 850 851 /* get P_loc by taking all local rows of P */ 852 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 853 854 /* attach the supporting struct to P for reuse */ 855 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 856 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 857 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 858 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 859 860 /* now, compute symbolic local P^T*A*P */ 861 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 862 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 863 } else if (scall == MAT_REUSE_MATRIX){ 864 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 865 if (container) { 866 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 867 } else { 868 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 869 } 870 /* update P_oth */ 871 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 872 873 } else { 874 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 875 } 876 /* now, compute numeric local P^T*A*P */ 877 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 878 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 879 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 880 881 PetscFunctionReturn(0); 882 } 883 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 884 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 885 {\ 886 /* allocate ci array, arrays for fill computation and */\ 887 /* free space for accumulating nonzero column info */\ 888 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 889 ci[0] = 0;\ 890 \ 891 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 892 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 893 nnz = adi[am] + aoi[am];\ 894 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 895 current_space = free_space;\ 896 \ 897 ptJ=ptj;\ 898 for (i=0; i<ptM; i++) {\ 899 ptnzi = pti[i+1] - pti[i];\ 900 nprow_loc = 0; nprow_oth = 0;\ 901 /* i-th row of symbolic Pt*A: */\ 902 for (j=0;j<ptnzi;j++) {\ 903 arow = *ptJ++;\ 904 /* diagonal portion of A */\ 905 anzj = adi[arow+1] - adi[arow];\ 906 ajj = adj + adi[arow];\ 907 for (k=0;k<anzj;k++) {\ 908 col = ajj[k]+prstart;\ 909 if (!ptadenserow_loc[col]) {\ 910 ptadenserow_loc[col] = -1;\ 911 ptasparserow_loc[nprow_loc++] = col;\ 912 }\ 913 }\ 914 /* off-diagonal portion of A */\ 915 anzj = aoi[arow+1] - aoi[arow];\ 916 ajj = aoj + aoi[arow];\ 917 for (k=0;k<anzj;k++) {\ 918 col = a->garray[ajj[k]]; /* global col */\ 919 if (col < cstart){\ 920 col = ajj[k];\ 921 } else if (col >= cend){\ 922 col = ajj[k] + pm;\ 923 } else {\ 924 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 925 }\ 926 if (!ptadenserow_oth[col]) {\ 927 ptadenserow_oth[col] = -1;\ 928 ptasparserow_oth[nprow_oth++] = col;\ 929 }\ 930 }\ 931 }\ 932 /* determine symbolic info for row of C_seq: */\ 933 cnzi = 0;\ 934 /* rows of P_loc */\ 935 ptaj = ptasparserow_loc;\ 936 for (j=0; j<nprow_loc; j++) {\ 937 prow = *ptaj++; \ 938 prow -= prstart; /* rm */\ 939 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 940 pjj = pj_loc + pi_loc[prow];\ 941 /* add non-zero cols of P into the sorted linked list lnk */\ 942 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 943 cnzi += nlnk;\ 944 }\ 945 /* rows of P_oth */\ 946 ptaj = ptasparserow_oth;\ 947 for (j=0; j<nprow_oth; j++) {\ 948 prow = *ptaj++; \ 949 if (prow >= prend) prow -= pm; /* rm */\ 950 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 951 pjj = pj_oth + pi_oth[prow];\ 952 /* add non-zero cols of P into the sorted linked list lnk */\ 953 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 954 cnzi += nlnk;\ 955 }\ 956 \ 957 /* If free space is not available, make more free space */\ 958 /* Double the amount of total space in the list */\ 959 if (current_space->local_remaining<cnzi) {\ 960 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 961 }\ 962 \ 963 /* Copy data into free space, and zero out denserows */\ 964 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 965 current_space->array += cnzi;\ 966 current_space->local_used += cnzi;\ 967 current_space->local_remaining -= cnzi;\ 968 \ 969 for (j=0;j<nprow_loc; j++) {\ 970 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 971 }\ 972 for (j=0;j<nprow_oth; j++) {\ 973 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 974 }\ 975 \ 976 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 977 /* For now, we will recompute what is needed. */ \ 978 ci[i+1] = ci[i] + cnzi;\ 979 }\ 980 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 981 /* Allocate space for cj, initialize cj, and */\ 982 /* destroy list of free space and other temporary array(s) */\ 983 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 984 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 985 } 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 988 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 989 { 990 PetscErrorCode ierr; 991 Mat P_loc,P_oth; 992 Mat_MatMatMultMPI *ptap; 993 PetscObjectContainer container; 994 FreeSpaceList free_space=PETSC_NULL,current_space; 995 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 996 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 997 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 998 Mat_SeqAIJ *p_loc,*p_oth; 999 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 1000 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1001 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 1002 PetscInt aN=A->N,am=A->m,pN=P->N; 1003 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 1004 PetscBT lnkbt; 1005 PetscInt prstart,prend,nprow_loc,nprow_oth; 1006 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1007 1008 MPI_Comm comm=A->comm; 1009 Mat B_mpi; 1010 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 1011 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 1012 PetscInt len,proc,*dnz,*onz,*owners; 1013 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 1014 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 1015 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 1016 MPI_Status *status; 1017 Mat_Merge_SeqsToMPI *merge; 1018 PetscInt *ptdi,*ptdj,*cdi,*cdj; 1019 PetscInt *ptoi,*ptoj,*coi,*coj; 1020 1021 PetscFunctionBegin; 1022 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 1023 if (container) { 1024 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 1025 } else { 1026 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1027 } 1028 1029 /* determine row ownership of C */ 1030 /*---------------------------------------------------------*/ 1031 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1032 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1033 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 1034 1035 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 1036 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 1037 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 1038 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1039 1040 /* get data from P_loc and P_oth */ 1041 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 1042 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 1043 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 1044 prend = prstart + pm; 1045 1046 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 1047 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1048 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1049 ptasparserow_loc = ptadenserow_loc + aN; 1050 ptadenserow_oth = ptasparserow_loc + aN; 1051 ptasparserow_oth = ptadenserow_oth + aN; 1052 1053 /* create and initialize a linked list */ 1054 nlnk = pN+1; 1055 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1056 1057 /* Pt = P_loc^T */ 1058 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1059 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 1060 1061 /* Pt = Pd^T - diagonal portion of P_loc */ 1062 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1063 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 1064 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1065 #ifdef NEW 1066 /* Pt = Po^T - off-diagonal portion of P_loc */ 1067 ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&ptoi,&ptoj);CHKERRQ(ierr); 1068 ierr = MatPtAPSymbolicLocal_Private(tnrows,ptoi,ptoj,coi,coj);CHKERRQ(ierr); 1069 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptoi,&ptoj);CHKERRQ(ierr); 1070 #endif 1071 /* clean up */ 1072 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1073 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1074 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1075 1076 /* determine the number of messages to send, their lengths */ 1077 /*---------------------------------------------------------*/ 1078 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 1079 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 1080 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 1081 len_s = merge->len_s; 1082 len = 0; /* max length of buf_si[] */ 1083 merge->nsend = 0; 1084 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 1085 proc = 0; 1086 for (i=0; i<tnrows; i++){ 1087 while (rmap[i] >= owners[proc+1]) proc++; 1088 len_si[proc]++; 1089 } 1090 for (proc=0; proc<size; proc++){ 1091 len_s[proc] = 0; 1092 if (len_si[proc]){ 1093 merge->nsend++; 1094 len_si[proc] = 2*(len_si[proc] + 1); 1095 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 1096 len += len_si[proc]; 1097 } 1098 } 1099 1100 /* determine the number and length of messages to receive for ij-structure */ 1101 /*-------------------------------------------------------------------------*/ 1102 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 1103 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 1104 1105 /* post the Irecv of j-structure */ 1106 /*-------------------------------*/ 1107 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 1108 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 1109 1110 /* post the Isend of j-structure */ 1111 /*--------------------------------*/ 1112 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 1113 sj_waits = si_waits + merge->nsend; 1114 1115 for (proc=0, k=0; proc<size; proc++){ 1116 if (!len_s[proc]) continue; 1117 i = owners[proc]; 1118 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 1119 k++; 1120 } 1121 1122 /* receives and sends of j-structure are complete */ 1123 /*------------------------------------------------*/ 1124 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1125 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 1126 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 1127 1128 /* send and recv i-structure */ 1129 /*---------------------------*/ 1130 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 1131 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 1132 1133 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 1134 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 1135 1136 j = 0; /* row index to be sent */ 1137 for (proc=0,k=0; proc<size; proc++){ 1138 if (!len_s[proc]) continue; 1139 /* form outgoing message for i-structure: 1140 buf_si[0]: nrows to be sent 1141 [1:nrows]: row index (global) 1142 [nrows+1:2*nrows+1]: i-structure index 1143 */ 1144 /*-------------------------------------------*/ 1145 nrows = len_si[proc]/2 - 1; 1146 buf_si_i = buf_si + nrows+1; 1147 buf_si[0] = nrows; 1148 buf_si_i[0] = 0; 1149 for (i=0; i<nrows; i++){ 1150 row = rmap[j++]; 1151 anzi = ci[row+1] - ci[row]; 1152 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 1153 buf_si[i+1] = row - owners[proc]; /* local row index */ 1154 } 1155 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 1156 k++; 1157 buf_si += len_si[proc]; 1158 } 1159 1160 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 1161 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 1162 1163 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 1164 for (i=0; i<merge->nrecv; i++){ 1165 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 1166 } 1167 1168 ierr = PetscFree(len_si);CHKERRQ(ierr); 1169 ierr = PetscFree(len_ri);CHKERRQ(ierr); 1170 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 1171 ierr = PetscFree(si_waits);CHKERRQ(ierr); 1172 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 1173 ierr = PetscFree(buf_s);CHKERRQ(ierr); 1174 ierr = PetscFree(status);CHKERRQ(ierr); 1175 1176 /* compute a local seq matrix in each processor */ 1177 /*----------------------------------------------*/ 1178 /* allocate bi array and free space for accumulating nonzero column info */ 1179 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1180 bi[0] = 0; 1181 1182 /* create and initialize a linked list */ 1183 nlnk = pN+1; 1184 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1185 1186 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 1187 len = 0; 1188 len = ci[owners[rank+1]] - ci[owners[rank]]; 1189 free_space=PETSC_NULL; 1190 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 1191 current_space = free_space; 1192 1193 /* determine symbolic info for each local row of C */ 1194 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1195 nextrow = buf_ri_k + merge->nrecv; 1196 nextci = nextrow + merge->nrecv; 1197 for (k=0; k<merge->nrecv; k++){ 1198 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1199 nrows = *buf_ri_k[k]; 1200 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 1201 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1202 } 1203 1204 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 1205 len = 0; 1206 for (i=0; i<pn; i++) { 1207 bnzi = 0; 1208 /* add local non-zero cols of this proc's C_seq into lnk */ 1209 anzi = cdi[i+1] - cdi[i]; 1210 cji = cdj + cdi[i]; 1211 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1212 bnzi += nlnk; 1213 /* add received col data into lnk */ 1214 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1215 if (i == *nextrow[k]) { /* i-th row */ 1216 anzi = *(nextci[k]+1) - *nextci[k]; 1217 cji = buf_rj[k] + *nextci[k]; 1218 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1219 bnzi += nlnk; 1220 nextrow[k]++; nextci[k]++; 1221 } 1222 } 1223 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 1224 1225 /* if free space is not available, make more free space */ 1226 if (current_space->local_remaining<bnzi) { 1227 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1228 nspacedouble++; 1229 } 1230 /* copy data into free space, then initialize lnk */ 1231 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1232 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 1233 1234 current_space->array += bnzi; 1235 current_space->local_used += bnzi; 1236 current_space->local_remaining -= bnzi; 1237 1238 bi[i+1] = bi[i] + bnzi; 1239 } 1240 1241 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1242 1243 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1244 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1245 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1246 1247 /* create symbolic parallel matrix B_mpi */ 1248 /*---------------------------------------*/ 1249 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 1250 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 1251 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 1252 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1253 1254 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 1255 B_mpi->assembled = PETSC_FALSE; 1256 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 1257 merge->bi = bi; 1258 merge->bj = bj; 1259 merge->ci = ci; 1260 merge->cj = cj; 1261 merge->buf_ri = buf_ri; 1262 merge->buf_rj = buf_rj; 1263 merge->C_seq = PETSC_NULL; 1264 1265 /* attach the supporting struct to B_mpi for reuse */ 1266 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 1267 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 1268 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 1269 *C = B_mpi; 1270 1271 ierr = PetscFree(cdi);CHKERRQ(ierr); 1272 ierr = PetscFree(cdj);CHKERRQ(ierr); 1273 #ifdef NEW 1274 ierr = PetscFree(coi);CHKERRQ(ierr); 1275 ierr = PetscFree(coj);CHKERRQ(ierr); 1276 #endif 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 1282 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 1283 { 1284 PetscErrorCode ierr; 1285 Mat_Merge_SeqsToMPI *merge; 1286 Mat_MatMatMultMPI *ptap; 1287 PetscObjectContainer cont_merge,cont_ptap; 1288 PetscInt flops=0; 1289 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 1290 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1291 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 1292 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 1293 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 1294 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 1295 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1296 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 1297 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 1298 PetscInt am=A->m,cN=C->N,cm=C->m; 1299 MPI_Comm comm=C->comm; 1300 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 1301 PetscInt *owners,proc,nrows; 1302 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 1303 MPI_Request *s_waits,*r_waits; 1304 MPI_Status *status; 1305 PetscInt *cseqi,*cseqj,col; 1306 PetscInt stages[3]; 1307 1308 PetscFunctionBegin; 1309 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 1310 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 1311 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 1312 1313 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 1314 if (cont_merge) { 1315 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1316 } else { 1317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 1318 } 1319 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 1320 if (cont_ptap) { 1321 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 1322 } else { 1323 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1324 } 1325 /* get data from symbolic products */ 1326 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 1327 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 1328 1329 cseqi = merge->ci; cseqj=merge->cj; 1330 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 1331 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 1332 1333 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1334 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1335 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1336 1337 /* Allocate temporary array for storage of one row of A*P and one row of C */ 1338 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1339 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1340 ca = (MatScalar*)(apa + cN); 1341 apj = (PetscInt *)(ca + cN); 1342 apjdense = (PetscInt *)(apj + cN); 1343 1344 /* Clear old values in C */ 1345 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1346 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1347 1348 for (i=0;i<am;i++) { 1349 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 1350 /* Form i-th sparse row of A*P */ 1351 apnzj = 0; 1352 /* diagonal portion of A */ 1353 nzi = adi[i+1] - adi[i]; 1354 for (j=0;j<nzi;j++) { 1355 prow = *adj++; 1356 /* diagonal portion of P */ 1357 pnzj = pd->i[prow+1] - pd->i[prow]; 1358 pjj = pd->j + pd->i[prow]; /* local col index of P */ 1359 paj = pd->a + pd->i[prow]; 1360 for (k=0;k<pnzj;k++) { 1361 col = *pjj + p->cstart; pjj++; /* global col index of P */ 1362 if (!apjdense[col]) { 1363 apjdense[col] = -1; 1364 apj[apnzj++] = col; 1365 } 1366 apa[col] += (*ada)*paj[k]; 1367 } 1368 flops += 2*pnzj; 1369 /* off-diagonal portion of P */ 1370 pnzj = po->i[prow+1] - po->i[prow]; 1371 pjj = po->j + po->i[prow]; /* local col index of P */ 1372 paj = po->a + po->i[prow]; 1373 for (k=0;k<pnzj;k++) { 1374 col = p->garray[*pjj]; pjj++; /* global col index of P */ 1375 if (!apjdense[col]) { 1376 apjdense[col] = -1; 1377 apj[apnzj++] = col; 1378 } 1379 apa[col] += (*ada)*paj[k]; 1380 } 1381 flops += 2*pnzj; 1382 1383 ada++; 1384 } 1385 /* off-diagonal portion of A */ 1386 nzi = aoi[i+1] - aoi[i]; 1387 for (j=0;j<nzi;j++) { 1388 prow = *aoj++; 1389 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1390 pjj = pj_oth + pi_oth[prow]; 1391 paj = pa_oth + pi_oth[prow]; 1392 for (k=0;k<pnzj;k++) { 1393 if (!apjdense[pjj[k]]) { 1394 apjdense[pjj[k]] = -1; 1395 apj[apnzj++] = pjj[k]; 1396 } 1397 apa[pjj[k]] += (*aoa)*paj[k]; 1398 } 1399 flops += 2*pnzj; 1400 aoa++; 1401 } 1402 /* Sort the j index array for quick sparse axpy. */ 1403 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1404 1405 ierr = PetscLogStagePop(); 1406 ierr = PetscLogStagePush(stages[1]); 1407 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1408 /* diagonal portion of P -- gives matrix value of local C */ 1409 pnzi = pd->i[i+1] - pd->i[i]; 1410 for (j=0;j<pnzi;j++) { 1411 /* add value into C */ 1412 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 1413 crow = (*pJ_d++) + owners[rank]; 1414 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 1415 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 1416 pA_d++; 1417 flops += 2*apnzj; 1418 } 1419 1420 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 1421 pnzi = po->i[i+1] - po->i[i]; 1422 for (j=0;j<pnzi;j++) { 1423 crow = p->garray[*pJ_o++]; 1424 cjj = cseqj + cseqi[crow]; 1425 caj = cseqa + cseqi[crow]; 1426 /* add value into C_seq to be sent to other processors */ 1427 nextap = 0; 1428 for (k=0;nextap<apnzj;k++) { 1429 if (cjj[k]==apj[nextap]) { 1430 caj[k] += (*pA_o)*apa[apj[nextap++]]; 1431 } 1432 } 1433 flops += 2*apnzj; 1434 pA_o++; 1435 } 1436 1437 /* Zero the current row info for A*P */ 1438 for (j=0;j<apnzj;j++) { 1439 apa[apj[j]] = 0.; 1440 apjdense[apj[j]] = 0; 1441 } 1442 ierr = PetscLogStagePop(); 1443 } 1444 1445 /* send and recv matrix values */ 1446 /*-----------------------------*/ 1447 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 1448 bi = merge->bi; 1449 bj = merge->bj; 1450 buf_ri = merge->buf_ri; 1451 buf_rj = merge->buf_rj; 1452 len_s = merge->len_s; 1453 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 1454 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 1455 1456 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 1457 for (proc=0,k=0; proc<size; proc++){ 1458 if (!len_s[proc]) continue; 1459 i = owners[proc]; 1460 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 1461 k++; 1462 } 1463 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1464 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 1465 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 1466 ierr = PetscFree(status);CHKERRQ(ierr); 1467 1468 ierr = PetscFree(s_waits);CHKERRQ(ierr); 1469 ierr = PetscFree(r_waits);CHKERRQ(ierr); 1470 ierr = PetscFree(cseqa);CHKERRQ(ierr); 1471 1472 /* insert mat values of mpimat */ 1473 /*----------------------------*/ 1474 ba_i = apa; /* rename, temporary array for storage of one row of C */ 1475 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1476 nextrow = buf_ri_k + merge->nrecv; 1477 nextcseqi = nextrow + merge->nrecv; 1478 1479 for (k=0; k<merge->nrecv; k++){ 1480 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1481 nrows = *(buf_ri_k[k]); 1482 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 1483 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1484 } 1485 1486 /* add received local vals to C */ 1487 for (i=0; i<cm; i++) { 1488 crow = owners[rank] + i; 1489 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 1490 bnzi = bi[i+1] - bi[i]; 1491 nzi = 0; 1492 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1493 /* i-th row */ 1494 if (i == *nextrow[k]) { 1495 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 1496 cseqj = buf_rj[k] + *(nextcseqi[k]); 1497 cseqa = abuf_r[k] + *(nextcseqi[k]); 1498 nextcseqj = 0; 1499 for (j=0; nextcseqj<cseqnzi; j++){ 1500 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 1501 ba_i[j] += cseqa[nextcseqj++]; 1502 nzi++; 1503 } 1504 } 1505 nextrow[k]++; nextcseqi[k]++; 1506 } 1507 } 1508 if (nzi>0){ 1509 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 1510 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 1511 flops += 2*nzi; 1512 } 1513 } 1514 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1516 ierr = PetscLogStagePop();CHKERRQ(ierr); 1517 1518 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 1519 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1520 ierr = PetscFree(apa);CHKERRQ(ierr); 1521 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1522 1523 PetscFunctionReturn(0); 1524 } 1525 #endif /* TOBEREMOVED */ 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 1529 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1530 { 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 if (scall == MAT_INITIAL_MATRIX){ 1535 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1536 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 1537 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1538 } 1539 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1540 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 1541 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 /* 1546 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1547 1548 Collective on Mat 1549 1550 Input Parameters: 1551 + A - the matrix 1552 - P - the projection matrix 1553 1554 Output Parameters: 1555 . C - the (i,j) structure of the product matrix 1556 1557 Notes: 1558 C will be created and must be destroyed by the user with MatDestroy(). 1559 1560 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1561 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1562 this (i,j) structure by calling MatPtAPNumeric(). 1563 1564 Level: intermediate 1565 1566 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1567 */ 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatPtAPSymbolic" 1570 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 1571 { 1572 PetscErrorCode ierr; 1573 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 1574 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 1575 1576 PetscFunctionBegin; 1577 1578 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1579 PetscValidType(A,1); 1580 MatPreallocated(A); 1581 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1582 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1583 1584 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1585 PetscValidType(P,2); 1586 MatPreallocated(P); 1587 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1588 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1589 1590 PetscValidPointer(C,3); 1591 1592 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1593 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1594 1595 /* For now, we do not dispatch based on the type of A and P */ 1596 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1597 fA = A->ops->ptapsymbolic; 1598 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 1599 fP = P->ops->ptapsymbolic; 1600 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 1601 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1602 1603 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1604 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 1605 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1606 1607 PetscFunctionReturn(0); 1608 } 1609 1610 typedef struct { 1611 Mat symAP; 1612 } Mat_PtAPstruct; 1613 1614 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 1618 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 1619 { 1620 PetscErrorCode ierr; 1621 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 1622 1623 PetscFunctionBegin; 1624 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 1625 ierr = PetscFree(ptap);CHKERRQ(ierr); 1626 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 1632 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 1633 { 1634 PetscErrorCode ierr; 1635 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 1637 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1638 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1639 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 1640 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 1641 MatScalar *ca; 1642 PetscBT lnkbt; 1643 1644 PetscFunctionBegin; 1645 /* Get ij structure of P^T */ 1646 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1647 ptJ=ptj; 1648 1649 /* Allocate ci array, arrays for fill computation and */ 1650 /* free space for accumulating nonzero column info */ 1651 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1652 ci[0] = 0; 1653 1654 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1655 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1656 ptasparserow = ptadenserow + an; 1657 1658 /* create and initialize a linked list */ 1659 nlnk = pn+1; 1660 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1661 1662 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1663 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1664 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1665 current_space = free_space; 1666 1667 /* Determine symbolic info for each row of C: */ 1668 for (i=0;i<pn;i++) { 1669 ptnzi = pti[i+1] - pti[i]; 1670 ptanzi = 0; 1671 /* Determine symbolic row of PtA: */ 1672 for (j=0;j<ptnzi;j++) { 1673 arow = *ptJ++; 1674 anzj = ai[arow+1] - ai[arow]; 1675 ajj = aj + ai[arow]; 1676 for (k=0;k<anzj;k++) { 1677 if (!ptadenserow[ajj[k]]) { 1678 ptadenserow[ajj[k]] = -1; 1679 ptasparserow[ptanzi++] = ajj[k]; 1680 } 1681 } 1682 } 1683 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1684 ptaj = ptasparserow; 1685 cnzi = 0; 1686 for (j=0;j<ptanzi;j++) { 1687 prow = *ptaj++; 1688 pnzj = pi[prow+1] - pi[prow]; 1689 pjj = pj + pi[prow]; 1690 /* add non-zero cols of P into the sorted linked list lnk */ 1691 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1692 cnzi += nlnk; 1693 } 1694 1695 /* If free space is not available, make more free space */ 1696 /* Double the amount of total space in the list */ 1697 if (current_space->local_remaining<cnzi) { 1698 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1699 } 1700 1701 /* Copy data into free space, and zero out denserows */ 1702 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1703 current_space->array += cnzi; 1704 current_space->local_used += cnzi; 1705 current_space->local_remaining -= cnzi; 1706 1707 for (j=0;j<ptanzi;j++) { 1708 ptadenserow[ptasparserow[j]] = 0; 1709 } 1710 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1711 /* For now, we will recompute what is needed. */ 1712 ci[i+1] = ci[i] + cnzi; 1713 } 1714 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1715 /* Allocate space for cj, initialize cj, and */ 1716 /* destroy list of free space and other temporary array(s) */ 1717 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1718 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1719 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1720 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1721 1722 /* Allocate space for ca */ 1723 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1724 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1725 1726 /* put together the new matrix */ 1727 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1728 1729 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1730 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1731 c = (Mat_SeqAIJ *)((*C)->data); 1732 c->freedata = PETSC_TRUE; 1733 c->nonew = 0; 1734 1735 /* Clean up. */ 1736 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1737 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #include "src/mat/impls/maij/maij.h" 1742 EXTERN_C_BEGIN 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1745 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1746 { 1747 /* This routine requires testing -- I don't think it works. */ 1748 PetscErrorCode ierr; 1749 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1750 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1751 Mat P=pp->AIJ; 1752 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1753 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1754 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1755 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1756 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1757 MatScalar *ca; 1758 1759 PetscFunctionBegin; 1760 /* Start timer */ 1761 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1762 1763 /* Get ij structure of P^T */ 1764 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1765 1766 /* Allocate ci array, arrays for fill computation and */ 1767 /* free space for accumulating nonzero column info */ 1768 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1769 ci[0] = 0; 1770 1771 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1772 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1773 ptasparserow = ptadenserow + an; 1774 denserow = ptasparserow + an; 1775 sparserow = denserow + pn; 1776 1777 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1778 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1779 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1780 current_space = free_space; 1781 1782 /* Determine symbolic info for each row of C: */ 1783 for (i=0;i<pn/ppdof;i++) { 1784 ptnzi = pti[i+1] - pti[i]; 1785 ptanzi = 0; 1786 ptJ = ptj + pti[i]; 1787 for (dof=0;dof<ppdof;dof++) { 1788 /* Determine symbolic row of PtA: */ 1789 for (j=0;j<ptnzi;j++) { 1790 arow = ptJ[j] + dof; 1791 anzj = ai[arow+1] - ai[arow]; 1792 ajj = aj + ai[arow]; 1793 for (k=0;k<anzj;k++) { 1794 if (!ptadenserow[ajj[k]]) { 1795 ptadenserow[ajj[k]] = -1; 1796 ptasparserow[ptanzi++] = ajj[k]; 1797 } 1798 } 1799 } 1800 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1801 ptaj = ptasparserow; 1802 cnzi = 0; 1803 for (j=0;j<ptanzi;j++) { 1804 pdof = *ptaj%dof; 1805 prow = (*ptaj++)/dof; 1806 pnzj = pi[prow+1] - pi[prow]; 1807 pjj = pj + pi[prow]; 1808 for (k=0;k<pnzj;k++) { 1809 if (!denserow[pjj[k]+pdof]) { 1810 denserow[pjj[k]+pdof] = -1; 1811 sparserow[cnzi++] = pjj[k]+pdof; 1812 } 1813 } 1814 } 1815 1816 /* sort sparserow */ 1817 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1818 1819 /* If free space is not available, make more free space */ 1820 /* Double the amount of total space in the list */ 1821 if (current_space->local_remaining<cnzi) { 1822 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1823 } 1824 1825 /* Copy data into free space, and zero out denserows */ 1826 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1827 current_space->array += cnzi; 1828 current_space->local_used += cnzi; 1829 current_space->local_remaining -= cnzi; 1830 1831 for (j=0;j<ptanzi;j++) { 1832 ptadenserow[ptasparserow[j]] = 0; 1833 } 1834 for (j=0;j<cnzi;j++) { 1835 denserow[sparserow[j]] = 0; 1836 } 1837 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1838 /* For now, we will recompute what is needed. */ 1839 ci[i+1+dof] = ci[i+dof] + cnzi; 1840 } 1841 } 1842 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1843 /* Allocate space for cj, initialize cj, and */ 1844 /* destroy list of free space and other temporary array(s) */ 1845 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1846 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1847 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1848 1849 /* Allocate space for ca */ 1850 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1851 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1852 1853 /* put together the new matrix */ 1854 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1855 1856 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1857 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1858 c = (Mat_SeqAIJ *)((*C)->data); 1859 c->freedata = PETSC_TRUE; 1860 c->nonew = 0; 1861 1862 /* Clean up. */ 1863 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1864 1865 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 EXTERN_C_END 1869 1870 /* 1871 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1872 1873 Collective on Mat 1874 1875 Input Parameters: 1876 + A - the matrix 1877 - P - the projection matrix 1878 1879 Output Parameters: 1880 . C - the product matrix 1881 1882 Notes: 1883 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1884 the user using MatDeatroy(). 1885 1886 This routine is currently only implemented for pairs of AIJ matrices and classes 1887 which inherit from AIJ. C will be of type MATAIJ. 1888 1889 Level: intermediate 1890 1891 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1892 */ 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatPtAPNumeric" 1895 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1896 { 1897 PetscErrorCode ierr; 1898 PetscErrorCode (*fA)(Mat,Mat,Mat); 1899 PetscErrorCode (*fP)(Mat,Mat,Mat); 1900 1901 PetscFunctionBegin; 1902 1903 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1904 PetscValidType(A,1); 1905 MatPreallocated(A); 1906 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1907 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1908 1909 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1910 PetscValidType(P,2); 1911 MatPreallocated(P); 1912 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1913 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1914 1915 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1916 PetscValidType(C,3); 1917 MatPreallocated(C); 1918 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1919 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1920 1921 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1922 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1923 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1924 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1925 1926 /* For now, we do not dispatch based on the type of A and P */ 1927 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1928 fA = A->ops->ptapnumeric; 1929 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1930 fP = P->ops->ptapnumeric; 1931 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1932 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1933 1934 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1935 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1936 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1937 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1943 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1944 { 1945 PetscErrorCode ierr; 1946 PetscInt flops=0; 1947 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1948 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1949 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1950 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1951 PetscInt *ci=c->i,*cj=c->j,*cjj; 1952 PetscInt am=A->M,cn=C->N,cm=C->M; 1953 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1954 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1955 1956 PetscFunctionBegin; 1957 /* Allocate temporary array for storage of one row of A*P */ 1958 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1959 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1960 1961 apj = (PetscInt *)(apa + cn); 1962 apjdense = apj + cn; 1963 1964 /* Clear old values in C */ 1965 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1966 1967 for (i=0;i<am;i++) { 1968 /* Form sparse row of A*P */ 1969 anzi = ai[i+1] - ai[i]; 1970 apnzj = 0; 1971 for (j=0;j<anzi;j++) { 1972 prow = *aj++; 1973 pnzj = pi[prow+1] - pi[prow]; 1974 pjj = pj + pi[prow]; 1975 paj = pa + pi[prow]; 1976 for (k=0;k<pnzj;k++) { 1977 if (!apjdense[pjj[k]]) { 1978 apjdense[pjj[k]] = -1; 1979 apj[apnzj++] = pjj[k]; 1980 } 1981 apa[pjj[k]] += (*aa)*paj[k]; 1982 } 1983 flops += 2*pnzj; 1984 aa++; 1985 } 1986 1987 /* Sort the j index array for quick sparse axpy. */ 1988 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1989 1990 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1991 pnzi = pi[i+1] - pi[i]; 1992 for (j=0;j<pnzi;j++) { 1993 nextap = 0; 1994 crow = *pJ++; 1995 cjj = cj + ci[crow]; 1996 caj = ca + ci[crow]; 1997 /* Perform sparse axpy operation. Note cjj includes apj. */ 1998 for (k=0;nextap<apnzj;k++) { 1999 if (cjj[k]==apj[nextap]) { 2000 caj[k] += (*pA)*apa[apj[nextap++]]; 2001 } 2002 } 2003 flops += 2*apnzj; 2004 pA++; 2005 } 2006 2007 /* Zero the current row info for A*P */ 2008 for (j=0;j<apnzj;j++) { 2009 apa[apj[j]] = 0.; 2010 apjdense[apj[j]] = 0; 2011 } 2012 } 2013 2014 /* Assemble the final matrix and clean up */ 2015 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2016 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = PetscFree(apa);CHKERRQ(ierr); 2018 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2019 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 2024 static PetscEvent logkey_matptapnumeric_local = 0; 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 2027 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 2028 { 2029 PetscErrorCode ierr; 2030 PetscInt flops=0; 2031 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2032 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 2033 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2034 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2035 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2036 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 2037 PetscInt *pJ=pj_loc,*pjj; 2038 PetscInt *ci=c->i,*cj=c->j,*cjj; 2039 PetscInt am=A->m,cn=C->N,cm=C->M; 2040 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2041 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 2042 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 2043 2044 PetscFunctionBegin; 2045 if (!logkey_matptapnumeric_local) { 2046 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 2047 } 2048 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2049 2050 /* Allocate temporary array for storage of one row of A*P */ 2051 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2052 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2053 apj = (PetscInt *)(apa + cn); 2054 apjdense = apj + cn; 2055 2056 /* Clear old values in C */ 2057 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2058 2059 for (i=0;i<am;i++) { 2060 /* Form i-th sparse row of A*P */ 2061 apnzj = 0; 2062 /* diagonal portion of A */ 2063 anzi = adi[i+1] - adi[i]; 2064 for (j=0;j<anzi;j++) { 2065 prow = *adj; 2066 adj++; 2067 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2068 pjj = pj_loc + pi_loc[prow]; 2069 paj = pa_loc + pi_loc[prow]; 2070 for (k=0;k<pnzj;k++) { 2071 if (!apjdense[pjj[k]]) { 2072 apjdense[pjj[k]] = -1; 2073 apj[apnzj++] = pjj[k]; 2074 } 2075 apa[pjj[k]] += (*ada)*paj[k]; 2076 } 2077 flops += 2*pnzj; 2078 ada++; 2079 } 2080 /* off-diagonal portion of A */ 2081 anzi = aoi[i+1] - aoi[i]; 2082 for (j=0;j<anzi;j++) { 2083 col = a->garray[*aoj]; 2084 if (col < cstart){ 2085 prow = *aoj; 2086 } else if (col >= cend){ 2087 prow = *aoj; 2088 } else { 2089 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2090 } 2091 aoj++; 2092 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2093 pjj = pj_oth + pi_oth[prow]; 2094 paj = pa_oth + pi_oth[prow]; 2095 for (k=0;k<pnzj;k++) { 2096 if (!apjdense[pjj[k]]) { 2097 apjdense[pjj[k]] = -1; 2098 apj[apnzj++] = pjj[k]; 2099 } 2100 apa[pjj[k]] += (*aoa)*paj[k]; 2101 } 2102 flops += 2*pnzj; 2103 aoa++; 2104 } 2105 /* Sort the j index array for quick sparse axpy. */ 2106 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2107 2108 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 2109 pnzi = pi_loc[i+1] - pi_loc[i]; 2110 for (j=0;j<pnzi;j++) { 2111 nextap = 0; 2112 crow = *pJ++; 2113 cjj = cj + ci[crow]; 2114 caj = ca + ci[crow]; 2115 /* Perform sparse axpy operation. Note cjj includes apj. */ 2116 for (k=0;nextap<apnzj;k++) { 2117 if (cjj[k]==apj[nextap]) { 2118 caj[k] += (*pA)*apa[apj[nextap++]]; 2119 } 2120 } 2121 flops += 2*apnzj; 2122 pA++; 2123 } 2124 2125 /* Zero the current row info for A*P */ 2126 for (j=0;j<apnzj;j++) { 2127 apa[apj[j]] = 0.; 2128 apjdense[apj[j]] = 0; 2129 } 2130 } 2131 2132 /* Assemble the final matrix and clean up */ 2133 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = PetscFree(apa);CHKERRQ(ierr); 2136 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2137 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 static PetscEvent logkey_matptapsymbolic_local = 0; 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 2143 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 2144 { 2145 PetscErrorCode ierr; 2146 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2147 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2148 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 2149 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 2150 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 2151 PetscInt *ci,*cj,*ptaj; 2152 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 2153 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 2154 PetscInt pm=P_loc->m,nlnk,*lnk; 2155 MatScalar *ca; 2156 PetscBT lnkbt; 2157 PetscInt prend,nprow_loc,nprow_oth; 2158 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 2159 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2160 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2161 2162 PetscFunctionBegin; 2163 if (!logkey_matptapsymbolic_local) { 2164 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 2165 } 2166 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2167 2168 prend = prstart + pm; 2169 2170 /* get ij structure of P_loc^T */ 2171 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2172 ptJ=ptj; 2173 2174 /* Allocate ci array, arrays for fill computation and */ 2175 /* free space for accumulating nonzero column info */ 2176 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2177 ci[0] = 0; 2178 2179 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 2180 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 2181 ptasparserow_loc = ptadenserow_loc + aN; 2182 ptadenserow_oth = ptasparserow_loc + aN; 2183 ptasparserow_oth = ptadenserow_oth + aN; 2184 2185 /* create and initialize a linked list */ 2186 nlnk = pN+1; 2187 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2188 2189 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 2190 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2191 nnz = adi[am] + aoi[am]; 2192 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 2193 current_space = free_space; 2194 2195 /* determine symbolic info for each row of C: */ 2196 for (i=0;i<pN;i++) { 2197 ptnzi = pti[i+1] - pti[i]; 2198 nprow_loc = 0; nprow_oth = 0; 2199 /* i-th row of symbolic P_loc^T*A_loc: */ 2200 for (j=0;j<ptnzi;j++) { 2201 arow = *ptJ++; 2202 /* diagonal portion of A */ 2203 anzj = adi[arow+1] - adi[arow]; 2204 ajj = adj + adi[arow]; 2205 for (k=0;k<anzj;k++) { 2206 col = ajj[k]+prstart; 2207 if (!ptadenserow_loc[col]) { 2208 ptadenserow_loc[col] = -1; 2209 ptasparserow_loc[nprow_loc++] = col; 2210 } 2211 } 2212 /* off-diagonal portion of A */ 2213 anzj = aoi[arow+1] - aoi[arow]; 2214 ajj = aoj + aoi[arow]; 2215 for (k=0;k<anzj;k++) { 2216 col = a->garray[ajj[k]]; /* global col */ 2217 if (col < cstart){ 2218 col = ajj[k]; 2219 } else if (col >= cend){ 2220 col = ajj[k] + pm; 2221 } else { 2222 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2223 } 2224 if (!ptadenserow_oth[col]) { 2225 ptadenserow_oth[col] = -1; 2226 ptasparserow_oth[nprow_oth++] = col; 2227 } 2228 } 2229 } 2230 2231 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 2232 cnzi = 0; 2233 /* rows of P_loc */ 2234 ptaj = ptasparserow_loc; 2235 for (j=0; j<nprow_loc; j++) { 2236 prow = *ptaj++; 2237 prow -= prstart; /* rm */ 2238 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2239 pjj = pj_loc + pi_loc[prow]; 2240 /* add non-zero cols of P into the sorted linked list lnk */ 2241 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2242 cnzi += nlnk; 2243 } 2244 /* rows of P_oth */ 2245 ptaj = ptasparserow_oth; 2246 for (j=0; j<nprow_oth; j++) { 2247 prow = *ptaj++; 2248 if (prow >= prend) prow -= pm; /* rm */ 2249 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2250 pjj = pj_oth + pi_oth[prow]; 2251 /* add non-zero cols of P into the sorted linked list lnk */ 2252 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2253 cnzi += nlnk; 2254 } 2255 2256 /* If free space is not available, make more free space */ 2257 /* Double the amount of total space in the list */ 2258 if (current_space->local_remaining<cnzi) { 2259 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2260 } 2261 2262 /* Copy data into free space, and zero out denserows */ 2263 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2264 current_space->array += cnzi; 2265 current_space->local_used += cnzi; 2266 current_space->local_remaining -= cnzi; 2267 2268 for (j=0;j<nprow_loc; j++) { 2269 ptadenserow_loc[ptasparserow_loc[j]] = 0; 2270 } 2271 for (j=0;j<nprow_oth; j++) { 2272 ptadenserow_oth[ptasparserow_oth[j]] = 0; 2273 } 2274 2275 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2276 /* For now, we will recompute what is needed. */ 2277 ci[i+1] = ci[i] + cnzi; 2278 } 2279 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2280 /* Allocate space for cj, initialize cj, and */ 2281 /* destroy list of free space and other temporary array(s) */ 2282 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2283 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2284 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 2285 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2286 2287 /* Allocate space for ca */ 2288 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2289 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2290 2291 /* put together the new matrix */ 2292 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 2293 2294 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2295 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2296 c = (Mat_SeqAIJ *)((*C)->data); 2297 c->freedata = PETSC_TRUE; 2298 c->nonew = 0; 2299 2300 /* Clean up. */ 2301 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2302 2303 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306