1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat P_loc,P_oth; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 167 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 168 Mat_SeqAIJ *p_loc,*p_oth; 169 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 170 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 171 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 172 PetscInt aN=A->N,am=A->m,pN=P->N; 173 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 174 MatScalar *ca; 175 PetscBT lnkbt; 176 PetscInt prstart,prend,nprow_loc,nprow_oth; 177 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 178 179 MPI_Comm comm=A->comm; 180 Mat B_mpi; 181 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 182 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 183 PetscInt len,proc,*dnz,*onz,*owners; 184 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 185 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 186 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 187 MPI_Status *status; 188 Mat_Merge_SeqsToMPI *merge; 189 190 PetscFunctionBegin; 191 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 192 if (container) { 193 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 194 } else { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 196 } 197 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 198 /*--------------------------------------------------*/ 199 /* get data from P_loc and P_oth */ 200 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 201 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 202 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 203 prend = prstart + pm; 204 205 /* get ij structure of P_loc^T */ 206 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 207 ptJ=ptj; 208 209 /* Allocate ci array, arrays for fill computation and */ 210 /* free space for accumulating nonzero column info */ 211 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 212 ci[0] = 0; 213 214 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 215 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 216 ptasparserow_loc = ptadenserow_loc + aN; 217 ptadenserow_oth = ptasparserow_loc + aN; 218 ptasparserow_oth = ptadenserow_oth + aN; 219 220 /* create and initialize a linked list */ 221 nlnk = pN+1; 222 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 223 224 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 225 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 226 nnz = adi[am] + aoi[am]; 227 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 228 current_space = free_space; 229 230 /* determine symbolic info for each row of C: */ 231 for (i=0;i<pN;i++) { 232 ptnzi = pti[i+1] - pti[i]; 233 nprow_loc = 0; nprow_oth = 0; 234 /* i-th row of symbolic P_loc^T*A_loc: */ 235 for (j=0;j<ptnzi;j++) { 236 arow = *ptJ++; 237 /* diagonal portion of A */ 238 anzj = adi[arow+1] - adi[arow]; 239 ajj = adj + adi[arow]; 240 for (k=0;k<anzj;k++) { 241 col = ajj[k]+prstart; 242 if (!ptadenserow_loc[col]) { 243 ptadenserow_loc[col] = -1; 244 ptasparserow_loc[nprow_loc++] = col; 245 } 246 } 247 /* off-diagonal portion of A */ 248 anzj = aoi[arow+1] - aoi[arow]; 249 ajj = aoj + aoi[arow]; 250 for (k=0;k<anzj;k++) { 251 col = a->garray[ajj[k]]; /* global col */ 252 if (col < cstart){ 253 col = ajj[k]; 254 } else if (col >= cend){ 255 col = ajj[k] + pm; 256 } else { 257 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 258 } 259 if (!ptadenserow_oth[col]) { 260 ptadenserow_oth[col] = -1; 261 ptasparserow_oth[nprow_oth++] = col; 262 } 263 } 264 } 265 266 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 267 cnzi = 0; 268 /* rows of P_loc */ 269 ptaj = ptasparserow_loc; 270 for (j=0; j<nprow_loc; j++) { 271 prow = *ptaj++; 272 prow -= prstart; /* rm */ 273 pnzj = pi_loc[prow+1] - pi_loc[prow]; 274 pjj = pj_loc + pi_loc[prow]; 275 /* add non-zero cols of P into the sorted linked list lnk */ 276 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 277 cnzi += nlnk; 278 } 279 /* rows of P_oth */ 280 ptaj = ptasparserow_oth; 281 for (j=0; j<nprow_oth; j++) { 282 prow = *ptaj++; 283 if (prow >= prend) prow -= pm; /* rm */ 284 pnzj = pi_oth[prow+1] - pi_oth[prow]; 285 pjj = pj_oth + pi_oth[prow]; 286 /* add non-zero cols of P into the sorted linked list lnk */ 287 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 288 cnzi += nlnk; 289 } 290 291 /* If free space is not available, make more free space */ 292 /* Double the amount of total space in the list */ 293 if (current_space->local_remaining<cnzi) { 294 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 295 } 296 297 /* Copy data into free space, and zero out denserows */ 298 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 299 current_space->array += cnzi; 300 current_space->local_used += cnzi; 301 current_space->local_remaining -= cnzi; 302 303 for (j=0;j<nprow_loc; j++) { 304 ptadenserow_loc[ptasparserow_loc[j]] = 0; 305 } 306 for (j=0;j<nprow_oth; j++) { 307 ptadenserow_oth[ptasparserow_oth[j]] = 0; 308 } 309 310 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 311 /* For now, we will recompute what is needed. */ 312 ci[i+1] = ci[i] + cnzi; 313 } 314 /* Clean up. */ 315 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 316 317 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 318 /* Allocate space for cj, initialize cj, and */ 319 /* destroy list of free space and other temporary array(s) */ 320 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 321 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 322 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 323 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 324 325 /* Allocate space for ca */ 326 /* 327 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 328 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 329 */ 330 331 /* add C_seq into mpi C */ 332 /*-----------------------------------*/ 333 free_space=PETSC_NULL; current_space=PETSC_NULL; 334 335 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 336 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 337 338 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 339 340 341 /* determine row ownership */ 342 /*---------------------------------------------------------*/ 343 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 344 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 345 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 346 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 347 348 /* determine the number of messages to send, their lengths */ 349 /*---------------------------------------------------------*/ 350 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 351 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 352 len_s = merge->len_s; 353 len = 0; /* length of buf_si[] */ 354 merge->nsend = 0; 355 for (proc=0; proc<size; proc++){ 356 len_si[proc] = 0; 357 if (proc == rank){ 358 len_s[proc] = 0; 359 } else { 360 len_si[proc] = owners[proc+1] - owners[proc] + 1; 361 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 362 } 363 if (len_s[proc]) { 364 merge->nsend++; 365 nrows = 0; 366 for (i=owners[proc]; i<owners[proc+1]; i++){ 367 if (ci[i+1] > ci[i]) nrows++; 368 } 369 len_si[proc] = 2*(nrows+1); 370 len += len_si[proc]; 371 } 372 } 373 374 /* determine the number and length of messages to receive for ij-structure */ 375 /*-------------------------------------------------------------------------*/ 376 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 377 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 378 379 /* post the Irecv of j-structure */ 380 /*-------------------------------*/ 381 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 382 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 383 384 /* post the Isend of j-structure */ 385 /*--------------------------------*/ 386 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 387 sj_waits = si_waits + merge->nsend; 388 389 for (proc=0, k=0; proc<size; proc++){ 390 if (!len_s[proc]) continue; 391 i = owners[proc]; 392 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 393 k++; 394 } 395 396 /* receives and sends of j-structure are complete */ 397 /*------------------------------------------------*/ 398 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 399 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 400 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 401 402 /* send and recv i-structure */ 403 /*---------------------------*/ 404 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 405 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 406 407 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 408 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 409 for (proc=0,k=0; proc<size; proc++){ 410 if (!len_s[proc]) continue; 411 /* form outgoing message for i-structure: 412 buf_si[0]: nrows to be sent 413 [1:nrows]: row index (global) 414 [nrows+1:2*nrows+1]: i-structure index 415 */ 416 /*-------------------------------------------*/ 417 nrows = len_si[proc]/2 - 1; 418 buf_si_i = buf_si + nrows+1; 419 buf_si[0] = nrows; 420 buf_si_i[0] = 0; 421 nrows = 0; 422 for (i=owners[proc]; i<owners[proc+1]; i++){ 423 anzi = ci[i+1] - ci[i]; 424 if (anzi) { 425 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 426 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 427 nrows++; 428 } 429 } 430 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 431 k++; 432 buf_si += len_si[proc]; 433 } 434 435 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 436 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 437 438 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 439 for (i=0; i<merge->nrecv; i++){ 440 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 441 } 442 443 ierr = PetscFree(len_si);CHKERRQ(ierr); 444 ierr = PetscFree(len_ri);CHKERRQ(ierr); 445 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 446 ierr = PetscFree(si_waits);CHKERRQ(ierr); 447 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 448 ierr = PetscFree(buf_s);CHKERRQ(ierr); 449 ierr = PetscFree(status);CHKERRQ(ierr); 450 451 /* compute a local seq matrix in each processor */ 452 /*----------------------------------------------*/ 453 /* allocate bi array and free space for accumulating nonzero column info */ 454 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 455 bi[0] = 0; 456 457 /* create and initialize a linked list */ 458 nlnk = pN+1; 459 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 460 461 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 462 len = 0; 463 len = ci[owners[rank+1]] - ci[owners[rank]]; 464 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 465 current_space = free_space; 466 467 /* determine symbolic info for each local row */ 468 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 469 nextrow = buf_ri_k + merge->nrecv; 470 nextci = nextrow + merge->nrecv; 471 for (k=0; k<merge->nrecv; k++){ 472 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 473 nrows = *buf_ri_k[k]; 474 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 475 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 476 } 477 478 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 479 len = 0; 480 for (i=0; i<pn; i++) { 481 bnzi = 0; 482 /* add local non-zero cols of this proc's C_seq into lnk */ 483 arow = owners[rank] + i; 484 anzi = ci[arow+1] - ci[arow]; 485 cji = cj + ci[arow]; 486 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 487 bnzi += nlnk; 488 /* add received col data into lnk */ 489 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 490 if (i == *nextrow[k]) { /* i-th row */ 491 anzi = *(nextci[k]+1) - *nextci[k]; 492 cji = buf_rj[k] + *nextci[k]; 493 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 494 bnzi += nlnk; 495 nextrow[k]++; nextci[k]++; 496 } 497 } 498 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 499 500 /* if free space is not available, make more free space */ 501 if (current_space->local_remaining<bnzi) { 502 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 503 nspacedouble++; 504 } 505 /* copy data into free space, then initialize lnk */ 506 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 507 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 508 509 current_space->array += bnzi; 510 current_space->local_used += bnzi; 511 current_space->local_remaining -= bnzi; 512 513 bi[i+1] = bi[i] + bnzi; 514 } 515 516 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 517 518 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 519 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 520 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 521 522 /* create symbolic parallel matrix B_mpi */ 523 /*---------------------------------------*/ 524 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 525 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 526 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 527 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 528 529 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 530 B_mpi->assembled = PETSC_FALSE; 531 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 532 merge->bi = bi; 533 merge->bj = bj; 534 merge->ci = ci; 535 merge->cj = cj; 536 merge->buf_ri = buf_ri; 537 merge->buf_rj = buf_rj; 538 merge->C_seq = PETSC_NULL; 539 540 /* attach the supporting struct to B_mpi for reuse */ 541 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 542 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 543 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 544 *C = B_mpi; 545 546 PetscFunctionReturn(0); 547 548 #ifdef TOBEREMOVED 549 PetscErrorCode ierr; 550 Mat C_seq; 551 Mat_MatMatMultMPI *ptap; 552 PetscObjectContainer container; 553 554 PetscFunctionBegin; 555 556 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 557 if (container) { 558 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 559 } else { 560 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 561 } 562 /* compute C_seq = P_loc^T * A_loc * P_seq */ 563 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 564 565 /* add C_seq into mpi C */ 566 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 #endif /* TOBEREMOVED */ 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 574 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 575 { 576 PetscErrorCode ierr; 577 Mat_Merge_SeqsToMPI *merge; 578 Mat_MatMatMultMPI *ptap; 579 PetscObjectContainer cont_merge,cont_ptap; 580 581 PetscInt flops=0; 582 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 583 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 584 Mat_SeqAIJ *p_loc,*p_oth; 585 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 586 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 587 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 588 PetscInt *cjj; 589 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 590 MatScalar *pa_loc,*pA,*pa_oth; 591 PetscInt am=A->m,cN=C->N; 592 593 MPI_Comm comm=C->comm; 594 PetscMPIInt size,rank,taga,*len_s; 595 PetscInt *owners; 596 PetscInt proc; 597 PetscInt **buf_ri,**buf_rj; 598 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 599 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 600 MPI_Request *s_waits,*r_waits; 601 MPI_Status *status; 602 MatScalar **abuf_r,*ba_i; 603 PetscInt *cseqi,*cseqj; 604 PetscInt *cseqj_tmp; 605 MatScalar *cseqa_tmp; 606 607 PetscFunctionBegin; 608 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 609 if (cont_merge) { 610 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 611 } else { 612 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 613 } 614 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 615 if (cont_ptap) { 616 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 617 } else { 618 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 619 } 620 /* get data from symbolic products */ 621 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 622 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 623 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 624 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 625 626 cseqi = merge->ci; cseqj=merge->cj; 627 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 628 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 629 630 /* Allocate temporary array for storage of one row of A*P */ 631 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 632 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 633 apj = (PetscInt *)(apa + cN); 634 apjdense = apj + cN; 635 636 /* Clear old values in C_Seq */ 637 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 638 639 for (i=0;i<am;i++) { 640 /* Form i-th sparse row of A*P */ 641 apnzj = 0; 642 /* diagonal portion of A */ 643 anzi = adi[i+1] - adi[i]; 644 for (j=0;j<anzi;j++) { 645 prow = *adj; 646 adj++; 647 pnzj = pi_loc[prow+1] - pi_loc[prow]; 648 pjj = pj_loc + pi_loc[prow]; 649 paj = pa_loc + pi_loc[prow]; 650 for (k=0;k<pnzj;k++) { 651 if (!apjdense[pjj[k]]) { 652 apjdense[pjj[k]] = -1; 653 apj[apnzj++] = pjj[k]; 654 } 655 apa[pjj[k]] += (*ada)*paj[k]; 656 } 657 flops += 2*pnzj; 658 ada++; 659 } 660 /* off-diagonal portion of A */ 661 anzi = aoi[i+1] - aoi[i]; 662 for (j=0;j<anzi;j++) { 663 col = a->garray[*aoj]; 664 if (col < cstart){ 665 prow = *aoj; 666 } else if (col >= cend){ 667 prow = *aoj; 668 } else { 669 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 670 } 671 aoj++; 672 pnzj = pi_oth[prow+1] - pi_oth[prow]; 673 pjj = pj_oth + pi_oth[prow]; 674 paj = pa_oth + pi_oth[prow]; 675 for (k=0;k<pnzj;k++) { 676 if (!apjdense[pjj[k]]) { 677 apjdense[pjj[k]] = -1; 678 apj[apnzj++] = pjj[k]; 679 } 680 apa[pjj[k]] += (*aoa)*paj[k]; 681 } 682 flops += 2*pnzj; 683 aoa++; 684 } 685 /* Sort the j index array for quick sparse axpy. */ 686 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 687 688 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 689 pnzi = pi_loc[i+1] - pi_loc[i]; 690 for (j=0;j<pnzi;j++) { 691 nextap = 0; 692 crow = *pJ++; 693 cjj = cseqj + cseqi[crow]; 694 caj = cseqa + cseqi[crow]; 695 /* Perform sparse axpy operation. Note cjj includes apj. */ 696 for (k=0;nextap<apnzj;k++) { 697 if (cjj[k]==apj[nextap]) { 698 caj[k] += (*pA)*apa[apj[nextap++]]; 699 } 700 } 701 flops += 2*apnzj; 702 pA++; 703 } 704 705 /* Zero the current row info for A*P */ 706 for (j=0;j<apnzj;j++) { 707 apa[apj[j]] = 0.; 708 apjdense[apj[j]] = 0; 709 } 710 } 711 712 ierr = PetscFree(apa);CHKERRQ(ierr); 713 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 714 715 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 717 718 bi = merge->bi; 719 bj = merge->bj; 720 buf_ri = merge->buf_ri; 721 buf_rj = merge->buf_rj; 722 723 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 724 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 725 726 /* send and recv matrix values */ 727 /*-----------------------------*/ 728 len_s = merge->len_s; 729 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 730 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 731 732 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 733 for (proc=0,k=0; proc<size; proc++){ 734 if (!len_s[proc]) continue; 735 i = owners[proc]; 736 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 737 k++; 738 } 739 740 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 741 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 742 ierr = PetscFree(status);CHKERRQ(ierr); 743 744 ierr = PetscFree(s_waits);CHKERRQ(ierr); 745 ierr = PetscFree(r_waits);CHKERRQ(ierr); 746 747 /* insert mat values of mpimat */ 748 /*----------------------------*/ 749 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 750 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 751 nextrow = buf_ri_k + merge->nrecv; 752 nextcseqi = nextrow + merge->nrecv; 753 754 for (k=0; k<merge->nrecv; k++){ 755 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 756 nrows = *(buf_ri_k[k]); 757 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 758 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 759 } 760 761 /* set values of ba */ 762 for (i=0; i<C->m; i++) { 763 cseqrow = owners[rank] + i; 764 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 765 bnzi = bi[i+1] - bi[i]; 766 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 767 768 /* add local non-zero vals of this proc's C_seq into ba */ 769 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 770 cseqj_tmp = cseqj + cseqi[cseqrow]; 771 cseqa_tmp = cseqa + cseqi[cseqrow]; 772 nextcseqj = 0; 773 for (j=0; nextcseqj<cseqnzi; j++){ 774 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 775 ba_i[j] += cseqa_tmp[nextcseqj++]; 776 } 777 } 778 779 /* add received vals into ba */ 780 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 781 /* i-th row */ 782 if (i == *nextrow[k]) { 783 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 784 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 785 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 786 nextcseqj = 0; 787 for (j=0; nextcseqj<cseqnzi; j++){ 788 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 789 ba_i[j] += cseqa_tmp[nextcseqj++]; 790 } 791 } 792 nextrow[k]++; nextcseqi[k]++; 793 } 794 } 795 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 796 } 797 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 798 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 799 800 ierr = PetscFree(cseqa);CHKERRQ(ierr); 801 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 802 ierr = PetscFree(ba_i);CHKERRQ(ierr); 803 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 804 805 PetscFunctionReturn(0); 806 } 807 808 #ifdef TOBEREMOVED 809 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 812 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 813 { 814 PetscErrorCode ierr; 815 Mat_MatMatMultMPI *ptap; 816 PetscObjectContainer container; 817 818 PetscFunctionBegin; 819 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 820 if (container) { 821 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 822 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 823 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 824 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 825 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 826 827 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 828 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(ptap);CHKERRQ(ierr); 831 832 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 838 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 839 { 840 PetscErrorCode ierr; 841 Mat_MatMatMultMPI *ptap; 842 PetscObjectContainer container; 843 844 PetscFunctionBegin; 845 if (scall == MAT_INITIAL_MATRIX){ 846 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 847 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 848 849 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 850 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 851 852 /* get P_loc by taking all local rows of P */ 853 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 854 855 /* attach the supporting struct to P for reuse */ 856 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 857 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 858 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 859 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 860 861 /* now, compute symbolic local P^T*A*P */ 862 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 863 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 864 } else if (scall == MAT_REUSE_MATRIX){ 865 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 866 if (container) { 867 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 868 } else { 869 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 870 } 871 /* update P_oth */ 872 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 873 874 } else { 875 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 876 } 877 /* now, compute numeric local P^T*A*P */ 878 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 879 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 880 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 881 882 PetscFunctionReturn(0); 883 } 884 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 885 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 886 {\ 887 /* allocate ci array, arrays for fill computation and */\ 888 /* free space for accumulating nonzero column info */\ 889 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 890 ci[0] = 0;\ 891 \ 892 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 893 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 894 nnz = adi[am] + aoi[am];\ 895 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 896 current_space = free_space;\ 897 \ 898 ptJ=ptj;\ 899 for (i=0; i<ptM; i++) {\ 900 ptnzi = pti[i+1] - pti[i];\ 901 nprow_loc = 0; nprow_oth = 0;\ 902 /* i-th row of symbolic Pt*A: */\ 903 for (j=0;j<ptnzi;j++) {\ 904 arow = *ptJ++;\ 905 /* diagonal portion of A */\ 906 anzj = adi[arow+1] - adi[arow];\ 907 ajj = adj + adi[arow];\ 908 for (k=0;k<anzj;k++) {\ 909 col = ajj[k]+prstart;\ 910 if (!ptadenserow_loc[col]) {\ 911 ptadenserow_loc[col] = -1;\ 912 ptasparserow_loc[nprow_loc++] = col;\ 913 }\ 914 }\ 915 /* off-diagonal portion of A */\ 916 anzj = aoi[arow+1] - aoi[arow];\ 917 ajj = aoj + aoi[arow];\ 918 for (k=0;k<anzj;k++) {\ 919 col = a->garray[ajj[k]]; /* global col */\ 920 if (col < cstart){\ 921 col = ajj[k];\ 922 } else if (col >= cend){\ 923 col = ajj[k] + pm;\ 924 } else {\ 925 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 926 }\ 927 if (!ptadenserow_oth[col]) {\ 928 ptadenserow_oth[col] = -1;\ 929 ptasparserow_oth[nprow_oth++] = col;\ 930 }\ 931 }\ 932 }\ 933 /* determine symbolic info for row of C_seq: */\ 934 cnzi = 0;\ 935 /* rows of P_loc */\ 936 ptaj = ptasparserow_loc;\ 937 for (j=0; j<nprow_loc; j++) {\ 938 prow = *ptaj++; \ 939 prow -= prstart; /* rm */\ 940 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 941 pjj = pj_loc + pi_loc[prow];\ 942 /* add non-zero cols of P into the sorted linked list lnk */\ 943 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 944 cnzi += nlnk;\ 945 }\ 946 /* rows of P_oth */\ 947 ptaj = ptasparserow_oth;\ 948 for (j=0; j<nprow_oth; j++) {\ 949 prow = *ptaj++; \ 950 if (prow >= prend) prow -= pm; /* rm */\ 951 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 952 pjj = pj_oth + pi_oth[prow];\ 953 /* add non-zero cols of P into the sorted linked list lnk */\ 954 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 955 cnzi += nlnk;\ 956 }\ 957 \ 958 /* If free space is not available, make more free space */\ 959 /* Double the amount of total space in the list */\ 960 if (current_space->local_remaining<cnzi) {\ 961 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 962 }\ 963 \ 964 /* Copy data into free space, and zero out denserows */\ 965 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 966 current_space->array += cnzi;\ 967 current_space->local_used += cnzi;\ 968 current_space->local_remaining -= cnzi;\ 969 \ 970 for (j=0;j<nprow_loc; j++) {\ 971 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 972 }\ 973 for (j=0;j<nprow_oth; j++) {\ 974 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 975 }\ 976 \ 977 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 978 /* For now, we will recompute what is needed. */ \ 979 ci[i+1] = ci[i] + cnzi;\ 980 }\ 981 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 982 /* Allocate space for cj, initialize cj, and */\ 983 /* destroy list of free space and other temporary array(s) */\ 984 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 985 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 986 } 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 989 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 990 { 991 PetscErrorCode ierr; 992 Mat P_loc,P_oth; 993 Mat_MatMatMultMPI *ptap; 994 PetscObjectContainer container; 995 FreeSpaceList free_space=PETSC_NULL,current_space; 996 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 997 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 998 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 999 Mat_SeqAIJ *p_loc,*p_oth; 1000 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 1001 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1002 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 1003 PetscInt aN=A->N,am=A->m,pN=P->N; 1004 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 1005 PetscBT lnkbt; 1006 PetscInt prstart,prend,nprow_loc,nprow_oth; 1007 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1008 1009 MPI_Comm comm=A->comm; 1010 Mat B_mpi; 1011 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 1012 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 1013 PetscInt len,proc,*dnz,*onz,*owners; 1014 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 1015 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 1016 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 1017 MPI_Status *status; 1018 Mat_Merge_SeqsToMPI *merge; 1019 PetscInt *ptdi,*ptdj,*cdi,*cdj; 1020 PetscInt *ptoi,*ptoj,*coi,*coj; 1021 1022 PetscFunctionBegin; 1023 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 1024 if (container) { 1025 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 1026 } else { 1027 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1028 } 1029 1030 /* determine row ownership of C */ 1031 /*---------------------------------------------------------*/ 1032 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1033 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1034 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 1035 1036 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 1037 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 1038 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 1039 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1040 1041 /* get data from P_loc and P_oth */ 1042 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 1043 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 1044 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 1045 prend = prstart + pm; 1046 1047 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 1048 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1049 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1050 ptasparserow_loc = ptadenserow_loc + aN; 1051 ptadenserow_oth = ptasparserow_loc + aN; 1052 ptasparserow_oth = ptadenserow_oth + aN; 1053 1054 /* create and initialize a linked list */ 1055 nlnk = pN+1; 1056 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1057 1058 /* Pt = P_loc^T */ 1059 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1060 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 1061 1062 /* Pt = Pd^T - diagonal portion of P_loc */ 1063 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1064 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 1065 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1066 #ifdef NEW 1067 /* Pt = Po^T - off-diagonal portion of P_loc */ 1068 ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&ptoi,&ptoj);CHKERRQ(ierr); 1069 ierr = MatPtAPSymbolicLocal_Private(tnrows,ptoi,ptoj,coi,coj);CHKERRQ(ierr); 1070 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptoi,&ptoj);CHKERRQ(ierr); 1071 #endif 1072 /* clean up */ 1073 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1074 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1075 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1076 1077 /* determine the number of messages to send, their lengths */ 1078 /*---------------------------------------------------------*/ 1079 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 1080 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 1081 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 1082 len_s = merge->len_s; 1083 len = 0; /* max length of buf_si[] */ 1084 merge->nsend = 0; 1085 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 1086 proc = 0; 1087 for (i=0; i<tnrows; i++){ 1088 while (rmap[i] >= owners[proc+1]) proc++; 1089 len_si[proc]++; 1090 } 1091 for (proc=0; proc<size; proc++){ 1092 len_s[proc] = 0; 1093 if (len_si[proc]){ 1094 merge->nsend++; 1095 len_si[proc] = 2*(len_si[proc] + 1); 1096 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 1097 len += len_si[proc]; 1098 } 1099 } 1100 1101 /* determine the number and length of messages to receive for ij-structure */ 1102 /*-------------------------------------------------------------------------*/ 1103 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 1104 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 1105 1106 /* post the Irecv of j-structure */ 1107 /*-------------------------------*/ 1108 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 1109 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 1110 1111 /* post the Isend of j-structure */ 1112 /*--------------------------------*/ 1113 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 1114 sj_waits = si_waits + merge->nsend; 1115 1116 for (proc=0, k=0; proc<size; proc++){ 1117 if (!len_s[proc]) continue; 1118 i = owners[proc]; 1119 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 1120 k++; 1121 } 1122 1123 /* receives and sends of j-structure are complete */ 1124 /*------------------------------------------------*/ 1125 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1126 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 1127 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 1128 1129 /* send and recv i-structure */ 1130 /*---------------------------*/ 1131 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 1132 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 1133 1134 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 1135 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 1136 1137 j = 0; /* row index to be sent */ 1138 for (proc=0,k=0; proc<size; proc++){ 1139 if (!len_s[proc]) continue; 1140 /* form outgoing message for i-structure: 1141 buf_si[0]: nrows to be sent 1142 [1:nrows]: row index (global) 1143 [nrows+1:2*nrows+1]: i-structure index 1144 */ 1145 /*-------------------------------------------*/ 1146 nrows = len_si[proc]/2 - 1; 1147 buf_si_i = buf_si + nrows+1; 1148 buf_si[0] = nrows; 1149 buf_si_i[0] = 0; 1150 for (i=0; i<nrows; i++){ 1151 row = rmap[j++]; 1152 anzi = ci[row+1] - ci[row]; 1153 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 1154 buf_si[i+1] = row - owners[proc]; /* local row index */ 1155 } 1156 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 1157 k++; 1158 buf_si += len_si[proc]; 1159 } 1160 1161 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 1162 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 1163 1164 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 1165 for (i=0; i<merge->nrecv; i++){ 1166 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 1167 } 1168 1169 ierr = PetscFree(len_si);CHKERRQ(ierr); 1170 ierr = PetscFree(len_ri);CHKERRQ(ierr); 1171 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 1172 ierr = PetscFree(si_waits);CHKERRQ(ierr); 1173 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 1174 ierr = PetscFree(buf_s);CHKERRQ(ierr); 1175 ierr = PetscFree(status);CHKERRQ(ierr); 1176 1177 /* compute a local seq matrix in each processor */ 1178 /*----------------------------------------------*/ 1179 /* allocate bi array and free space for accumulating nonzero column info */ 1180 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1181 bi[0] = 0; 1182 1183 /* create and initialize a linked list */ 1184 nlnk = pN+1; 1185 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1186 1187 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 1188 len = 0; 1189 len = ci[owners[rank+1]] - ci[owners[rank]]; 1190 free_space=PETSC_NULL; 1191 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 1192 current_space = free_space; 1193 1194 /* determine symbolic info for each local row of C */ 1195 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1196 nextrow = buf_ri_k + merge->nrecv; 1197 nextci = nextrow + merge->nrecv; 1198 for (k=0; k<merge->nrecv; k++){ 1199 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1200 nrows = *buf_ri_k[k]; 1201 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 1202 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1203 } 1204 1205 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 1206 len = 0; 1207 for (i=0; i<pn; i++) { 1208 bnzi = 0; 1209 /* add local non-zero cols of this proc's C_seq into lnk */ 1210 anzi = cdi[i+1] - cdi[i]; 1211 cji = cdj + cdi[i]; 1212 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1213 bnzi += nlnk; 1214 /* add received col data into lnk */ 1215 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1216 if (i == *nextrow[k]) { /* i-th row */ 1217 anzi = *(nextci[k]+1) - *nextci[k]; 1218 cji = buf_rj[k] + *nextci[k]; 1219 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1220 bnzi += nlnk; 1221 nextrow[k]++; nextci[k]++; 1222 } 1223 } 1224 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 1225 1226 /* if free space is not available, make more free space */ 1227 if (current_space->local_remaining<bnzi) { 1228 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1229 nspacedouble++; 1230 } 1231 /* copy data into free space, then initialize lnk */ 1232 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1233 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 1234 1235 current_space->array += bnzi; 1236 current_space->local_used += bnzi; 1237 current_space->local_remaining -= bnzi; 1238 1239 bi[i+1] = bi[i] + bnzi; 1240 } 1241 1242 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1243 1244 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1245 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1246 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1247 1248 /* create symbolic parallel matrix B_mpi */ 1249 /*---------------------------------------*/ 1250 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 1251 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 1252 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 1253 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1254 1255 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 1256 B_mpi->assembled = PETSC_FALSE; 1257 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 1258 merge->bi = bi; 1259 merge->bj = bj; 1260 merge->ci = ci; 1261 merge->cj = cj; 1262 merge->buf_ri = buf_ri; 1263 merge->buf_rj = buf_rj; 1264 merge->C_seq = PETSC_NULL; 1265 1266 /* attach the supporting struct to B_mpi for reuse */ 1267 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 1268 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 1269 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 1270 *C = B_mpi; 1271 1272 ierr = PetscFree(cdi);CHKERRQ(ierr); 1273 ierr = PetscFree(cdj);CHKERRQ(ierr); 1274 #ifdef NEW 1275 ierr = PetscFree(coi);CHKERRQ(ierr); 1276 ierr = PetscFree(coj);CHKERRQ(ierr); 1277 #endif 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 1283 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 1284 { 1285 PetscErrorCode ierr; 1286 Mat_Merge_SeqsToMPI *merge; 1287 Mat_MatMatMultMPI *ptap; 1288 PetscObjectContainer cont_merge,cont_ptap; 1289 PetscInt flops=0; 1290 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 1291 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1292 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 1293 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 1294 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 1295 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 1296 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1297 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 1298 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 1299 PetscInt am=A->m,cN=C->N,cm=C->m; 1300 MPI_Comm comm=C->comm; 1301 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 1302 PetscInt *owners,proc,nrows; 1303 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 1304 MPI_Request *s_waits,*r_waits; 1305 MPI_Status *status; 1306 PetscInt *cseqi,*cseqj,col; 1307 PetscInt stages[3]; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 1311 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 1312 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 1313 1314 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 1315 if (cont_merge) { 1316 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1317 } else { 1318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 1319 } 1320 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 1321 if (cont_ptap) { 1322 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 1323 } else { 1324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1325 } 1326 /* get data from symbolic products */ 1327 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 1328 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 1329 1330 cseqi = merge->ci; cseqj=merge->cj; 1331 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 1332 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 1333 1334 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1335 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1336 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1337 1338 /* Allocate temporary array for storage of one row of A*P and one row of C */ 1339 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1340 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1341 ca = (MatScalar*)(apa + cN); 1342 apj = (PetscInt *)(ca + cN); 1343 apjdense = (PetscInt *)(apj + cN); 1344 1345 /* Clear old values in C */ 1346 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1347 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1348 1349 for (i=0;i<am;i++) { 1350 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 1351 /* Form i-th sparse row of A*P */ 1352 apnzj = 0; 1353 /* diagonal portion of A */ 1354 nzi = adi[i+1] - adi[i]; 1355 for (j=0;j<nzi;j++) { 1356 prow = *adj++; 1357 /* diagonal portion of P */ 1358 pnzj = pd->i[prow+1] - pd->i[prow]; 1359 pjj = pd->j + pd->i[prow]; /* local col index of P */ 1360 paj = pd->a + pd->i[prow]; 1361 for (k=0;k<pnzj;k++) { 1362 col = *pjj + p->cstart; pjj++; /* global col index of P */ 1363 if (!apjdense[col]) { 1364 apjdense[col] = -1; 1365 apj[apnzj++] = col; 1366 } 1367 apa[col] += (*ada)*paj[k]; 1368 } 1369 flops += 2*pnzj; 1370 /* off-diagonal portion of P */ 1371 pnzj = po->i[prow+1] - po->i[prow]; 1372 pjj = po->j + po->i[prow]; /* local col index of P */ 1373 paj = po->a + po->i[prow]; 1374 for (k=0;k<pnzj;k++) { 1375 col = p->garray[*pjj]; pjj++; /* global col index of P */ 1376 if (!apjdense[col]) { 1377 apjdense[col] = -1; 1378 apj[apnzj++] = col; 1379 } 1380 apa[col] += (*ada)*paj[k]; 1381 } 1382 flops += 2*pnzj; 1383 1384 ada++; 1385 } 1386 /* off-diagonal portion of A */ 1387 nzi = aoi[i+1] - aoi[i]; 1388 for (j=0;j<nzi;j++) { 1389 prow = *aoj++; 1390 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1391 pjj = pj_oth + pi_oth[prow]; 1392 paj = pa_oth + pi_oth[prow]; 1393 for (k=0;k<pnzj;k++) { 1394 if (!apjdense[pjj[k]]) { 1395 apjdense[pjj[k]] = -1; 1396 apj[apnzj++] = pjj[k]; 1397 } 1398 apa[pjj[k]] += (*aoa)*paj[k]; 1399 } 1400 flops += 2*pnzj; 1401 aoa++; 1402 } 1403 /* Sort the j index array for quick sparse axpy. */ 1404 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1405 1406 ierr = PetscLogStagePop(); 1407 ierr = PetscLogStagePush(stages[1]); 1408 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1409 /* diagonal portion of P -- gives matrix value of local C */ 1410 pnzi = pd->i[i+1] - pd->i[i]; 1411 for (j=0;j<pnzi;j++) { 1412 /* add value into C */ 1413 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 1414 crow = (*pJ_d++) + owners[rank]; 1415 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 1416 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 1417 pA_d++; 1418 flops += 2*apnzj; 1419 } 1420 1421 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 1422 pnzi = po->i[i+1] - po->i[i]; 1423 for (j=0;j<pnzi;j++) { 1424 crow = p->garray[*pJ_o++]; 1425 cjj = cseqj + cseqi[crow]; 1426 caj = cseqa + cseqi[crow]; 1427 /* add value into C_seq to be sent to other processors */ 1428 nextap = 0; 1429 for (k=0;nextap<apnzj;k++) { 1430 if (cjj[k]==apj[nextap]) { 1431 caj[k] += (*pA_o)*apa[apj[nextap++]]; 1432 } 1433 } 1434 flops += 2*apnzj; 1435 pA_o++; 1436 } 1437 1438 /* Zero the current row info for A*P */ 1439 for (j=0;j<apnzj;j++) { 1440 apa[apj[j]] = 0.; 1441 apjdense[apj[j]] = 0; 1442 } 1443 ierr = PetscLogStagePop(); 1444 } 1445 1446 /* send and recv matrix values */ 1447 /*-----------------------------*/ 1448 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 1449 bi = merge->bi; 1450 bj = merge->bj; 1451 buf_ri = merge->buf_ri; 1452 buf_rj = merge->buf_rj; 1453 len_s = merge->len_s; 1454 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 1455 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 1456 1457 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 1458 for (proc=0,k=0; proc<size; proc++){ 1459 if (!len_s[proc]) continue; 1460 i = owners[proc]; 1461 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 1462 k++; 1463 } 1464 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1465 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 1466 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 1467 ierr = PetscFree(status);CHKERRQ(ierr); 1468 1469 ierr = PetscFree(s_waits);CHKERRQ(ierr); 1470 ierr = PetscFree(r_waits);CHKERRQ(ierr); 1471 ierr = PetscFree(cseqa);CHKERRQ(ierr); 1472 1473 /* insert mat values of mpimat */ 1474 /*----------------------------*/ 1475 ba_i = apa; /* rename, temporary array for storage of one row of C */ 1476 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1477 nextrow = buf_ri_k + merge->nrecv; 1478 nextcseqi = nextrow + merge->nrecv; 1479 1480 for (k=0; k<merge->nrecv; k++){ 1481 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1482 nrows = *(buf_ri_k[k]); 1483 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 1484 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1485 } 1486 1487 /* add received local vals to C */ 1488 for (i=0; i<cm; i++) { 1489 crow = owners[rank] + i; 1490 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 1491 bnzi = bi[i+1] - bi[i]; 1492 nzi = 0; 1493 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1494 /* i-th row */ 1495 if (i == *nextrow[k]) { 1496 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 1497 cseqj = buf_rj[k] + *(nextcseqi[k]); 1498 cseqa = abuf_r[k] + *(nextcseqi[k]); 1499 nextcseqj = 0; 1500 for (j=0; nextcseqj<cseqnzi; j++){ 1501 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 1502 ba_i[j] += cseqa[nextcseqj++]; 1503 nzi++; 1504 } 1505 } 1506 nextrow[k]++; nextcseqi[k]++; 1507 } 1508 } 1509 if (nzi>0){ 1510 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 1511 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 1512 flops += 2*nzi; 1513 } 1514 } 1515 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1516 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1517 ierr = PetscLogStagePop();CHKERRQ(ierr); 1518 1519 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 1520 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1521 ierr = PetscFree(apa);CHKERRQ(ierr); 1522 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1523 1524 PetscFunctionReturn(0); 1525 } 1526 #endif /* TOBEREMOVED */ 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 1530 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1531 { 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 if (scall == MAT_INITIAL_MATRIX){ 1536 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1537 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 1538 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1539 } 1540 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1541 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 1542 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /* 1547 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1548 1549 Collective on Mat 1550 1551 Input Parameters: 1552 + A - the matrix 1553 - P - the projection matrix 1554 1555 Output Parameters: 1556 . C - the (i,j) structure of the product matrix 1557 1558 Notes: 1559 C will be created and must be destroyed by the user with MatDestroy(). 1560 1561 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1562 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1563 this (i,j) structure by calling MatPtAPNumeric(). 1564 1565 Level: intermediate 1566 1567 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1568 */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatPtAPSymbolic" 1571 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 1572 { 1573 PetscErrorCode ierr; 1574 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 1575 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 1576 1577 PetscFunctionBegin; 1578 1579 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1580 PetscValidType(A,1); 1581 MatPreallocated(A); 1582 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1583 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1584 1585 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1586 PetscValidType(P,2); 1587 MatPreallocated(P); 1588 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1589 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1590 1591 PetscValidPointer(C,3); 1592 1593 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1594 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1595 1596 /* For now, we do not dispatch based on the type of A and P */ 1597 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1598 fA = A->ops->ptapsymbolic; 1599 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 1600 fP = P->ops->ptapsymbolic; 1601 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 1602 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1603 1604 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1605 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 1606 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1607 1608 PetscFunctionReturn(0); 1609 } 1610 1611 typedef struct { 1612 Mat symAP; 1613 } Mat_PtAPstruct; 1614 1615 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 1619 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 1620 { 1621 PetscErrorCode ierr; 1622 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 1623 1624 PetscFunctionBegin; 1625 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 1626 ierr = PetscFree(ptap);CHKERRQ(ierr); 1627 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 1633 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 1634 { 1635 PetscErrorCode ierr; 1636 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1637 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 1638 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1639 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1640 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 1641 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 1642 MatScalar *ca; 1643 PetscBT lnkbt; 1644 1645 PetscFunctionBegin; 1646 /* Get ij structure of P^T */ 1647 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1648 ptJ=ptj; 1649 1650 /* Allocate ci array, arrays for fill computation and */ 1651 /* free space for accumulating nonzero column info */ 1652 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1653 ci[0] = 0; 1654 1655 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1656 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1657 ptasparserow = ptadenserow + an; 1658 1659 /* create and initialize a linked list */ 1660 nlnk = pn+1; 1661 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1662 1663 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1664 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1665 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1666 current_space = free_space; 1667 1668 /* Determine symbolic info for each row of C: */ 1669 for (i=0;i<pn;i++) { 1670 ptnzi = pti[i+1] - pti[i]; 1671 ptanzi = 0; 1672 /* Determine symbolic row of PtA: */ 1673 for (j=0;j<ptnzi;j++) { 1674 arow = *ptJ++; 1675 anzj = ai[arow+1] - ai[arow]; 1676 ajj = aj + ai[arow]; 1677 for (k=0;k<anzj;k++) { 1678 if (!ptadenserow[ajj[k]]) { 1679 ptadenserow[ajj[k]] = -1; 1680 ptasparserow[ptanzi++] = ajj[k]; 1681 } 1682 } 1683 } 1684 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1685 ptaj = ptasparserow; 1686 cnzi = 0; 1687 for (j=0;j<ptanzi;j++) { 1688 prow = *ptaj++; 1689 pnzj = pi[prow+1] - pi[prow]; 1690 pjj = pj + pi[prow]; 1691 /* add non-zero cols of P into the sorted linked list lnk */ 1692 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1693 cnzi += nlnk; 1694 } 1695 1696 /* If free space is not available, make more free space */ 1697 /* Double the amount of total space in the list */ 1698 if (current_space->local_remaining<cnzi) { 1699 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1700 } 1701 1702 /* Copy data into free space, and zero out denserows */ 1703 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1704 current_space->array += cnzi; 1705 current_space->local_used += cnzi; 1706 current_space->local_remaining -= cnzi; 1707 1708 for (j=0;j<ptanzi;j++) { 1709 ptadenserow[ptasparserow[j]] = 0; 1710 } 1711 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1712 /* For now, we will recompute what is needed. */ 1713 ci[i+1] = ci[i] + cnzi; 1714 } 1715 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1716 /* Allocate space for cj, initialize cj, and */ 1717 /* destroy list of free space and other temporary array(s) */ 1718 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1719 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1720 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1721 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1722 1723 /* Allocate space for ca */ 1724 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1725 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1726 1727 /* put together the new matrix */ 1728 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1729 1730 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1731 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1732 c = (Mat_SeqAIJ *)((*C)->data); 1733 c->freedata = PETSC_TRUE; 1734 c->nonew = 0; 1735 1736 /* Clean up. */ 1737 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #include "src/mat/impls/maij/maij.h" 1743 EXTERN_C_BEGIN 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1746 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1747 { 1748 /* This routine requires testing -- I don't think it works. */ 1749 PetscErrorCode ierr; 1750 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1751 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1752 Mat P=pp->AIJ; 1753 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1754 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1755 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1756 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1757 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1758 MatScalar *ca; 1759 1760 PetscFunctionBegin; 1761 /* Start timer */ 1762 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1763 1764 /* Get ij structure of P^T */ 1765 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1766 1767 /* Allocate ci array, arrays for fill computation and */ 1768 /* free space for accumulating nonzero column info */ 1769 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1770 ci[0] = 0; 1771 1772 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1773 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1774 ptasparserow = ptadenserow + an; 1775 denserow = ptasparserow + an; 1776 sparserow = denserow + pn; 1777 1778 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1779 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1780 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1781 current_space = free_space; 1782 1783 /* Determine symbolic info for each row of C: */ 1784 for (i=0;i<pn/ppdof;i++) { 1785 ptnzi = pti[i+1] - pti[i]; 1786 ptanzi = 0; 1787 ptJ = ptj + pti[i]; 1788 for (dof=0;dof<ppdof;dof++) { 1789 /* Determine symbolic row of PtA: */ 1790 for (j=0;j<ptnzi;j++) { 1791 arow = ptJ[j] + dof; 1792 anzj = ai[arow+1] - ai[arow]; 1793 ajj = aj + ai[arow]; 1794 for (k=0;k<anzj;k++) { 1795 if (!ptadenserow[ajj[k]]) { 1796 ptadenserow[ajj[k]] = -1; 1797 ptasparserow[ptanzi++] = ajj[k]; 1798 } 1799 } 1800 } 1801 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1802 ptaj = ptasparserow; 1803 cnzi = 0; 1804 for (j=0;j<ptanzi;j++) { 1805 pdof = *ptaj%dof; 1806 prow = (*ptaj++)/dof; 1807 pnzj = pi[prow+1] - pi[prow]; 1808 pjj = pj + pi[prow]; 1809 for (k=0;k<pnzj;k++) { 1810 if (!denserow[pjj[k]+pdof]) { 1811 denserow[pjj[k]+pdof] = -1; 1812 sparserow[cnzi++] = pjj[k]+pdof; 1813 } 1814 } 1815 } 1816 1817 /* sort sparserow */ 1818 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1819 1820 /* If free space is not available, make more free space */ 1821 /* Double the amount of total space in the list */ 1822 if (current_space->local_remaining<cnzi) { 1823 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1824 } 1825 1826 /* Copy data into free space, and zero out denserows */ 1827 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1828 current_space->array += cnzi; 1829 current_space->local_used += cnzi; 1830 current_space->local_remaining -= cnzi; 1831 1832 for (j=0;j<ptanzi;j++) { 1833 ptadenserow[ptasparserow[j]] = 0; 1834 } 1835 for (j=0;j<cnzi;j++) { 1836 denserow[sparserow[j]] = 0; 1837 } 1838 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1839 /* For now, we will recompute what is needed. */ 1840 ci[i+1+dof] = ci[i+dof] + cnzi; 1841 } 1842 } 1843 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1844 /* Allocate space for cj, initialize cj, and */ 1845 /* destroy list of free space and other temporary array(s) */ 1846 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1847 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1848 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1849 1850 /* Allocate space for ca */ 1851 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1852 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1853 1854 /* put together the new matrix */ 1855 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1856 1857 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1858 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1859 c = (Mat_SeqAIJ *)((*C)->data); 1860 c->freedata = PETSC_TRUE; 1861 c->nonew = 0; 1862 1863 /* Clean up. */ 1864 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1865 1866 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 EXTERN_C_END 1870 1871 /* 1872 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1873 1874 Collective on Mat 1875 1876 Input Parameters: 1877 + A - the matrix 1878 - P - the projection matrix 1879 1880 Output Parameters: 1881 . C - the product matrix 1882 1883 Notes: 1884 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1885 the user using MatDeatroy(). 1886 1887 This routine is currently only implemented for pairs of AIJ matrices and classes 1888 which inherit from AIJ. C will be of type MATAIJ. 1889 1890 Level: intermediate 1891 1892 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1893 */ 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatPtAPNumeric" 1896 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1897 { 1898 PetscErrorCode ierr; 1899 PetscErrorCode (*fA)(Mat,Mat,Mat); 1900 PetscErrorCode (*fP)(Mat,Mat,Mat); 1901 1902 PetscFunctionBegin; 1903 1904 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1905 PetscValidType(A,1); 1906 MatPreallocated(A); 1907 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1908 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1909 1910 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1911 PetscValidType(P,2); 1912 MatPreallocated(P); 1913 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1914 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1915 1916 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1917 PetscValidType(C,3); 1918 MatPreallocated(C); 1919 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1920 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1921 1922 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1923 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1924 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1925 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1926 1927 /* For now, we do not dispatch based on the type of A and P */ 1928 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1929 fA = A->ops->ptapnumeric; 1930 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1931 fP = P->ops->ptapnumeric; 1932 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1933 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1934 1935 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1936 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1937 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1938 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1944 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1945 { 1946 PetscErrorCode ierr; 1947 PetscInt flops=0; 1948 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1949 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1950 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1951 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1952 PetscInt *ci=c->i,*cj=c->j,*cjj; 1953 PetscInt am=A->M,cn=C->N,cm=C->M; 1954 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1955 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1956 1957 PetscFunctionBegin; 1958 /* Allocate temporary array for storage of one row of A*P */ 1959 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1960 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1961 1962 apj = (PetscInt *)(apa + cn); 1963 apjdense = apj + cn; 1964 1965 /* Clear old values in C */ 1966 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1967 1968 for (i=0;i<am;i++) { 1969 /* Form sparse row of A*P */ 1970 anzi = ai[i+1] - ai[i]; 1971 apnzj = 0; 1972 for (j=0;j<anzi;j++) { 1973 prow = *aj++; 1974 pnzj = pi[prow+1] - pi[prow]; 1975 pjj = pj + pi[prow]; 1976 paj = pa + pi[prow]; 1977 for (k=0;k<pnzj;k++) { 1978 if (!apjdense[pjj[k]]) { 1979 apjdense[pjj[k]] = -1; 1980 apj[apnzj++] = pjj[k]; 1981 } 1982 apa[pjj[k]] += (*aa)*paj[k]; 1983 } 1984 flops += 2*pnzj; 1985 aa++; 1986 } 1987 1988 /* Sort the j index array for quick sparse axpy. */ 1989 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1990 1991 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1992 pnzi = pi[i+1] - pi[i]; 1993 for (j=0;j<pnzi;j++) { 1994 nextap = 0; 1995 crow = *pJ++; 1996 cjj = cj + ci[crow]; 1997 caj = ca + ci[crow]; 1998 /* Perform sparse axpy operation. Note cjj includes apj. */ 1999 for (k=0;nextap<apnzj;k++) { 2000 if (cjj[k]==apj[nextap]) { 2001 caj[k] += (*pA)*apa[apj[nextap++]]; 2002 } 2003 } 2004 flops += 2*apnzj; 2005 pA++; 2006 } 2007 2008 /* Zero the current row info for A*P */ 2009 for (j=0;j<apnzj;j++) { 2010 apa[apj[j]] = 0.; 2011 apjdense[apj[j]] = 0; 2012 } 2013 } 2014 2015 /* Assemble the final matrix and clean up */ 2016 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 ierr = PetscFree(apa);CHKERRQ(ierr); 2019 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2020 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 2025 static PetscEvent logkey_matptapnumeric_local = 0; 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 2028 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 2029 { 2030 PetscErrorCode ierr; 2031 PetscInt flops=0; 2032 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2033 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 2034 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2035 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2036 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2037 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 2038 PetscInt *pJ=pj_loc,*pjj; 2039 PetscInt *ci=c->i,*cj=c->j,*cjj; 2040 PetscInt am=A->m,cn=C->N,cm=C->M; 2041 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2042 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 2043 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 2044 2045 PetscFunctionBegin; 2046 if (!logkey_matptapnumeric_local) { 2047 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 2048 } 2049 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2050 2051 /* Allocate temporary array for storage of one row of A*P */ 2052 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2053 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2054 apj = (PetscInt *)(apa + cn); 2055 apjdense = apj + cn; 2056 2057 /* Clear old values in C */ 2058 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2059 2060 for (i=0;i<am;i++) { 2061 /* Form i-th sparse row of A*P */ 2062 apnzj = 0; 2063 /* diagonal portion of A */ 2064 anzi = adi[i+1] - adi[i]; 2065 for (j=0;j<anzi;j++) { 2066 prow = *adj; 2067 adj++; 2068 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2069 pjj = pj_loc + pi_loc[prow]; 2070 paj = pa_loc + pi_loc[prow]; 2071 for (k=0;k<pnzj;k++) { 2072 if (!apjdense[pjj[k]]) { 2073 apjdense[pjj[k]] = -1; 2074 apj[apnzj++] = pjj[k]; 2075 } 2076 apa[pjj[k]] += (*ada)*paj[k]; 2077 } 2078 flops += 2*pnzj; 2079 ada++; 2080 } 2081 /* off-diagonal portion of A */ 2082 anzi = aoi[i+1] - aoi[i]; 2083 for (j=0;j<anzi;j++) { 2084 col = a->garray[*aoj]; 2085 if (col < cstart){ 2086 prow = *aoj; 2087 } else if (col >= cend){ 2088 prow = *aoj; 2089 } else { 2090 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2091 } 2092 aoj++; 2093 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2094 pjj = pj_oth + pi_oth[prow]; 2095 paj = pa_oth + pi_oth[prow]; 2096 for (k=0;k<pnzj;k++) { 2097 if (!apjdense[pjj[k]]) { 2098 apjdense[pjj[k]] = -1; 2099 apj[apnzj++] = pjj[k]; 2100 } 2101 apa[pjj[k]] += (*aoa)*paj[k]; 2102 } 2103 flops += 2*pnzj; 2104 aoa++; 2105 } 2106 /* Sort the j index array for quick sparse axpy. */ 2107 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2108 2109 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 2110 pnzi = pi_loc[i+1] - pi_loc[i]; 2111 for (j=0;j<pnzi;j++) { 2112 nextap = 0; 2113 crow = *pJ++; 2114 cjj = cj + ci[crow]; 2115 caj = ca + ci[crow]; 2116 /* Perform sparse axpy operation. Note cjj includes apj. */ 2117 for (k=0;nextap<apnzj;k++) { 2118 if (cjj[k]==apj[nextap]) { 2119 caj[k] += (*pA)*apa[apj[nextap++]]; 2120 } 2121 } 2122 flops += 2*apnzj; 2123 pA++; 2124 } 2125 2126 /* Zero the current row info for A*P */ 2127 for (j=0;j<apnzj;j++) { 2128 apa[apj[j]] = 0.; 2129 apjdense[apj[j]] = 0; 2130 } 2131 } 2132 2133 /* Assemble the final matrix and clean up */ 2134 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = PetscFree(apa);CHKERRQ(ierr); 2137 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2138 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 static PetscEvent logkey_matptapsymbolic_local = 0; 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 2144 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 2145 { 2146 PetscErrorCode ierr; 2147 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2148 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2149 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 2150 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 2151 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 2152 PetscInt *ci,*cj,*ptaj; 2153 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 2154 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 2155 PetscInt pm=P_loc->m,nlnk,*lnk; 2156 MatScalar *ca; 2157 PetscBT lnkbt; 2158 PetscInt prend,nprow_loc,nprow_oth; 2159 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 2160 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2161 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2162 2163 PetscFunctionBegin; 2164 if (!logkey_matptapsymbolic_local) { 2165 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 2166 } 2167 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2168 2169 prend = prstart + pm; 2170 2171 /* get ij structure of P_loc^T */ 2172 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2173 ptJ=ptj; 2174 2175 /* Allocate ci array, arrays for fill computation and */ 2176 /* free space for accumulating nonzero column info */ 2177 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2178 ci[0] = 0; 2179 2180 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 2181 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 2182 ptasparserow_loc = ptadenserow_loc + aN; 2183 ptadenserow_oth = ptasparserow_loc + aN; 2184 ptasparserow_oth = ptadenserow_oth + aN; 2185 2186 /* create and initialize a linked list */ 2187 nlnk = pN+1; 2188 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2189 2190 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 2191 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2192 nnz = adi[am] + aoi[am]; 2193 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 2194 current_space = free_space; 2195 2196 /* determine symbolic info for each row of C: */ 2197 for (i=0;i<pN;i++) { 2198 ptnzi = pti[i+1] - pti[i]; 2199 nprow_loc = 0; nprow_oth = 0; 2200 /* i-th row of symbolic P_loc^T*A_loc: */ 2201 for (j=0;j<ptnzi;j++) { 2202 arow = *ptJ++; 2203 /* diagonal portion of A */ 2204 anzj = adi[arow+1] - adi[arow]; 2205 ajj = adj + adi[arow]; 2206 for (k=0;k<anzj;k++) { 2207 col = ajj[k]+prstart; 2208 if (!ptadenserow_loc[col]) { 2209 ptadenserow_loc[col] = -1; 2210 ptasparserow_loc[nprow_loc++] = col; 2211 } 2212 } 2213 /* off-diagonal portion of A */ 2214 anzj = aoi[arow+1] - aoi[arow]; 2215 ajj = aoj + aoi[arow]; 2216 for (k=0;k<anzj;k++) { 2217 col = a->garray[ajj[k]]; /* global col */ 2218 if (col < cstart){ 2219 col = ajj[k]; 2220 } else if (col >= cend){ 2221 col = ajj[k] + pm; 2222 } else { 2223 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2224 } 2225 if (!ptadenserow_oth[col]) { 2226 ptadenserow_oth[col] = -1; 2227 ptasparserow_oth[nprow_oth++] = col; 2228 } 2229 } 2230 } 2231 2232 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 2233 cnzi = 0; 2234 /* rows of P_loc */ 2235 ptaj = ptasparserow_loc; 2236 for (j=0; j<nprow_loc; j++) { 2237 prow = *ptaj++; 2238 prow -= prstart; /* rm */ 2239 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2240 pjj = pj_loc + pi_loc[prow]; 2241 /* add non-zero cols of P into the sorted linked list lnk */ 2242 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2243 cnzi += nlnk; 2244 } 2245 /* rows of P_oth */ 2246 ptaj = ptasparserow_oth; 2247 for (j=0; j<nprow_oth; j++) { 2248 prow = *ptaj++; 2249 if (prow >= prend) prow -= pm; /* rm */ 2250 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2251 pjj = pj_oth + pi_oth[prow]; 2252 /* add non-zero cols of P into the sorted linked list lnk */ 2253 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2254 cnzi += nlnk; 2255 } 2256 2257 /* If free space is not available, make more free space */ 2258 /* Double the amount of total space in the list */ 2259 if (current_space->local_remaining<cnzi) { 2260 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2261 } 2262 2263 /* Copy data into free space, and zero out denserows */ 2264 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2265 current_space->array += cnzi; 2266 current_space->local_used += cnzi; 2267 current_space->local_remaining -= cnzi; 2268 2269 for (j=0;j<nprow_loc; j++) { 2270 ptadenserow_loc[ptasparserow_loc[j]] = 0; 2271 } 2272 for (j=0;j<nprow_oth; j++) { 2273 ptadenserow_oth[ptasparserow_oth[j]] = 0; 2274 } 2275 2276 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2277 /* For now, we will recompute what is needed. */ 2278 ci[i+1] = ci[i] + cnzi; 2279 } 2280 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2281 /* Allocate space for cj, initialize cj, and */ 2282 /* destroy list of free space and other temporary array(s) */ 2283 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2284 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2285 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 2286 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2287 2288 /* Allocate space for ca */ 2289 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2290 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2291 2292 /* put together the new matrix */ 2293 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 2294 2295 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2296 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2297 c = (Mat_SeqAIJ *)((*C)->data); 2298 c->freedata = PETSC_TRUE; 2299 c->nonew = 0; 2300 2301 /* Clean up. */ 2302 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2303 2304 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307