1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat P_loc,P_oth; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 167 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 168 Mat_SeqAIJ *p_loc,*p_oth; 169 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 170 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 171 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 172 PetscInt aN=A->N,am=A->m,pN=P->N; 173 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 174 MatScalar *ca; 175 PetscBT lnkbt; 176 PetscInt prstart,prend,nprow_loc,nprow_oth; 177 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 178 179 MPI_Comm comm=A->comm; 180 Mat B_mpi; 181 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 182 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 183 PetscInt len,proc,*dnz,*onz,*owners; 184 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 185 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 186 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 187 MPI_Status *status; 188 Mat_Merge_SeqsToMPI *merge; 189 190 PetscFunctionBegin; 191 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 192 if (container) { 193 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 194 } else { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 196 } 197 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 198 /*--------------------------------------------------*/ 199 /* get data from P_loc and P_oth */ 200 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 201 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 202 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 203 prend = prstart + pm; 204 205 /* get ij structure of P_loc^T */ 206 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 207 ptJ=ptj; 208 209 /* Allocate ci array, arrays for fill computation and */ 210 /* free space for accumulating nonzero column info */ 211 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 212 ci[0] = 0; 213 214 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 215 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 216 ptasparserow_loc = ptadenserow_loc + aN; 217 ptadenserow_oth = ptasparserow_loc + aN; 218 ptasparserow_oth = ptadenserow_oth + aN; 219 220 /* create and initialize a linked list */ 221 nlnk = pN+1; 222 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 223 224 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 225 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 226 nnz = adi[am] + aoi[am]; 227 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 228 current_space = free_space; 229 230 /* determine symbolic info for each row of C: */ 231 for (i=0;i<pN;i++) { 232 ptnzi = pti[i+1] - pti[i]; 233 nprow_loc = 0; nprow_oth = 0; 234 /* i-th row of symbolic P_loc^T*A_loc: */ 235 for (j=0;j<ptnzi;j++) { 236 arow = *ptJ++; 237 /* diagonal portion of A */ 238 anzj = adi[arow+1] - adi[arow]; 239 ajj = adj + adi[arow]; 240 for (k=0;k<anzj;k++) { 241 col = ajj[k]+prstart; 242 if (!ptadenserow_loc[col]) { 243 ptadenserow_loc[col] = -1; 244 ptasparserow_loc[nprow_loc++] = col; 245 } 246 } 247 /* off-diagonal portion of A */ 248 anzj = aoi[arow+1] - aoi[arow]; 249 ajj = aoj + aoi[arow]; 250 for (k=0;k<anzj;k++) { 251 col = a->garray[ajj[k]]; /* global col */ 252 if (col < cstart){ 253 col = ajj[k]; 254 } else if (col >= cend){ 255 col = ajj[k] + pm; 256 } else { 257 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 258 } 259 if (!ptadenserow_oth[col]) { 260 ptadenserow_oth[col] = -1; 261 ptasparserow_oth[nprow_oth++] = col; 262 } 263 } 264 } 265 266 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 267 cnzi = 0; 268 /* rows of P_loc */ 269 ptaj = ptasparserow_loc; 270 for (j=0; j<nprow_loc; j++) { 271 prow = *ptaj++; 272 prow -= prstart; /* rm */ 273 pnzj = pi_loc[prow+1] - pi_loc[prow]; 274 pjj = pj_loc + pi_loc[prow]; 275 /* add non-zero cols of P into the sorted linked list lnk */ 276 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 277 cnzi += nlnk; 278 } 279 /* rows of P_oth */ 280 ptaj = ptasparserow_oth; 281 for (j=0; j<nprow_oth; j++) { 282 prow = *ptaj++; 283 if (prow >= prend) prow -= pm; /* rm */ 284 pnzj = pi_oth[prow+1] - pi_oth[prow]; 285 pjj = pj_oth + pi_oth[prow]; 286 /* add non-zero cols of P into the sorted linked list lnk */ 287 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 288 cnzi += nlnk; 289 } 290 291 /* If free space is not available, make more free space */ 292 /* Double the amount of total space in the list */ 293 if (current_space->local_remaining<cnzi) { 294 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 295 } 296 297 /* Copy data into free space, and zero out denserows */ 298 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 299 current_space->array += cnzi; 300 current_space->local_used += cnzi; 301 current_space->local_remaining -= cnzi; 302 303 for (j=0;j<nprow_loc; j++) { 304 ptadenserow_loc[ptasparserow_loc[j]] = 0; 305 } 306 for (j=0;j<nprow_oth; j++) { 307 ptadenserow_oth[ptasparserow_oth[j]] = 0; 308 } 309 310 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 311 /* For now, we will recompute what is needed. */ 312 ci[i+1] = ci[i] + cnzi; 313 } 314 /* Clean up. */ 315 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 316 317 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 318 /* Allocate space for cj, initialize cj, and */ 319 /* destroy list of free space and other temporary array(s) */ 320 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 321 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 322 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 323 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 324 325 /* Allocate space for ca */ 326 /* 327 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 328 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 329 */ 330 331 /* add C_seq into mpi C */ 332 /*-----------------------------------*/ 333 free_space=PETSC_NULL; current_space=PETSC_NULL; 334 335 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 336 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 337 338 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 339 340 341 /* determine row ownership */ 342 /*---------------------------------------------------------*/ 343 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 344 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 345 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 346 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 347 348 /* determine the number of messages to send, their lengths */ 349 /*---------------------------------------------------------*/ 350 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 351 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 352 len_s = merge->len_s; 353 len = 0; /* length of buf_si[] */ 354 merge->nsend = 0; 355 for (proc=0; proc<size; proc++){ 356 len_si[proc] = 0; 357 if (proc == rank){ 358 len_s[proc] = 0; 359 } else { 360 len_si[proc] = owners[proc+1] - owners[proc] + 1; 361 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 362 } 363 if (len_s[proc]) { 364 merge->nsend++; 365 nrows = 0; 366 for (i=owners[proc]; i<owners[proc+1]; i++){ 367 if (ci[i+1] > ci[i]) nrows++; 368 } 369 len_si[proc] = 2*(nrows+1); 370 len += len_si[proc]; 371 } 372 } 373 374 /* determine the number and length of messages to receive for ij-structure */ 375 /*-------------------------------------------------------------------------*/ 376 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 377 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 378 379 /* post the Irecv of j-structure */ 380 /*-------------------------------*/ 381 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 382 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 383 384 /* post the Isend of j-structure */ 385 /*--------------------------------*/ 386 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 387 sj_waits = si_waits + merge->nsend; 388 389 for (proc=0, k=0; proc<size; proc++){ 390 if (!len_s[proc]) continue; 391 i = owners[proc]; 392 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 393 k++; 394 } 395 396 /* receives and sends of j-structure are complete */ 397 /*------------------------------------------------*/ 398 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 399 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 400 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 401 402 /* send and recv i-structure */ 403 /*---------------------------*/ 404 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 405 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 406 407 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 408 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 409 for (proc=0,k=0; proc<size; proc++){ 410 if (!len_s[proc]) continue; 411 /* form outgoing message for i-structure: 412 buf_si[0]: nrows to be sent 413 [1:nrows]: row index (global) 414 [nrows+1:2*nrows+1]: i-structure index 415 */ 416 /*-------------------------------------------*/ 417 nrows = len_si[proc]/2 - 1; 418 buf_si_i = buf_si + nrows+1; 419 buf_si[0] = nrows; 420 buf_si_i[0] = 0; 421 nrows = 0; 422 for (i=owners[proc]; i<owners[proc+1]; i++){ 423 anzi = ci[i+1] - ci[i]; 424 if (anzi) { 425 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 426 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 427 nrows++; 428 } 429 } 430 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 431 k++; 432 buf_si += len_si[proc]; 433 } 434 435 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 436 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 437 438 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 439 for (i=0; i<merge->nrecv; i++){ 440 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 441 } 442 443 ierr = PetscFree(len_si);CHKERRQ(ierr); 444 ierr = PetscFree(len_ri);CHKERRQ(ierr); 445 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 446 ierr = PetscFree(si_waits);CHKERRQ(ierr); 447 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 448 ierr = PetscFree(buf_s);CHKERRQ(ierr); 449 ierr = PetscFree(status);CHKERRQ(ierr); 450 451 /* compute a local seq matrix in each processor */ 452 /*----------------------------------------------*/ 453 /* allocate bi array and free space for accumulating nonzero column info */ 454 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 455 bi[0] = 0; 456 457 /* create and initialize a linked list */ 458 nlnk = pN+1; 459 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 460 461 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 462 len = 0; 463 len = ci[owners[rank+1]] - ci[owners[rank]]; 464 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 465 current_space = free_space; 466 467 /* determine symbolic info for each local row */ 468 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 469 nextrow = buf_ri_k + merge->nrecv; 470 nextci = nextrow + merge->nrecv; 471 for (k=0; k<merge->nrecv; k++){ 472 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 473 nrows = *buf_ri_k[k]; 474 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 475 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 476 } 477 478 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 479 len = 0; 480 for (i=0; i<pn; i++) { 481 bnzi = 0; 482 /* add local non-zero cols of this proc's C_seq into lnk */ 483 arow = owners[rank] + i; 484 anzi = ci[arow+1] - ci[arow]; 485 cji = cj + ci[arow]; 486 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 487 bnzi += nlnk; 488 /* add received col data into lnk */ 489 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 490 if (i == *nextrow[k]) { /* i-th row */ 491 anzi = *(nextci[k]+1) - *nextci[k]; 492 cji = buf_rj[k] + *nextci[k]; 493 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 494 bnzi += nlnk; 495 nextrow[k]++; nextci[k]++; 496 } 497 } 498 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 499 500 /* if free space is not available, make more free space */ 501 if (current_space->local_remaining<bnzi) { 502 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 503 nspacedouble++; 504 } 505 /* copy data into free space, then initialize lnk */ 506 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 507 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 508 509 current_space->array += bnzi; 510 current_space->local_used += bnzi; 511 current_space->local_remaining -= bnzi; 512 513 bi[i+1] = bi[i] + bnzi; 514 } 515 516 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 517 518 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 519 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 520 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 521 522 /* create symbolic parallel matrix B_mpi */ 523 /*---------------------------------------*/ 524 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 525 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 526 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 527 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 528 529 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 530 B_mpi->assembled = PETSC_FALSE; 531 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 532 merge->bi = bi; 533 merge->bj = bj; 534 merge->ci = ci; 535 merge->cj = cj; 536 merge->buf_ri = buf_ri; 537 merge->buf_rj = buf_rj; 538 merge->C_seq = PETSC_NULL; 539 540 /* attach the supporting struct to B_mpi for reuse */ 541 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 542 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 543 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 544 *C = B_mpi; 545 546 PetscFunctionReturn(0); 547 548 #ifdef TOBEREMOVED 549 PetscErrorCode ierr; 550 Mat C_seq; 551 Mat_MatMatMultMPI *ptap; 552 PetscObjectContainer container; 553 554 PetscFunctionBegin; 555 556 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 557 if (container) { 558 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 559 } else { 560 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 561 } 562 /* compute C_seq = P_loc^T * A_loc * P_seq */ 563 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 564 565 /* add C_seq into mpi C */ 566 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 #endif /* TOBEREMOVED */ 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 574 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 575 { 576 PetscErrorCode ierr; 577 Mat_Merge_SeqsToMPI *merge; 578 Mat_MatMatMultMPI *ptap; 579 PetscObjectContainer cont_merge,cont_ptap; 580 581 PetscInt flops=0; 582 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 583 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 584 Mat C_seq; 585 Mat_SeqAIJ *p_loc,*p_oth; 586 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 587 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 588 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 589 PetscInt *cjj; 590 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 591 MatScalar *pa_loc,*pA,*pa_oth; 592 PetscInt am=A->m,cN=C->N; 593 594 MPI_Comm comm=C->comm; 595 PetscMPIInt size,rank,taga,*len_s; 596 PetscInt *owners; 597 PetscInt proc; 598 PetscInt **buf_ri,**buf_rj; 599 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 600 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 601 MPI_Request *s_waits,*r_waits; 602 MPI_Status *status; 603 MatScalar **abuf_r,*ba_i; 604 Mat_SeqAIJ *cseq; 605 PetscInt *cseqi,*cseqj; 606 PetscInt *cseqj_tmp; 607 MatScalar *cseqa_tmp; 608 609 PetscFunctionBegin; 610 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 611 if (cont_merge) { 612 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 613 } else { 614 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 615 } 616 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 617 if (cont_ptap) { 618 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 619 } else { 620 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 621 } 622 /* get data from symbolic products */ 623 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 624 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 625 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 626 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 627 628 cseqi = merge->ci; cseqj=merge->cj; 629 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 630 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 631 632 /* Allocate temporary array for storage of one row of A*P */ 633 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 634 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 635 apj = (PetscInt *)(apa + cN); 636 apjdense = apj + cN; 637 638 /* Clear old values in C_Seq */ 639 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 640 641 for (i=0;i<am;i++) { 642 /* Form i-th sparse row of A*P */ 643 apnzj = 0; 644 /* diagonal portion of A */ 645 anzi = adi[i+1] - adi[i]; 646 for (j=0;j<anzi;j++) { 647 prow = *adj; 648 adj++; 649 pnzj = pi_loc[prow+1] - pi_loc[prow]; 650 pjj = pj_loc + pi_loc[prow]; 651 paj = pa_loc + pi_loc[prow]; 652 for (k=0;k<pnzj;k++) { 653 if (!apjdense[pjj[k]]) { 654 apjdense[pjj[k]] = -1; 655 apj[apnzj++] = pjj[k]; 656 } 657 apa[pjj[k]] += (*ada)*paj[k]; 658 } 659 flops += 2*pnzj; 660 ada++; 661 } 662 /* off-diagonal portion of A */ 663 anzi = aoi[i+1] - aoi[i]; 664 for (j=0;j<anzi;j++) { 665 col = a->garray[*aoj]; 666 if (col < cstart){ 667 prow = *aoj; 668 } else if (col >= cend){ 669 prow = *aoj; 670 } else { 671 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 672 } 673 aoj++; 674 pnzj = pi_oth[prow+1] - pi_oth[prow]; 675 pjj = pj_oth + pi_oth[prow]; 676 paj = pa_oth + pi_oth[prow]; 677 for (k=0;k<pnzj;k++) { 678 if (!apjdense[pjj[k]]) { 679 apjdense[pjj[k]] = -1; 680 apj[apnzj++] = pjj[k]; 681 } 682 apa[pjj[k]] += (*aoa)*paj[k]; 683 } 684 flops += 2*pnzj; 685 aoa++; 686 } 687 /* Sort the j index array for quick sparse axpy. */ 688 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 689 690 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 691 pnzi = pi_loc[i+1] - pi_loc[i]; 692 for (j=0;j<pnzi;j++) { 693 nextap = 0; 694 crow = *pJ++; 695 cjj = cseqj + cseqi[crow]; 696 caj = cseqa + cseqi[crow]; 697 /* Perform sparse axpy operation. Note cjj includes apj. */ 698 for (k=0;nextap<apnzj;k++) { 699 if (cjj[k]==apj[nextap]) { 700 caj[k] += (*pA)*apa[apj[nextap++]]; 701 } 702 } 703 flops += 2*apnzj; 704 pA++; 705 } 706 707 /* Zero the current row info for A*P */ 708 for (j=0;j<apnzj;j++) { 709 apa[apj[j]] = 0.; 710 apjdense[apj[j]] = 0; 711 } 712 } 713 714 ierr = PetscFree(apa);CHKERRQ(ierr); 715 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 716 717 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 718 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 719 720 bi = merge->bi; 721 bj = merge->bj; 722 buf_ri = merge->buf_ri; 723 buf_rj = merge->buf_rj; 724 725 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 726 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 727 728 /* send and recv matrix values */ 729 /*-----------------------------*/ 730 len_s = merge->len_s; 731 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 732 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 733 734 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 735 for (proc=0,k=0; proc<size; proc++){ 736 if (!len_s[proc]) continue; 737 i = owners[proc]; 738 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 739 k++; 740 } 741 742 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 743 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 744 ierr = PetscFree(status);CHKERRQ(ierr); 745 746 ierr = PetscFree(s_waits);CHKERRQ(ierr); 747 ierr = PetscFree(r_waits);CHKERRQ(ierr); 748 749 /* insert mat values of mpimat */ 750 /*----------------------------*/ 751 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 752 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 753 nextrow = buf_ri_k + merge->nrecv; 754 nextcseqi = nextrow + merge->nrecv; 755 756 for (k=0; k<merge->nrecv; k++){ 757 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 758 nrows = *(buf_ri_k[k]); 759 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 760 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 761 } 762 763 /* set values of ba */ 764 for (i=0; i<C->m; i++) { 765 cseqrow = owners[rank] + i; 766 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 767 bnzi = bi[i+1] - bi[i]; 768 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 769 770 /* add local non-zero vals of this proc's C_seq into ba */ 771 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 772 cseqj_tmp = cseqj + cseqi[cseqrow]; 773 cseqa_tmp = cseqa + cseqi[cseqrow]; 774 nextcseqj = 0; 775 for (j=0; nextcseqj<cseqnzi; j++){ 776 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 777 ba_i[j] += cseqa_tmp[nextcseqj++]; 778 } 779 } 780 781 /* add received vals into ba */ 782 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 783 /* i-th row */ 784 if (i == *nextrow[k]) { 785 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 786 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 787 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 788 nextcseqj = 0; 789 for (j=0; nextcseqj<cseqnzi; j++){ 790 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 791 ba_i[j] += cseqa_tmp[nextcseqj++]; 792 } 793 } 794 nextrow[k]++; nextcseqi[k]++; 795 } 796 } 797 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 798 } 799 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 800 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 801 802 ierr = PetscFree(cseqa);CHKERRQ(ierr); 803 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 804 ierr = PetscFree(ba_i);CHKERRQ(ierr); 805 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 806 807 PetscFunctionReturn(0); 808 } 809 810 #ifdef TOBEREMOVED 811 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 814 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 815 { 816 PetscErrorCode ierr; 817 Mat_MatMatMultMPI *ptap; 818 PetscObjectContainer container; 819 820 PetscFunctionBegin; 821 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 822 if (container) { 823 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 824 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 825 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 826 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 827 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 828 829 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 830 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 831 } 832 ierr = PetscFree(ptap);CHKERRQ(ierr); 833 834 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 840 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 841 { 842 PetscErrorCode ierr; 843 Mat_MatMatMultMPI *ptap; 844 PetscObjectContainer container; 845 846 PetscFunctionBegin; 847 if (scall == MAT_INITIAL_MATRIX){ 848 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 849 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 850 851 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 852 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 853 854 /* get P_loc by taking all local rows of P */ 855 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 856 857 /* attach the supporting struct to P for reuse */ 858 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 859 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 860 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 861 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 862 863 /* now, compute symbolic local P^T*A*P */ 864 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 865 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 866 } else if (scall == MAT_REUSE_MATRIX){ 867 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 868 if (container) { 869 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 870 } else { 871 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 872 } 873 /* update P_oth */ 874 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 875 876 } else { 877 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 878 } 879 /* now, compute numeric local P^T*A*P */ 880 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 881 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 882 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 883 884 PetscFunctionReturn(0); 885 } 886 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 887 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 888 {\ 889 /* allocate ci array, arrays for fill computation and */\ 890 /* free space for accumulating nonzero column info */\ 891 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 892 ci[0] = 0;\ 893 \ 894 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 895 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 896 nnz = adi[am] + aoi[am];\ 897 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 898 current_space = free_space;\ 899 \ 900 ptJ=ptj;\ 901 for (i=0; i<ptM; i++) {\ 902 ptnzi = pti[i+1] - pti[i];\ 903 nprow_loc = 0; nprow_oth = 0;\ 904 /* i-th row of symbolic Pt*A: */\ 905 for (j=0;j<ptnzi;j++) {\ 906 arow = *ptJ++;\ 907 /* diagonal portion of A */\ 908 anzj = adi[arow+1] - adi[arow];\ 909 ajj = adj + adi[arow];\ 910 for (k=0;k<anzj;k++) {\ 911 col = ajj[k]+prstart;\ 912 if (!ptadenserow_loc[col]) {\ 913 ptadenserow_loc[col] = -1;\ 914 ptasparserow_loc[nprow_loc++] = col;\ 915 }\ 916 }\ 917 /* off-diagonal portion of A */\ 918 anzj = aoi[arow+1] - aoi[arow];\ 919 ajj = aoj + aoi[arow];\ 920 for (k=0;k<anzj;k++) {\ 921 col = a->garray[ajj[k]]; /* global col */\ 922 if (col < cstart){\ 923 col = ajj[k];\ 924 } else if (col >= cend){\ 925 col = ajj[k] + pm;\ 926 } else {\ 927 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 928 }\ 929 if (!ptadenserow_oth[col]) {\ 930 ptadenserow_oth[col] = -1;\ 931 ptasparserow_oth[nprow_oth++] = col;\ 932 }\ 933 }\ 934 }\ 935 /* determine symbolic info for row of C_seq: */\ 936 cnzi = 0;\ 937 /* rows of P_loc */\ 938 ptaj = ptasparserow_loc;\ 939 for (j=0; j<nprow_loc; j++) {\ 940 prow = *ptaj++; \ 941 prow -= prstart; /* rm */\ 942 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 943 pjj = pj_loc + pi_loc[prow];\ 944 /* add non-zero cols of P into the sorted linked list lnk */\ 945 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 946 cnzi += nlnk;\ 947 }\ 948 /* rows of P_oth */\ 949 ptaj = ptasparserow_oth;\ 950 for (j=0; j<nprow_oth; j++) {\ 951 prow = *ptaj++; \ 952 if (prow >= prend) prow -= pm; /* rm */\ 953 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 954 pjj = pj_oth + pi_oth[prow];\ 955 /* add non-zero cols of P into the sorted linked list lnk */\ 956 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 957 cnzi += nlnk;\ 958 }\ 959 \ 960 /* If free space is not available, make more free space */\ 961 /* Double the amount of total space in the list */\ 962 if (current_space->local_remaining<cnzi) {\ 963 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 964 }\ 965 \ 966 /* Copy data into free space, and zero out denserows */\ 967 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 968 current_space->array += cnzi;\ 969 current_space->local_used += cnzi;\ 970 current_space->local_remaining -= cnzi;\ 971 \ 972 for (j=0;j<nprow_loc; j++) {\ 973 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 974 }\ 975 for (j=0;j<nprow_oth; j++) {\ 976 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 977 }\ 978 \ 979 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 980 /* For now, we will recompute what is needed. */ \ 981 ci[i+1] = ci[i] + cnzi;\ 982 }\ 983 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 984 /* Allocate space for cj, initialize cj, and */\ 985 /* destroy list of free space and other temporary array(s) */\ 986 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 987 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 988 } 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 991 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 992 { 993 PetscErrorCode ierr; 994 Mat P_loc,P_oth; 995 Mat_MatMatMultMPI *ptap; 996 PetscObjectContainer container; 997 FreeSpaceList free_space=PETSC_NULL,current_space; 998 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 999 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 1000 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 1001 Mat_SeqAIJ *p_loc,*p_oth; 1002 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 1003 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1004 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 1005 PetscInt aN=A->N,am=A->m,pN=P->N; 1006 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 1007 PetscBT lnkbt; 1008 PetscInt prstart,prend,nprow_loc,nprow_oth; 1009 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1010 1011 MPI_Comm comm=A->comm; 1012 Mat B_mpi; 1013 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 1014 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 1015 PetscInt len,proc,*dnz,*onz,*owners; 1016 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 1017 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 1018 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 1019 MPI_Status *status; 1020 Mat_Merge_SeqsToMPI *merge; 1021 PetscInt *ptdi,*ptdj,*cdi,*cdj; 1022 PetscInt *ptoi,*ptoj,*coi,*coj; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 1026 if (container) { 1027 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 1028 } else { 1029 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1030 } 1031 1032 /* determine row ownership of C */ 1033 /*---------------------------------------------------------*/ 1034 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1035 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1036 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 1037 1038 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 1039 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 1040 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 1041 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1042 1043 /* get data from P_loc and P_oth */ 1044 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 1045 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 1046 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 1047 prend = prstart + pm; 1048 1049 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 1050 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1051 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1052 ptasparserow_loc = ptadenserow_loc + aN; 1053 ptadenserow_oth = ptasparserow_loc + aN; 1054 ptasparserow_oth = ptadenserow_oth + aN; 1055 1056 /* create and initialize a linked list */ 1057 nlnk = pN+1; 1058 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1059 1060 /* Pt = P_loc^T */ 1061 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1062 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 1063 1064 /* Pt = Pd^T - diagonal portion of P_loc */ 1065 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1066 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 1067 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1068 #ifdef NEW 1069 /* Pt = Po^T - off-diagonal portion of P_loc */ 1070 ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&ptoi,&ptoj);CHKERRQ(ierr); 1071 ierr = MatPtAPSymbolicLocal_Private(tnrows,ptoi,ptoj,coi,coj);CHKERRQ(ierr); 1072 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptoi,&ptoj);CHKERRQ(ierr); 1073 #endif 1074 /* clean up */ 1075 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1076 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1077 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1078 1079 /* determine the number of messages to send, their lengths */ 1080 /*---------------------------------------------------------*/ 1081 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 1082 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 1083 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 1084 len_s = merge->len_s; 1085 len = 0; /* max length of buf_si[] */ 1086 merge->nsend = 0; 1087 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 1088 proc = 0; 1089 for (i=0; i<tnrows; i++){ 1090 while (rmap[i] >= owners[proc+1]) proc++; 1091 len_si[proc]++; 1092 } 1093 for (proc=0; proc<size; proc++){ 1094 len_s[proc] = 0; 1095 if (len_si[proc]){ 1096 merge->nsend++; 1097 len_si[proc] = 2*(len_si[proc] + 1); 1098 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 1099 len += len_si[proc]; 1100 } 1101 } 1102 1103 /* determine the number and length of messages to receive for ij-structure */ 1104 /*-------------------------------------------------------------------------*/ 1105 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 1106 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 1107 1108 /* post the Irecv of j-structure */ 1109 /*-------------------------------*/ 1110 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 1111 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 1112 1113 /* post the Isend of j-structure */ 1114 /*--------------------------------*/ 1115 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 1116 sj_waits = si_waits + merge->nsend; 1117 1118 for (proc=0, k=0; proc<size; proc++){ 1119 if (!len_s[proc]) continue; 1120 i = owners[proc]; 1121 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 1122 k++; 1123 } 1124 1125 /* receives and sends of j-structure are complete */ 1126 /*------------------------------------------------*/ 1127 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1128 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 1129 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 1130 1131 /* send and recv i-structure */ 1132 /*---------------------------*/ 1133 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 1134 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 1135 1136 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 1137 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 1138 1139 j = 0; /* row index to be sent */ 1140 for (proc=0,k=0; proc<size; proc++){ 1141 if (!len_s[proc]) continue; 1142 /* form outgoing message for i-structure: 1143 buf_si[0]: nrows to be sent 1144 [1:nrows]: row index (global) 1145 [nrows+1:2*nrows+1]: i-structure index 1146 */ 1147 /*-------------------------------------------*/ 1148 nrows = len_si[proc]/2 - 1; 1149 buf_si_i = buf_si + nrows+1; 1150 buf_si[0] = nrows; 1151 buf_si_i[0] = 0; 1152 for (i=0; i<nrows; i++){ 1153 row = rmap[j++]; 1154 anzi = ci[row+1] - ci[row]; 1155 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 1156 buf_si[i+1] = row - owners[proc]; /* local row index */ 1157 } 1158 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 1159 k++; 1160 buf_si += len_si[proc]; 1161 } 1162 1163 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 1164 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 1165 1166 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 1167 for (i=0; i<merge->nrecv; i++){ 1168 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 1169 } 1170 1171 ierr = PetscFree(len_si);CHKERRQ(ierr); 1172 ierr = PetscFree(len_ri);CHKERRQ(ierr); 1173 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 1174 ierr = PetscFree(si_waits);CHKERRQ(ierr); 1175 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 1176 ierr = PetscFree(buf_s);CHKERRQ(ierr); 1177 ierr = PetscFree(status);CHKERRQ(ierr); 1178 1179 /* compute a local seq matrix in each processor */ 1180 /*----------------------------------------------*/ 1181 /* allocate bi array and free space for accumulating nonzero column info */ 1182 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1183 bi[0] = 0; 1184 1185 /* create and initialize a linked list */ 1186 nlnk = pN+1; 1187 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1188 1189 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 1190 len = 0; 1191 len = ci[owners[rank+1]] - ci[owners[rank]]; 1192 free_space=PETSC_NULL; 1193 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 1194 current_space = free_space; 1195 1196 /* determine symbolic info for each local row of C */ 1197 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1198 nextrow = buf_ri_k + merge->nrecv; 1199 nextci = nextrow + merge->nrecv; 1200 for (k=0; k<merge->nrecv; k++){ 1201 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1202 nrows = *buf_ri_k[k]; 1203 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 1204 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1205 } 1206 1207 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 1208 len = 0; 1209 for (i=0; i<pn; i++) { 1210 bnzi = 0; 1211 /* add local non-zero cols of this proc's C_seq into lnk */ 1212 anzi = cdi[i+1] - cdi[i]; 1213 cji = cdj + cdi[i]; 1214 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1215 bnzi += nlnk; 1216 /* add received col data into lnk */ 1217 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1218 if (i == *nextrow[k]) { /* i-th row */ 1219 anzi = *(nextci[k]+1) - *nextci[k]; 1220 cji = buf_rj[k] + *nextci[k]; 1221 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1222 bnzi += nlnk; 1223 nextrow[k]++; nextci[k]++; 1224 } 1225 } 1226 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 1227 1228 /* if free space is not available, make more free space */ 1229 if (current_space->local_remaining<bnzi) { 1230 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1231 nspacedouble++; 1232 } 1233 /* copy data into free space, then initialize lnk */ 1234 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1235 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 1236 1237 current_space->array += bnzi; 1238 current_space->local_used += bnzi; 1239 current_space->local_remaining -= bnzi; 1240 1241 bi[i+1] = bi[i] + bnzi; 1242 } 1243 1244 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1245 1246 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1247 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1248 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1249 1250 /* create symbolic parallel matrix B_mpi */ 1251 /*---------------------------------------*/ 1252 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 1253 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 1254 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 1255 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1256 1257 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 1258 B_mpi->assembled = PETSC_FALSE; 1259 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 1260 merge->bi = bi; 1261 merge->bj = bj; 1262 merge->ci = ci; 1263 merge->cj = cj; 1264 merge->buf_ri = buf_ri; 1265 merge->buf_rj = buf_rj; 1266 merge->C_seq = PETSC_NULL; 1267 1268 /* attach the supporting struct to B_mpi for reuse */ 1269 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 1270 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 1271 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 1272 *C = B_mpi; 1273 1274 ierr = PetscFree(cdi);CHKERRQ(ierr); 1275 ierr = PetscFree(cdj);CHKERRQ(ierr); 1276 #ifdef NEW 1277 ierr = PetscFree(coi);CHKERRQ(ierr); 1278 ierr = PetscFree(coj);CHKERRQ(ierr); 1279 #endif 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 1285 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 1286 { 1287 PetscErrorCode ierr; 1288 Mat_Merge_SeqsToMPI *merge; 1289 Mat_MatMatMultMPI *ptap; 1290 PetscObjectContainer cont_merge,cont_ptap; 1291 PetscInt flops=0; 1292 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 1293 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1294 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 1295 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 1296 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 1297 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 1298 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1299 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 1300 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 1301 PetscInt am=A->m,cN=C->N,cm=C->m; 1302 MPI_Comm comm=C->comm; 1303 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 1304 PetscInt *owners,proc,nrows; 1305 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 1306 MPI_Request *s_waits,*r_waits; 1307 MPI_Status *status; 1308 PetscInt *cseqi,*cseqj,col; 1309 PetscInt stages[3]; 1310 1311 PetscFunctionBegin; 1312 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 1313 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 1314 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 1315 1316 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 1317 if (cont_merge) { 1318 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1319 } else { 1320 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 1321 } 1322 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 1323 if (cont_ptap) { 1324 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 1325 } else { 1326 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1327 } 1328 /* get data from symbolic products */ 1329 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 1330 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 1331 1332 cseqi = merge->ci; cseqj=merge->cj; 1333 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 1334 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 1335 1336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1337 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1338 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1339 1340 /* Allocate temporary array for storage of one row of A*P and one row of C */ 1341 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1342 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1343 ca = (MatScalar*)(apa + cN); 1344 apj = (PetscInt *)(ca + cN); 1345 apjdense = (PetscInt *)(apj + cN); 1346 1347 /* Clear old values in C */ 1348 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1349 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1350 1351 for (i=0;i<am;i++) { 1352 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 1353 /* Form i-th sparse row of A*P */ 1354 apnzj = 0; 1355 /* diagonal portion of A */ 1356 nzi = adi[i+1] - adi[i]; 1357 for (j=0;j<nzi;j++) { 1358 prow = *adj++; 1359 /* diagonal portion of P */ 1360 pnzj = pd->i[prow+1] - pd->i[prow]; 1361 pjj = pd->j + pd->i[prow]; /* local col index of P */ 1362 paj = pd->a + pd->i[prow]; 1363 for (k=0;k<pnzj;k++) { 1364 col = *pjj + p->cstart; pjj++; /* global col index of P */ 1365 if (!apjdense[col]) { 1366 apjdense[col] = -1; 1367 apj[apnzj++] = col; 1368 } 1369 apa[col] += (*ada)*paj[k]; 1370 } 1371 flops += 2*pnzj; 1372 /* off-diagonal portion of P */ 1373 pnzj = po->i[prow+1] - po->i[prow]; 1374 pjj = po->j + po->i[prow]; /* local col index of P */ 1375 paj = po->a + po->i[prow]; 1376 for (k=0;k<pnzj;k++) { 1377 col = p->garray[*pjj]; pjj++; /* global col index of P */ 1378 if (!apjdense[col]) { 1379 apjdense[col] = -1; 1380 apj[apnzj++] = col; 1381 } 1382 apa[col] += (*ada)*paj[k]; 1383 } 1384 flops += 2*pnzj; 1385 1386 ada++; 1387 } 1388 /* off-diagonal portion of A */ 1389 nzi = aoi[i+1] - aoi[i]; 1390 for (j=0;j<nzi;j++) { 1391 prow = *aoj++; 1392 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1393 pjj = pj_oth + pi_oth[prow]; 1394 paj = pa_oth + pi_oth[prow]; 1395 for (k=0;k<pnzj;k++) { 1396 if (!apjdense[pjj[k]]) { 1397 apjdense[pjj[k]] = -1; 1398 apj[apnzj++] = pjj[k]; 1399 } 1400 apa[pjj[k]] += (*aoa)*paj[k]; 1401 } 1402 flops += 2*pnzj; 1403 aoa++; 1404 } 1405 /* Sort the j index array for quick sparse axpy. */ 1406 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1407 1408 ierr = PetscLogStagePop(); 1409 ierr = PetscLogStagePush(stages[1]); 1410 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1411 /* diagonal portion of P -- gives matrix value of local C */ 1412 pnzi = pd->i[i+1] - pd->i[i]; 1413 for (j=0;j<pnzi;j++) { 1414 /* add value into C */ 1415 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 1416 crow = (*pJ_d++) + owners[rank]; 1417 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 1418 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 1419 pA_d++; 1420 flops += 2*apnzj; 1421 } 1422 1423 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 1424 pnzi = po->i[i+1] - po->i[i]; 1425 for (j=0;j<pnzi;j++) { 1426 crow = p->garray[*pJ_o++]; 1427 cjj = cseqj + cseqi[crow]; 1428 caj = cseqa + cseqi[crow]; 1429 /* add value into C_seq to be sent to other processors */ 1430 nextap = 0; 1431 for (k=0;nextap<apnzj;k++) { 1432 if (cjj[k]==apj[nextap]) { 1433 caj[k] += (*pA_o)*apa[apj[nextap++]]; 1434 } 1435 } 1436 flops += 2*apnzj; 1437 pA_o++; 1438 } 1439 1440 /* Zero the current row info for A*P */ 1441 for (j=0;j<apnzj;j++) { 1442 apa[apj[j]] = 0.; 1443 apjdense[apj[j]] = 0; 1444 } 1445 ierr = PetscLogStagePop(); 1446 } 1447 1448 /* send and recv matrix values */ 1449 /*-----------------------------*/ 1450 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 1451 bi = merge->bi; 1452 bj = merge->bj; 1453 buf_ri = merge->buf_ri; 1454 buf_rj = merge->buf_rj; 1455 len_s = merge->len_s; 1456 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 1457 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 1458 1459 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 1460 for (proc=0,k=0; proc<size; proc++){ 1461 if (!len_s[proc]) continue; 1462 i = owners[proc]; 1463 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 1464 k++; 1465 } 1466 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1467 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 1468 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 1469 ierr = PetscFree(status);CHKERRQ(ierr); 1470 1471 ierr = PetscFree(s_waits);CHKERRQ(ierr); 1472 ierr = PetscFree(r_waits);CHKERRQ(ierr); 1473 ierr = PetscFree(cseqa);CHKERRQ(ierr); 1474 1475 /* insert mat values of mpimat */ 1476 /*----------------------------*/ 1477 ba_i = apa; /* rename, temporary array for storage of one row of C */ 1478 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1479 nextrow = buf_ri_k + merge->nrecv; 1480 nextcseqi = nextrow + merge->nrecv; 1481 1482 for (k=0; k<merge->nrecv; k++){ 1483 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1484 nrows = *(buf_ri_k[k]); 1485 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 1486 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1487 } 1488 1489 /* add received local vals to C */ 1490 for (i=0; i<cm; i++) { 1491 crow = owners[rank] + i; 1492 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 1493 bnzi = bi[i+1] - bi[i]; 1494 nzi = 0; 1495 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1496 /* i-th row */ 1497 if (i == *nextrow[k]) { 1498 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 1499 cseqj = buf_rj[k] + *(nextcseqi[k]); 1500 cseqa = abuf_r[k] + *(nextcseqi[k]); 1501 nextcseqj = 0; 1502 for (j=0; nextcseqj<cseqnzi; j++){ 1503 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 1504 ba_i[j] += cseqa[nextcseqj++]; 1505 nzi++; 1506 } 1507 } 1508 nextrow[k]++; nextcseqi[k]++; 1509 } 1510 } 1511 if (nzi>0){ 1512 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 1513 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 1514 flops += 2*nzi; 1515 } 1516 } 1517 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1518 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1519 ierr = PetscLogStagePop();CHKERRQ(ierr); 1520 1521 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 1522 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1523 ierr = PetscFree(apa);CHKERRQ(ierr); 1524 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1525 1526 PetscFunctionReturn(0); 1527 } 1528 #endif /* TOBEREMOVED */ 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 1532 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (scall == MAT_INITIAL_MATRIX){ 1538 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1539 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 1540 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1541 } 1542 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1543 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 1544 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 /* 1549 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1550 1551 Collective on Mat 1552 1553 Input Parameters: 1554 + A - the matrix 1555 - P - the projection matrix 1556 1557 Output Parameters: 1558 . C - the (i,j) structure of the product matrix 1559 1560 Notes: 1561 C will be created and must be destroyed by the user with MatDestroy(). 1562 1563 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1564 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1565 this (i,j) structure by calling MatPtAPNumeric(). 1566 1567 Level: intermediate 1568 1569 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1570 */ 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatPtAPSymbolic" 1573 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 1574 { 1575 PetscErrorCode ierr; 1576 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 1577 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 1578 1579 PetscFunctionBegin; 1580 1581 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1582 PetscValidType(A,1); 1583 MatPreallocated(A); 1584 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1585 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1586 1587 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1588 PetscValidType(P,2); 1589 MatPreallocated(P); 1590 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1591 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1592 1593 PetscValidPointer(C,3); 1594 1595 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1596 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1597 1598 /* For now, we do not dispatch based on the type of A and P */ 1599 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1600 fA = A->ops->ptapsymbolic; 1601 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 1602 fP = P->ops->ptapsymbolic; 1603 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 1604 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1605 1606 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1607 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 1608 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1609 1610 PetscFunctionReturn(0); 1611 } 1612 1613 typedef struct { 1614 Mat symAP; 1615 } Mat_PtAPstruct; 1616 1617 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 1621 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 1622 { 1623 PetscErrorCode ierr; 1624 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 1625 1626 PetscFunctionBegin; 1627 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 1628 ierr = PetscFree(ptap);CHKERRQ(ierr); 1629 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 1635 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 1636 { 1637 PetscErrorCode ierr; 1638 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1639 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 1640 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1641 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1642 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 1643 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 1644 MatScalar *ca; 1645 PetscBT lnkbt; 1646 1647 PetscFunctionBegin; 1648 /* Get ij structure of P^T */ 1649 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1650 ptJ=ptj; 1651 1652 /* Allocate ci array, arrays for fill computation and */ 1653 /* free space for accumulating nonzero column info */ 1654 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1655 ci[0] = 0; 1656 1657 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1658 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1659 ptasparserow = ptadenserow + an; 1660 1661 /* create and initialize a linked list */ 1662 nlnk = pn+1; 1663 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1664 1665 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1666 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1667 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1668 current_space = free_space; 1669 1670 /* Determine symbolic info for each row of C: */ 1671 for (i=0;i<pn;i++) { 1672 ptnzi = pti[i+1] - pti[i]; 1673 ptanzi = 0; 1674 /* Determine symbolic row of PtA: */ 1675 for (j=0;j<ptnzi;j++) { 1676 arow = *ptJ++; 1677 anzj = ai[arow+1] - ai[arow]; 1678 ajj = aj + ai[arow]; 1679 for (k=0;k<anzj;k++) { 1680 if (!ptadenserow[ajj[k]]) { 1681 ptadenserow[ajj[k]] = -1; 1682 ptasparserow[ptanzi++] = ajj[k]; 1683 } 1684 } 1685 } 1686 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1687 ptaj = ptasparserow; 1688 cnzi = 0; 1689 for (j=0;j<ptanzi;j++) { 1690 prow = *ptaj++; 1691 pnzj = pi[prow+1] - pi[prow]; 1692 pjj = pj + pi[prow]; 1693 /* add non-zero cols of P into the sorted linked list lnk */ 1694 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1695 cnzi += nlnk; 1696 } 1697 1698 /* If free space is not available, make more free space */ 1699 /* Double the amount of total space in the list */ 1700 if (current_space->local_remaining<cnzi) { 1701 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1702 } 1703 1704 /* Copy data into free space, and zero out denserows */ 1705 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1706 current_space->array += cnzi; 1707 current_space->local_used += cnzi; 1708 current_space->local_remaining -= cnzi; 1709 1710 for (j=0;j<ptanzi;j++) { 1711 ptadenserow[ptasparserow[j]] = 0; 1712 } 1713 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1714 /* For now, we will recompute what is needed. */ 1715 ci[i+1] = ci[i] + cnzi; 1716 } 1717 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1718 /* Allocate space for cj, initialize cj, and */ 1719 /* destroy list of free space and other temporary array(s) */ 1720 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1721 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1722 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1723 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1724 1725 /* Allocate space for ca */ 1726 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1727 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1728 1729 /* put together the new matrix */ 1730 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1731 1732 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1733 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1734 c = (Mat_SeqAIJ *)((*C)->data); 1735 c->freedata = PETSC_TRUE; 1736 c->nonew = 0; 1737 1738 /* Clean up. */ 1739 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #include "src/mat/impls/maij/maij.h" 1745 EXTERN_C_BEGIN 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1748 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1749 { 1750 /* This routine requires testing -- I don't think it works. */ 1751 PetscErrorCode ierr; 1752 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1753 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1754 Mat P=pp->AIJ; 1755 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1756 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1757 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1758 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1759 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1760 MatScalar *ca; 1761 1762 PetscFunctionBegin; 1763 /* Start timer */ 1764 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1765 1766 /* Get ij structure of P^T */ 1767 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1768 1769 /* Allocate ci array, arrays for fill computation and */ 1770 /* free space for accumulating nonzero column info */ 1771 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1772 ci[0] = 0; 1773 1774 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1775 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1776 ptasparserow = ptadenserow + an; 1777 denserow = ptasparserow + an; 1778 sparserow = denserow + pn; 1779 1780 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1781 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1782 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1783 current_space = free_space; 1784 1785 /* Determine symbolic info for each row of C: */ 1786 for (i=0;i<pn/ppdof;i++) { 1787 ptnzi = pti[i+1] - pti[i]; 1788 ptanzi = 0; 1789 ptJ = ptj + pti[i]; 1790 for (dof=0;dof<ppdof;dof++) { 1791 /* Determine symbolic row of PtA: */ 1792 for (j=0;j<ptnzi;j++) { 1793 arow = ptJ[j] + dof; 1794 anzj = ai[arow+1] - ai[arow]; 1795 ajj = aj + ai[arow]; 1796 for (k=0;k<anzj;k++) { 1797 if (!ptadenserow[ajj[k]]) { 1798 ptadenserow[ajj[k]] = -1; 1799 ptasparserow[ptanzi++] = ajj[k]; 1800 } 1801 } 1802 } 1803 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1804 ptaj = ptasparserow; 1805 cnzi = 0; 1806 for (j=0;j<ptanzi;j++) { 1807 pdof = *ptaj%dof; 1808 prow = (*ptaj++)/dof; 1809 pnzj = pi[prow+1] - pi[prow]; 1810 pjj = pj + pi[prow]; 1811 for (k=0;k<pnzj;k++) { 1812 if (!denserow[pjj[k]+pdof]) { 1813 denserow[pjj[k]+pdof] = -1; 1814 sparserow[cnzi++] = pjj[k]+pdof; 1815 } 1816 } 1817 } 1818 1819 /* sort sparserow */ 1820 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1821 1822 /* If free space is not available, make more free space */ 1823 /* Double the amount of total space in the list */ 1824 if (current_space->local_remaining<cnzi) { 1825 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1826 } 1827 1828 /* Copy data into free space, and zero out denserows */ 1829 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1830 current_space->array += cnzi; 1831 current_space->local_used += cnzi; 1832 current_space->local_remaining -= cnzi; 1833 1834 for (j=0;j<ptanzi;j++) { 1835 ptadenserow[ptasparserow[j]] = 0; 1836 } 1837 for (j=0;j<cnzi;j++) { 1838 denserow[sparserow[j]] = 0; 1839 } 1840 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1841 /* For now, we will recompute what is needed. */ 1842 ci[i+1+dof] = ci[i+dof] + cnzi; 1843 } 1844 } 1845 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1846 /* Allocate space for cj, initialize cj, and */ 1847 /* destroy list of free space and other temporary array(s) */ 1848 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1849 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1850 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1851 1852 /* Allocate space for ca */ 1853 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1854 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1855 1856 /* put together the new matrix */ 1857 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1858 1859 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1860 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1861 c = (Mat_SeqAIJ *)((*C)->data); 1862 c->freedata = PETSC_TRUE; 1863 c->nonew = 0; 1864 1865 /* Clean up. */ 1866 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1867 1868 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 EXTERN_C_END 1872 1873 /* 1874 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1875 1876 Collective on Mat 1877 1878 Input Parameters: 1879 + A - the matrix 1880 - P - the projection matrix 1881 1882 Output Parameters: 1883 . C - the product matrix 1884 1885 Notes: 1886 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1887 the user using MatDeatroy(). 1888 1889 This routine is currently only implemented for pairs of AIJ matrices and classes 1890 which inherit from AIJ. C will be of type MATAIJ. 1891 1892 Level: intermediate 1893 1894 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1895 */ 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatPtAPNumeric" 1898 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1899 { 1900 PetscErrorCode ierr; 1901 PetscErrorCode (*fA)(Mat,Mat,Mat); 1902 PetscErrorCode (*fP)(Mat,Mat,Mat); 1903 1904 PetscFunctionBegin; 1905 1906 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1907 PetscValidType(A,1); 1908 MatPreallocated(A); 1909 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1910 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1911 1912 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1913 PetscValidType(P,2); 1914 MatPreallocated(P); 1915 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1916 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1917 1918 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1919 PetscValidType(C,3); 1920 MatPreallocated(C); 1921 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1922 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1923 1924 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1925 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1926 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1927 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1928 1929 /* For now, we do not dispatch based on the type of A and P */ 1930 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1931 fA = A->ops->ptapnumeric; 1932 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1933 fP = P->ops->ptapnumeric; 1934 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1935 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1936 1937 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1938 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1939 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1940 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1946 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1947 { 1948 PetscErrorCode ierr; 1949 PetscInt flops=0; 1950 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1951 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1952 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1953 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1954 PetscInt *ci=c->i,*cj=c->j,*cjj; 1955 PetscInt am=A->M,cn=C->N,cm=C->M; 1956 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1957 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1958 1959 PetscFunctionBegin; 1960 /* Allocate temporary array for storage of one row of A*P */ 1961 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1962 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1963 1964 apj = (PetscInt *)(apa + cn); 1965 apjdense = apj + cn; 1966 1967 /* Clear old values in C */ 1968 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1969 1970 for (i=0;i<am;i++) { 1971 /* Form sparse row of A*P */ 1972 anzi = ai[i+1] - ai[i]; 1973 apnzj = 0; 1974 for (j=0;j<anzi;j++) { 1975 prow = *aj++; 1976 pnzj = pi[prow+1] - pi[prow]; 1977 pjj = pj + pi[prow]; 1978 paj = pa + pi[prow]; 1979 for (k=0;k<pnzj;k++) { 1980 if (!apjdense[pjj[k]]) { 1981 apjdense[pjj[k]] = -1; 1982 apj[apnzj++] = pjj[k]; 1983 } 1984 apa[pjj[k]] += (*aa)*paj[k]; 1985 } 1986 flops += 2*pnzj; 1987 aa++; 1988 } 1989 1990 /* Sort the j index array for quick sparse axpy. */ 1991 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1992 1993 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1994 pnzi = pi[i+1] - pi[i]; 1995 for (j=0;j<pnzi;j++) { 1996 nextap = 0; 1997 crow = *pJ++; 1998 cjj = cj + ci[crow]; 1999 caj = ca + ci[crow]; 2000 /* Perform sparse axpy operation. Note cjj includes apj. */ 2001 for (k=0;nextap<apnzj;k++) { 2002 if (cjj[k]==apj[nextap]) { 2003 caj[k] += (*pA)*apa[apj[nextap++]]; 2004 } 2005 } 2006 flops += 2*apnzj; 2007 pA++; 2008 } 2009 2010 /* Zero the current row info for A*P */ 2011 for (j=0;j<apnzj;j++) { 2012 apa[apj[j]] = 0.; 2013 apjdense[apj[j]] = 0; 2014 } 2015 } 2016 2017 /* Assemble the final matrix and clean up */ 2018 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2019 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2020 ierr = PetscFree(apa);CHKERRQ(ierr); 2021 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 2026 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 2027 static PetscEvent logkey_matptapnumeric_local = 0; 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 2030 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 2031 { 2032 PetscErrorCode ierr; 2033 PetscInt flops=0; 2034 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2035 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 2036 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2037 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2038 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2039 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 2040 PetscInt *pJ=pj_loc,*pjj; 2041 PetscInt *ci=c->i,*cj=c->j,*cjj; 2042 PetscInt am=A->m,cn=C->N,cm=C->M; 2043 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2044 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 2045 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 2046 2047 PetscFunctionBegin; 2048 if (!logkey_matptapnumeric_local) { 2049 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 2050 } 2051 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2052 2053 /* Allocate temporary array for storage of one row of A*P */ 2054 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2055 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2056 apj = (PetscInt *)(apa + cn); 2057 apjdense = apj + cn; 2058 2059 /* Clear old values in C */ 2060 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2061 2062 for (i=0;i<am;i++) { 2063 /* Form i-th sparse row of A*P */ 2064 apnzj = 0; 2065 /* diagonal portion of A */ 2066 anzi = adi[i+1] - adi[i]; 2067 for (j=0;j<anzi;j++) { 2068 prow = *adj; 2069 adj++; 2070 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2071 pjj = pj_loc + pi_loc[prow]; 2072 paj = pa_loc + pi_loc[prow]; 2073 for (k=0;k<pnzj;k++) { 2074 if (!apjdense[pjj[k]]) { 2075 apjdense[pjj[k]] = -1; 2076 apj[apnzj++] = pjj[k]; 2077 } 2078 apa[pjj[k]] += (*ada)*paj[k]; 2079 } 2080 flops += 2*pnzj; 2081 ada++; 2082 } 2083 /* off-diagonal portion of A */ 2084 anzi = aoi[i+1] - aoi[i]; 2085 for (j=0;j<anzi;j++) { 2086 col = a->garray[*aoj]; 2087 if (col < cstart){ 2088 prow = *aoj; 2089 } else if (col >= cend){ 2090 prow = *aoj; 2091 } else { 2092 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2093 } 2094 aoj++; 2095 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2096 pjj = pj_oth + pi_oth[prow]; 2097 paj = pa_oth + pi_oth[prow]; 2098 for (k=0;k<pnzj;k++) { 2099 if (!apjdense[pjj[k]]) { 2100 apjdense[pjj[k]] = -1; 2101 apj[apnzj++] = pjj[k]; 2102 } 2103 apa[pjj[k]] += (*aoa)*paj[k]; 2104 } 2105 flops += 2*pnzj; 2106 aoa++; 2107 } 2108 /* Sort the j index array for quick sparse axpy. */ 2109 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2110 2111 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 2112 pnzi = pi_loc[i+1] - pi_loc[i]; 2113 for (j=0;j<pnzi;j++) { 2114 nextap = 0; 2115 crow = *pJ++; 2116 cjj = cj + ci[crow]; 2117 caj = ca + ci[crow]; 2118 /* Perform sparse axpy operation. Note cjj includes apj. */ 2119 for (k=0;nextap<apnzj;k++) { 2120 if (cjj[k]==apj[nextap]) { 2121 caj[k] += (*pA)*apa[apj[nextap++]]; 2122 } 2123 } 2124 flops += 2*apnzj; 2125 pA++; 2126 } 2127 2128 /* Zero the current row info for A*P */ 2129 for (j=0;j<apnzj;j++) { 2130 apa[apj[j]] = 0.; 2131 apjdense[apj[j]] = 0; 2132 } 2133 } 2134 2135 /* Assemble the final matrix and clean up */ 2136 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = PetscFree(apa);CHKERRQ(ierr); 2139 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2140 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 static PetscEvent logkey_matptapsymbolic_local = 0; 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 2146 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 2147 { 2148 PetscErrorCode ierr; 2149 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2150 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2151 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 2152 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 2153 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 2154 PetscInt *ci,*cj,*ptaj; 2155 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 2156 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 2157 PetscInt pm=P_loc->m,nlnk,*lnk; 2158 MatScalar *ca; 2159 PetscBT lnkbt; 2160 PetscInt prend,nprow_loc,nprow_oth; 2161 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 2162 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2163 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2164 2165 PetscFunctionBegin; 2166 if (!logkey_matptapsymbolic_local) { 2167 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 2168 } 2169 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2170 2171 prend = prstart + pm; 2172 2173 /* get ij structure of P_loc^T */ 2174 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2175 ptJ=ptj; 2176 2177 /* Allocate ci array, arrays for fill computation and */ 2178 /* free space for accumulating nonzero column info */ 2179 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2180 ci[0] = 0; 2181 2182 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 2183 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 2184 ptasparserow_loc = ptadenserow_loc + aN; 2185 ptadenserow_oth = ptasparserow_loc + aN; 2186 ptasparserow_oth = ptadenserow_oth + aN; 2187 2188 /* create and initialize a linked list */ 2189 nlnk = pN+1; 2190 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2191 2192 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 2193 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2194 nnz = adi[am] + aoi[am]; 2195 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 2196 current_space = free_space; 2197 2198 /* determine symbolic info for each row of C: */ 2199 for (i=0;i<pN;i++) { 2200 ptnzi = pti[i+1] - pti[i]; 2201 nprow_loc = 0; nprow_oth = 0; 2202 /* i-th row of symbolic P_loc^T*A_loc: */ 2203 for (j=0;j<ptnzi;j++) { 2204 arow = *ptJ++; 2205 /* diagonal portion of A */ 2206 anzj = adi[arow+1] - adi[arow]; 2207 ajj = adj + adi[arow]; 2208 for (k=0;k<anzj;k++) { 2209 col = ajj[k]+prstart; 2210 if (!ptadenserow_loc[col]) { 2211 ptadenserow_loc[col] = -1; 2212 ptasparserow_loc[nprow_loc++] = col; 2213 } 2214 } 2215 /* off-diagonal portion of A */ 2216 anzj = aoi[arow+1] - aoi[arow]; 2217 ajj = aoj + aoi[arow]; 2218 for (k=0;k<anzj;k++) { 2219 col = a->garray[ajj[k]]; /* global col */ 2220 if (col < cstart){ 2221 col = ajj[k]; 2222 } else if (col >= cend){ 2223 col = ajj[k] + pm; 2224 } else { 2225 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2226 } 2227 if (!ptadenserow_oth[col]) { 2228 ptadenserow_oth[col] = -1; 2229 ptasparserow_oth[nprow_oth++] = col; 2230 } 2231 } 2232 } 2233 2234 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 2235 cnzi = 0; 2236 /* rows of P_loc */ 2237 ptaj = ptasparserow_loc; 2238 for (j=0; j<nprow_loc; j++) { 2239 prow = *ptaj++; 2240 prow -= prstart; /* rm */ 2241 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2242 pjj = pj_loc + pi_loc[prow]; 2243 /* add non-zero cols of P into the sorted linked list lnk */ 2244 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2245 cnzi += nlnk; 2246 } 2247 /* rows of P_oth */ 2248 ptaj = ptasparserow_oth; 2249 for (j=0; j<nprow_oth; j++) { 2250 prow = *ptaj++; 2251 if (prow >= prend) prow -= pm; /* rm */ 2252 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2253 pjj = pj_oth + pi_oth[prow]; 2254 /* add non-zero cols of P into the sorted linked list lnk */ 2255 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2256 cnzi += nlnk; 2257 } 2258 2259 /* If free space is not available, make more free space */ 2260 /* Double the amount of total space in the list */ 2261 if (current_space->local_remaining<cnzi) { 2262 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2263 } 2264 2265 /* Copy data into free space, and zero out denserows */ 2266 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2267 current_space->array += cnzi; 2268 current_space->local_used += cnzi; 2269 current_space->local_remaining -= cnzi; 2270 2271 for (j=0;j<nprow_loc; j++) { 2272 ptadenserow_loc[ptasparserow_loc[j]] = 0; 2273 } 2274 for (j=0;j<nprow_oth; j++) { 2275 ptadenserow_oth[ptasparserow_oth[j]] = 0; 2276 } 2277 2278 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2279 /* For now, we will recompute what is needed. */ 2280 ci[i+1] = ci[i] + cnzi; 2281 } 2282 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2283 /* Allocate space for cj, initialize cj, and */ 2284 /* destroy list of free space and other temporary array(s) */ 2285 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2286 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2287 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 2288 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2289 2290 /* Allocate space for ca */ 2291 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2292 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2293 2294 /* put together the new matrix */ 2295 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 2296 2297 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2298 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2299 c = (Mat_SeqAIJ *)((*C)->data); 2300 c->freedata = PETSC_TRUE; 2301 c->nonew = 0; 2302 2303 /* Clean up. */ 2304 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2305 2306 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309