1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat P_loc,P_oth; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 167 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 168 Mat_SeqAIJ *p_loc,*p_oth; 169 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 170 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 171 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 172 PetscInt aN=A->N,am=A->m,pN=P->N; 173 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 174 PetscBT lnkbt; 175 PetscInt prstart,prend,nprow_loc,nprow_oth; 176 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 177 178 MPI_Comm comm=A->comm; 179 Mat B_mpi; 180 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 181 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 182 PetscInt len,proc,*dnz,*onz,*owners; 183 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 184 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 185 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 186 MPI_Status *status; 187 Mat_Merge_SeqsToMPI *merge; 188 189 PetscFunctionBegin; 190 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 191 if (container) { 192 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 193 } else { 194 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 195 } 196 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 197 /*--------------------------------------------------*/ 198 /* get data from P_loc and P_oth */ 199 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 200 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 201 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 202 prend = prstart + pm; 203 204 /* get ij structure of P_loc^T */ 205 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 206 ptJ=ptj; 207 208 /* Allocate ci array, arrays for fill computation and */ 209 /* free space for accumulating nonzero column info */ 210 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 211 ci[0] = 0; 212 213 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 214 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 215 ptasparserow_loc = ptadenserow_loc + aN; 216 ptadenserow_oth = ptasparserow_loc + aN; 217 ptasparserow_oth = ptadenserow_oth + aN; 218 219 /* create and initialize a linked list */ 220 nlnk = pN+1; 221 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 222 223 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 224 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 225 nnz = adi[am] + aoi[am]; 226 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 227 current_space = free_space; 228 229 /* determine symbolic info for each row of C: */ 230 for (i=0;i<pN;i++) { 231 ptnzi = pti[i+1] - pti[i]; 232 nprow_loc = 0; nprow_oth = 0; 233 /* i-th row of symbolic P_loc^T*A_loc: */ 234 for (j=0;j<ptnzi;j++) { 235 arow = *ptJ++; 236 /* diagonal portion of A */ 237 anzj = adi[arow+1] - adi[arow]; 238 ajj = adj + adi[arow]; 239 for (k=0;k<anzj;k++) { 240 col = ajj[k]+prstart; 241 if (!ptadenserow_loc[col]) { 242 ptadenserow_loc[col] = -1; 243 ptasparserow_loc[nprow_loc++] = col; 244 } 245 } 246 /* off-diagonal portion of A */ 247 anzj = aoi[arow+1] - aoi[arow]; 248 ajj = aoj + aoi[arow]; 249 for (k=0;k<anzj;k++) { 250 col = a->garray[ajj[k]]; /* global col */ 251 if (col < cstart){ 252 col = ajj[k]; 253 } else if (col >= cend){ 254 col = ajj[k] + pm; 255 } else { 256 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 257 } 258 if (!ptadenserow_oth[col]) { 259 ptadenserow_oth[col] = -1; 260 ptasparserow_oth[nprow_oth++] = col; 261 } 262 } 263 } 264 265 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 266 cnzi = 0; 267 /* rows of P_loc */ 268 ptaj = ptasparserow_loc; 269 for (j=0; j<nprow_loc; j++) { 270 prow = *ptaj++; 271 prow -= prstart; /* rm */ 272 pnzj = pi_loc[prow+1] - pi_loc[prow]; 273 pjj = pj_loc + pi_loc[prow]; 274 /* add non-zero cols of P into the sorted linked list lnk */ 275 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 276 cnzi += nlnk; 277 } 278 /* rows of P_oth */ 279 ptaj = ptasparserow_oth; 280 for (j=0; j<nprow_oth; j++) { 281 prow = *ptaj++; 282 if (prow >= prend) prow -= pm; /* rm */ 283 pnzj = pi_oth[prow+1] - pi_oth[prow]; 284 pjj = pj_oth + pi_oth[prow]; 285 /* add non-zero cols of P into the sorted linked list lnk */ 286 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 287 cnzi += nlnk; 288 } 289 290 /* If free space is not available, make more free space */ 291 /* Double the amount of total space in the list */ 292 if (current_space->local_remaining<cnzi) { 293 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 294 } 295 296 /* Copy data into free space, and zero out denserows */ 297 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 298 current_space->array += cnzi; 299 current_space->local_used += cnzi; 300 current_space->local_remaining -= cnzi; 301 302 for (j=0;j<nprow_loc; j++) { 303 ptadenserow_loc[ptasparserow_loc[j]] = 0; 304 } 305 for (j=0;j<nprow_oth; j++) { 306 ptadenserow_oth[ptasparserow_oth[j]] = 0; 307 } 308 309 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 310 /* For now, we will recompute what is needed. */ 311 ci[i+1] = ci[i] + cnzi; 312 } 313 /* Clean up. */ 314 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 315 316 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 317 /* Allocate space for cj, initialize cj, and */ 318 /* destroy list of free space and other temporary array(s) */ 319 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 320 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 321 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 322 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 323 324 /* Allocate space for ca */ 325 /* 326 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 327 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 328 */ 329 330 /* add C_seq into mpi C */ 331 /*-----------------------------------*/ 332 free_space=PETSC_NULL; current_space=PETSC_NULL; 333 334 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 335 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 336 337 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 338 339 340 /* determine row ownership */ 341 /*---------------------------------------------------------*/ 342 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 343 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 344 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 345 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 346 347 /* determine the number of messages to send, their lengths */ 348 /*---------------------------------------------------------*/ 349 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 350 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 351 len_s = merge->len_s; 352 len = 0; /* length of buf_si[] */ 353 merge->nsend = 0; 354 for (proc=0; proc<size; proc++){ 355 len_si[proc] = 0; 356 if (proc == rank){ 357 len_s[proc] = 0; 358 } else { 359 len_si[proc] = owners[proc+1] - owners[proc] + 1; 360 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 361 } 362 if (len_s[proc]) { 363 merge->nsend++; 364 nrows = 0; 365 for (i=owners[proc]; i<owners[proc+1]; i++){ 366 if (ci[i+1] > ci[i]) nrows++; 367 } 368 len_si[proc] = 2*(nrows+1); 369 len += len_si[proc]; 370 } 371 } 372 373 /* determine the number and length of messages to receive for ij-structure */ 374 /*-------------------------------------------------------------------------*/ 375 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 376 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 377 378 /* post the Irecv of j-structure */ 379 /*-------------------------------*/ 380 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 381 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 382 383 /* post the Isend of j-structure */ 384 /*--------------------------------*/ 385 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 386 sj_waits = si_waits + merge->nsend; 387 388 for (proc=0, k=0; proc<size; proc++){ 389 if (!len_s[proc]) continue; 390 i = owners[proc]; 391 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 392 k++; 393 } 394 395 /* receives and sends of j-structure are complete */ 396 /*------------------------------------------------*/ 397 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 398 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 399 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 400 401 /* send and recv i-structure */ 402 /*---------------------------*/ 403 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 404 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 405 406 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 407 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 408 for (proc=0,k=0; proc<size; proc++){ 409 if (!len_s[proc]) continue; 410 /* form outgoing message for i-structure: 411 buf_si[0]: nrows to be sent 412 [1:nrows]: row index (global) 413 [nrows+1:2*nrows+1]: i-structure index 414 */ 415 /*-------------------------------------------*/ 416 nrows = len_si[proc]/2 - 1; 417 buf_si_i = buf_si + nrows+1; 418 buf_si[0] = nrows; 419 buf_si_i[0] = 0; 420 nrows = 0; 421 for (i=owners[proc]; i<owners[proc+1]; i++){ 422 anzi = ci[i+1] - ci[i]; 423 if (anzi) { 424 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 425 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 426 nrows++; 427 } 428 } 429 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 430 k++; 431 buf_si += len_si[proc]; 432 } 433 434 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 436 437 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 438 for (i=0; i<merge->nrecv; i++){ 439 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 440 } 441 442 ierr = PetscFree(len_si);CHKERRQ(ierr); 443 ierr = PetscFree(len_ri);CHKERRQ(ierr); 444 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 445 ierr = PetscFree(si_waits);CHKERRQ(ierr); 446 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 447 ierr = PetscFree(buf_s);CHKERRQ(ierr); 448 ierr = PetscFree(status);CHKERRQ(ierr); 449 450 /* compute a local seq matrix in each processor */ 451 /*----------------------------------------------*/ 452 /* allocate bi array and free space for accumulating nonzero column info */ 453 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 454 bi[0] = 0; 455 456 /* create and initialize a linked list */ 457 nlnk = pN+1; 458 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 459 460 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 461 len = 0; 462 len = ci[owners[rank+1]] - ci[owners[rank]]; 463 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 464 current_space = free_space; 465 466 /* determine symbolic info for each local row */ 467 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 468 nextrow = buf_ri_k + merge->nrecv; 469 nextci = nextrow + merge->nrecv; 470 for (k=0; k<merge->nrecv; k++){ 471 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 472 nrows = *buf_ri_k[k]; 473 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 474 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 475 } 476 477 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 478 len = 0; 479 for (i=0; i<pn; i++) { 480 bnzi = 0; 481 /* add local non-zero cols of this proc's C_seq into lnk */ 482 arow = owners[rank] + i; 483 anzi = ci[arow+1] - ci[arow]; 484 cji = cj + ci[arow]; 485 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 486 bnzi += nlnk; 487 /* add received col data into lnk */ 488 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 489 if (i == *nextrow[k]) { /* i-th row */ 490 anzi = *(nextci[k]+1) - *nextci[k]; 491 cji = buf_rj[k] + *nextci[k]; 492 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 493 bnzi += nlnk; 494 nextrow[k]++; nextci[k]++; 495 } 496 } 497 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 498 499 /* if free space is not available, make more free space */ 500 if (current_space->local_remaining<bnzi) { 501 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 502 nspacedouble++; 503 } 504 /* copy data into free space, then initialize lnk */ 505 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 506 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 507 508 current_space->array += bnzi; 509 current_space->local_used += bnzi; 510 current_space->local_remaining -= bnzi; 511 512 bi[i+1] = bi[i] + bnzi; 513 } 514 515 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 516 517 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 518 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 519 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 520 521 /* create symbolic parallel matrix B_mpi */ 522 /*---------------------------------------*/ 523 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 524 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 525 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 526 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 527 528 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 529 B_mpi->assembled = PETSC_FALSE; 530 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 531 merge->bi = bi; 532 merge->bj = bj; 533 merge->ci = ci; 534 merge->cj = cj; 535 merge->buf_ri = buf_ri; 536 merge->buf_rj = buf_rj; 537 merge->C_seq = PETSC_NULL; 538 539 /* attach the supporting struct to B_mpi for reuse */ 540 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 541 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 542 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 543 *C = B_mpi; 544 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 550 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 551 { 552 PetscErrorCode ierr; 553 Mat_Merge_SeqsToMPI *merge; 554 Mat_MatMatMultMPI *ptap; 555 PetscObjectContainer cont_merge,cont_ptap; 556 557 PetscInt flops=0; 558 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 559 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 560 Mat_SeqAIJ *p_loc,*p_oth; 561 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 562 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 563 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 564 PetscInt *cjj; 565 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 566 MatScalar *pa_loc,*pA,*pa_oth; 567 PetscInt am=A->m,cN=C->N; 568 569 MPI_Comm comm=C->comm; 570 PetscMPIInt size,rank,taga,*len_s; 571 PetscInt *owners; 572 PetscInt proc; 573 PetscInt **buf_ri,**buf_rj; 574 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 575 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 576 MPI_Request *s_waits,*r_waits; 577 MPI_Status *status; 578 MatScalar **abuf_r,*ba_i; 579 PetscInt *cseqi,*cseqj; 580 PetscInt *cseqj_tmp; 581 MatScalar *cseqa_tmp; 582 583 PetscFunctionBegin; 584 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 585 if (cont_merge) { 586 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 587 } else { 588 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 589 } 590 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 591 if (cont_ptap) { 592 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 593 } else { 594 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 595 } 596 /* get data from symbolic products */ 597 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 598 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 599 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 600 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 601 602 cseqi = merge->ci; cseqj=merge->cj; 603 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 604 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 605 606 /* Allocate temporary array for storage of one row of A*P */ 607 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 608 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 609 apj = (PetscInt *)(apa + cN); 610 apjdense = apj + cN; 611 612 /* Clear old values in C_Seq */ 613 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 614 615 for (i=0;i<am;i++) { 616 /* Form i-th sparse row of A*P */ 617 apnzj = 0; 618 /* diagonal portion of A */ 619 anzi = adi[i+1] - adi[i]; 620 for (j=0;j<anzi;j++) { 621 prow = *adj; 622 adj++; 623 pnzj = pi_loc[prow+1] - pi_loc[prow]; 624 pjj = pj_loc + pi_loc[prow]; 625 paj = pa_loc + pi_loc[prow]; 626 for (k=0;k<pnzj;k++) { 627 if (!apjdense[pjj[k]]) { 628 apjdense[pjj[k]] = -1; 629 apj[apnzj++] = pjj[k]; 630 } 631 apa[pjj[k]] += (*ada)*paj[k]; 632 } 633 flops += 2*pnzj; 634 ada++; 635 } 636 /* off-diagonal portion of A */ 637 anzi = aoi[i+1] - aoi[i]; 638 for (j=0;j<anzi;j++) { 639 col = a->garray[*aoj]; 640 if (col < cstart){ 641 prow = *aoj; 642 } else if (col >= cend){ 643 prow = *aoj; 644 } else { 645 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 646 } 647 aoj++; 648 pnzj = pi_oth[prow+1] - pi_oth[prow]; 649 pjj = pj_oth + pi_oth[prow]; 650 paj = pa_oth + pi_oth[prow]; 651 for (k=0;k<pnzj;k++) { 652 if (!apjdense[pjj[k]]) { 653 apjdense[pjj[k]] = -1; 654 apj[apnzj++] = pjj[k]; 655 } 656 apa[pjj[k]] += (*aoa)*paj[k]; 657 } 658 flops += 2*pnzj; 659 aoa++; 660 } 661 /* Sort the j index array for quick sparse axpy. */ 662 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 663 664 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 665 pnzi = pi_loc[i+1] - pi_loc[i]; 666 for (j=0;j<pnzi;j++) { 667 nextap = 0; 668 crow = *pJ++; 669 cjj = cseqj + cseqi[crow]; 670 caj = cseqa + cseqi[crow]; 671 /* Perform sparse axpy operation. Note cjj includes apj. */ 672 for (k=0;nextap<apnzj;k++) { 673 if (cjj[k]==apj[nextap]) { 674 caj[k] += (*pA)*apa[apj[nextap++]]; 675 } 676 } 677 flops += 2*apnzj; 678 pA++; 679 } 680 681 /* Zero the current row info for A*P */ 682 for (j=0;j<apnzj;j++) { 683 apa[apj[j]] = 0.; 684 apjdense[apj[j]] = 0; 685 } 686 } 687 688 ierr = PetscFree(apa);CHKERRQ(ierr); 689 690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 692 693 bi = merge->bi; 694 bj = merge->bj; 695 buf_ri = merge->buf_ri; 696 buf_rj = merge->buf_rj; 697 698 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 699 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 700 701 /* send and recv matrix values */ 702 /*-----------------------------*/ 703 len_s = merge->len_s; 704 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 705 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 706 707 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 708 for (proc=0,k=0; proc<size; proc++){ 709 if (!len_s[proc]) continue; 710 i = owners[proc]; 711 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 712 k++; 713 } 714 715 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 716 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 717 ierr = PetscFree(status);CHKERRQ(ierr); 718 719 ierr = PetscFree(s_waits);CHKERRQ(ierr); 720 ierr = PetscFree(r_waits);CHKERRQ(ierr); 721 722 /* insert mat values of mpimat */ 723 /*----------------------------*/ 724 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 725 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 726 nextrow = buf_ri_k + merge->nrecv; 727 nextcseqi = nextrow + merge->nrecv; 728 729 for (k=0; k<merge->nrecv; k++){ 730 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 731 nrows = *(buf_ri_k[k]); 732 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 733 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 734 } 735 736 /* set values of ba */ 737 for (i=0; i<C->m; i++) { 738 cseqrow = owners[rank] + i; 739 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 740 bnzi = bi[i+1] - bi[i]; 741 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 742 743 /* add local non-zero vals of this proc's C_seq into ba */ 744 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 745 cseqj_tmp = cseqj + cseqi[cseqrow]; 746 cseqa_tmp = cseqa + cseqi[cseqrow]; 747 nextcseqj = 0; 748 for (j=0; nextcseqj<cseqnzi; j++){ 749 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 750 ba_i[j] += cseqa_tmp[nextcseqj++]; 751 } 752 } 753 754 /* add received vals into ba */ 755 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 756 /* i-th row */ 757 if (i == *nextrow[k]) { 758 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 759 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 760 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 761 nextcseqj = 0; 762 for (j=0; nextcseqj<cseqnzi; j++){ 763 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 764 ba_i[j] += cseqa_tmp[nextcseqj++]; 765 } 766 } 767 nextrow[k]++; nextcseqi[k]++; 768 } 769 } 770 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 771 flops += 2*bnzi; 772 } 773 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 775 776 ierr = PetscFree(cseqa);CHKERRQ(ierr); 777 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 778 ierr = PetscFree(ba_i);CHKERRQ(ierr); 779 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 780 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 781 782 PetscFunctionReturn(0); 783 } 784 785 #ifdef TOBEREMOVED 786 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 789 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 790 { 791 PetscErrorCode ierr; 792 Mat_MatMatMultMPI *ptap; 793 PetscObjectContainer container; 794 795 PetscFunctionBegin; 796 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 797 if (container) { 798 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 799 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 800 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 801 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 802 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 803 804 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 805 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 806 } 807 ierr = PetscFree(ptap);CHKERRQ(ierr); 808 809 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 815 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 816 { 817 PetscErrorCode ierr; 818 Mat_MatMatMultMPI *ptap; 819 PetscObjectContainer container; 820 821 PetscFunctionBegin; 822 if (scall == MAT_INITIAL_MATRIX){ 823 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 824 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 825 826 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 827 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 828 829 /* get P_loc by taking all local rows of P */ 830 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 831 832 /* attach the supporting struct to P for reuse */ 833 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 834 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 835 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 836 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 837 838 /* now, compute symbolic local P^T*A*P */ 839 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 840 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 841 } else if (scall == MAT_REUSE_MATRIX){ 842 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 843 if (container) { 844 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 845 } else { 846 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 847 } 848 /* update P_oth */ 849 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 850 851 } else { 852 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 853 } 854 /* now, compute numeric local P^T*A*P */ 855 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 856 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 857 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 858 859 PetscFunctionReturn(0); 860 } 861 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 862 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 863 {\ 864 /* allocate ci array, arrays for fill computation and */\ 865 /* free space for accumulating nonzero column info */\ 866 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 867 ci[0] = 0;\ 868 \ 869 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 870 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 871 nnz = adi[am] + aoi[am];\ 872 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 873 current_space = free_space;\ 874 \ 875 ptJ=ptj;\ 876 for (i=0; i<ptM; i++) {\ 877 ptnzi = pti[i+1] - pti[i];\ 878 nprow_loc = 0; nprow_oth = 0;\ 879 /* i-th row of symbolic Pt*A: */\ 880 for (j=0;j<ptnzi;j++) {\ 881 arow = *ptJ++;\ 882 /* diagonal portion of A */\ 883 anzj = adi[arow+1] - adi[arow];\ 884 ajj = adj + adi[arow];\ 885 for (k=0;k<anzj;k++) {\ 886 col = ajj[k]+prstart;\ 887 if (!ptadenserow_loc[col]) {\ 888 ptadenserow_loc[col] = -1;\ 889 ptasparserow_loc[nprow_loc++] = col;\ 890 }\ 891 }\ 892 /* off-diagonal portion of A */\ 893 anzj = aoi[arow+1] - aoi[arow];\ 894 ajj = aoj + aoi[arow];\ 895 for (k=0;k<anzj;k++) {\ 896 col = a->garray[ajj[k]]; /* global col */\ 897 if (col < cstart){\ 898 col = ajj[k];\ 899 } else if (col >= cend){\ 900 col = ajj[k] + pm;\ 901 } else {\ 902 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 903 }\ 904 if (!ptadenserow_oth[col]) {\ 905 ptadenserow_oth[col] = -1;\ 906 ptasparserow_oth[nprow_oth++] = col;\ 907 }\ 908 }\ 909 }\ 910 /* determine symbolic info for row of C_seq: */\ 911 cnzi = 0;\ 912 /* rows of P_loc */\ 913 ptaj = ptasparserow_loc;\ 914 for (j=0; j<nprow_loc; j++) {\ 915 prow = *ptaj++; \ 916 prow -= prstart; /* rm */\ 917 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 918 pjj = pj_loc + pi_loc[prow];\ 919 /* add non-zero cols of P into the sorted linked list lnk */\ 920 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 921 cnzi += nlnk;\ 922 }\ 923 /* rows of P_oth */\ 924 ptaj = ptasparserow_oth;\ 925 for (j=0; j<nprow_oth; j++) {\ 926 prow = *ptaj++; \ 927 if (prow >= prend) prow -= pm; /* rm */\ 928 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 929 pjj = pj_oth + pi_oth[prow];\ 930 /* add non-zero cols of P into the sorted linked list lnk */\ 931 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 932 cnzi += nlnk;\ 933 }\ 934 \ 935 /* If free space is not available, make more free space */\ 936 /* Double the amount of total space in the list */\ 937 if (current_space->local_remaining<cnzi) {\ 938 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 939 }\ 940 \ 941 /* Copy data into free space, and zero out denserows */\ 942 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 943 current_space->array += cnzi;\ 944 current_space->local_used += cnzi;\ 945 current_space->local_remaining -= cnzi;\ 946 \ 947 for (j=0;j<nprow_loc; j++) {\ 948 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 949 }\ 950 for (j=0;j<nprow_oth; j++) {\ 951 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 952 }\ 953 \ 954 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 955 /* For now, we will recompute what is needed. */ \ 956 ci[i+1] = ci[i] + cnzi;\ 957 }\ 958 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 959 /* Allocate space for cj, initialize cj, and */\ 960 /* destroy list of free space and other temporary array(s) */\ 961 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 962 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 963 } 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 966 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 967 { 968 PetscErrorCode ierr; 969 Mat P_loc,P_oth; 970 Mat_MatMatMultMPI *ptap; 971 PetscObjectContainer container; 972 FreeSpaceList free_space=PETSC_NULL,current_space; 973 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 974 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 975 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 976 Mat_SeqAIJ *p_loc,*p_oth; 977 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 978 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 979 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 980 PetscInt aN=A->N,am=A->m,pN=P->N; 981 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 982 PetscBT lnkbt; 983 PetscInt prstart,prend,nprow_loc,nprow_oth; 984 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 985 986 MPI_Comm comm=A->comm; 987 Mat B_mpi; 988 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 989 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 990 PetscInt len,proc,*dnz,*onz,*owners; 991 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 992 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 993 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 994 MPI_Status *status; 995 Mat_Merge_SeqsToMPI *merge; 996 PetscInt *ptdi,*ptdj,*cdi,*cdj; 997 PetscInt *ptoi,*ptoj,*coi,*coj; 998 999 PetscFunctionBegin; 1000 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 1001 if (container) { 1002 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 1003 } else { 1004 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1005 } 1006 1007 /* determine row ownership of C */ 1008 /*---------------------------------------------------------*/ 1009 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1010 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1011 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 1012 1013 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 1014 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 1015 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 1016 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1017 1018 /* get data from P_loc and P_oth */ 1019 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 1020 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 1021 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 1022 prend = prstart + pm; 1023 1024 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 1025 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1026 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1027 ptasparserow_loc = ptadenserow_loc + aN; 1028 ptadenserow_oth = ptasparserow_loc + aN; 1029 ptasparserow_oth = ptadenserow_oth + aN; 1030 1031 /* create and initialize a linked list */ 1032 nlnk = pN+1; 1033 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1034 1035 /* Pt = P_loc^T */ 1036 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1037 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 1038 1039 /* Pt = Pd^T - diagonal portion of P_loc */ 1040 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1041 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 1042 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 1043 #ifdef NEW 1044 /* Pt = Po^T - off-diagonal portion of P_loc */ 1045 ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&ptoi,&ptoj);CHKERRQ(ierr); 1046 ierr = MatPtAPSymbolicLocal_Private(tnrows,ptoi,ptoj,coi,coj);CHKERRQ(ierr); 1047 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptoi,&ptoj);CHKERRQ(ierr); 1048 #endif 1049 /* clean up */ 1050 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1051 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1052 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1053 1054 /* determine the number of messages to send, their lengths */ 1055 /*---------------------------------------------------------*/ 1056 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 1057 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 1058 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 1059 len_s = merge->len_s; 1060 len = 0; /* max length of buf_si[] */ 1061 merge->nsend = 0; 1062 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 1063 proc = 0; 1064 for (i=0; i<tnrows; i++){ 1065 while (rmap[i] >= owners[proc+1]) proc++; 1066 len_si[proc]++; 1067 } 1068 for (proc=0; proc<size; proc++){ 1069 len_s[proc] = 0; 1070 if (len_si[proc]){ 1071 merge->nsend++; 1072 len_si[proc] = 2*(len_si[proc] + 1); 1073 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 1074 len += len_si[proc]; 1075 } 1076 } 1077 1078 /* determine the number and length of messages to receive for ij-structure */ 1079 /*-------------------------------------------------------------------------*/ 1080 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 1081 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 1082 1083 /* post the Irecv of j-structure */ 1084 /*-------------------------------*/ 1085 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 1086 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 1087 1088 /* post the Isend of j-structure */ 1089 /*--------------------------------*/ 1090 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 1091 sj_waits = si_waits + merge->nsend; 1092 1093 for (proc=0, k=0; proc<size; proc++){ 1094 if (!len_s[proc]) continue; 1095 i = owners[proc]; 1096 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 1097 k++; 1098 } 1099 1100 /* receives and sends of j-structure are complete */ 1101 /*------------------------------------------------*/ 1102 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1103 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 1104 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 1105 1106 /* send and recv i-structure */ 1107 /*---------------------------*/ 1108 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 1109 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 1110 1111 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 1112 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 1113 1114 j = 0; /* row index to be sent */ 1115 for (proc=0,k=0; proc<size; proc++){ 1116 if (!len_s[proc]) continue; 1117 /* form outgoing message for i-structure: 1118 buf_si[0]: nrows to be sent 1119 [1:nrows]: row index (global) 1120 [nrows+1:2*nrows+1]: i-structure index 1121 */ 1122 /*-------------------------------------------*/ 1123 nrows = len_si[proc]/2 - 1; 1124 buf_si_i = buf_si + nrows+1; 1125 buf_si[0] = nrows; 1126 buf_si_i[0] = 0; 1127 for (i=0; i<nrows; i++){ 1128 row = rmap[j++]; 1129 anzi = ci[row+1] - ci[row]; 1130 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 1131 buf_si[i+1] = row - owners[proc]; /* local row index */ 1132 } 1133 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 1134 k++; 1135 buf_si += len_si[proc]; 1136 } 1137 1138 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 1139 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 1140 1141 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 1142 for (i=0; i<merge->nrecv; i++){ 1143 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 1144 } 1145 1146 ierr = PetscFree(len_si);CHKERRQ(ierr); 1147 ierr = PetscFree(len_ri);CHKERRQ(ierr); 1148 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 1149 ierr = PetscFree(si_waits);CHKERRQ(ierr); 1150 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 1151 ierr = PetscFree(buf_s);CHKERRQ(ierr); 1152 ierr = PetscFree(status);CHKERRQ(ierr); 1153 1154 /* compute a local seq matrix in each processor */ 1155 /*----------------------------------------------*/ 1156 /* allocate bi array and free space for accumulating nonzero column info */ 1157 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1158 bi[0] = 0; 1159 1160 /* create and initialize a linked list */ 1161 nlnk = pN+1; 1162 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1163 1164 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 1165 len = 0; 1166 len = ci[owners[rank+1]] - ci[owners[rank]]; 1167 free_space=PETSC_NULL; 1168 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 1169 current_space = free_space; 1170 1171 /* determine symbolic info for each local row of C */ 1172 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1173 nextrow = buf_ri_k + merge->nrecv; 1174 nextci = nextrow + merge->nrecv; 1175 for (k=0; k<merge->nrecv; k++){ 1176 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1177 nrows = *buf_ri_k[k]; 1178 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 1179 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1180 } 1181 1182 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 1183 len = 0; 1184 for (i=0; i<pn; i++) { 1185 bnzi = 0; 1186 /* add local non-zero cols of this proc's C_seq into lnk */ 1187 anzi = cdi[i+1] - cdi[i]; 1188 cji = cdj + cdi[i]; 1189 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1190 bnzi += nlnk; 1191 /* add received col data into lnk */ 1192 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1193 if (i == *nextrow[k]) { /* i-th row */ 1194 anzi = *(nextci[k]+1) - *nextci[k]; 1195 cji = buf_rj[k] + *nextci[k]; 1196 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1197 bnzi += nlnk; 1198 nextrow[k]++; nextci[k]++; 1199 } 1200 } 1201 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 1202 1203 /* if free space is not available, make more free space */ 1204 if (current_space->local_remaining<bnzi) { 1205 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1206 nspacedouble++; 1207 } 1208 /* copy data into free space, then initialize lnk */ 1209 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1210 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 1211 1212 current_space->array += bnzi; 1213 current_space->local_used += bnzi; 1214 current_space->local_remaining -= bnzi; 1215 1216 bi[i+1] = bi[i] + bnzi; 1217 } 1218 1219 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1220 1221 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1222 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1223 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1224 1225 /* create symbolic parallel matrix B_mpi */ 1226 /*---------------------------------------*/ 1227 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 1228 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 1229 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 1230 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1231 1232 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 1233 B_mpi->assembled = PETSC_FALSE; 1234 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 1235 merge->bi = bi; 1236 merge->bj = bj; 1237 merge->ci = ci; 1238 merge->cj = cj; 1239 merge->buf_ri = buf_ri; 1240 merge->buf_rj = buf_rj; 1241 merge->C_seq = PETSC_NULL; 1242 1243 /* attach the supporting struct to B_mpi for reuse */ 1244 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 1245 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 1246 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 1247 *C = B_mpi; 1248 1249 ierr = PetscFree(cdi);CHKERRQ(ierr); 1250 ierr = PetscFree(cdj);CHKERRQ(ierr); 1251 #ifdef NEW 1252 ierr = PetscFree(coi);CHKERRQ(ierr); 1253 ierr = PetscFree(coj);CHKERRQ(ierr); 1254 #endif 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 1260 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 1261 { 1262 PetscErrorCode ierr; 1263 Mat_Merge_SeqsToMPI *merge; 1264 Mat_MatMatMultMPI *ptap; 1265 PetscObjectContainer cont_merge,cont_ptap; 1266 PetscInt flops=0; 1267 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 1268 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1269 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 1270 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 1271 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 1272 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 1273 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1274 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 1275 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 1276 PetscInt am=A->m,cN=C->N,cm=C->m; 1277 MPI_Comm comm=C->comm; 1278 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 1279 PetscInt *owners,proc,nrows; 1280 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 1281 MPI_Request *s_waits,*r_waits; 1282 MPI_Status *status; 1283 PetscInt *cseqi,*cseqj,col; 1284 PetscInt stages[3]; 1285 1286 PetscFunctionBegin; 1287 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 1288 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 1289 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 1290 1291 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 1292 if (cont_merge) { 1293 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1294 } else { 1295 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 1296 } 1297 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 1298 if (cont_ptap) { 1299 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 1300 } else { 1301 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 1302 } 1303 /* get data from symbolic products */ 1304 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 1305 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 1306 1307 cseqi = merge->ci; cseqj=merge->cj; 1308 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 1309 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 1310 1311 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1312 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1313 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 1314 1315 /* Allocate temporary array for storage of one row of A*P and one row of C */ 1316 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1317 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1318 ca = (MatScalar*)(apa + cN); 1319 apj = (PetscInt *)(ca + cN); 1320 apjdense = (PetscInt *)(apj + cN); 1321 1322 /* Clear old values in C */ 1323 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1324 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1325 1326 for (i=0;i<am;i++) { 1327 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 1328 /* Form i-th sparse row of A*P */ 1329 apnzj = 0; 1330 /* diagonal portion of A */ 1331 nzi = adi[i+1] - adi[i]; 1332 for (j=0;j<nzi;j++) { 1333 prow = *adj++; 1334 /* diagonal portion of P */ 1335 pnzj = pd->i[prow+1] - pd->i[prow]; 1336 pjj = pd->j + pd->i[prow]; /* local col index of P */ 1337 paj = pd->a + pd->i[prow]; 1338 for (k=0;k<pnzj;k++) { 1339 col = *pjj + p->cstart; pjj++; /* global col index of P */ 1340 if (!apjdense[col]) { 1341 apjdense[col] = -1; 1342 apj[apnzj++] = col; 1343 } 1344 apa[col] += (*ada)*paj[k]; 1345 } 1346 flops += 2*pnzj; 1347 /* off-diagonal portion of P */ 1348 pnzj = po->i[prow+1] - po->i[prow]; 1349 pjj = po->j + po->i[prow]; /* local col index of P */ 1350 paj = po->a + po->i[prow]; 1351 for (k=0;k<pnzj;k++) { 1352 col = p->garray[*pjj]; pjj++; /* global col index of P */ 1353 if (!apjdense[col]) { 1354 apjdense[col] = -1; 1355 apj[apnzj++] = col; 1356 } 1357 apa[col] += (*ada)*paj[k]; 1358 } 1359 flops += 2*pnzj; 1360 1361 ada++; 1362 } 1363 /* off-diagonal portion of A */ 1364 nzi = aoi[i+1] - aoi[i]; 1365 for (j=0;j<nzi;j++) { 1366 prow = *aoj++; 1367 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1368 pjj = pj_oth + pi_oth[prow]; 1369 paj = pa_oth + pi_oth[prow]; 1370 for (k=0;k<pnzj;k++) { 1371 if (!apjdense[pjj[k]]) { 1372 apjdense[pjj[k]] = -1; 1373 apj[apnzj++] = pjj[k]; 1374 } 1375 apa[pjj[k]] += (*aoa)*paj[k]; 1376 } 1377 flops += 2*pnzj; 1378 aoa++; 1379 } 1380 /* Sort the j index array for quick sparse axpy. */ 1381 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1382 1383 ierr = PetscLogStagePop(); 1384 ierr = PetscLogStagePush(stages[1]); 1385 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1386 /* diagonal portion of P -- gives matrix value of local C */ 1387 pnzi = pd->i[i+1] - pd->i[i]; 1388 for (j=0;j<pnzi;j++) { 1389 /* add value into C */ 1390 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 1391 crow = (*pJ_d++) + owners[rank]; 1392 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 1393 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 1394 pA_d++; 1395 flops += 2*apnzj; 1396 } 1397 1398 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 1399 pnzi = po->i[i+1] - po->i[i]; 1400 for (j=0;j<pnzi;j++) { 1401 crow = p->garray[*pJ_o++]; 1402 cjj = cseqj + cseqi[crow]; 1403 caj = cseqa + cseqi[crow]; 1404 /* add value into C_seq to be sent to other processors */ 1405 nextap = 0; 1406 for (k=0;nextap<apnzj;k++) { 1407 if (cjj[k]==apj[nextap]) { 1408 caj[k] += (*pA_o)*apa[apj[nextap++]]; 1409 } 1410 } 1411 flops += 2*apnzj; 1412 pA_o++; 1413 } 1414 1415 /* Zero the current row info for A*P */ 1416 for (j=0;j<apnzj;j++) { 1417 apa[apj[j]] = 0.; 1418 apjdense[apj[j]] = 0; 1419 } 1420 ierr = PetscLogStagePop(); 1421 } 1422 1423 /* send and recv matrix values */ 1424 /*-----------------------------*/ 1425 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 1426 bi = merge->bi; 1427 bj = merge->bj; 1428 buf_ri = merge->buf_ri; 1429 buf_rj = merge->buf_rj; 1430 len_s = merge->len_s; 1431 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 1432 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 1433 1434 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 1435 for (proc=0,k=0; proc<size; proc++){ 1436 if (!len_s[proc]) continue; 1437 i = owners[proc]; 1438 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 1439 k++; 1440 } 1441 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 1442 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 1443 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 1444 ierr = PetscFree(status);CHKERRQ(ierr); 1445 1446 ierr = PetscFree(s_waits);CHKERRQ(ierr); 1447 ierr = PetscFree(r_waits);CHKERRQ(ierr); 1448 ierr = PetscFree(cseqa);CHKERRQ(ierr); 1449 1450 /* insert mat values of mpimat */ 1451 /*----------------------------*/ 1452 ba_i = apa; /* rename, temporary array for storage of one row of C */ 1453 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 1454 nextrow = buf_ri_k + merge->nrecv; 1455 nextcseqi = nextrow + merge->nrecv; 1456 1457 for (k=0; k<merge->nrecv; k++){ 1458 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 1459 nrows = *(buf_ri_k[k]); 1460 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 1461 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 1462 } 1463 1464 /* add received local vals to C */ 1465 for (i=0; i<cm; i++) { 1466 crow = owners[rank] + i; 1467 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 1468 bnzi = bi[i+1] - bi[i]; 1469 nzi = 0; 1470 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 1471 /* i-th row */ 1472 if (i == *nextrow[k]) { 1473 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 1474 cseqj = buf_rj[k] + *(nextcseqi[k]); 1475 cseqa = abuf_r[k] + *(nextcseqi[k]); 1476 nextcseqj = 0; 1477 for (j=0; nextcseqj<cseqnzi; j++){ 1478 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 1479 ba_i[j] += cseqa[nextcseqj++]; 1480 nzi++; 1481 } 1482 } 1483 nextrow[k]++; nextcseqi[k]++; 1484 } 1485 } 1486 if (nzi>0){ 1487 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 1488 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 1489 flops += 2*nzi; 1490 } 1491 } 1492 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 ierr = PetscLogStagePop();CHKERRQ(ierr); 1495 1496 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 1497 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 1498 ierr = PetscFree(apa);CHKERRQ(ierr); 1499 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1500 1501 PetscFunctionReturn(0); 1502 } 1503 #endif /* TOBEREMOVED */ 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 1507 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1508 { 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 if (scall == MAT_INITIAL_MATRIX){ 1513 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1514 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 1515 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1516 } 1517 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1518 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 1519 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /* 1524 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1525 1526 Collective on Mat 1527 1528 Input Parameters: 1529 + A - the matrix 1530 - P - the projection matrix 1531 1532 Output Parameters: 1533 . C - the (i,j) structure of the product matrix 1534 1535 Notes: 1536 C will be created and must be destroyed by the user with MatDestroy(). 1537 1538 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1539 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1540 this (i,j) structure by calling MatPtAPNumeric(). 1541 1542 Level: intermediate 1543 1544 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1545 */ 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatPtAPSymbolic" 1548 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 1549 { 1550 PetscErrorCode ierr; 1551 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 1552 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 1553 1554 PetscFunctionBegin; 1555 1556 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1557 PetscValidType(A,1); 1558 MatPreallocated(A); 1559 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1560 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1561 1562 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1563 PetscValidType(P,2); 1564 MatPreallocated(P); 1565 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1566 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1567 1568 PetscValidPointer(C,3); 1569 1570 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1571 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1572 1573 /* For now, we do not dispatch based on the type of A and P */ 1574 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1575 fA = A->ops->ptapsymbolic; 1576 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 1577 fP = P->ops->ptapsymbolic; 1578 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 1579 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1580 1581 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1582 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 1583 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1584 1585 PetscFunctionReturn(0); 1586 } 1587 1588 typedef struct { 1589 Mat symAP; 1590 } Mat_PtAPstruct; 1591 1592 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 1593 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 1596 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 1597 { 1598 PetscErrorCode ierr; 1599 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 1600 1601 PetscFunctionBegin; 1602 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 1603 ierr = PetscFree(ptap);CHKERRQ(ierr); 1604 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 1610 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 1611 { 1612 PetscErrorCode ierr; 1613 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 1615 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1616 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1617 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 1618 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 1619 MatScalar *ca; 1620 PetscBT lnkbt; 1621 1622 PetscFunctionBegin; 1623 /* Get ij structure of P^T */ 1624 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1625 ptJ=ptj; 1626 1627 /* Allocate ci array, arrays for fill computation and */ 1628 /* free space for accumulating nonzero column info */ 1629 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1630 ci[0] = 0; 1631 1632 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1633 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1634 ptasparserow = ptadenserow + an; 1635 1636 /* create and initialize a linked list */ 1637 nlnk = pn+1; 1638 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1639 1640 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1641 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1642 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1643 current_space = free_space; 1644 1645 /* Determine symbolic info for each row of C: */ 1646 for (i=0;i<pn;i++) { 1647 ptnzi = pti[i+1] - pti[i]; 1648 ptanzi = 0; 1649 /* Determine symbolic row of PtA: */ 1650 for (j=0;j<ptnzi;j++) { 1651 arow = *ptJ++; 1652 anzj = ai[arow+1] - ai[arow]; 1653 ajj = aj + ai[arow]; 1654 for (k=0;k<anzj;k++) { 1655 if (!ptadenserow[ajj[k]]) { 1656 ptadenserow[ajj[k]] = -1; 1657 ptasparserow[ptanzi++] = ajj[k]; 1658 } 1659 } 1660 } 1661 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1662 ptaj = ptasparserow; 1663 cnzi = 0; 1664 for (j=0;j<ptanzi;j++) { 1665 prow = *ptaj++; 1666 pnzj = pi[prow+1] - pi[prow]; 1667 pjj = pj + pi[prow]; 1668 /* add non-zero cols of P into the sorted linked list lnk */ 1669 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1670 cnzi += nlnk; 1671 } 1672 1673 /* If free space is not available, make more free space */ 1674 /* Double the amount of total space in the list */ 1675 if (current_space->local_remaining<cnzi) { 1676 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1677 } 1678 1679 /* Copy data into free space, and zero out denserows */ 1680 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1681 current_space->array += cnzi; 1682 current_space->local_used += cnzi; 1683 current_space->local_remaining -= cnzi; 1684 1685 for (j=0;j<ptanzi;j++) { 1686 ptadenserow[ptasparserow[j]] = 0; 1687 } 1688 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1689 /* For now, we will recompute what is needed. */ 1690 ci[i+1] = ci[i] + cnzi; 1691 } 1692 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1693 /* Allocate space for cj, initialize cj, and */ 1694 /* destroy list of free space and other temporary array(s) */ 1695 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1696 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1697 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1698 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1699 1700 /* Allocate space for ca */ 1701 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1702 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1703 1704 /* put together the new matrix */ 1705 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1706 1707 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1708 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1709 c = (Mat_SeqAIJ *)((*C)->data); 1710 c->freedata = PETSC_TRUE; 1711 c->nonew = 0; 1712 1713 /* Clean up. */ 1714 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1715 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #include "src/mat/impls/maij/maij.h" 1720 EXTERN_C_BEGIN 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1723 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1724 { 1725 /* This routine requires testing -- I don't think it works. */ 1726 PetscErrorCode ierr; 1727 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1728 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1729 Mat P=pp->AIJ; 1730 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1731 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1732 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1733 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1734 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1735 MatScalar *ca; 1736 1737 PetscFunctionBegin; 1738 /* Start timer */ 1739 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1740 1741 /* Get ij structure of P^T */ 1742 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1743 1744 /* Allocate ci array, arrays for fill computation and */ 1745 /* free space for accumulating nonzero column info */ 1746 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1747 ci[0] = 0; 1748 1749 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1750 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1751 ptasparserow = ptadenserow + an; 1752 denserow = ptasparserow + an; 1753 sparserow = denserow + pn; 1754 1755 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1756 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1757 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1758 current_space = free_space; 1759 1760 /* Determine symbolic info for each row of C: */ 1761 for (i=0;i<pn/ppdof;i++) { 1762 ptnzi = pti[i+1] - pti[i]; 1763 ptanzi = 0; 1764 ptJ = ptj + pti[i]; 1765 for (dof=0;dof<ppdof;dof++) { 1766 /* Determine symbolic row of PtA: */ 1767 for (j=0;j<ptnzi;j++) { 1768 arow = ptJ[j] + dof; 1769 anzj = ai[arow+1] - ai[arow]; 1770 ajj = aj + ai[arow]; 1771 for (k=0;k<anzj;k++) { 1772 if (!ptadenserow[ajj[k]]) { 1773 ptadenserow[ajj[k]] = -1; 1774 ptasparserow[ptanzi++] = ajj[k]; 1775 } 1776 } 1777 } 1778 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1779 ptaj = ptasparserow; 1780 cnzi = 0; 1781 for (j=0;j<ptanzi;j++) { 1782 pdof = *ptaj%dof; 1783 prow = (*ptaj++)/dof; 1784 pnzj = pi[prow+1] - pi[prow]; 1785 pjj = pj + pi[prow]; 1786 for (k=0;k<pnzj;k++) { 1787 if (!denserow[pjj[k]+pdof]) { 1788 denserow[pjj[k]+pdof] = -1; 1789 sparserow[cnzi++] = pjj[k]+pdof; 1790 } 1791 } 1792 } 1793 1794 /* sort sparserow */ 1795 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1796 1797 /* If free space is not available, make more free space */ 1798 /* Double the amount of total space in the list */ 1799 if (current_space->local_remaining<cnzi) { 1800 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1801 } 1802 1803 /* Copy data into free space, and zero out denserows */ 1804 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1805 current_space->array += cnzi; 1806 current_space->local_used += cnzi; 1807 current_space->local_remaining -= cnzi; 1808 1809 for (j=0;j<ptanzi;j++) { 1810 ptadenserow[ptasparserow[j]] = 0; 1811 } 1812 for (j=0;j<cnzi;j++) { 1813 denserow[sparserow[j]] = 0; 1814 } 1815 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1816 /* For now, we will recompute what is needed. */ 1817 ci[i+1+dof] = ci[i+dof] + cnzi; 1818 } 1819 } 1820 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1821 /* Allocate space for cj, initialize cj, and */ 1822 /* destroy list of free space and other temporary array(s) */ 1823 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1824 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1825 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1826 1827 /* Allocate space for ca */ 1828 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1829 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1830 1831 /* put together the new matrix */ 1832 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1833 1834 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1835 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1836 c = (Mat_SeqAIJ *)((*C)->data); 1837 c->freedata = PETSC_TRUE; 1838 c->nonew = 0; 1839 1840 /* Clean up. */ 1841 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1842 1843 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 EXTERN_C_END 1847 1848 /* 1849 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1850 1851 Collective on Mat 1852 1853 Input Parameters: 1854 + A - the matrix 1855 - P - the projection matrix 1856 1857 Output Parameters: 1858 . C - the product matrix 1859 1860 Notes: 1861 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1862 the user using MatDeatroy(). 1863 1864 This routine is currently only implemented for pairs of AIJ matrices and classes 1865 which inherit from AIJ. C will be of type MATAIJ. 1866 1867 Level: intermediate 1868 1869 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1870 */ 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatPtAPNumeric" 1873 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1874 { 1875 PetscErrorCode ierr; 1876 PetscErrorCode (*fA)(Mat,Mat,Mat); 1877 PetscErrorCode (*fP)(Mat,Mat,Mat); 1878 1879 PetscFunctionBegin; 1880 1881 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1882 PetscValidType(A,1); 1883 MatPreallocated(A); 1884 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1885 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1886 1887 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1888 PetscValidType(P,2); 1889 MatPreallocated(P); 1890 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1891 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1892 1893 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1894 PetscValidType(C,3); 1895 MatPreallocated(C); 1896 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1897 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1898 1899 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1900 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1901 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1902 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1903 1904 /* For now, we do not dispatch based on the type of A and P */ 1905 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1906 fA = A->ops->ptapnumeric; 1907 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1908 fP = P->ops->ptapnumeric; 1909 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1910 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1911 1912 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1913 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1914 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1915 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1921 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1922 { 1923 PetscErrorCode ierr; 1924 PetscInt flops=0; 1925 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1926 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1927 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1928 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1929 PetscInt *ci=c->i,*cj=c->j,*cjj; 1930 PetscInt am=A->M,cn=C->N,cm=C->M; 1931 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1932 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1933 1934 PetscFunctionBegin; 1935 /* Allocate temporary array for storage of one row of A*P */ 1936 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1937 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1938 1939 apj = (PetscInt *)(apa + cn); 1940 apjdense = apj + cn; 1941 1942 /* Clear old values in C */ 1943 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1944 1945 for (i=0;i<am;i++) { 1946 /* Form sparse row of A*P */ 1947 anzi = ai[i+1] - ai[i]; 1948 apnzj = 0; 1949 for (j=0;j<anzi;j++) { 1950 prow = *aj++; 1951 pnzj = pi[prow+1] - pi[prow]; 1952 pjj = pj + pi[prow]; 1953 paj = pa + pi[prow]; 1954 for (k=0;k<pnzj;k++) { 1955 if (!apjdense[pjj[k]]) { 1956 apjdense[pjj[k]] = -1; 1957 apj[apnzj++] = pjj[k]; 1958 } 1959 apa[pjj[k]] += (*aa)*paj[k]; 1960 } 1961 flops += 2*pnzj; 1962 aa++; 1963 } 1964 1965 /* Sort the j index array for quick sparse axpy. */ 1966 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1967 1968 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1969 pnzi = pi[i+1] - pi[i]; 1970 for (j=0;j<pnzi;j++) { 1971 nextap = 0; 1972 crow = *pJ++; 1973 cjj = cj + ci[crow]; 1974 caj = ca + ci[crow]; 1975 /* Perform sparse axpy operation. Note cjj includes apj. */ 1976 for (k=0;nextap<apnzj;k++) { 1977 if (cjj[k]==apj[nextap]) { 1978 caj[k] += (*pA)*apa[apj[nextap++]]; 1979 } 1980 } 1981 flops += 2*apnzj; 1982 pA++; 1983 } 1984 1985 /* Zero the current row info for A*P */ 1986 for (j=0;j<apnzj;j++) { 1987 apa[apj[j]] = 0.; 1988 apjdense[apj[j]] = 0; 1989 } 1990 } 1991 1992 /* Assemble the final matrix and clean up */ 1993 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = PetscFree(apa);CHKERRQ(ierr); 1996 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1997 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 2002 static PetscEvent logkey_matptapnumeric_local = 0; 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 2005 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 2006 { 2007 PetscErrorCode ierr; 2008 PetscInt flops=0; 2009 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2010 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 2011 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2012 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2013 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2014 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 2015 PetscInt *pJ=pj_loc,*pjj; 2016 PetscInt *ci=c->i,*cj=c->j,*cjj; 2017 PetscInt am=A->m,cn=C->N,cm=C->M; 2018 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2019 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 2020 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 2021 2022 PetscFunctionBegin; 2023 if (!logkey_matptapnumeric_local) { 2024 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 2025 } 2026 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2027 2028 /* Allocate temporary array for storage of one row of A*P */ 2029 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2030 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2031 apj = (PetscInt *)(apa + cn); 2032 apjdense = apj + cn; 2033 2034 /* Clear old values in C */ 2035 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2036 2037 for (i=0;i<am;i++) { 2038 /* Form i-th sparse row of A*P */ 2039 apnzj = 0; 2040 /* diagonal portion of A */ 2041 anzi = adi[i+1] - adi[i]; 2042 for (j=0;j<anzi;j++) { 2043 prow = *adj; 2044 adj++; 2045 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2046 pjj = pj_loc + pi_loc[prow]; 2047 paj = pa_loc + pi_loc[prow]; 2048 for (k=0;k<pnzj;k++) { 2049 if (!apjdense[pjj[k]]) { 2050 apjdense[pjj[k]] = -1; 2051 apj[apnzj++] = pjj[k]; 2052 } 2053 apa[pjj[k]] += (*ada)*paj[k]; 2054 } 2055 flops += 2*pnzj; 2056 ada++; 2057 } 2058 /* off-diagonal portion of A */ 2059 anzi = aoi[i+1] - aoi[i]; 2060 for (j=0;j<anzi;j++) { 2061 col = a->garray[*aoj]; 2062 if (col < cstart){ 2063 prow = *aoj; 2064 } else if (col >= cend){ 2065 prow = *aoj; 2066 } else { 2067 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2068 } 2069 aoj++; 2070 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2071 pjj = pj_oth + pi_oth[prow]; 2072 paj = pa_oth + pi_oth[prow]; 2073 for (k=0;k<pnzj;k++) { 2074 if (!apjdense[pjj[k]]) { 2075 apjdense[pjj[k]] = -1; 2076 apj[apnzj++] = pjj[k]; 2077 } 2078 apa[pjj[k]] += (*aoa)*paj[k]; 2079 } 2080 flops += 2*pnzj; 2081 aoa++; 2082 } 2083 /* Sort the j index array for quick sparse axpy. */ 2084 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2085 2086 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 2087 pnzi = pi_loc[i+1] - pi_loc[i]; 2088 for (j=0;j<pnzi;j++) { 2089 nextap = 0; 2090 crow = *pJ++; 2091 cjj = cj + ci[crow]; 2092 caj = ca + ci[crow]; 2093 /* Perform sparse axpy operation. Note cjj includes apj. */ 2094 for (k=0;nextap<apnzj;k++) { 2095 if (cjj[k]==apj[nextap]) { 2096 caj[k] += (*pA)*apa[apj[nextap++]]; 2097 } 2098 } 2099 flops += 2*apnzj; 2100 pA++; 2101 } 2102 2103 /* Zero the current row info for A*P */ 2104 for (j=0;j<apnzj;j++) { 2105 apa[apj[j]] = 0.; 2106 apjdense[apj[j]] = 0; 2107 } 2108 } 2109 2110 /* Assemble the final matrix and clean up */ 2111 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2112 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2113 ierr = PetscFree(apa);CHKERRQ(ierr); 2114 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2115 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 static PetscEvent logkey_matptapsymbolic_local = 0; 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 2121 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 2122 { 2123 PetscErrorCode ierr; 2124 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2125 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 2126 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 2127 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 2128 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 2129 PetscInt *ci,*cj,*ptaj; 2130 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 2131 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 2132 PetscInt pm=P_loc->m,nlnk,*lnk; 2133 MatScalar *ca; 2134 PetscBT lnkbt; 2135 PetscInt prend,nprow_loc,nprow_oth; 2136 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 2137 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 2138 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 2139 2140 PetscFunctionBegin; 2141 if (!logkey_matptapsymbolic_local) { 2142 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 2143 } 2144 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2145 2146 prend = prstart + pm; 2147 2148 /* get ij structure of P_loc^T */ 2149 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2150 ptJ=ptj; 2151 2152 /* Allocate ci array, arrays for fill computation and */ 2153 /* free space for accumulating nonzero column info */ 2154 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2155 ci[0] = 0; 2156 2157 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 2158 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 2159 ptasparserow_loc = ptadenserow_loc + aN; 2160 ptadenserow_oth = ptasparserow_loc + aN; 2161 ptasparserow_oth = ptadenserow_oth + aN; 2162 2163 /* create and initialize a linked list */ 2164 nlnk = pN+1; 2165 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2166 2167 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 2168 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2169 nnz = adi[am] + aoi[am]; 2170 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 2171 current_space = free_space; 2172 2173 /* determine symbolic info for each row of C: */ 2174 for (i=0;i<pN;i++) { 2175 ptnzi = pti[i+1] - pti[i]; 2176 nprow_loc = 0; nprow_oth = 0; 2177 /* i-th row of symbolic P_loc^T*A_loc: */ 2178 for (j=0;j<ptnzi;j++) { 2179 arow = *ptJ++; 2180 /* diagonal portion of A */ 2181 anzj = adi[arow+1] - adi[arow]; 2182 ajj = adj + adi[arow]; 2183 for (k=0;k<anzj;k++) { 2184 col = ajj[k]+prstart; 2185 if (!ptadenserow_loc[col]) { 2186 ptadenserow_loc[col] = -1; 2187 ptasparserow_loc[nprow_loc++] = col; 2188 } 2189 } 2190 /* off-diagonal portion of A */ 2191 anzj = aoi[arow+1] - aoi[arow]; 2192 ajj = aoj + aoi[arow]; 2193 for (k=0;k<anzj;k++) { 2194 col = a->garray[ajj[k]]; /* global col */ 2195 if (col < cstart){ 2196 col = ajj[k]; 2197 } else if (col >= cend){ 2198 col = ajj[k] + pm; 2199 } else { 2200 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 2201 } 2202 if (!ptadenserow_oth[col]) { 2203 ptadenserow_oth[col] = -1; 2204 ptasparserow_oth[nprow_oth++] = col; 2205 } 2206 } 2207 } 2208 2209 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 2210 cnzi = 0; 2211 /* rows of P_loc */ 2212 ptaj = ptasparserow_loc; 2213 for (j=0; j<nprow_loc; j++) { 2214 prow = *ptaj++; 2215 prow -= prstart; /* rm */ 2216 pnzj = pi_loc[prow+1] - pi_loc[prow]; 2217 pjj = pj_loc + pi_loc[prow]; 2218 /* add non-zero cols of P into the sorted linked list lnk */ 2219 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2220 cnzi += nlnk; 2221 } 2222 /* rows of P_oth */ 2223 ptaj = ptasparserow_oth; 2224 for (j=0; j<nprow_oth; j++) { 2225 prow = *ptaj++; 2226 if (prow >= prend) prow -= pm; /* rm */ 2227 pnzj = pi_oth[prow+1] - pi_oth[prow]; 2228 pjj = pj_oth + pi_oth[prow]; 2229 /* add non-zero cols of P into the sorted linked list lnk */ 2230 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2231 cnzi += nlnk; 2232 } 2233 2234 /* If free space is not available, make more free space */ 2235 /* Double the amount of total space in the list */ 2236 if (current_space->local_remaining<cnzi) { 2237 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2238 } 2239 2240 /* Copy data into free space, and zero out denserows */ 2241 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2242 current_space->array += cnzi; 2243 current_space->local_used += cnzi; 2244 current_space->local_remaining -= cnzi; 2245 2246 for (j=0;j<nprow_loc; j++) { 2247 ptadenserow_loc[ptasparserow_loc[j]] = 0; 2248 } 2249 for (j=0;j<nprow_oth; j++) { 2250 ptadenserow_oth[ptasparserow_oth[j]] = 0; 2251 } 2252 2253 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2254 /* For now, we will recompute what is needed. */ 2255 ci[i+1] = ci[i] + cnzi; 2256 } 2257 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2258 /* Allocate space for cj, initialize cj, and */ 2259 /* destroy list of free space and other temporary array(s) */ 2260 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2261 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2262 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 2263 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2264 2265 /* Allocate space for ca */ 2266 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2267 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2268 2269 /* put together the new matrix */ 2270 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 2271 2272 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2273 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2274 c = (Mat_SeqAIJ *)((*C)->data); 2275 c->freedata = PETSC_TRUE; 2276 c->nonew = 0; 2277 2278 /* Clean up. */ 2279 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 2280 2281 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284