1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 156 { 157 PetscErrorCode ierr; 158 Mat P_loc,P_oth; 159 Mat_MatMatMultMPI *ptap; 160 PetscObjectContainer container; 161 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 162 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 163 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 164 Mat_SeqAIJ *p_loc,*p_oth; 165 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 166 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 167 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 168 PetscInt aN=A->N,am=A->m,pN=P->N; 169 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 170 MatScalar *ca; 171 PetscBT lnkbt; 172 PetscInt prstart,prend,nprow_loc,nprow_oth; 173 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 174 175 MPI_Comm comm=A->comm; 176 Mat B_mpi; 177 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 178 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 179 PetscInt len,proc,*dnz,*onz,*owners; 180 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 181 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 182 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 183 MPI_Status *status; 184 Mat_Merge_SeqsToMPI *merge; 185 186 PetscFunctionBegin; 187 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 188 if (container) { 189 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 190 } else { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 192 } 193 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 194 /*--------------------------------------------------*/ 195 /* get data from P_loc and P_oth */ 196 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 197 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 198 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 199 prend = prstart + pm; 200 201 /* get ij structure of P_loc^T */ 202 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 203 ptJ=ptj; 204 205 /* Allocate ci array, arrays for fill computation and */ 206 /* free space for accumulating nonzero column info */ 207 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 208 ci[0] = 0; 209 210 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 211 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 212 ptasparserow_loc = ptadenserow_loc + aN; 213 ptadenserow_oth = ptasparserow_loc + aN; 214 ptasparserow_oth = ptadenserow_oth + aN; 215 216 /* create and initialize a linked list */ 217 nlnk = pN+1; 218 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 219 220 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 221 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 222 nnz = adi[am] + aoi[am]; 223 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 224 current_space = free_space; 225 226 /* determine symbolic info for each row of C: */ 227 for (i=0;i<pN;i++) { 228 ptnzi = pti[i+1] - pti[i]; 229 nprow_loc = 0; nprow_oth = 0; 230 /* i-th row of symbolic P_loc^T*A_loc: */ 231 for (j=0;j<ptnzi;j++) { 232 arow = *ptJ++; 233 /* diagonal portion of A */ 234 anzj = adi[arow+1] - adi[arow]; 235 ajj = adj + adi[arow]; 236 for (k=0;k<anzj;k++) { 237 col = ajj[k]+prstart; 238 if (!ptadenserow_loc[col]) { 239 ptadenserow_loc[col] = -1; 240 ptasparserow_loc[nprow_loc++] = col; 241 } 242 } 243 /* off-diagonal portion of A */ 244 anzj = aoi[arow+1] - aoi[arow]; 245 ajj = aoj + aoi[arow]; 246 for (k=0;k<anzj;k++) { 247 col = a->garray[ajj[k]]; /* global col */ 248 if (col < cstart){ 249 col = ajj[k]; 250 } else if (col >= cend){ 251 col = ajj[k] + pm; 252 } else { 253 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 254 } 255 if (!ptadenserow_oth[col]) { 256 ptadenserow_oth[col] = -1; 257 ptasparserow_oth[nprow_oth++] = col; 258 } 259 } 260 } 261 262 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 263 cnzi = 0; 264 /* rows of P_loc */ 265 ptaj = ptasparserow_loc; 266 for (j=0; j<nprow_loc; j++) { 267 prow = *ptaj++; 268 prow -= prstart; /* rm */ 269 pnzj = pi_loc[prow+1] - pi_loc[prow]; 270 pjj = pj_loc + pi_loc[prow]; 271 /* add non-zero cols of P into the sorted linked list lnk */ 272 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 273 cnzi += nlnk; 274 } 275 /* rows of P_oth */ 276 ptaj = ptasparserow_oth; 277 for (j=0; j<nprow_oth; j++) { 278 prow = *ptaj++; 279 if (prow >= prend) prow -= pm; /* rm */ 280 pnzj = pi_oth[prow+1] - pi_oth[prow]; 281 pjj = pj_oth + pi_oth[prow]; 282 /* add non-zero cols of P into the sorted linked list lnk */ 283 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 284 cnzi += nlnk; 285 } 286 287 /* If free space is not available, make more free space */ 288 /* Double the amount of total space in the list */ 289 if (current_space->local_remaining<cnzi) { 290 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 291 } 292 293 /* Copy data into free space, and zero out denserows */ 294 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 295 current_space->array += cnzi; 296 current_space->local_used += cnzi; 297 current_space->local_remaining -= cnzi; 298 299 for (j=0;j<nprow_loc; j++) { 300 ptadenserow_loc[ptasparserow_loc[j]] = 0; 301 } 302 for (j=0;j<nprow_oth; j++) { 303 ptadenserow_oth[ptasparserow_oth[j]] = 0; 304 } 305 306 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 307 /* For now, we will recompute what is needed. */ 308 ci[i+1] = ci[i] + cnzi; 309 } 310 /* Clean up. */ 311 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 312 313 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 314 /* Allocate space for cj, initialize cj, and */ 315 /* destroy list of free space and other temporary array(s) */ 316 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 317 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 318 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 319 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 320 321 /* Allocate space for ca */ 322 /* 323 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 324 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 325 */ 326 327 /* add C_seq into mpi C */ 328 /*-----------------------------------*/ 329 free_space=PETSC_NULL; current_space=PETSC_NULL; 330 331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 332 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 333 334 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 335 336 337 /* determine row ownership */ 338 /*---------------------------------------------------------*/ 339 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 340 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 341 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 342 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 343 344 /* determine the number of messages to send, their lengths */ 345 /*---------------------------------------------------------*/ 346 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 347 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 348 len_s = merge->len_s; 349 len = 0; /* length of buf_si[] */ 350 merge->nsend = 0; 351 for (proc=0; proc<size; proc++){ 352 len_si[proc] = 0; 353 if (proc == rank){ 354 len_s[proc] = 0; 355 } else { 356 len_si[proc] = owners[proc+1] - owners[proc] + 1; 357 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 358 } 359 if (len_s[proc]) { 360 merge->nsend++; 361 nrows = 0; 362 for (i=owners[proc]; i<owners[proc+1]; i++){ 363 if (ci[i+1] > ci[i]) nrows++; 364 } 365 len_si[proc] = 2*(nrows+1); 366 len += len_si[proc]; 367 } 368 } 369 370 /* determine the number and length of messages to receive for ij-structure */ 371 /*-------------------------------------------------------------------------*/ 372 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 373 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 374 375 /* post the Irecv of j-structure */ 376 /*-------------------------------*/ 377 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 378 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 379 380 /* post the Isend of j-structure */ 381 /*--------------------------------*/ 382 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 383 sj_waits = si_waits + merge->nsend; 384 385 for (proc=0, k=0; proc<size; proc++){ 386 if (!len_s[proc]) continue; 387 i = owners[proc]; 388 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 389 k++; 390 } 391 392 /* receives and sends of j-structure are complete */ 393 /*------------------------------------------------*/ 394 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 395 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 396 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 397 398 /* send and recv i-structure */ 399 /*---------------------------*/ 400 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 401 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 402 403 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 404 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 405 for (proc=0,k=0; proc<size; proc++){ 406 if (!len_s[proc]) continue; 407 /* form outgoing message for i-structure: 408 buf_si[0]: nrows to be sent 409 [1:nrows]: row index (global) 410 [nrows+1:2*nrows+1]: i-structure index 411 */ 412 /*-------------------------------------------*/ 413 nrows = len_si[proc]/2 - 1; 414 buf_si_i = buf_si + nrows+1; 415 buf_si[0] = nrows; 416 buf_si_i[0] = 0; 417 nrows = 0; 418 for (i=owners[proc]; i<owners[proc+1]; i++){ 419 anzi = ci[i+1] - ci[i]; 420 if (anzi) { 421 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 422 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 423 nrows++; 424 } 425 } 426 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 427 k++; 428 buf_si += len_si[proc]; 429 } 430 431 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 432 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 433 434 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 435 for (i=0; i<merge->nrecv; i++){ 436 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 437 } 438 439 ierr = PetscFree(len_si);CHKERRQ(ierr); 440 ierr = PetscFree(len_ri);CHKERRQ(ierr); 441 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 442 ierr = PetscFree(si_waits);CHKERRQ(ierr); 443 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 444 ierr = PetscFree(buf_s);CHKERRQ(ierr); 445 ierr = PetscFree(status);CHKERRQ(ierr); 446 447 /* compute a local seq matrix in each processor */ 448 /*----------------------------------------------*/ 449 /* allocate bi array and free space for accumulating nonzero column info */ 450 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 451 bi[0] = 0; 452 453 /* create and initialize a linked list */ 454 nlnk = pN+1; 455 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 456 457 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 458 len = 0; 459 len = ci[owners[rank+1]] - ci[owners[rank]]; 460 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 461 current_space = free_space; 462 463 /* determine symbolic info for each local row */ 464 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 465 nextrow = buf_ri_k + merge->nrecv; 466 nextci = nextrow + merge->nrecv; 467 for (k=0; k<merge->nrecv; k++){ 468 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 469 nrows = *buf_ri_k[k]; 470 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 471 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 472 } 473 474 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 475 len = 0; 476 for (i=0; i<pn; i++) { 477 bnzi = 0; 478 /* add local non-zero cols of this proc's C_seq into lnk */ 479 arow = owners[rank] + i; 480 anzi = ci[arow+1] - ci[arow]; 481 cji = cj + ci[arow]; 482 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 483 bnzi += nlnk; 484 /* add received col data into lnk */ 485 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 486 if (i == *nextrow[k]) { /* i-th row */ 487 anzi = *(nextci[k]+1) - *nextci[k]; 488 cji = buf_rj[k] + *nextci[k]; 489 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 490 bnzi += nlnk; 491 nextrow[k]++; nextci[k]++; 492 } 493 } 494 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 495 496 /* if free space is not available, make more free space */ 497 if (current_space->local_remaining<bnzi) { 498 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 499 nspacedouble++; 500 } 501 /* copy data into free space, then initialize lnk */ 502 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 503 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 504 505 current_space->array += bnzi; 506 current_space->local_used += bnzi; 507 current_space->local_remaining -= bnzi; 508 509 bi[i+1] = bi[i] + bnzi; 510 } 511 512 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 513 514 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 515 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 516 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 517 518 /* create symbolic parallel matrix B_mpi */ 519 /*---------------------------------------*/ 520 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 521 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 522 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 523 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 524 525 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 526 B_mpi->assembled = PETSC_FALSE; 527 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 528 merge->bi = bi; 529 merge->bj = bj; 530 merge->ci = ci; 531 merge->cj = cj; 532 merge->buf_ri = buf_ri; 533 merge->buf_rj = buf_rj; 534 merge->C_seq = PETSC_NULL; 535 536 /* attach the supporting struct to B_mpi for reuse */ 537 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 538 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 539 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 540 *C = B_mpi; 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 547 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 548 { 549 PetscErrorCode ierr; 550 Mat_Merge_SeqsToMPI *merge; 551 Mat_MatMatMultMPI *ptap; 552 PetscObjectContainer cont_merge,cont_ptap; 553 PetscInt flops=0; 554 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 555 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 556 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data, 557 *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 558 Mat C_seq; 559 Mat_SeqAIJ *cseq,*p_oth; 560 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend; 561 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 562 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 563 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 564 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 565 PetscInt am=A->m,cN=C->N,cm=C->m; 566 MPI_Comm comm=C->comm; 567 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 568 PetscInt *owners,proc,nrows; 569 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 570 MPI_Request *s_waits,*r_waits; 571 MPI_Status *status; 572 PetscInt *cseqi,*cseqj,col; 573 574 PetscFunctionBegin; 575 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 576 if (cont_merge) { 577 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 578 } else { 579 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 580 } 581 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 582 if (cont_ptap) { 583 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 584 } else { 585 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 586 } 587 /* get data from symbolic products */ 588 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 589 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 590 591 cseqi = merge->ci; cseqj=merge->cj; 592 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 593 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 594 595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 597 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 598 599 /* Allocate temporary array for storage of one row of A*P and one row of C */ 600 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 601 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 602 ca = (MatScalar*)(apa + cN); 603 apj = (PetscInt *)(ca + cN); 604 apjdense = (PetscInt *)(apj + cN); 605 606 /* Clear old values in C */ 607 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 608 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 609 610 for (i=0;i<am;i++) { 611 /* Form i-th sparse row of A*P */ 612 apnzj = 0; 613 /* diagonal portion of A */ 614 nzi = adi[i+1] - adi[i]; 615 for (j=0;j<nzi;j++) { 616 prow = *adj++; 617 /* diagonal portion of P */ 618 pnzj = pd->i[prow+1] - pd->i[prow]; 619 pjj = pd->j + pd->i[prow]; /* local col index of P */ 620 paj = pd->a + pd->i[prow]; 621 for (k=0;k<pnzj;k++) { 622 col = *pjj + p->cstart; pjj++; /* global col index of P */ 623 if (!apjdense[col]) { 624 apjdense[col] = -1; 625 apj[apnzj++] = col; 626 } 627 apa[col] += (*ada)*paj[k]; 628 } 629 flops += 2*pnzj; 630 /* off-diagonal portion of P */ 631 pnzj = po->i[prow+1] - po->i[prow]; 632 pjj = po->j + po->i[prow]; /* local col index of P */ 633 paj = po->a + po->i[prow]; 634 for (k=0;k<pnzj;k++) { 635 col = p->garray[*pjj]; pjj++; /* global col index of P */ 636 if (!apjdense[col]) { 637 apjdense[col] = -1; 638 apj[apnzj++] = col; 639 } 640 apa[col] += (*ada)*paj[k]; 641 } 642 flops += 2*pnzj; 643 644 ada++; 645 } 646 /* off-diagonal portion of A */ 647 nzi = aoi[i+1] - aoi[i]; 648 for (j=0;j<nzi;j++) { 649 prow = *aoj++; 650 pnzj = pi_oth[prow+1] - pi_oth[prow]; 651 pjj = pj_oth + pi_oth[prow]; 652 paj = pa_oth + pi_oth[prow]; 653 for (k=0;k<pnzj;k++) { 654 if (!apjdense[pjj[k]]) { 655 apjdense[pjj[k]] = -1; 656 apj[apnzj++] = pjj[k]; 657 } 658 apa[pjj[k]] += (*aoa)*paj[k]; 659 } 660 flops += 2*pnzj; 661 aoa++; 662 } 663 /* Sort the j index array for quick sparse axpy. */ 664 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 665 666 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 667 /* diagonal portion of P -- gives matrix value of local C */ 668 pnzi = pd->i[i+1] - pd->i[i]; 669 for (j=0;j<pnzi;j++) { 670 crow = (*pJ_d++) + owners[rank]; 671 cjj = cseqj + cseqi[crow]; 672 /* add value into C */ 673 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 674 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 675 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 676 pA_d++; 677 flops += 2*apnzj; 678 } 679 680 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 681 pnzi = po->i[i+1] - po->i[i]; 682 for (j=0;j<pnzi;j++) { 683 crow = p->garray[*pJ_o++]; 684 cjj = cseqj + cseqi[crow]; 685 caj = cseqa + cseqi[crow]; 686 /* add value into C_seq to be sent to other processors */ 687 nextap = 0; 688 for (k=0;nextap<apnzj;k++) { 689 if (cjj[k]==apj[nextap]) { 690 caj[k] += (*pA_o)*apa[apj[nextap++]]; 691 } 692 } 693 flops += 2*apnzj; 694 pA_o++; 695 } 696 697 /* Zero the current row info for A*P */ 698 for (j=0;j<apnzj;j++) { 699 apa[apj[j]] = 0.; 700 apjdense[apj[j]] = 0; 701 } 702 } 703 704 /* send and recv matrix values */ 705 /*-----------------------------*/ 706 bi = merge->bi; 707 bj = merge->bj; 708 buf_ri = merge->buf_ri; 709 buf_rj = merge->buf_rj; 710 len_s = merge->len_s; 711 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 712 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 713 714 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 715 for (proc=0,k=0; proc<size; proc++){ 716 if (!len_s[proc]) continue; 717 i = owners[proc]; 718 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 719 k++; 720 } 721 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 722 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 723 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 724 ierr = PetscFree(status);CHKERRQ(ierr); 725 726 ierr = PetscFree(s_waits);CHKERRQ(ierr); 727 ierr = PetscFree(r_waits);CHKERRQ(ierr); 728 ierr = PetscFree(cseqa);CHKERRQ(ierr); 729 730 /* insert mat values of mpimat */ 731 /*----------------------------*/ 732 ba_i = apa; /* rename, temporary array for storage of one row of C */ 733 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 734 nextrow = buf_ri_k + merge->nrecv; 735 nextcseqi = nextrow + merge->nrecv; 736 737 for (k=0; k<merge->nrecv; k++){ 738 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 739 nrows = *(buf_ri_k[k]); 740 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 741 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 742 } 743 744 /* add received local vals to C */ 745 for (i=0; i<cm; i++) { 746 crow = owners[rank] + i; 747 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 748 bnzi = bi[i+1] - bi[i]; 749 nzi = 0; 750 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 751 /* i-th row */ 752 if (i == *nextrow[k]) { 753 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 754 cseqj = buf_rj[k] + *(nextcseqi[k]); 755 cseqa = abuf_r[k] + *(nextcseqi[k]); 756 nextcseqj = 0; 757 for (j=0; nextcseqj<cseqnzi; j++){ 758 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 759 ba_i[j] += cseqa[nextcseqj++]; 760 nzi++; 761 } 762 } 763 nextrow[k]++; nextcseqi[k]++; 764 } 765 } 766 if (nzi>0){ 767 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 768 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 769 flops += 2*nzi; 770 } 771 } 772 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 775 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 776 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 777 ierr = PetscFree(apa);CHKERRQ(ierr); 778 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 779 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 785 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 786 { 787 PetscErrorCode ierr; 788 789 PetscFunctionBegin; 790 if (scall == MAT_INITIAL_MATRIX){ 791 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 792 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 793 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 794 } 795 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 796 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 797 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 /* 802 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 803 804 Collective on Mat 805 806 Input Parameters: 807 + A - the matrix 808 - P - the projection matrix 809 810 Output Parameters: 811 . C - the (i,j) structure of the product matrix 812 813 Notes: 814 C will be created and must be destroyed by the user with MatDestroy(). 815 816 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 817 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 818 this (i,j) structure by calling MatPtAPNumeric(). 819 820 Level: intermediate 821 822 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 823 */ 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatPtAPSymbolic" 826 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 827 { 828 PetscErrorCode ierr; 829 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 830 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 831 832 PetscFunctionBegin; 833 834 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 835 PetscValidType(A,1); 836 MatPreallocated(A); 837 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 838 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 839 840 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 841 PetscValidType(P,2); 842 MatPreallocated(P); 843 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 844 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 845 846 PetscValidPointer(C,3); 847 848 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 849 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 850 851 /* For now, we do not dispatch based on the type of A and P */ 852 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 853 fA = A->ops->ptapsymbolic; 854 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 855 fP = P->ops->ptapsymbolic; 856 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 857 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 858 859 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 860 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 861 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 862 863 PetscFunctionReturn(0); 864 } 865 866 typedef struct { 867 Mat symAP; 868 } Mat_PtAPstruct; 869 870 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 874 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 875 { 876 PetscErrorCode ierr; 877 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 878 879 PetscFunctionBegin; 880 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 881 ierr = PetscFree(ptap);CHKERRQ(ierr); 882 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 888 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 889 { 890 PetscErrorCode ierr; 891 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 892 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 893 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 894 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 895 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 896 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 897 MatScalar *ca; 898 PetscBT lnkbt; 899 900 PetscFunctionBegin; 901 /* Get ij structure of P^T */ 902 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 903 ptJ=ptj; 904 905 /* Allocate ci array, arrays for fill computation and */ 906 /* free space for accumulating nonzero column info */ 907 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 908 ci[0] = 0; 909 910 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 911 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 912 ptasparserow = ptadenserow + an; 913 914 /* create and initialize a linked list */ 915 nlnk = pn+1; 916 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 917 918 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 919 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 920 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 921 current_space = free_space; 922 923 /* Determine symbolic info for each row of C: */ 924 for (i=0;i<pn;i++) { 925 ptnzi = pti[i+1] - pti[i]; 926 ptanzi = 0; 927 /* Determine symbolic row of PtA: */ 928 for (j=0;j<ptnzi;j++) { 929 arow = *ptJ++; 930 anzj = ai[arow+1] - ai[arow]; 931 ajj = aj + ai[arow]; 932 for (k=0;k<anzj;k++) { 933 if (!ptadenserow[ajj[k]]) { 934 ptadenserow[ajj[k]] = -1; 935 ptasparserow[ptanzi++] = ajj[k]; 936 } 937 } 938 } 939 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 940 ptaj = ptasparserow; 941 cnzi = 0; 942 for (j=0;j<ptanzi;j++) { 943 prow = *ptaj++; 944 pnzj = pi[prow+1] - pi[prow]; 945 pjj = pj + pi[prow]; 946 /* add non-zero cols of P into the sorted linked list lnk */ 947 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 948 cnzi += nlnk; 949 } 950 951 /* If free space is not available, make more free space */ 952 /* Double the amount of total space in the list */ 953 if (current_space->local_remaining<cnzi) { 954 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 955 } 956 957 /* Copy data into free space, and zero out denserows */ 958 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 959 current_space->array += cnzi; 960 current_space->local_used += cnzi; 961 current_space->local_remaining -= cnzi; 962 963 for (j=0;j<ptanzi;j++) { 964 ptadenserow[ptasparserow[j]] = 0; 965 } 966 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 967 /* For now, we will recompute what is needed. */ 968 ci[i+1] = ci[i] + cnzi; 969 } 970 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 971 /* Allocate space for cj, initialize cj, and */ 972 /* destroy list of free space and other temporary array(s) */ 973 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 974 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 975 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 976 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 977 978 /* Allocate space for ca */ 979 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 980 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 981 982 /* put together the new matrix */ 983 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 984 985 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 986 /* Since these are PETSc arrays, change flags to free them as necessary. */ 987 c = (Mat_SeqAIJ *)((*C)->data); 988 c->freedata = PETSC_TRUE; 989 c->nonew = 0; 990 991 /* Clean up. */ 992 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 993 994 PetscFunctionReturn(0); 995 } 996 997 #include "src/mat/impls/maij/maij.h" 998 EXTERN_C_BEGIN 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1001 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1002 { 1003 /* This routine requires testing -- I don't think it works. */ 1004 PetscErrorCode ierr; 1005 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1006 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1007 Mat P=pp->AIJ; 1008 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1009 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1010 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1011 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1012 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1013 MatScalar *ca; 1014 1015 PetscFunctionBegin; 1016 /* Start timer */ 1017 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1018 1019 /* Get ij structure of P^T */ 1020 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1021 1022 /* Allocate ci array, arrays for fill computation and */ 1023 /* free space for accumulating nonzero column info */ 1024 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1025 ci[0] = 0; 1026 1027 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1028 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1029 ptasparserow = ptadenserow + an; 1030 denserow = ptasparserow + an; 1031 sparserow = denserow + pn; 1032 1033 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1034 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1035 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1036 current_space = free_space; 1037 1038 /* Determine symbolic info for each row of C: */ 1039 for (i=0;i<pn/ppdof;i++) { 1040 ptnzi = pti[i+1] - pti[i]; 1041 ptanzi = 0; 1042 ptJ = ptj + pti[i]; 1043 for (dof=0;dof<ppdof;dof++) { 1044 /* Determine symbolic row of PtA: */ 1045 for (j=0;j<ptnzi;j++) { 1046 arow = ptJ[j] + dof; 1047 anzj = ai[arow+1] - ai[arow]; 1048 ajj = aj + ai[arow]; 1049 for (k=0;k<anzj;k++) { 1050 if (!ptadenserow[ajj[k]]) { 1051 ptadenserow[ajj[k]] = -1; 1052 ptasparserow[ptanzi++] = ajj[k]; 1053 } 1054 } 1055 } 1056 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1057 ptaj = ptasparserow; 1058 cnzi = 0; 1059 for (j=0;j<ptanzi;j++) { 1060 pdof = *ptaj%dof; 1061 prow = (*ptaj++)/dof; 1062 pnzj = pi[prow+1] - pi[prow]; 1063 pjj = pj + pi[prow]; 1064 for (k=0;k<pnzj;k++) { 1065 if (!denserow[pjj[k]+pdof]) { 1066 denserow[pjj[k]+pdof] = -1; 1067 sparserow[cnzi++] = pjj[k]+pdof; 1068 } 1069 } 1070 } 1071 1072 /* sort sparserow */ 1073 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1074 1075 /* If free space is not available, make more free space */ 1076 /* Double the amount of total space in the list */ 1077 if (current_space->local_remaining<cnzi) { 1078 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1079 } 1080 1081 /* Copy data into free space, and zero out denserows */ 1082 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1083 current_space->array += cnzi; 1084 current_space->local_used += cnzi; 1085 current_space->local_remaining -= cnzi; 1086 1087 for (j=0;j<ptanzi;j++) { 1088 ptadenserow[ptasparserow[j]] = 0; 1089 } 1090 for (j=0;j<cnzi;j++) { 1091 denserow[sparserow[j]] = 0; 1092 } 1093 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1094 /* For now, we will recompute what is needed. */ 1095 ci[i+1+dof] = ci[i+dof] + cnzi; 1096 } 1097 } 1098 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1099 /* Allocate space for cj, initialize cj, and */ 1100 /* destroy list of free space and other temporary array(s) */ 1101 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1102 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1103 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1104 1105 /* Allocate space for ca */ 1106 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1107 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1108 1109 /* put together the new matrix */ 1110 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1111 1112 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1113 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1114 c = (Mat_SeqAIJ *)((*C)->data); 1115 c->freedata = PETSC_TRUE; 1116 c->nonew = 0; 1117 1118 /* Clean up. */ 1119 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1120 1121 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 EXTERN_C_END 1125 1126 /* 1127 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1128 1129 Collective on Mat 1130 1131 Input Parameters: 1132 + A - the matrix 1133 - P - the projection matrix 1134 1135 Output Parameters: 1136 . C - the product matrix 1137 1138 Notes: 1139 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1140 the user using MatDeatroy(). 1141 1142 This routine is currently only implemented for pairs of AIJ matrices and classes 1143 which inherit from AIJ. C will be of type MATAIJ. 1144 1145 Level: intermediate 1146 1147 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1148 */ 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatPtAPNumeric" 1151 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1152 { 1153 PetscErrorCode ierr; 1154 PetscErrorCode (*fA)(Mat,Mat,Mat); 1155 PetscErrorCode (*fP)(Mat,Mat,Mat); 1156 1157 PetscFunctionBegin; 1158 1159 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1160 PetscValidType(A,1); 1161 MatPreallocated(A); 1162 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1163 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1164 1165 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1166 PetscValidType(P,2); 1167 MatPreallocated(P); 1168 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1169 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1170 1171 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1172 PetscValidType(C,3); 1173 MatPreallocated(C); 1174 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1175 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1176 1177 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 1178 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 1179 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 1180 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 1181 1182 /* For now, we do not dispatch based on the type of A and P */ 1183 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1184 fA = A->ops->ptapnumeric; 1185 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1186 fP = P->ops->ptapnumeric; 1187 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1188 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1189 1190 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1191 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1192 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1199 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1200 { 1201 PetscErrorCode ierr; 1202 PetscInt flops=0; 1203 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1204 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1205 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1206 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1207 PetscInt *ci=c->i,*cj=c->j,*cjj; 1208 PetscInt am=A->M,cn=C->N,cm=C->M; 1209 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1210 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1211 1212 PetscFunctionBegin; 1213 /* Allocate temporary array for storage of one row of A*P */ 1214 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1215 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1216 1217 apj = (PetscInt *)(apa + cn); 1218 apjdense = apj + cn; 1219 1220 /* Clear old values in C */ 1221 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1222 1223 for (i=0;i<am;i++) { 1224 /* Form sparse row of A*P */ 1225 anzi = ai[i+1] - ai[i]; 1226 apnzj = 0; 1227 for (j=0;j<anzi;j++) { 1228 prow = *aj++; 1229 pnzj = pi[prow+1] - pi[prow]; 1230 pjj = pj + pi[prow]; 1231 paj = pa + pi[prow]; 1232 for (k=0;k<pnzj;k++) { 1233 if (!apjdense[pjj[k]]) { 1234 apjdense[pjj[k]] = -1; 1235 apj[apnzj++] = pjj[k]; 1236 } 1237 apa[pjj[k]] += (*aa)*paj[k]; 1238 } 1239 flops += 2*pnzj; 1240 aa++; 1241 } 1242 1243 /* Sort the j index array for quick sparse axpy. */ 1244 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1245 1246 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1247 pnzi = pi[i+1] - pi[i]; 1248 for (j=0;j<pnzi;j++) { 1249 nextap = 0; 1250 crow = *pJ++; 1251 cjj = cj + ci[crow]; 1252 caj = ca + ci[crow]; 1253 /* Perform sparse axpy operation. Note cjj includes apj. */ 1254 for (k=0;nextap<apnzj;k++) { 1255 if (cjj[k]==apj[nextap]) { 1256 caj[k] += (*pA)*apa[apj[nextap++]]; 1257 } 1258 } 1259 flops += 2*apnzj; 1260 pA++; 1261 } 1262 1263 /* Zero the current row info for A*P */ 1264 for (j=0;j<apnzj;j++) { 1265 apa[apj[j]] = 0.; 1266 apjdense[apj[j]] = 0; 1267 } 1268 } 1269 1270 /* Assemble the final matrix and clean up */ 1271 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1272 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1273 ierr = PetscFree(apa);CHKERRQ(ierr); 1274 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1275 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1280 static PetscEvent logkey_matptapnumeric_local = 0; 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1283 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1284 { 1285 PetscErrorCode ierr; 1286 PetscInt flops=0; 1287 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1288 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1289 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1290 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1291 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1292 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1293 PetscInt *pJ=pj_loc,*pjj; 1294 PetscInt *ci=c->i,*cj=c->j,*cjj; 1295 PetscInt am=A->m,cn=C->N,cm=C->M; 1296 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1297 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1298 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1299 1300 PetscFunctionBegin; 1301 if (!logkey_matptapnumeric_local) { 1302 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1303 } 1304 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1305 1306 /* Allocate temporary array for storage of one row of A*P */ 1307 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1308 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1309 apj = (PetscInt *)(apa + cn); 1310 apjdense = apj + cn; 1311 1312 /* Clear old values in C */ 1313 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1314 1315 for (i=0;i<am;i++) { 1316 /* Form i-th sparse row of A*P */ 1317 apnzj = 0; 1318 /* diagonal portion of A */ 1319 anzi = adi[i+1] - adi[i]; 1320 for (j=0;j<anzi;j++) { 1321 prow = *adj; 1322 adj++; 1323 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1324 pjj = pj_loc + pi_loc[prow]; 1325 paj = pa_loc + pi_loc[prow]; 1326 for (k=0;k<pnzj;k++) { 1327 if (!apjdense[pjj[k]]) { 1328 apjdense[pjj[k]] = -1; 1329 apj[apnzj++] = pjj[k]; 1330 } 1331 apa[pjj[k]] += (*ada)*paj[k]; 1332 } 1333 flops += 2*pnzj; 1334 ada++; 1335 } 1336 /* off-diagonal portion of A */ 1337 anzi = aoi[i+1] - aoi[i]; 1338 for (j=0;j<anzi;j++) { 1339 col = a->garray[*aoj]; 1340 if (col < cstart){ 1341 prow = *aoj; 1342 } else if (col >= cend){ 1343 prow = *aoj; 1344 } else { 1345 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1346 } 1347 aoj++; 1348 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1349 pjj = pj_oth + pi_oth[prow]; 1350 paj = pa_oth + pi_oth[prow]; 1351 for (k=0;k<pnzj;k++) { 1352 if (!apjdense[pjj[k]]) { 1353 apjdense[pjj[k]] = -1; 1354 apj[apnzj++] = pjj[k]; 1355 } 1356 apa[pjj[k]] += (*aoa)*paj[k]; 1357 } 1358 flops += 2*pnzj; 1359 aoa++; 1360 } 1361 /* Sort the j index array for quick sparse axpy. */ 1362 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1363 1364 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1365 pnzi = pi_loc[i+1] - pi_loc[i]; 1366 for (j=0;j<pnzi;j++) { 1367 nextap = 0; 1368 crow = *pJ++; 1369 cjj = cj + ci[crow]; 1370 caj = ca + ci[crow]; 1371 /* Perform sparse axpy operation. Note cjj includes apj. */ 1372 for (k=0;nextap<apnzj;k++) { 1373 if (cjj[k]==apj[nextap]) { 1374 caj[k] += (*pA)*apa[apj[nextap++]]; 1375 } 1376 } 1377 flops += 2*apnzj; 1378 pA++; 1379 } 1380 1381 /* Zero the current row info for A*P */ 1382 for (j=0;j<apnzj;j++) { 1383 apa[apj[j]] = 0.; 1384 apjdense[apj[j]] = 0; 1385 } 1386 } 1387 1388 /* Assemble the final matrix and clean up */ 1389 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 ierr = PetscFree(apa);CHKERRQ(ierr); 1392 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1393 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 static PetscEvent logkey_matptapsymbolic_local = 0; 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1399 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1400 { 1401 PetscErrorCode ierr; 1402 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1403 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1404 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1405 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1406 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1407 PetscInt *ci,*cj,*ptaj; 1408 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1409 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1410 PetscInt pm=P_loc->m,nlnk,*lnk; 1411 MatScalar *ca; 1412 PetscBT lnkbt; 1413 PetscInt prend,nprow_loc,nprow_oth; 1414 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1415 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1416 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1417 1418 PetscFunctionBegin; 1419 if (!logkey_matptapsymbolic_local) { 1420 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1421 } 1422 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1423 1424 prend = prstart + pm; 1425 1426 /* get ij structure of P_loc^T */ 1427 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1428 ptJ=ptj; 1429 1430 /* Allocate ci array, arrays for fill computation and */ 1431 /* free space for accumulating nonzero column info */ 1432 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1433 ci[0] = 0; 1434 1435 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1436 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1437 ptasparserow_loc = ptadenserow_loc + aN; 1438 ptadenserow_oth = ptasparserow_loc + aN; 1439 ptasparserow_oth = ptadenserow_oth + aN; 1440 1441 /* create and initialize a linked list */ 1442 nlnk = pN+1; 1443 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1444 1445 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1446 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1447 nnz = adi[am] + aoi[am]; 1448 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1449 current_space = free_space; 1450 1451 /* determine symbolic info for each row of C: */ 1452 for (i=0;i<pN;i++) { 1453 ptnzi = pti[i+1] - pti[i]; 1454 nprow_loc = 0; nprow_oth = 0; 1455 /* i-th row of symbolic P_loc^T*A_loc: */ 1456 for (j=0;j<ptnzi;j++) { 1457 arow = *ptJ++; 1458 /* diagonal portion of A */ 1459 anzj = adi[arow+1] - adi[arow]; 1460 ajj = adj + adi[arow]; 1461 for (k=0;k<anzj;k++) { 1462 col = ajj[k]+prstart; 1463 if (!ptadenserow_loc[col]) { 1464 ptadenserow_loc[col] = -1; 1465 ptasparserow_loc[nprow_loc++] = col; 1466 } 1467 } 1468 /* off-diagonal portion of A */ 1469 anzj = aoi[arow+1] - aoi[arow]; 1470 ajj = aoj + aoi[arow]; 1471 for (k=0;k<anzj;k++) { 1472 col = a->garray[ajj[k]]; /* global col */ 1473 if (col < cstart){ 1474 col = ajj[k]; 1475 } else if (col >= cend){ 1476 col = ajj[k] + pm; 1477 } else { 1478 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1479 } 1480 if (!ptadenserow_oth[col]) { 1481 ptadenserow_oth[col] = -1; 1482 ptasparserow_oth[nprow_oth++] = col; 1483 } 1484 } 1485 } 1486 1487 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1488 cnzi = 0; 1489 /* rows of P_loc */ 1490 ptaj = ptasparserow_loc; 1491 for (j=0; j<nprow_loc; j++) { 1492 prow = *ptaj++; 1493 prow -= prstart; /* rm */ 1494 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1495 pjj = pj_loc + pi_loc[prow]; 1496 /* add non-zero cols of P into the sorted linked list lnk */ 1497 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1498 cnzi += nlnk; 1499 } 1500 /* rows of P_oth */ 1501 ptaj = ptasparserow_oth; 1502 for (j=0; j<nprow_oth; j++) { 1503 prow = *ptaj++; 1504 if (prow >= prend) prow -= pm; /* rm */ 1505 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1506 pjj = pj_oth + pi_oth[prow]; 1507 /* add non-zero cols of P into the sorted linked list lnk */ 1508 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1509 cnzi += nlnk; 1510 } 1511 1512 /* If free space is not available, make more free space */ 1513 /* Double the amount of total space in the list */ 1514 if (current_space->local_remaining<cnzi) { 1515 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1516 } 1517 1518 /* Copy data into free space, and zero out denserows */ 1519 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1520 current_space->array += cnzi; 1521 current_space->local_used += cnzi; 1522 current_space->local_remaining -= cnzi; 1523 1524 for (j=0;j<nprow_loc; j++) { 1525 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1526 } 1527 for (j=0;j<nprow_oth; j++) { 1528 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1529 } 1530 1531 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1532 /* For now, we will recompute what is needed. */ 1533 ci[i+1] = ci[i] + cnzi; 1534 } 1535 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1536 /* Allocate space for cj, initialize cj, and */ 1537 /* destroy list of free space and other temporary array(s) */ 1538 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1539 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1540 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1541 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1542 1543 /* Allocate space for ca */ 1544 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1545 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1546 1547 /* put together the new matrix */ 1548 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1549 1550 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1551 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1552 c = (Mat_SeqAIJ *)((*C)->data); 1553 c->freedata = PETSC_TRUE; 1554 c->nonew = 0; 1555 1556 /* Clean up. */ 1557 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1558 1559 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562