1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 if (ptap->abi) ierr = PetscFree(ptap->abi);CHKERRQ(ierr); 95 if (ptap->abj) ierr = PetscFree(ptap->abj);CHKERRQ(ierr); 96 97 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 98 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 99 } 100 ierr = PetscFree(ptap);CHKERRQ(ierr); 101 102 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 108 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 109 { 110 PetscErrorCode ierr; 111 Mat_MatMatMultMPI *ptap; 112 PetscObjectContainer container; 113 114 PetscFunctionBegin; 115 if (scall == MAT_INITIAL_MATRIX){ 116 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 117 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 118 ptap->abi=PETSC_NULL; ptap->abj=PETSC_NULL; 119 ptap->abnz_max = 0; /* symbolic A*P is not done yet */ 120 121 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 122 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 123 124 /* get P_loc by taking all local rows of P */ 125 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 126 127 /* attach the supporting struct to P for reuse */ 128 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 129 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 130 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 131 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 132 133 /* now, compute symbolic local P^T*A*P */ 134 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 135 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 136 } else if (scall == MAT_REUSE_MATRIX){ 137 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 138 if (container) { 139 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 140 } else { 141 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 142 } 143 144 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 145 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 146 147 /* get P_loc by taking all local rows of P */ 148 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 149 150 } else { 151 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 152 } 153 /* now, compute numeric local P^T*A*P */ 154 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 155 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 156 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 157 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 163 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 164 { 165 PetscErrorCode ierr; 166 Mat P_loc,P_oth; 167 Mat_MatMatMultMPI *ptap; 168 PetscObjectContainer container; 169 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 170 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 171 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 172 Mat_SeqAIJ *p_loc,*p_oth; 173 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 174 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 175 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 176 PetscInt aN=A->N,am=A->m,pN=P->N; 177 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 178 PetscBT lnkbt; 179 PetscInt prstart,prend,nprow_loc,nprow_oth; 180 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 181 182 MPI_Comm comm=A->comm; 183 Mat B_mpi; 184 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 185 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 186 PetscInt len,proc,*dnz,*onz,*owners; 187 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 188 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 189 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 190 MPI_Status *status; 191 Mat_Merge_SeqsToMPI *merge; 192 193 PetscFunctionBegin; 194 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 195 if (container) { 196 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 197 } else { 198 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 199 } 200 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 201 /*--------------------------------------------------*/ 202 /* get data from P_loc and P_oth */ 203 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 204 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 205 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 206 prend = prstart + pm; 207 208 /* get ij structure of P_loc^T */ 209 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 210 ptJ=ptj; 211 212 /* Allocate ci array, arrays for fill computation and */ 213 /* free space for accumulating nonzero column info */ 214 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 215 ci[0] = 0; 216 217 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 218 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 219 ptasparserow_loc = ptadenserow_loc + aN; 220 ptadenserow_oth = ptasparserow_loc + aN; 221 ptasparserow_oth = ptadenserow_oth + aN; 222 223 /* create and initialize a linked list */ 224 nlnk = pN+1; 225 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 226 227 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 228 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 229 nnz = adi[am] + aoi[am]; 230 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 231 current_space = free_space; 232 233 /* determine symbolic info for each row of C: */ 234 for (i=0;i<pN;i++) { 235 ptnzi = pti[i+1] - pti[i]; 236 nprow_loc = 0; nprow_oth = 0; 237 /* i-th row of symbolic P_loc^T*A_loc: */ 238 for (j=0;j<ptnzi;j++) { 239 arow = *ptJ++; 240 /* diagonal portion of A */ 241 anzj = adi[arow+1] - adi[arow]; 242 ajj = adj + adi[arow]; 243 for (k=0;k<anzj;k++) { 244 col = ajj[k]+prstart; 245 if (!ptadenserow_loc[col]) { 246 ptadenserow_loc[col] = -1; 247 ptasparserow_loc[nprow_loc++] = col; 248 } 249 } 250 /* off-diagonal portion of A */ 251 anzj = aoi[arow+1] - aoi[arow]; 252 ajj = aoj + aoi[arow]; 253 for (k=0;k<anzj;k++) { 254 col = a->garray[ajj[k]]; /* global col */ 255 if (col < cstart){ 256 col = ajj[k]; 257 } else if (col >= cend){ 258 col = ajj[k] + pm; 259 } else { 260 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 261 } 262 if (!ptadenserow_oth[col]) { 263 ptadenserow_oth[col] = -1; 264 ptasparserow_oth[nprow_oth++] = col; 265 } 266 } 267 } 268 269 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 270 cnzi = 0; 271 /* rows of P_loc */ 272 ptaj = ptasparserow_loc; 273 for (j=0; j<nprow_loc; j++) { 274 prow = *ptaj++; 275 prow -= prstart; /* rm */ 276 pnzj = pi_loc[prow+1] - pi_loc[prow]; 277 pjj = pj_loc + pi_loc[prow]; 278 /* add non-zero cols of P into the sorted linked list lnk */ 279 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 280 cnzi += nlnk; 281 } 282 /* rows of P_oth */ 283 ptaj = ptasparserow_oth; 284 for (j=0; j<nprow_oth; j++) { 285 prow = *ptaj++; 286 if (prow >= prend) prow -= pm; /* rm */ 287 pnzj = pi_oth[prow+1] - pi_oth[prow]; 288 pjj = pj_oth + pi_oth[prow]; 289 /* add non-zero cols of P into the sorted linked list lnk */ 290 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 cnzi += nlnk; 292 } 293 294 /* If free space is not available, make more free space */ 295 /* Double the amount of total space in the list */ 296 if (current_space->local_remaining<cnzi) { 297 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 298 } 299 300 /* Copy data into free space, and zero out denserows */ 301 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 302 current_space->array += cnzi; 303 current_space->local_used += cnzi; 304 current_space->local_remaining -= cnzi; 305 306 for (j=0;j<nprow_loc; j++) { 307 ptadenserow_loc[ptasparserow_loc[j]] = 0; 308 } 309 for (j=0;j<nprow_oth; j++) { 310 ptadenserow_oth[ptasparserow_oth[j]] = 0; 311 } 312 313 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 314 /* For now, we will recompute what is needed. */ 315 ci[i+1] = ci[i] + cnzi; 316 } 317 /* Clean up. */ 318 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 319 320 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 321 /* Allocate space for cj, initialize cj, and */ 322 /* destroy list of free space and other temporary array(s) */ 323 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 324 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 325 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 326 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 327 328 /* Allocate space for ca */ 329 /* 330 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 331 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 332 */ 333 334 /* add C_seq into mpi C */ 335 /*-----------------------------------*/ 336 free_space=PETSC_NULL; current_space=PETSC_NULL; 337 338 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 339 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 340 341 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 342 343 344 /* determine row ownership */ 345 /*---------------------------------------------------------*/ 346 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 347 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 348 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 349 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 350 351 /* determine the number of messages to send, their lengths */ 352 /*---------------------------------------------------------*/ 353 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 354 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 355 len_s = merge->len_s; 356 len = 0; /* length of buf_si[] */ 357 merge->nsend = 0; 358 for (proc=0; proc<size; proc++){ 359 len_si[proc] = 0; 360 if (proc == rank){ 361 len_s[proc] = 0; 362 } else { 363 len_si[proc] = owners[proc+1] - owners[proc] + 1; 364 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 365 } 366 if (len_s[proc]) { 367 merge->nsend++; 368 nrows = 0; 369 for (i=owners[proc]; i<owners[proc+1]; i++){ 370 if (ci[i+1] > ci[i]) nrows++; 371 } 372 len_si[proc] = 2*(nrows+1); 373 len += len_si[proc]; 374 } 375 } 376 377 /* determine the number and length of messages to receive for ij-structure */ 378 /*-------------------------------------------------------------------------*/ 379 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 380 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 381 382 /* post the Irecv of j-structure */ 383 /*-------------------------------*/ 384 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 385 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 386 387 /* post the Isend of j-structure */ 388 /*--------------------------------*/ 389 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 390 sj_waits = si_waits + merge->nsend; 391 392 for (proc=0, k=0; proc<size; proc++){ 393 if (!len_s[proc]) continue; 394 i = owners[proc]; 395 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 396 k++; 397 } 398 399 /* receives and sends of j-structure are complete */ 400 /*------------------------------------------------*/ 401 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 402 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 403 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 404 405 /* send and recv i-structure */ 406 /*---------------------------*/ 407 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 408 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 409 410 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 411 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 412 for (proc=0,k=0; proc<size; proc++){ 413 if (!len_s[proc]) continue; 414 /* form outgoing message for i-structure: 415 buf_si[0]: nrows to be sent 416 [1:nrows]: row index (global) 417 [nrows+1:2*nrows+1]: i-structure index 418 */ 419 /*-------------------------------------------*/ 420 nrows = len_si[proc]/2 - 1; 421 buf_si_i = buf_si + nrows+1; 422 buf_si[0] = nrows; 423 buf_si_i[0] = 0; 424 nrows = 0; 425 for (i=owners[proc]; i<owners[proc+1]; i++){ 426 anzi = ci[i+1] - ci[i]; 427 if (anzi) { 428 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 429 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 430 nrows++; 431 } 432 } 433 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 434 k++; 435 buf_si += len_si[proc]; 436 } 437 438 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 439 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 440 441 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 442 for (i=0; i<merge->nrecv; i++){ 443 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 444 } 445 446 ierr = PetscFree(len_si);CHKERRQ(ierr); 447 ierr = PetscFree(len_ri);CHKERRQ(ierr); 448 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 449 ierr = PetscFree(si_waits);CHKERRQ(ierr); 450 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 451 ierr = PetscFree(buf_s);CHKERRQ(ierr); 452 ierr = PetscFree(status);CHKERRQ(ierr); 453 454 /* compute a local seq matrix in each processor */ 455 /*----------------------------------------------*/ 456 /* allocate bi array and free space for accumulating nonzero column info */ 457 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 458 bi[0] = 0; 459 460 /* create and initialize a linked list */ 461 nlnk = pN+1; 462 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 463 464 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 465 len = 0; 466 len = ci[owners[rank+1]] - ci[owners[rank]]; 467 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 468 current_space = free_space; 469 470 /* determine symbolic info for each local row */ 471 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 472 nextrow = buf_ri_k + merge->nrecv; 473 nextci = nextrow + merge->nrecv; 474 for (k=0; k<merge->nrecv; k++){ 475 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 476 nrows = *buf_ri_k[k]; 477 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 478 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 479 } 480 481 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 482 len = 0; 483 for (i=0; i<pn; i++) { 484 bnzi = 0; 485 /* add local non-zero cols of this proc's C_seq into lnk */ 486 arow = owners[rank] + i; 487 anzi = ci[arow+1] - ci[arow]; 488 cji = cj + ci[arow]; 489 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 490 bnzi += nlnk; 491 /* add received col data into lnk */ 492 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 493 if (i == *nextrow[k]) { /* i-th row */ 494 anzi = *(nextci[k]+1) - *nextci[k]; 495 cji = buf_rj[k] + *nextci[k]; 496 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 497 bnzi += nlnk; 498 nextrow[k]++; nextci[k]++; 499 } 500 } 501 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 502 503 /* if free space is not available, make more free space */ 504 if (current_space->local_remaining<bnzi) { 505 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 506 nspacedouble++; 507 } 508 /* copy data into free space, then initialize lnk */ 509 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 510 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 511 512 current_space->array += bnzi; 513 current_space->local_used += bnzi; 514 current_space->local_remaining -= bnzi; 515 516 bi[i+1] = bi[i] + bnzi; 517 } 518 519 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 520 521 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 522 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 523 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 524 525 /* create symbolic parallel matrix B_mpi */ 526 /*---------------------------------------*/ 527 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 528 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 529 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 530 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 531 /* ierr = MatSetOption(B_mpi,MAT_COLUMNS_SORTED);CHKERRQ(ierr); -cause delay? */ 532 533 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 534 B_mpi->assembled = PETSC_FALSE; 535 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 536 merge->bi = bi; 537 merge->bj = bj; 538 merge->ci = ci; 539 merge->cj = cj; 540 merge->buf_ri = buf_ri; 541 merge->buf_rj = buf_rj; 542 merge->C_seq = PETSC_NULL; 543 544 /* attach the supporting struct to B_mpi for reuse */ 545 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 546 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 547 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 548 *C = B_mpi; 549 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 555 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 556 { 557 PetscErrorCode ierr; 558 Mat_Merge_SeqsToMPI *merge; 559 Mat_MatMatMultMPI *ptap; 560 PetscObjectContainer cont_merge,cont_ptap; 561 562 PetscInt flops=0; 563 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 564 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 565 Mat_SeqAIJ *p_loc,*p_oth; 566 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 567 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 568 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 569 PetscInt *cjj; 570 MatScalar *ada=ad->a,*aoa=ao->a,*ada_tmp=ad->a,*aoa_tmp=ao->a,*apa,*paj,*cseqa,*caj; 571 MatScalar *pa_loc,*pA,*pa_oth; 572 PetscInt am=A->m,cN=C->N; 573 PetscInt nextp,*adj_tmp=ad->j,*aoj_tmp=ao->j; 574 575 MPI_Comm comm=C->comm; 576 PetscMPIInt size,rank,taga,*len_s; 577 PetscInt *owners; 578 PetscInt proc; 579 PetscInt **buf_ri,**buf_rj; 580 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 581 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 582 MPI_Request *s_waits,*r_waits; 583 MPI_Status *status; 584 MatScalar **abuf_r,*ba_i; 585 PetscInt *cseqi,*cseqj; 586 PetscInt *cseqj_tmp; 587 MatScalar *cseqa_tmp; 588 PetscInt stages[2]; 589 PetscInt *apsymi,*apsymj; 590 PetscTruth HasSymAP; 591 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 592 593 PetscFunctionBegin; 594 ierr = PetscLogStageRegister(&stages[0],"NumPtAP_local");CHKERRQ(ierr); 595 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_Comm");CHKERRQ(ierr); 596 597 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 598 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 599 600 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 601 if (cont_merge) { 602 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 603 } else { 604 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 605 } 606 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 607 if (cont_ptap) { 608 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 609 } else { 610 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 611 } 612 613 /* set or get data from symbolic A*P */ 614 if (!ptap->abnz_max) { 615 HasSymAP = PETSC_FALSE; 616 ierr = PetscMalloc((am+2)*sizeof(PetscInt),&apsymi);CHKERRQ(ierr); 617 ptap->abi = apsymi; 618 apsymi[0] = 0; 619 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 620 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 621 apj = (PetscInt *)(apa + cN); 622 apjdense = apj + cN; 623 /* Initial FreeSpace size is 2*nnz(A) */ 624 ierr = GetMoreSpace((PetscInt)(2*(adi[am]+aoi[am])),&free_space);CHKERRQ(ierr); 625 current_space = free_space; 626 } else { 627 HasSymAP = PETSC_TRUE; 628 ierr = PetscMalloc((ptap->abnz_max+1)*sizeof(MatScalar),&apa);CHKERRQ(ierr); 629 ierr = PetscMemzero(apa,ptap->abnz_max*sizeof(MatScalar));CHKERRQ(ierr); 630 apsymi = ptap->abi; apsymj = ptap->abj; 631 } 632 633 /* get data from symbolic products */ 634 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 635 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 636 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 637 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 638 639 cseqi = merge->ci; cseqj=merge->cj; 640 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 641 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 642 643 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 644 for (i=0;i<am;i++) { 645 /* Form i-th sparse row of A*P */ 646 if (!HasSymAP){ /* Compute symbolic i-th sparse row of A*P */ 647 apnzj = 0; 648 /* diagonal portion of A */ 649 anzi = adi[i+1] - adi[i]; 650 for (j=0;j<anzi;j++) { 651 prow = *adj; 652 adj++; 653 pnzj = pi_loc[prow+1] - pi_loc[prow]; 654 pjj = pj_loc + pi_loc[prow]; 655 paj = pa_loc + pi_loc[prow]; 656 for (k=0;k<pnzj;k++) { 657 if (!apjdense[pjj[k]]) { 658 apjdense[pjj[k]] = -1; 659 apj[apnzj++] = pjj[k]; 660 } 661 } 662 flops += 2*pnzj; 663 ada++; 664 } 665 /* off-diagonal portion of A */ 666 anzi = aoi[i+1] - aoi[i]; 667 for (j=0;j<anzi;j++) { 668 col = a->garray[*aoj]; 669 if (col < cstart){ 670 prow = *aoj; 671 } else if (col >= cend){ 672 prow = *aoj; 673 } else { 674 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 675 } 676 aoj++; 677 pnzj = pi_oth[prow+1] - pi_oth[prow]; 678 pjj = pj_oth + pi_oth[prow]; 679 paj = pa_oth + pi_oth[prow]; 680 for (k=0;k<pnzj;k++) { 681 if (!apjdense[pjj[k]]) { 682 apjdense[pjj[k]] = -1; 683 apj[apnzj++] = pjj[k]; 684 } 685 } 686 flops += 2*pnzj; 687 aoa++; 688 } 689 /* Sort the j index array for quick sparse axpy. */ 690 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 691 692 apsymi[i+1] = apsymi[i] + apnzj; 693 if (ptap->abnz_max < apnzj) ptap->abnz_max = apnzj; 694 695 /* If free space is not available, make more free space */ 696 /* Double the amount of total space in the list */ 697 if (current_space->local_remaining<apnzj) { 698 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 699 } 700 701 /* Copy data into free space, then initialize lnk */ 702 /* ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); */ 703 for (j=0; j<apnzj; j++) current_space->array[j] = apj[j]; 704 current_space->array += apnzj; 705 current_space->local_used += apnzj; 706 current_space->local_remaining -= apnzj; 707 708 } else { /* get symbolic AP[i,:] */ 709 apnzj = apsymi[i+1] - apsymi[i]; 710 apj = apsymj + apsymi[i]; 711 } 712 713 /* use symbolic AP[i,:] to form a condensed array apa */ 714 /* diagonal portion of A */ 715 anzi = adi[i+1] - adi[i]; 716 for (j=0;j<anzi;j++) { 717 prow = *adj_tmp; 718 adj_tmp++; 719 pnzj = pi_loc[prow+1] - pi_loc[prow]; 720 pjj = pj_loc + pi_loc[prow]; 721 paj = pa_loc + pi_loc[prow]; 722 nextp = 0; 723 for (k=0; nextp<pnzj; k++) { 724 if (apj[k] == pjj[nextp]) { /* col of AP == col of P */ 725 apa[k] += (*ada_tmp)*paj[nextp++]; 726 } 727 } 728 ada_tmp++; 729 } 730 /* off-diagonal portion of A */ 731 anzi = aoi[i+1] - aoi[i]; 732 for (j=0;j<anzi;j++) { 733 col = a->garray[*aoj_tmp]; 734 if (col < cstart){ 735 prow = *aoj_tmp; 736 } else if (col >= cend){ 737 prow = *aoj_tmp; 738 } else { 739 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 740 } 741 aoj_tmp++; 742 pnzj = pi_oth[prow+1] - pi_oth[prow]; 743 pjj = pj_oth + pi_oth[prow]; 744 paj = pa_oth + pi_oth[prow]; 745 nextp = 0; 746 for (k=0; nextp<pnzj; k++) { 747 if (apj[k] == pjj[nextp]) { /* col of AP == col of P */ 748 apa[k] += (*aoa_tmp)*paj[nextp++]; 749 } 750 } 751 flops += 2*pnzj; 752 aoa_tmp++; 753 } 754 755 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 756 pnzi = pi_loc[i+1] - pi_loc[i]; 757 for (j=0;j<pnzi;j++) { 758 nextap = 0; 759 crow = *pJ++; 760 cjj = cseqj + cseqi[crow]; 761 caj = cseqa + cseqi[crow]; 762 /* Perform sparse axpy operation. Note cjj includes apj. */ 763 for (k=0;nextap<apnzj;k++) { 764 if (cjj[k]==apj[nextap]) caj[k] += (*pA)*apa[nextap++]; 765 } 766 767 flops += 2*apnzj; 768 pA++; 769 } 770 771 /* Zero the current row info for A*P */ 772 for (j=0;j<apnzj;j++) { 773 if (!HasSymAP){ 774 apjdense[apj[j]] = 0; 775 } 776 apa[j] = 0; 777 } 778 } 779 if (!HasSymAP){ 780 /* Column indices of AP are in the list of free space */ 781 /* Allocate space for apsymj, initialize apsymj, and */ 782 /* destroy list of free space and other temporary array(s) */ 783 ierr = PetscMalloc((apsymi[am]+1)*sizeof(PetscInt),&ptap->abj);CHKERRQ(ierr); 784 ierr = MakeSpaceContiguous(&free_space,ptap->abj);CHKERRQ(ierr); 785 } 786 ierr = PetscFree(apa);CHKERRQ(ierr); 787 ierr = PetscLogStagePop(); 788 789 bi = merge->bi; 790 bj = merge->bj; 791 buf_ri = merge->buf_ri; 792 buf_rj = merge->buf_rj; 793 794 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 795 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 796 797 /* send and recv matrix values */ 798 /*-----------------------------*/ 799 ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); 800 len_s = merge->len_s; 801 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 802 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 803 804 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 805 for (proc=0,k=0; proc<size; proc++){ 806 if (!len_s[proc]) continue; 807 i = owners[proc]; 808 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 809 k++; 810 } 811 812 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 813 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 814 ierr = PetscFree(status);CHKERRQ(ierr); 815 816 ierr = PetscFree(s_waits);CHKERRQ(ierr); 817 ierr = PetscFree(r_waits);CHKERRQ(ierr); 818 819 /* insert mat values of mpimat */ 820 /*----------------------------*/ 821 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 822 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 823 nextrow = buf_ri_k + merge->nrecv; 824 nextcseqi = nextrow + merge->nrecv; 825 826 for (k=0; k<merge->nrecv; k++){ 827 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 828 nrows = *(buf_ri_k[k]); 829 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 830 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 831 } 832 833 /* set values of ba */ 834 for (i=0; i<C->m; i++) { 835 cseqrow = owners[rank] + i; /* global row index of C_seq */ 836 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 837 bnzi = bi[i+1] - bi[i]; 838 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 839 840 /* add local non-zero vals of this proc's C_seq into ba */ 841 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 842 cseqj_tmp = cseqj + cseqi[cseqrow]; 843 cseqa_tmp = cseqa + cseqi[cseqrow]; 844 nextcseqj = 0; 845 for (j=0; nextcseqj<cseqnzi; j++){ 846 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 847 ba_i[j] += cseqa_tmp[nextcseqj++]; 848 } 849 } 850 851 /* add received vals into ba */ 852 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 853 /* i-th row */ 854 if (i == *nextrow[k]) { 855 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 856 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 857 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 858 nextcseqj = 0; 859 for (j=0; nextcseqj<cseqnzi; j++){ 860 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 861 ba_i[j] += cseqa_tmp[nextcseqj++]; 862 } 863 } 864 nextrow[k]++; nextcseqi[k]++; 865 } 866 } 867 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 868 flops += 2*bnzi; 869 } 870 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 872 ierr = PetscLogStagePop();CHKERRQ(ierr); 873 874 ierr = PetscFree(cseqa);CHKERRQ(ierr); 875 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 876 ierr = PetscFree(ba_i);CHKERRQ(ierr); 877 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 878 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 879 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 885 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 886 { 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 if (scall == MAT_INITIAL_MATRIX){ 891 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 892 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 893 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 894 } 895 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 896 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 897 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 /* 902 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 903 904 Collective on Mat 905 906 Input Parameters: 907 + A - the matrix 908 - P - the projection matrix 909 910 Output Parameters: 911 . C - the (i,j) structure of the product matrix 912 913 Notes: 914 C will be created and must be destroyed by the user with MatDestroy(). 915 916 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 917 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 918 this (i,j) structure by calling MatPtAPNumeric(). 919 920 Level: intermediate 921 922 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 923 */ 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatPtAPSymbolic" 926 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 927 { 928 PetscErrorCode ierr; 929 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 930 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 931 932 PetscFunctionBegin; 933 934 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 935 PetscValidType(A,1); 936 MatPreallocated(A); 937 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 938 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 939 940 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 941 PetscValidType(P,2); 942 MatPreallocated(P); 943 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 944 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 945 946 PetscValidPointer(C,3); 947 948 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 949 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 950 951 /* For now, we do not dispatch based on the type of A and P */ 952 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 953 fA = A->ops->ptapsymbolic; 954 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 955 fP = P->ops->ptapsymbolic; 956 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 957 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 958 959 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 960 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 961 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 962 963 PetscFunctionReturn(0); 964 } 965 #ifdef TOBEREMOVE 966 typedef struct { 967 Mat symAP; 968 } Mat_PtAPstruct; 969 970 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 974 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 975 { 976 PetscErrorCode ierr; 977 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 978 979 PetscFunctionBegin; 980 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 981 ierr = PetscFree(ptap);CHKERRQ(ierr); 982 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 #endif /* TOBEREMOVE */ 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 989 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 990 { 991 PetscErrorCode ierr; 992 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 993 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 994 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 995 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 996 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 997 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 998 MatScalar *ca; 999 PetscBT lnkbt; 1000 1001 PetscFunctionBegin; 1002 /* Get ij structure of P^T */ 1003 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1004 ptJ=ptj; 1005 1006 /* Allocate ci array, arrays for fill computation and */ 1007 /* free space for accumulating nonzero column info */ 1008 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1009 ci[0] = 0; 1010 1011 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1012 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1013 ptasparserow = ptadenserow + an; 1014 1015 /* create and initialize a linked list */ 1016 nlnk = pn+1; 1017 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1018 1019 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1020 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1021 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1022 current_space = free_space; 1023 1024 /* Determine symbolic info for each row of C: */ 1025 for (i=0;i<pn;i++) { 1026 ptnzi = pti[i+1] - pti[i]; 1027 ptanzi = 0; 1028 /* Determine symbolic row of PtA: */ 1029 for (j=0;j<ptnzi;j++) { 1030 arow = *ptJ++; 1031 anzj = ai[arow+1] - ai[arow]; 1032 ajj = aj + ai[arow]; 1033 for (k=0;k<anzj;k++) { 1034 if (!ptadenserow[ajj[k]]) { 1035 ptadenserow[ajj[k]] = -1; 1036 ptasparserow[ptanzi++] = ajj[k]; 1037 } 1038 } 1039 } 1040 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1041 ptaj = ptasparserow; 1042 cnzi = 0; 1043 for (j=0;j<ptanzi;j++) { 1044 prow = *ptaj++; 1045 pnzj = pi[prow+1] - pi[prow]; 1046 pjj = pj + pi[prow]; 1047 /* add non-zero cols of P into the sorted linked list lnk */ 1048 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1049 cnzi += nlnk; 1050 } 1051 1052 /* If free space is not available, make more free space */ 1053 /* Double the amount of total space in the list */ 1054 if (current_space->local_remaining<cnzi) { 1055 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1056 } 1057 1058 /* Copy data into free space, and zero out denserows */ 1059 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1060 current_space->array += cnzi; 1061 current_space->local_used += cnzi; 1062 current_space->local_remaining -= cnzi; 1063 1064 for (j=0;j<ptanzi;j++) { 1065 ptadenserow[ptasparserow[j]] = 0; 1066 } 1067 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1068 /* For now, we will recompute what is needed. */ 1069 ci[i+1] = ci[i] + cnzi; 1070 } 1071 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1072 /* Allocate space for cj, initialize cj, and */ 1073 /* destroy list of free space and other temporary array(s) */ 1074 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1075 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1076 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1077 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1078 1079 /* Allocate space for ca */ 1080 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1081 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1082 1083 /* put together the new matrix */ 1084 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1085 1086 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1087 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1088 c = (Mat_SeqAIJ *)((*C)->data); 1089 c->freedata = PETSC_TRUE; 1090 c->nonew = 0; 1091 1092 /* Clean up. */ 1093 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1094 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #include "src/mat/impls/maij/maij.h" 1099 EXTERN_C_BEGIN 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1102 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1103 { 1104 /* This routine requires testing -- I don't think it works. */ 1105 PetscErrorCode ierr; 1106 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1107 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1108 Mat P=pp->AIJ; 1109 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1110 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1111 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1112 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1113 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1114 MatScalar *ca; 1115 1116 PetscFunctionBegin; 1117 /* Start timer */ 1118 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1119 1120 /* Get ij structure of P^T */ 1121 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1122 1123 /* Allocate ci array, arrays for fill computation and */ 1124 /* free space for accumulating nonzero column info */ 1125 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1126 ci[0] = 0; 1127 1128 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1129 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1130 ptasparserow = ptadenserow + an; 1131 denserow = ptasparserow + an; 1132 sparserow = denserow + pn; 1133 1134 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1135 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1136 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1137 current_space = free_space; 1138 1139 /* Determine symbolic info for each row of C: */ 1140 for (i=0;i<pn/ppdof;i++) { 1141 ptnzi = pti[i+1] - pti[i]; 1142 ptanzi = 0; 1143 ptJ = ptj + pti[i]; 1144 for (dof=0;dof<ppdof;dof++) { 1145 /* Determine symbolic row of PtA: */ 1146 for (j=0;j<ptnzi;j++) { 1147 arow = ptJ[j] + dof; 1148 anzj = ai[arow+1] - ai[arow]; 1149 ajj = aj + ai[arow]; 1150 for (k=0;k<anzj;k++) { 1151 if (!ptadenserow[ajj[k]]) { 1152 ptadenserow[ajj[k]] = -1; 1153 ptasparserow[ptanzi++] = ajj[k]; 1154 } 1155 } 1156 } 1157 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1158 ptaj = ptasparserow; 1159 cnzi = 0; 1160 for (j=0;j<ptanzi;j++) { 1161 pdof = *ptaj%dof; 1162 prow = (*ptaj++)/dof; 1163 pnzj = pi[prow+1] - pi[prow]; 1164 pjj = pj + pi[prow]; 1165 for (k=0;k<pnzj;k++) { 1166 if (!denserow[pjj[k]+pdof]) { 1167 denserow[pjj[k]+pdof] = -1; 1168 sparserow[cnzi++] = pjj[k]+pdof; 1169 } 1170 } 1171 } 1172 1173 /* sort sparserow */ 1174 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1175 1176 /* If free space is not available, make more free space */ 1177 /* Double the amount of total space in the list */ 1178 if (current_space->local_remaining<cnzi) { 1179 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1180 } 1181 1182 /* Copy data into free space, and zero out denserows */ 1183 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1184 current_space->array += cnzi; 1185 current_space->local_used += cnzi; 1186 current_space->local_remaining -= cnzi; 1187 1188 for (j=0;j<ptanzi;j++) { 1189 ptadenserow[ptasparserow[j]] = 0; 1190 } 1191 for (j=0;j<cnzi;j++) { 1192 denserow[sparserow[j]] = 0; 1193 } 1194 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1195 /* For now, we will recompute what is needed. */ 1196 ci[i+1+dof] = ci[i+dof] + cnzi; 1197 } 1198 } 1199 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1200 /* Allocate space for cj, initialize cj, and */ 1201 /* destroy list of free space and other temporary array(s) */ 1202 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1203 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1204 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1205 1206 /* Allocate space for ca */ 1207 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1208 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1209 1210 /* put together the new matrix */ 1211 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1212 1213 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1214 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1215 c = (Mat_SeqAIJ *)((*C)->data); 1216 c->freedata = PETSC_TRUE; 1217 c->nonew = 0; 1218 1219 /* Clean up. */ 1220 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1221 1222 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 EXTERN_C_END 1226 1227 /* 1228 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1229 1230 Collective on Mat 1231 1232 Input Parameters: 1233 + A - the matrix 1234 - P - the projection matrix 1235 1236 Output Parameters: 1237 . C - the product matrix 1238 1239 Notes: 1240 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1241 the user using MatDeatroy(). 1242 1243 This routine is currently only implemented for pairs of AIJ matrices and classes 1244 which inherit from AIJ. C will be of type MATAIJ. 1245 1246 Level: intermediate 1247 1248 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1249 */ 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatPtAPNumeric" 1252 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1253 { 1254 PetscErrorCode ierr; 1255 PetscErrorCode (*fA)(Mat,Mat,Mat); 1256 PetscErrorCode (*fP)(Mat,Mat,Mat); 1257 1258 PetscFunctionBegin; 1259 1260 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1261 PetscValidType(A,1); 1262 MatPreallocated(A); 1263 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1264 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1265 1266 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1267 PetscValidType(P,2); 1268 MatPreallocated(P); 1269 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1270 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1271 1272 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1273 PetscValidType(C,3); 1274 MatPreallocated(C); 1275 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1276 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1277 1278 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1279 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1280 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1281 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1282 1283 /* For now, we do not dispatch based on the type of A and P */ 1284 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1285 fA = A->ops->ptapnumeric; 1286 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1287 fP = P->ops->ptapnumeric; 1288 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1289 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1290 1291 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1292 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1293 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1294 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1300 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1301 { 1302 PetscErrorCode ierr; 1303 PetscInt flops=0; 1304 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1305 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1306 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1307 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1308 PetscInt *ci=c->i,*cj=c->j,*cjj; 1309 PetscInt am=A->M,cn=C->N,cm=C->M; 1310 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1311 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1312 1313 PetscFunctionBegin; 1314 /* Allocate temporary array for storage of one row of A*P */ 1315 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1316 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1317 1318 apj = (PetscInt *)(apa + cn); 1319 apjdense = apj + cn; 1320 1321 /* Clear old values in C */ 1322 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1323 1324 for (i=0;i<am;i++) { 1325 /* Form sparse row of A*P */ 1326 anzi = ai[i+1] - ai[i]; 1327 apnzj = 0; 1328 for (j=0;j<anzi;j++) { 1329 prow = *aj++; 1330 pnzj = pi[prow+1] - pi[prow]; 1331 pjj = pj + pi[prow]; 1332 paj = pa + pi[prow]; 1333 for (k=0;k<pnzj;k++) { 1334 if (!apjdense[pjj[k]]) { 1335 apjdense[pjj[k]] = -1; 1336 apj[apnzj++] = pjj[k]; 1337 } 1338 apa[pjj[k]] += (*aa)*paj[k]; 1339 } 1340 flops += 2*pnzj; 1341 aa++; 1342 } 1343 1344 /* Sort the j index array for quick sparse axpy. */ 1345 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1346 1347 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1348 pnzi = pi[i+1] - pi[i]; 1349 for (j=0;j<pnzi;j++) { 1350 nextap = 0; 1351 crow = *pJ++; 1352 cjj = cj + ci[crow]; 1353 caj = ca + ci[crow]; 1354 /* Perform sparse axpy operation. Note cjj includes apj. */ 1355 for (k=0;nextap<apnzj;k++) { 1356 if (cjj[k]==apj[nextap]) { 1357 caj[k] += (*pA)*apa[apj[nextap++]]; 1358 } 1359 } 1360 flops += 2*apnzj; 1361 pA++; 1362 } 1363 1364 /* Zero the current row info for A*P */ 1365 for (j=0;j<apnzj;j++) { 1366 apa[apj[j]] = 0.; 1367 apjdense[apj[j]] = 0; 1368 } 1369 } 1370 1371 /* Assemble the final matrix and clean up */ 1372 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1373 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1374 ierr = PetscFree(apa);CHKERRQ(ierr); 1375 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1376 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1381 static PetscEvent logkey_matptapnumeric_local = 0; 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1384 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1385 { 1386 PetscErrorCode ierr; 1387 PetscInt flops=0; 1388 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1389 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1390 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1391 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1392 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1393 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1394 PetscInt *pJ=pj_loc,*pjj; 1395 PetscInt *ci=c->i,*cj=c->j,*cjj; 1396 PetscInt am=A->m,cn=C->N,cm=C->M; 1397 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1398 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1399 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1400 1401 PetscFunctionBegin; 1402 if (!logkey_matptapnumeric_local) { 1403 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1404 } 1405 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1406 1407 /* Allocate temporary array for storage of one row of A*P */ 1408 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1409 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1410 apj = (PetscInt *)(apa + cn); 1411 apjdense = apj + cn; 1412 1413 /* Clear old values in C */ 1414 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1415 1416 for (i=0;i<am;i++) { 1417 /* Form i-th sparse row of A*P */ 1418 apnzj = 0; 1419 /* diagonal portion of A */ 1420 anzi = adi[i+1] - adi[i]; 1421 for (j=0;j<anzi;j++) { 1422 prow = *adj; 1423 adj++; 1424 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1425 pjj = pj_loc + pi_loc[prow]; 1426 paj = pa_loc + pi_loc[prow]; 1427 for (k=0;k<pnzj;k++) { 1428 if (!apjdense[pjj[k]]) { 1429 apjdense[pjj[k]] = -1; 1430 apj[apnzj++] = pjj[k]; 1431 } 1432 apa[pjj[k]] += (*ada)*paj[k]; 1433 } 1434 flops += 2*pnzj; 1435 ada++; 1436 } 1437 /* off-diagonal portion of A */ 1438 anzi = aoi[i+1] - aoi[i]; 1439 for (j=0;j<anzi;j++) { 1440 col = a->garray[*aoj]; 1441 if (col < cstart){ 1442 prow = *aoj; 1443 } else if (col >= cend){ 1444 prow = *aoj; 1445 } else { 1446 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1447 } 1448 aoj++; 1449 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1450 pjj = pj_oth + pi_oth[prow]; 1451 paj = pa_oth + pi_oth[prow]; 1452 for (k=0;k<pnzj;k++) { 1453 if (!apjdense[pjj[k]]) { 1454 apjdense[pjj[k]] = -1; 1455 apj[apnzj++] = pjj[k]; 1456 } 1457 apa[pjj[k]] += (*aoa)*paj[k]; 1458 } 1459 flops += 2*pnzj; 1460 aoa++; 1461 } 1462 /* Sort the j index array for quick sparse axpy. */ 1463 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1464 1465 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1466 pnzi = pi_loc[i+1] - pi_loc[i]; 1467 for (j=0;j<pnzi;j++) { 1468 nextap = 0; 1469 crow = *pJ++; 1470 cjj = cj + ci[crow]; 1471 caj = ca + ci[crow]; 1472 /* Perform sparse axpy operation. Note cjj includes apj. */ 1473 for (k=0;nextap<apnzj;k++) { 1474 if (cjj[k]==apj[nextap]) { 1475 caj[k] += (*pA)*apa[apj[nextap++]]; 1476 } 1477 } 1478 flops += 2*apnzj; 1479 pA++; 1480 } 1481 1482 /* Zero the current row info for A*P */ 1483 for (j=0;j<apnzj;j++) { 1484 apa[apj[j]] = 0.; 1485 apjdense[apj[j]] = 0; 1486 } 1487 } 1488 1489 /* Assemble the final matrix and clean up */ 1490 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1491 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = PetscFree(apa);CHKERRQ(ierr); 1493 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1494 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 static PetscEvent logkey_matptapsymbolic_local = 0; 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1500 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1501 { 1502 PetscErrorCode ierr; 1503 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1504 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1505 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1506 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1507 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1508 PetscInt *ci,*cj,*ptaj; 1509 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1510 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1511 PetscInt pm=P_loc->m,nlnk,*lnk; 1512 MatScalar *ca; 1513 PetscBT lnkbt; 1514 PetscInt prend,nprow_loc,nprow_oth; 1515 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1516 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1517 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1518 1519 PetscFunctionBegin; 1520 if (!logkey_matptapsymbolic_local) { 1521 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1522 } 1523 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1524 1525 prend = prstart + pm; 1526 1527 /* get ij structure of P_loc^T */ 1528 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1529 ptJ=ptj; 1530 1531 /* Allocate ci array, arrays for fill computation and */ 1532 /* free space for accumulating nonzero column info */ 1533 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1534 ci[0] = 0; 1535 1536 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1537 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1538 ptasparserow_loc = ptadenserow_loc + aN; 1539 ptadenserow_oth = ptasparserow_loc + aN; 1540 ptasparserow_oth = ptadenserow_oth + aN; 1541 1542 /* create and initialize a linked list */ 1543 nlnk = pN+1; 1544 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1545 1546 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1547 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1548 nnz = adi[am] + aoi[am]; 1549 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1550 current_space = free_space; 1551 1552 /* determine symbolic info for each row of C: */ 1553 for (i=0;i<pN;i++) { 1554 ptnzi = pti[i+1] - pti[i]; 1555 nprow_loc = 0; nprow_oth = 0; 1556 /* i-th row of symbolic P_loc^T*A_loc: */ 1557 for (j=0;j<ptnzi;j++) { 1558 arow = *ptJ++; 1559 /* diagonal portion of A */ 1560 anzj = adi[arow+1] - adi[arow]; 1561 ajj = adj + adi[arow]; 1562 for (k=0;k<anzj;k++) { 1563 col = ajj[k]+prstart; 1564 if (!ptadenserow_loc[col]) { 1565 ptadenserow_loc[col] = -1; 1566 ptasparserow_loc[nprow_loc++] = col; 1567 } 1568 } 1569 /* off-diagonal portion of A */ 1570 anzj = aoi[arow+1] - aoi[arow]; 1571 ajj = aoj + aoi[arow]; 1572 for (k=0;k<anzj;k++) { 1573 col = a->garray[ajj[k]]; /* global col */ 1574 if (col < cstart){ 1575 col = ajj[k]; 1576 } else if (col >= cend){ 1577 col = ajj[k] + pm; 1578 } else { 1579 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1580 } 1581 if (!ptadenserow_oth[col]) { 1582 ptadenserow_oth[col] = -1; 1583 ptasparserow_oth[nprow_oth++] = col; 1584 } 1585 } 1586 } 1587 1588 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1589 cnzi = 0; 1590 /* rows of P_loc */ 1591 ptaj = ptasparserow_loc; 1592 for (j=0; j<nprow_loc; j++) { 1593 prow = *ptaj++; 1594 prow -= prstart; /* rm */ 1595 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1596 pjj = pj_loc + pi_loc[prow]; 1597 /* add non-zero cols of P into the sorted linked list lnk */ 1598 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1599 cnzi += nlnk; 1600 } 1601 /* rows of P_oth */ 1602 ptaj = ptasparserow_oth; 1603 for (j=0; j<nprow_oth; j++) { 1604 prow = *ptaj++; 1605 if (prow >= prend) prow -= pm; /* rm */ 1606 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1607 pjj = pj_oth + pi_oth[prow]; 1608 /* add non-zero cols of P into the sorted linked list lnk */ 1609 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1610 cnzi += nlnk; 1611 } 1612 1613 /* If free space is not available, make more free space */ 1614 /* Double the amount of total space in the list */ 1615 if (current_space->local_remaining<cnzi) { 1616 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1617 } 1618 1619 /* Copy data into free space, and zero out denserows */ 1620 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1621 current_space->array += cnzi; 1622 current_space->local_used += cnzi; 1623 current_space->local_remaining -= cnzi; 1624 1625 for (j=0;j<nprow_loc; j++) { 1626 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1627 } 1628 for (j=0;j<nprow_oth; j++) { 1629 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1630 } 1631 1632 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1633 /* For now, we will recompute what is needed. */ 1634 ci[i+1] = ci[i] + cnzi; 1635 } 1636 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1637 /* Allocate space for cj, initialize cj, and */ 1638 /* destroy list of free space and other temporary array(s) */ 1639 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1640 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1641 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1642 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1643 1644 /* Allocate space for ca */ 1645 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1646 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1647 1648 /* put together the new matrix */ 1649 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1650 1651 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1652 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1653 c = (Mat_SeqAIJ *)((*C)->data); 1654 c->freedata = PETSC_TRUE; 1655 c->nonew = 0; 1656 1657 /* Clean up. */ 1658 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1659 1660 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663