1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 if (ptap->abi) ierr = PetscFree(ptap->abi);CHKERRQ(ierr); 95 if (ptap->abj) ierr = PetscFree(ptap->abj);CHKERRQ(ierr); 96 97 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 98 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 99 } 100 ierr = PetscFree(ptap);CHKERRQ(ierr); 101 102 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 108 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 109 { 110 PetscErrorCode ierr; 111 Mat_MatMatMultMPI *ptap; 112 PetscObjectContainer container; 113 114 PetscFunctionBegin; 115 if (scall == MAT_INITIAL_MATRIX){ 116 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 117 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 118 ptap->abi=PETSC_NULL; ptap->abj=PETSC_NULL; 119 ptap->abnz_max = 0; /* symbolic A*P is not done yet */ 120 121 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 122 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 123 124 /* get P_loc by taking all local rows of P */ 125 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 126 127 /* attach the supporting struct to P for reuse */ 128 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 129 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 130 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 131 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 132 133 /* now, compute symbolic local P^T*A*P */ 134 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 135 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 136 } else if (scall == MAT_REUSE_MATRIX){ 137 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 138 if (container) { 139 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 140 } else { 141 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 142 } 143 144 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 145 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 146 147 /* get P_loc by taking all local rows of P */ 148 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 149 150 } else { 151 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 152 } 153 /* now, compute numeric local P^T*A*P */ 154 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 155 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 156 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 157 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 163 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 164 { 165 PetscErrorCode ierr; 166 Mat P_loc,P_oth; 167 Mat_MatMatMultMPI *ptap; 168 PetscObjectContainer container; 169 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 170 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 171 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 172 Mat_SeqAIJ *p_loc,*p_oth; 173 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj; 174 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 175 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 176 PetscInt aN=A->N,am=A->m,pN=P->N; 177 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 178 PetscBT lnkbt; 179 PetscInt prstart,prend,nprow_loc,nprow_oth; 180 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 181 182 MPI_Comm comm=A->comm; 183 Mat B_mpi; 184 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 185 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 186 PetscInt len,proc,*dnz,*onz,*owners; 187 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 188 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 189 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 190 MPI_Status *status; 191 Mat_Merge_SeqsToMPI *merge; 192 193 PetscFunctionBegin; 194 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 195 if (container) { 196 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 197 } else { 198 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 199 } 200 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 201 /*--------------------------------------------------*/ 202 /* get data from P_loc and P_oth */ 203 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 204 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 205 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 206 prend = prstart + pm; 207 208 /* get ij structure of P_loc^T */ 209 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 210 ptJ=ptj; 211 212 /* Allocate ci array, arrays for fill computation and */ 213 /* free space for accumulating nonzero column info */ 214 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 215 ci[0] = 0; 216 217 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 218 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 219 ptasparserow_loc = ptadenserow_loc + aN; 220 ptadenserow_oth = ptasparserow_loc + aN; 221 ptasparserow_oth = ptadenserow_oth + aN; 222 223 /* create and initialize a linked list */ 224 nlnk = pN+1; 225 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 226 227 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 228 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 229 nnz = adi[am] + aoi[am]; 230 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 231 current_space = free_space; 232 233 /* determine symbolic info for each row of C: */ 234 for (i=0;i<pN;i++) { 235 ptnzi = pti[i+1] - pti[i]; 236 nprow_loc = 0; nprow_oth = 0; 237 /* i-th row of symbolic P_loc^T*A_loc: */ 238 for (j=0;j<ptnzi;j++) { 239 arow = *ptJ++; 240 /* diagonal portion of A */ 241 anzj = adi[arow+1] - adi[arow]; 242 ajj = adj + adi[arow]; 243 for (k=0;k<anzj;k++) { 244 col = ajj[k]+prstart; 245 if (!ptadenserow_loc[col]) { 246 ptadenserow_loc[col] = -1; 247 ptasparserow_loc[nprow_loc++] = col; 248 } 249 } 250 /* off-diagonal portion of A */ 251 anzj = aoi[arow+1] - aoi[arow]; 252 ajj = aoj + aoi[arow]; 253 for (k=0;k<anzj;k++) { 254 col = a->garray[ajj[k]]; /* global col */ 255 if (col < cstart){ 256 col = ajj[k]; 257 } else if (col >= cend){ 258 col = ajj[k] + pm; 259 } else { 260 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 261 } 262 if (!ptadenserow_oth[col]) { 263 ptadenserow_oth[col] = -1; 264 ptasparserow_oth[nprow_oth++] = col; 265 } 266 } 267 } 268 269 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 270 cnzi = 0; 271 /* rows of P_loc */ 272 ptaj = ptasparserow_loc; 273 for (j=0; j<nprow_loc; j++) { 274 prow = *ptaj++; 275 prow -= prstart; /* rm */ 276 pnzj = pi_loc[prow+1] - pi_loc[prow]; 277 pjj = pj_loc + pi_loc[prow]; 278 /* add non-zero cols of P into the sorted linked list lnk */ 279 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 280 cnzi += nlnk; 281 } 282 /* rows of P_oth */ 283 ptaj = ptasparserow_oth; 284 for (j=0; j<nprow_oth; j++) { 285 prow = *ptaj++; 286 if (prow >= prend) prow -= pm; /* rm */ 287 pnzj = pi_oth[prow+1] - pi_oth[prow]; 288 pjj = pj_oth + pi_oth[prow]; 289 /* add non-zero cols of P into the sorted linked list lnk */ 290 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 cnzi += nlnk; 292 } 293 294 /* If free space is not available, make more free space */ 295 /* Double the amount of total space in the list */ 296 if (current_space->local_remaining<cnzi) { 297 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 298 } 299 300 /* Copy data into free space, and zero out denserows */ 301 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 302 current_space->array += cnzi; 303 current_space->local_used += cnzi; 304 current_space->local_remaining -= cnzi; 305 306 for (j=0;j<nprow_loc; j++) { 307 ptadenserow_loc[ptasparserow_loc[j]] = 0; 308 } 309 for (j=0;j<nprow_oth; j++) { 310 ptadenserow_oth[ptasparserow_oth[j]] = 0; 311 } 312 313 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 314 /* For now, we will recompute what is needed. */ 315 ci[i+1] = ci[i] + cnzi; 316 } 317 /* Clean up. */ 318 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 319 320 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 321 /* Allocate space for cj, initialize cj, and */ 322 /* destroy list of free space and other temporary array(s) */ 323 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 324 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 325 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 326 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 327 328 /* Allocate space for ca */ 329 /* 330 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 331 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 332 */ 333 334 /* add C_seq into mpi C */ 335 /*-----------------------------------*/ 336 free_space=PETSC_NULL; current_space=PETSC_NULL; 337 338 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 339 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 340 341 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 342 343 344 /* determine row ownership */ 345 /*---------------------------------------------------------*/ 346 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 347 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 348 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 349 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 350 351 /* determine the number of messages to send, their lengths */ 352 /*---------------------------------------------------------*/ 353 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 354 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 355 len_s = merge->len_s; 356 len = 0; /* length of buf_si[] */ 357 merge->nsend = 0; 358 for (proc=0; proc<size; proc++){ 359 len_si[proc] = 0; 360 if (proc == rank){ 361 len_s[proc] = 0; 362 } else { 363 len_si[proc] = owners[proc+1] - owners[proc] + 1; 364 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of rows to be sent to [proc] */ 365 } 366 if (len_s[proc]) { 367 merge->nsend++; 368 nrows = 0; 369 for (i=owners[proc]; i<owners[proc+1]; i++){ 370 if (ci[i+1] > ci[i]) nrows++; 371 } 372 len_si[proc] = 2*(nrows+1); 373 len += len_si[proc]; 374 } 375 } 376 377 /* determine the number and length of messages to receive for ij-structure */ 378 /*-------------------------------------------------------------------------*/ 379 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 380 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 381 382 /* post the Irecv of j-structure */ 383 /*-------------------------------*/ 384 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 385 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 386 387 /* post the Isend of j-structure */ 388 /*--------------------------------*/ 389 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 390 sj_waits = si_waits + merge->nsend; 391 392 for (proc=0, k=0; proc<size; proc++){ 393 if (!len_s[proc]) continue; 394 i = owners[proc]; 395 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 396 k++; 397 } 398 399 /* receives and sends of j-structure are complete */ 400 /*------------------------------------------------*/ 401 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 402 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 403 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 404 405 /* send and recv i-structure */ 406 /*---------------------------*/ 407 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 408 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 409 410 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 411 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 412 for (proc=0,k=0; proc<size; proc++){ 413 if (!len_s[proc]) continue; 414 /* form outgoing message for i-structure: 415 buf_si[0]: nrows to be sent 416 [1:nrows]: row index (global) 417 [nrows+1:2*nrows+1]: i-structure index 418 */ 419 /*-------------------------------------------*/ 420 nrows = len_si[proc]/2 - 1; 421 buf_si_i = buf_si + nrows+1; 422 buf_si[0] = nrows; 423 buf_si_i[0] = 0; 424 nrows = 0; 425 for (i=owners[proc]; i<owners[proc+1]; i++){ 426 anzi = ci[i+1] - ci[i]; 427 if (anzi) { 428 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 429 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 430 nrows++; 431 } 432 } 433 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 434 k++; 435 buf_si += len_si[proc]; 436 } 437 438 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 439 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 440 441 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 442 for (i=0; i<merge->nrecv; i++){ 443 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 444 } 445 446 ierr = PetscFree(len_si);CHKERRQ(ierr); 447 ierr = PetscFree(len_ri);CHKERRQ(ierr); 448 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 449 ierr = PetscFree(si_waits);CHKERRQ(ierr); 450 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 451 ierr = PetscFree(buf_s);CHKERRQ(ierr); 452 ierr = PetscFree(status);CHKERRQ(ierr); 453 454 /* compute a local seq matrix in each processor */ 455 /*----------------------------------------------*/ 456 /* allocate bi array and free space for accumulating nonzero column info */ 457 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 458 bi[0] = 0; 459 460 /* create and initialize a linked list */ 461 nlnk = pN+1; 462 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 463 464 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 465 len = 0; 466 len = ci[owners[rank+1]] - ci[owners[rank]]; 467 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 468 current_space = free_space; 469 470 /* determine symbolic info for each local row */ 471 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 472 nextrow = buf_ri_k + merge->nrecv; 473 nextci = nextrow + merge->nrecv; 474 for (k=0; k<merge->nrecv; k++){ 475 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 476 nrows = *buf_ri_k[k]; 477 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 478 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 479 } 480 481 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 482 len = 0; 483 for (i=0; i<pn; i++) { 484 bnzi = 0; 485 /* add local non-zero cols of this proc's C_seq into lnk */ 486 arow = owners[rank] + i; 487 anzi = ci[arow+1] - ci[arow]; 488 cji = cj + ci[arow]; 489 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 490 bnzi += nlnk; 491 /* add received col data into lnk */ 492 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 493 if (i == *nextrow[k]) { /* i-th row */ 494 anzi = *(nextci[k]+1) - *nextci[k]; 495 cji = buf_rj[k] + *nextci[k]; 496 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 497 bnzi += nlnk; 498 nextrow[k]++; nextci[k]++; 499 } 500 } 501 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 502 503 /* if free space is not available, make more free space */ 504 if (current_space->local_remaining<bnzi) { 505 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 506 nspacedouble++; 507 } 508 /* copy data into free space, then initialize lnk */ 509 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 510 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 511 512 current_space->array += bnzi; 513 current_space->local_used += bnzi; 514 current_space->local_remaining -= bnzi; 515 516 bi[i+1] = bi[i] + bnzi; 517 } 518 519 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 520 521 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 522 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 523 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 524 525 /* create symbolic parallel matrix B_mpi */ 526 /*---------------------------------------*/ 527 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 528 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 529 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 530 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 531 /* ierr = MatSetOption(B_mpi,MAT_COLUMNS_SORTED);CHKERRQ(ierr); -cause delay? */ 532 533 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 534 B_mpi->assembled = PETSC_FALSE; 535 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 536 merge->bi = bi; 537 merge->bj = bj; 538 merge->ci = ci; 539 merge->cj = cj; 540 merge->buf_ri = buf_ri; 541 merge->buf_rj = buf_rj; 542 merge->C_seq = PETSC_NULL; 543 544 /* attach the supporting struct to B_mpi for reuse */ 545 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 546 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 547 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 548 *C = B_mpi; 549 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 555 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 556 { 557 PetscErrorCode ierr; 558 Mat_Merge_SeqsToMPI *merge; 559 Mat_MatMatMultMPI *ptap; 560 PetscObjectContainer cont_merge,cont_ptap; 561 562 PetscInt flops=0; 563 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 564 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 565 Mat_SeqAIJ *p_loc,*p_oth; 566 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 567 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 568 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 569 PetscInt *cjj; 570 MatScalar *ada=ad->a,*aoa=ao->a,*ada_tmp=ad->a,*aoa_tmp=ao->a,*apa_tmp,*paj,*cseqa,*caj; /**apa*/ 571 MatScalar *pa_loc,*pA,*pa_oth; 572 PetscInt am=A->m,cN=C->N; 573 PetscInt nextp,*adj_tmp=ad->j,*aoj_tmp=ao->j; 574 575 MPI_Comm comm=C->comm; 576 PetscMPIInt size,rank,taga,*len_s; 577 PetscInt *owners; 578 PetscInt proc; 579 PetscInt **buf_ri,**buf_rj; 580 PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 581 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 582 MPI_Request *s_waits,*r_waits; 583 MPI_Status *status; 584 MatScalar **abuf_r,*ba_i; 585 PetscInt *cseqi,*cseqj; 586 PetscInt *cseqj_tmp; 587 MatScalar *cseqa_tmp; 588 PetscInt stages[2]; 589 PetscInt *apsymi,*apsymj; 590 PetscTruth HasSymAP; 591 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 592 593 PetscFunctionBegin; 594 ierr = PetscLogStageRegister(&stages[0],"NumPtAP_local");CHKERRQ(ierr); 595 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_Comm");CHKERRQ(ierr); 596 597 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 598 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 599 600 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 601 if (cont_merge) { 602 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 603 } else { 604 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 605 } 606 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 607 if (cont_ptap) { 608 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 609 } else { 610 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 611 } 612 /* set or get data from symbolic A*P */ 613 if (!ptap->abnz_max) { 614 HasSymAP = PETSC_FALSE; 615 ierr = PetscMalloc((am+2)*sizeof(PetscInt),&apsymi);CHKERRQ(ierr); 616 ptap->abi = apsymi; 617 apsymi[0] = 0; 618 ierr = PetscMalloc(cN*sizeof(MatScalar),&apa_tmp);CHKERRQ(ierr); 619 ierr = PetscMemzero(apa_tmp,cN*sizeof(MatScalar));CHKERRQ(ierr); 620 /* Initial FreeSpace size is 2*nnz(A) */ 621 ierr = GetMoreSpace((PetscInt)(2*(adi[am]+aoi[am])),&free_space);CHKERRQ(ierr); 622 current_space = free_space; 623 } else { 624 /* printf(" [%d] abnz_max: %d, cN: %d\n",rank,ptap->abnz_max,cN); */ 625 HasSymAP = PETSC_TRUE; 626 ierr = PetscMalloc((ptap->abnz_max+1)*sizeof(MatScalar),&apa_tmp);CHKERRQ(ierr); 627 ierr = PetscMemzero(apa_tmp,ptap->abnz_max*sizeof(MatScalar));CHKERRQ(ierr); 628 apsymi = ptap->abi; apsymj = ptap->abj; 629 } 630 631 /* get data from symbolic products */ 632 p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 633 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 634 pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 635 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 636 637 cseqi = merge->ci; cseqj=merge->cj; 638 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 639 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 640 641 /* Allocate temporary array for storage of one row of A*P */ 642 /* ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 643 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 644 apj = (PetscInt *)(apa + cN); */ 645 646 ierr = PetscMalloc(cN*(2*sizeof(PetscInt)),&apj);CHKERRQ(ierr); 647 ierr = PetscMemzero(apj,cN*(2*sizeof(PetscInt)));CHKERRQ(ierr); 648 /* apj = (PetscInt *)(apa + cN); */ 649 apjdense = apj + cN; 650 /* 651 ierr = PetscMalloc(cN*sizeof(MatScalar),&apa_tmp);CHKERRQ(ierr); 652 ierr = PetscMemzero(apa_tmp,cN*sizeof(MatScalar));CHKERRQ(ierr); 653 */ 654 /* Clear old values in C_Seq */ 655 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 656 657 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 658 for (i=0;i<am;i++) { 659 /* Form i-th sparse row of A*P */ 660 if (!HasSymAP){ /* Compute symbolic i-th sparse row of A*P */ 661 apnzj = 0; 662 /* diagonal portion of A */ 663 anzi = adi[i+1] - adi[i]; 664 for (j=0;j<anzi;j++) { 665 prow = *adj; 666 adj++; 667 pnzj = pi_loc[prow+1] - pi_loc[prow]; 668 pjj = pj_loc + pi_loc[prow]; 669 paj = pa_loc + pi_loc[prow]; 670 for (k=0;k<pnzj;k++) { 671 if (!apjdense[pjj[k]]) { 672 apjdense[pjj[k]] = -1; 673 apj[apnzj++] = pjj[k]; 674 } 675 /* apa[pjj[k]] += (*ada)*paj[k]; */ 676 } 677 flops += 2*pnzj; 678 ada++; 679 } 680 /* off-diagonal portion of A */ 681 anzi = aoi[i+1] - aoi[i]; 682 for (j=0;j<anzi;j++) { 683 col = a->garray[*aoj]; 684 if (col < cstart){ 685 prow = *aoj; 686 } else if (col >= cend){ 687 prow = *aoj; 688 } else { 689 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 690 } 691 aoj++; 692 pnzj = pi_oth[prow+1] - pi_oth[prow]; 693 pjj = pj_oth + pi_oth[prow]; 694 paj = pa_oth + pi_oth[prow]; 695 for (k=0;k<pnzj;k++) { 696 if (!apjdense[pjj[k]]) { 697 apjdense[pjj[k]] = -1; 698 apj[apnzj++] = pjj[k]; 699 } 700 /* apa[pjj[k]] += (*aoa)*paj[k]; */ 701 } 702 flops += 2*pnzj; 703 aoa++; 704 } 705 /* Sort the j index array for quick sparse axpy. */ 706 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 707 } else { /* get symbolic AP[i,:] */ 708 apnzj = apsymi[i+1] - apsymi[i]; 709 for (j=0; j<apnzj; j++){ 710 apj[j] = *(apsymj + apsymi[i]+j); 711 } 712 } 713 if (!HasSymAP){ 714 apsymi[i+1] = apsymi[i] + apnzj; 715 if (ptap->abnz_max < apnzj) ptap->abnz_max = apnzj; 716 717 /* If free space is not available, make more free space */ 718 /* Double the amount of total space in the list */ 719 if (current_space->local_remaining<apnzj) { 720 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 721 } 722 723 /* Copy data into free space, then initialize lnk */ 724 /* ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); */ 725 for (j=0; j<apnzj; j++) current_space->array[j] = apj[j]; 726 current_space->array += apnzj; 727 current_space->local_used += apnzj; 728 current_space->local_remaining -= apnzj; 729 } 730 731 /* use symbolic AP[i,:] to form a condensed array apa_tmp */ 732 /* diagonal portion of A */ 733 anzi = adi[i+1] - adi[i]; 734 for (j=0;j<anzi;j++) { 735 prow = *adj_tmp; 736 adj_tmp++; 737 pnzj = pi_loc[prow+1] - pi_loc[prow]; 738 pjj = pj_loc + pi_loc[prow]; 739 paj = pa_loc + pi_loc[prow]; 740 nextp = 0; 741 for (k=0; nextp<pnzj; k++) { 742 if (apj[k] == pjj[nextp]) { /* col of AP == col of P */ 743 apa_tmp[k] += (*ada_tmp)*paj[nextp++]; 744 } 745 } 746 ada_tmp++; 747 } 748 /* off-diagonal portion of A */ 749 anzi = aoi[i+1] - aoi[i]; 750 for (j=0;j<anzi;j++) { 751 col = a->garray[*aoj_tmp]; 752 if (col < cstart){ 753 prow = *aoj_tmp; 754 } else if (col >= cend){ 755 prow = *aoj_tmp; 756 } else { 757 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 758 } 759 aoj_tmp++; 760 pnzj = pi_oth[prow+1] - pi_oth[prow]; 761 pjj = pj_oth + pi_oth[prow]; 762 paj = pa_oth + pi_oth[prow]; 763 nextp = 0; 764 for (k=0; nextp<pnzj; k++) { 765 if (apj[k] == pjj[nextp]) { /* col of AP == col of P */ 766 apa_tmp[k] += (*aoa_tmp)*paj[nextp++]; 767 } 768 } 769 flops += 2*pnzj; 770 aoa_tmp++; 771 } 772 773 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 774 pnzi = pi_loc[i+1] - pi_loc[i]; 775 for (j=0;j<pnzi;j++) { 776 nextap = 0; 777 crow = *pJ++; 778 cjj = cseqj + cseqi[crow]; 779 caj = cseqa + cseqi[crow]; 780 /* Perform sparse axpy operation. Note cjj includes apj. */ 781 for (k=0;nextap<apnzj;k++) { 782 if (cjj[k]==apj[nextap]) { 783 /* caj[k] += (*pA)*apa[apj[nextap++]]; */ 784 caj[k] += (*pA)*apa_tmp[nextap++]; 785 } 786 } 787 788 flops += 2*apnzj; 789 pA++; 790 } 791 792 /* Zero the current row info for A*P */ 793 for (j=0;j<apnzj;j++) { 794 /* apa[apj[j]] = 0.; */ 795 apjdense[apj[j]] = 0; 796 apa_tmp[j] = 0; 797 } 798 } 799 if (!HasSymAP){ 800 /* Column indices of AP are in the list of free space */ 801 /* Allocate space for apsymj, initialize apsymj, and */ 802 /* destroy list of free space and other temporary array(s) */ 803 ierr = PetscMalloc((apsymi[am]+1)*sizeof(PetscInt),&ptap->abj);CHKERRQ(ierr); 804 ierr = MakeSpaceContiguous(&free_space,ptap->abj);CHKERRQ(ierr); 805 } 806 ierr = PetscFree(apa_tmp);CHKERRQ(ierr); 807 ierr = PetscFree(apj);CHKERRQ(ierr); 808 ierr = PetscLogStagePop(); 809 810 bi = merge->bi; 811 bj = merge->bj; 812 buf_ri = merge->buf_ri; 813 buf_rj = merge->buf_rj; 814 815 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 816 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 817 818 /* send and recv matrix values */ 819 /*-----------------------------*/ 820 ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); 821 len_s = merge->len_s; 822 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 823 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 824 825 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 826 for (proc=0,k=0; proc<size; proc++){ 827 if (!len_s[proc]) continue; 828 i = owners[proc]; 829 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 830 k++; 831 } 832 833 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 834 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 835 ierr = PetscFree(status);CHKERRQ(ierr); 836 837 ierr = PetscFree(s_waits);CHKERRQ(ierr); 838 ierr = PetscFree(r_waits);CHKERRQ(ierr); 839 840 /* insert mat values of mpimat */ 841 /*----------------------------*/ 842 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 843 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 844 nextrow = buf_ri_k + merge->nrecv; 845 nextcseqi = nextrow + merge->nrecv; 846 847 for (k=0; k<merge->nrecv; k++){ 848 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 849 nrows = *(buf_ri_k[k]); 850 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 851 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 852 } 853 854 /* set values of ba */ 855 for (i=0; i<C->m; i++) { 856 cseqrow = owners[rank] + i; /* global row index of C_seq */ 857 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 858 bnzi = bi[i+1] - bi[i]; 859 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 860 861 /* add local non-zero vals of this proc's C_seq into ba */ 862 cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 863 cseqj_tmp = cseqj + cseqi[cseqrow]; 864 cseqa_tmp = cseqa + cseqi[cseqrow]; 865 nextcseqj = 0; 866 for (j=0; nextcseqj<cseqnzi; j++){ 867 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 868 ba_i[j] += cseqa_tmp[nextcseqj++]; 869 } 870 } 871 872 /* add received vals into ba */ 873 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 874 /* i-th row */ 875 if (i == *nextrow[k]) { 876 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 877 cseqj_tmp = buf_rj[k] + *(nextcseqi[k]); 878 cseqa_tmp = abuf_r[k] + *(nextcseqi[k]); 879 nextcseqj = 0; 880 for (j=0; nextcseqj<cseqnzi; j++){ 881 if (*(bj_i + j) == cseqj_tmp[nextcseqj]){ /* bcol == cseqcol */ 882 ba_i[j] += cseqa_tmp[nextcseqj++]; 883 } 884 } 885 nextrow[k]++; nextcseqi[k]++; 886 } 887 } 888 ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 889 flops += 2*bnzi; 890 } 891 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 893 ierr = PetscLogStagePop();CHKERRQ(ierr); 894 895 ierr = PetscFree(cseqa);CHKERRQ(ierr); 896 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 897 ierr = PetscFree(ba_i);CHKERRQ(ierr); 898 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 899 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 900 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 906 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 907 { 908 PetscErrorCode ierr; 909 910 PetscFunctionBegin; 911 if (scall == MAT_INITIAL_MATRIX){ 912 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 913 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 914 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 915 } 916 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 917 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 918 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 /* 923 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 924 925 Collective on Mat 926 927 Input Parameters: 928 + A - the matrix 929 - P - the projection matrix 930 931 Output Parameters: 932 . C - the (i,j) structure of the product matrix 933 934 Notes: 935 C will be created and must be destroyed by the user with MatDestroy(). 936 937 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 938 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 939 this (i,j) structure by calling MatPtAPNumeric(). 940 941 Level: intermediate 942 943 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 944 */ 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatPtAPSymbolic" 947 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 948 { 949 PetscErrorCode ierr; 950 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 951 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 952 953 PetscFunctionBegin; 954 955 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 956 PetscValidType(A,1); 957 MatPreallocated(A); 958 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 959 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 960 961 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 962 PetscValidType(P,2); 963 MatPreallocated(P); 964 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 965 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 966 967 PetscValidPointer(C,3); 968 969 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 970 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 971 972 /* For now, we do not dispatch based on the type of A and P */ 973 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 974 fA = A->ops->ptapsymbolic; 975 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 976 fP = P->ops->ptapsymbolic; 977 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 978 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 979 980 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 981 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 982 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 #ifdef TOBEREMOVE 987 typedef struct { 988 Mat symAP; 989 } Mat_PtAPstruct; 990 991 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 995 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 996 { 997 PetscErrorCode ierr; 998 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 999 1000 PetscFunctionBegin; 1001 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 1002 ierr = PetscFree(ptap);CHKERRQ(ierr); 1003 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 #endif /* TOBEREMOVE */ 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 1010 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 1011 { 1012 PetscErrorCode ierr; 1013 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 1015 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1016 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1017 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 1018 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 1019 MatScalar *ca; 1020 PetscBT lnkbt; 1021 1022 PetscFunctionBegin; 1023 /* Get ij structure of P^T */ 1024 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1025 ptJ=ptj; 1026 1027 /* Allocate ci array, arrays for fill computation and */ 1028 /* free space for accumulating nonzero column info */ 1029 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1030 ci[0] = 0; 1031 1032 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1033 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1034 ptasparserow = ptadenserow + an; 1035 1036 /* create and initialize a linked list */ 1037 nlnk = pn+1; 1038 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1039 1040 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1041 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1042 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1043 current_space = free_space; 1044 1045 /* Determine symbolic info for each row of C: */ 1046 for (i=0;i<pn;i++) { 1047 ptnzi = pti[i+1] - pti[i]; 1048 ptanzi = 0; 1049 /* Determine symbolic row of PtA: */ 1050 for (j=0;j<ptnzi;j++) { 1051 arow = *ptJ++; 1052 anzj = ai[arow+1] - ai[arow]; 1053 ajj = aj + ai[arow]; 1054 for (k=0;k<anzj;k++) { 1055 if (!ptadenserow[ajj[k]]) { 1056 ptadenserow[ajj[k]] = -1; 1057 ptasparserow[ptanzi++] = ajj[k]; 1058 } 1059 } 1060 } 1061 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1062 ptaj = ptasparserow; 1063 cnzi = 0; 1064 for (j=0;j<ptanzi;j++) { 1065 prow = *ptaj++; 1066 pnzj = pi[prow+1] - pi[prow]; 1067 pjj = pj + pi[prow]; 1068 /* add non-zero cols of P into the sorted linked list lnk */ 1069 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1070 cnzi += nlnk; 1071 } 1072 1073 /* If free space is not available, make more free space */ 1074 /* Double the amount of total space in the list */ 1075 if (current_space->local_remaining<cnzi) { 1076 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1077 } 1078 1079 /* Copy data into free space, and zero out denserows */ 1080 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1081 current_space->array += cnzi; 1082 current_space->local_used += cnzi; 1083 current_space->local_remaining -= cnzi; 1084 1085 for (j=0;j<ptanzi;j++) { 1086 ptadenserow[ptasparserow[j]] = 0; 1087 } 1088 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1089 /* For now, we will recompute what is needed. */ 1090 ci[i+1] = ci[i] + cnzi; 1091 } 1092 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1093 /* Allocate space for cj, initialize cj, and */ 1094 /* destroy list of free space and other temporary array(s) */ 1095 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1096 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1097 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1098 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1099 1100 /* Allocate space for ca */ 1101 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1102 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1103 1104 /* put together the new matrix */ 1105 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1106 1107 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1108 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1109 c = (Mat_SeqAIJ *)((*C)->data); 1110 c->freedata = PETSC_TRUE; 1111 c->nonew = 0; 1112 1113 /* Clean up. */ 1114 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1115 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #include "src/mat/impls/maij/maij.h" 1120 EXTERN_C_BEGIN 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1123 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1124 { 1125 /* This routine requires testing -- I don't think it works. */ 1126 PetscErrorCode ierr; 1127 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1128 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1129 Mat P=pp->AIJ; 1130 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1131 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1132 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1133 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1134 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1135 MatScalar *ca; 1136 1137 PetscFunctionBegin; 1138 /* Start timer */ 1139 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1140 1141 /* Get ij structure of P^T */ 1142 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1143 1144 /* Allocate ci array, arrays for fill computation and */ 1145 /* free space for accumulating nonzero column info */ 1146 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1147 ci[0] = 0; 1148 1149 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1150 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1151 ptasparserow = ptadenserow + an; 1152 denserow = ptasparserow + an; 1153 sparserow = denserow + pn; 1154 1155 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1156 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1157 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1158 current_space = free_space; 1159 1160 /* Determine symbolic info for each row of C: */ 1161 for (i=0;i<pn/ppdof;i++) { 1162 ptnzi = pti[i+1] - pti[i]; 1163 ptanzi = 0; 1164 ptJ = ptj + pti[i]; 1165 for (dof=0;dof<ppdof;dof++) { 1166 /* Determine symbolic row of PtA: */ 1167 for (j=0;j<ptnzi;j++) { 1168 arow = ptJ[j] + dof; 1169 anzj = ai[arow+1] - ai[arow]; 1170 ajj = aj + ai[arow]; 1171 for (k=0;k<anzj;k++) { 1172 if (!ptadenserow[ajj[k]]) { 1173 ptadenserow[ajj[k]] = -1; 1174 ptasparserow[ptanzi++] = ajj[k]; 1175 } 1176 } 1177 } 1178 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1179 ptaj = ptasparserow; 1180 cnzi = 0; 1181 for (j=0;j<ptanzi;j++) { 1182 pdof = *ptaj%dof; 1183 prow = (*ptaj++)/dof; 1184 pnzj = pi[prow+1] - pi[prow]; 1185 pjj = pj + pi[prow]; 1186 for (k=0;k<pnzj;k++) { 1187 if (!denserow[pjj[k]+pdof]) { 1188 denserow[pjj[k]+pdof] = -1; 1189 sparserow[cnzi++] = pjj[k]+pdof; 1190 } 1191 } 1192 } 1193 1194 /* sort sparserow */ 1195 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1196 1197 /* If free space is not available, make more free space */ 1198 /* Double the amount of total space in the list */ 1199 if (current_space->local_remaining<cnzi) { 1200 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1201 } 1202 1203 /* Copy data into free space, and zero out denserows */ 1204 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1205 current_space->array += cnzi; 1206 current_space->local_used += cnzi; 1207 current_space->local_remaining -= cnzi; 1208 1209 for (j=0;j<ptanzi;j++) { 1210 ptadenserow[ptasparserow[j]] = 0; 1211 } 1212 for (j=0;j<cnzi;j++) { 1213 denserow[sparserow[j]] = 0; 1214 } 1215 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1216 /* For now, we will recompute what is needed. */ 1217 ci[i+1+dof] = ci[i+dof] + cnzi; 1218 } 1219 } 1220 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1221 /* Allocate space for cj, initialize cj, and */ 1222 /* destroy list of free space and other temporary array(s) */ 1223 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1224 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1225 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1226 1227 /* Allocate space for ca */ 1228 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1229 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1230 1231 /* put together the new matrix */ 1232 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1233 1234 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1235 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1236 c = (Mat_SeqAIJ *)((*C)->data); 1237 c->freedata = PETSC_TRUE; 1238 c->nonew = 0; 1239 1240 /* Clean up. */ 1241 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1242 1243 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 EXTERN_C_END 1247 1248 /* 1249 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1250 1251 Collective on Mat 1252 1253 Input Parameters: 1254 + A - the matrix 1255 - P - the projection matrix 1256 1257 Output Parameters: 1258 . C - the product matrix 1259 1260 Notes: 1261 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1262 the user using MatDeatroy(). 1263 1264 This routine is currently only implemented for pairs of AIJ matrices and classes 1265 which inherit from AIJ. C will be of type MATAIJ. 1266 1267 Level: intermediate 1268 1269 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1270 */ 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatPtAPNumeric" 1273 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1274 { 1275 PetscErrorCode ierr; 1276 PetscErrorCode (*fA)(Mat,Mat,Mat); 1277 PetscErrorCode (*fP)(Mat,Mat,Mat); 1278 1279 PetscFunctionBegin; 1280 1281 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1282 PetscValidType(A,1); 1283 MatPreallocated(A); 1284 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1285 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1286 1287 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1288 PetscValidType(P,2); 1289 MatPreallocated(P); 1290 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1291 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1292 1293 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1294 PetscValidType(C,3); 1295 MatPreallocated(C); 1296 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1297 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1298 1299 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1300 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1301 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1302 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1303 1304 /* For now, we do not dispatch based on the type of A and P */ 1305 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1306 fA = A->ops->ptapnumeric; 1307 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1308 fP = P->ops->ptapnumeric; 1309 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1310 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1311 1312 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1313 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1314 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1315 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1321 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1322 { 1323 PetscErrorCode ierr; 1324 PetscInt flops=0; 1325 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1326 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1327 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1328 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1329 PetscInt *ci=c->i,*cj=c->j,*cjj; 1330 PetscInt am=A->M,cn=C->N,cm=C->M; 1331 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1332 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1333 1334 PetscFunctionBegin; 1335 /* Allocate temporary array for storage of one row of A*P */ 1336 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1337 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1338 1339 apj = (PetscInt *)(apa + cn); 1340 apjdense = apj + cn; 1341 1342 /* Clear old values in C */ 1343 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1344 1345 for (i=0;i<am;i++) { 1346 /* Form sparse row of A*P */ 1347 anzi = ai[i+1] - ai[i]; 1348 apnzj = 0; 1349 for (j=0;j<anzi;j++) { 1350 prow = *aj++; 1351 pnzj = pi[prow+1] - pi[prow]; 1352 pjj = pj + pi[prow]; 1353 paj = pa + pi[prow]; 1354 for (k=0;k<pnzj;k++) { 1355 if (!apjdense[pjj[k]]) { 1356 apjdense[pjj[k]] = -1; 1357 apj[apnzj++] = pjj[k]; 1358 } 1359 apa[pjj[k]] += (*aa)*paj[k]; 1360 } 1361 flops += 2*pnzj; 1362 aa++; 1363 } 1364 1365 /* Sort the j index array for quick sparse axpy. */ 1366 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1367 1368 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1369 pnzi = pi[i+1] - pi[i]; 1370 for (j=0;j<pnzi;j++) { 1371 nextap = 0; 1372 crow = *pJ++; 1373 cjj = cj + ci[crow]; 1374 caj = ca + ci[crow]; 1375 /* Perform sparse axpy operation. Note cjj includes apj. */ 1376 for (k=0;nextap<apnzj;k++) { 1377 if (cjj[k]==apj[nextap]) { 1378 caj[k] += (*pA)*apa[apj[nextap++]]; 1379 } 1380 } 1381 flops += 2*apnzj; 1382 pA++; 1383 } 1384 1385 /* Zero the current row info for A*P */ 1386 for (j=0;j<apnzj;j++) { 1387 apa[apj[j]] = 0.; 1388 apjdense[apj[j]] = 0; 1389 } 1390 } 1391 1392 /* Assemble the final matrix and clean up */ 1393 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = PetscFree(apa);CHKERRQ(ierr); 1396 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1397 1398 PetscFunctionReturn(0); 1399 } 1400 1401 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1402 static PetscEvent logkey_matptapnumeric_local = 0; 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1405 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1406 { 1407 PetscErrorCode ierr; 1408 PetscInt flops=0; 1409 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1410 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1411 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1412 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1413 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1414 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1415 PetscInt *pJ=pj_loc,*pjj; 1416 PetscInt *ci=c->i,*cj=c->j,*cjj; 1417 PetscInt am=A->m,cn=C->N,cm=C->M; 1418 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1419 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1420 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1421 1422 PetscFunctionBegin; 1423 if (!logkey_matptapnumeric_local) { 1424 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1427 1428 /* Allocate temporary array for storage of one row of A*P */ 1429 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1430 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1431 apj = (PetscInt *)(apa + cn); 1432 apjdense = apj + cn; 1433 1434 /* Clear old values in C */ 1435 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1436 1437 for (i=0;i<am;i++) { 1438 /* Form i-th sparse row of A*P */ 1439 apnzj = 0; 1440 /* diagonal portion of A */ 1441 anzi = adi[i+1] - adi[i]; 1442 for (j=0;j<anzi;j++) { 1443 prow = *adj; 1444 adj++; 1445 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1446 pjj = pj_loc + pi_loc[prow]; 1447 paj = pa_loc + pi_loc[prow]; 1448 for (k=0;k<pnzj;k++) { 1449 if (!apjdense[pjj[k]]) { 1450 apjdense[pjj[k]] = -1; 1451 apj[apnzj++] = pjj[k]; 1452 } 1453 apa[pjj[k]] += (*ada)*paj[k]; 1454 } 1455 flops += 2*pnzj; 1456 ada++; 1457 } 1458 /* off-diagonal portion of A */ 1459 anzi = aoi[i+1] - aoi[i]; 1460 for (j=0;j<anzi;j++) { 1461 col = a->garray[*aoj]; 1462 if (col < cstart){ 1463 prow = *aoj; 1464 } else if (col >= cend){ 1465 prow = *aoj; 1466 } else { 1467 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1468 } 1469 aoj++; 1470 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1471 pjj = pj_oth + pi_oth[prow]; 1472 paj = pa_oth + pi_oth[prow]; 1473 for (k=0;k<pnzj;k++) { 1474 if (!apjdense[pjj[k]]) { 1475 apjdense[pjj[k]] = -1; 1476 apj[apnzj++] = pjj[k]; 1477 } 1478 apa[pjj[k]] += (*aoa)*paj[k]; 1479 } 1480 flops += 2*pnzj; 1481 aoa++; 1482 } 1483 /* Sort the j index array for quick sparse axpy. */ 1484 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1485 1486 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1487 pnzi = pi_loc[i+1] - pi_loc[i]; 1488 for (j=0;j<pnzi;j++) { 1489 nextap = 0; 1490 crow = *pJ++; 1491 cjj = cj + ci[crow]; 1492 caj = ca + ci[crow]; 1493 /* Perform sparse axpy operation. Note cjj includes apj. */ 1494 for (k=0;nextap<apnzj;k++) { 1495 if (cjj[k]==apj[nextap]) { 1496 caj[k] += (*pA)*apa[apj[nextap++]]; 1497 } 1498 } 1499 flops += 2*apnzj; 1500 pA++; 1501 } 1502 1503 /* Zero the current row info for A*P */ 1504 for (j=0;j<apnzj;j++) { 1505 apa[apj[j]] = 0.; 1506 apjdense[apj[j]] = 0; 1507 } 1508 } 1509 1510 /* Assemble the final matrix and clean up */ 1511 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1513 ierr = PetscFree(apa);CHKERRQ(ierr); 1514 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1515 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 static PetscEvent logkey_matptapsymbolic_local = 0; 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1521 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1522 { 1523 PetscErrorCode ierr; 1524 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1525 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1526 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1527 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1528 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1529 PetscInt *ci,*cj,*ptaj; 1530 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1531 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1532 PetscInt pm=P_loc->m,nlnk,*lnk; 1533 MatScalar *ca; 1534 PetscBT lnkbt; 1535 PetscInt prend,nprow_loc,nprow_oth; 1536 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1537 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1538 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1539 1540 PetscFunctionBegin; 1541 if (!logkey_matptapsymbolic_local) { 1542 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1543 } 1544 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1545 1546 prend = prstart + pm; 1547 1548 /* get ij structure of P_loc^T */ 1549 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1550 ptJ=ptj; 1551 1552 /* Allocate ci array, arrays for fill computation and */ 1553 /* free space for accumulating nonzero column info */ 1554 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1555 ci[0] = 0; 1556 1557 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1558 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1559 ptasparserow_loc = ptadenserow_loc + aN; 1560 ptadenserow_oth = ptasparserow_loc + aN; 1561 ptasparserow_oth = ptadenserow_oth + aN; 1562 1563 /* create and initialize a linked list */ 1564 nlnk = pN+1; 1565 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1566 1567 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1568 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1569 nnz = adi[am] + aoi[am]; 1570 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1571 current_space = free_space; 1572 1573 /* determine symbolic info for each row of C: */ 1574 for (i=0;i<pN;i++) { 1575 ptnzi = pti[i+1] - pti[i]; 1576 nprow_loc = 0; nprow_oth = 0; 1577 /* i-th row of symbolic P_loc^T*A_loc: */ 1578 for (j=0;j<ptnzi;j++) { 1579 arow = *ptJ++; 1580 /* diagonal portion of A */ 1581 anzj = adi[arow+1] - adi[arow]; 1582 ajj = adj + adi[arow]; 1583 for (k=0;k<anzj;k++) { 1584 col = ajj[k]+prstart; 1585 if (!ptadenserow_loc[col]) { 1586 ptadenserow_loc[col] = -1; 1587 ptasparserow_loc[nprow_loc++] = col; 1588 } 1589 } 1590 /* off-diagonal portion of A */ 1591 anzj = aoi[arow+1] - aoi[arow]; 1592 ajj = aoj + aoi[arow]; 1593 for (k=0;k<anzj;k++) { 1594 col = a->garray[ajj[k]]; /* global col */ 1595 if (col < cstart){ 1596 col = ajj[k]; 1597 } else if (col >= cend){ 1598 col = ajj[k] + pm; 1599 } else { 1600 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1601 } 1602 if (!ptadenserow_oth[col]) { 1603 ptadenserow_oth[col] = -1; 1604 ptasparserow_oth[nprow_oth++] = col; 1605 } 1606 } 1607 } 1608 1609 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1610 cnzi = 0; 1611 /* rows of P_loc */ 1612 ptaj = ptasparserow_loc; 1613 for (j=0; j<nprow_loc; j++) { 1614 prow = *ptaj++; 1615 prow -= prstart; /* rm */ 1616 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1617 pjj = pj_loc + pi_loc[prow]; 1618 /* add non-zero cols of P into the sorted linked list lnk */ 1619 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1620 cnzi += nlnk; 1621 } 1622 /* rows of P_oth */ 1623 ptaj = ptasparserow_oth; 1624 for (j=0; j<nprow_oth; j++) { 1625 prow = *ptaj++; 1626 if (prow >= prend) prow -= pm; /* rm */ 1627 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1628 pjj = pj_oth + pi_oth[prow]; 1629 /* add non-zero cols of P into the sorted linked list lnk */ 1630 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1631 cnzi += nlnk; 1632 } 1633 1634 /* If free space is not available, make more free space */ 1635 /* Double the amount of total space in the list */ 1636 if (current_space->local_remaining<cnzi) { 1637 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1638 } 1639 1640 /* Copy data into free space, and zero out denserows */ 1641 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1642 current_space->array += cnzi; 1643 current_space->local_used += cnzi; 1644 current_space->local_remaining -= cnzi; 1645 1646 for (j=0;j<nprow_loc; j++) { 1647 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1648 } 1649 for (j=0;j<nprow_oth; j++) { 1650 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1651 } 1652 1653 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1654 /* For now, we will recompute what is needed. */ 1655 ci[i+1] = ci[i] + cnzi; 1656 } 1657 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1658 /* Allocate space for cj, initialize cj, and */ 1659 /* destroy list of free space and other temporary array(s) */ 1660 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1661 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1662 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1663 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1664 1665 /* Allocate space for ca */ 1666 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1667 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1668 1669 /* put together the new matrix */ 1670 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1671 1672 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1673 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1674 c = (Mat_SeqAIJ *)((*C)->data); 1675 c->freedata = PETSC_TRUE; 1676 c->nonew = 0; 1677 1678 /* Clean up. */ 1679 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1680 1681 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684