1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 153 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 154 {\ 155 /* allocate ci array, arrays for fill computation and */\ 156 /* free space for accumulating nonzero column info */\ 157 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 158 ci[0] = 0;\ 159 \ 160 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 161 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 162 nnz = adi[am] + aoi[am];\ 163 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 164 current_space = free_space;\ 165 \ 166 ptJ=ptj;\ 167 for (i=0; i<ptM; i++) {\ 168 ptnzi = pti[i+1] - pti[i];\ 169 nprow_loc = 0; nprow_oth = 0;\ 170 /* i-th row of symbolic Pt*A: */\ 171 for (j=0;j<ptnzi;j++) {\ 172 arow = *ptJ++;\ 173 /* diagonal portion of A */\ 174 anzj = adi[arow+1] - adi[arow];\ 175 ajj = adj + adi[arow];\ 176 for (k=0;k<anzj;k++) {\ 177 col = ajj[k]+prstart;\ 178 if (!ptadenserow_loc[col]) {\ 179 ptadenserow_loc[col] = -1;\ 180 ptasparserow_loc[nprow_loc++] = col;\ 181 }\ 182 }\ 183 /* off-diagonal portion of A */\ 184 anzj = aoi[arow+1] - aoi[arow];\ 185 ajj = aoj + aoi[arow];\ 186 for (k=0;k<anzj;k++) {\ 187 col = a->garray[ajj[k]]; /* global col */\ 188 if (col < cstart){\ 189 col = ajj[k];\ 190 } else if (col >= cend){\ 191 col = ajj[k] + pm;\ 192 } else {\ 193 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 194 }\ 195 if (!ptadenserow_oth[col]) {\ 196 ptadenserow_oth[col] = -1;\ 197 ptasparserow_oth[nprow_oth++] = col;\ 198 }\ 199 }\ 200 }\ 201 /* determine symbolic info for row of C_seq: */\ 202 cnzi = 0;\ 203 /* rows of P_loc */\ 204 ptaj = ptasparserow_loc;\ 205 for (j=0; j<nprow_loc; j++) {\ 206 prow = *ptaj++; \ 207 prow -= prstart; /* rm */\ 208 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 209 pjj = pj_loc + pi_loc[prow];\ 210 /* add non-zero cols of P into the sorted linked list lnk */\ 211 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 212 cnzi += nlnk;\ 213 }\ 214 /* rows of P_oth */\ 215 ptaj = ptasparserow_oth;\ 216 for (j=0; j<nprow_oth; j++) {\ 217 prow = *ptaj++; \ 218 if (prow >= prend) prow -= pm; /* rm */\ 219 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 220 pjj = pj_oth + pi_oth[prow];\ 221 /* add non-zero cols of P into the sorted linked list lnk */\ 222 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 223 cnzi += nlnk;\ 224 }\ 225 \ 226 /* If free space is not available, make more free space */\ 227 /* Double the amount of total space in the list */\ 228 if (current_space->local_remaining<cnzi) {\ 229 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 230 }\ 231 \ 232 /* Copy data into free space, and zero out denserows */\ 233 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 234 current_space->array += cnzi;\ 235 current_space->local_used += cnzi;\ 236 current_space->local_remaining -= cnzi;\ 237 \ 238 for (j=0;j<nprow_loc; j++) {\ 239 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 240 }\ 241 for (j=0;j<nprow_oth; j++) {\ 242 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 243 }\ 244 \ 245 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 246 /* For now, we will recompute what is needed. */ \ 247 ci[i+1] = ci[i] + cnzi;\ 248 }\ 249 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 250 /* Allocate space for cj, initialize cj, and */\ 251 /* destroy list of free space and other temporary array(s) */\ 252 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 253 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 254 } 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 257 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 258 { 259 PetscErrorCode ierr; 260 Mat P_loc,P_oth; 261 Mat_MatMatMultMPI *ptap; 262 PetscObjectContainer container; 263 FreeSpaceList free_space=PETSC_NULL,current_space; 264 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 265 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c, 266 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 267 Mat_SeqAIJ *p_loc,*p_oth; 268 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 269 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 270 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 271 PetscInt aN=A->N,am=A->m,pN=P->N; 272 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 273 MatScalar *ca; 274 PetscBT lnkbt; 275 PetscInt prstart,prend,nprow_loc,nprow_oth; 276 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 277 278 MPI_Comm comm=A->comm; 279 Mat B_mpi; 280 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 281 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 282 PetscInt len,proc,*dnz,*onz,*owners; 283 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 284 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 285 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 286 MPI_Status *status; 287 Mat_Merge_SeqsToMPI *merge; 288 289 PetscFunctionBegin; 290 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 291 if (container) { 292 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 293 } else { 294 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 295 } 296 297 /* determine row ownership of C */ 298 /*---------------------------------------------------------*/ 299 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 300 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 301 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 302 303 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 304 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 305 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 306 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 307 308 /* get data from P_loc and P_oth */ 309 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 310 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 311 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 312 prend = prstart + pm; 313 314 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 315 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 316 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 317 ptasparserow_loc = ptadenserow_loc + aN; 318 ptadenserow_oth = ptasparserow_loc + aN; 319 ptasparserow_oth = ptadenserow_oth + aN; 320 321 /* create and initialize a linked list */ 322 nlnk = pN+1; 323 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 324 325 /* Pt = P_loc^T */ 326 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 327 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 328 329 /* clean up */ 330 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 331 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 332 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 333 334 /* determine the number of messages to send, their lengths */ 335 /*---------------------------------------------------------*/ 336 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 337 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 338 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 339 len_s = merge->len_s; 340 len = 0; /* max length of buf_si[] */ 341 merge->nsend = 0; 342 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 343 proc = 0; 344 for (i=0; i<tnrows; i++){ 345 while (rmap[i] >= owners[proc+1]) proc++; 346 len_si[proc]++; 347 } 348 for (proc=0; proc<size; proc++){ 349 len_s[proc] = 0; 350 if (len_si[proc]){ 351 merge->nsend++; 352 len_si[proc] = 2*(len_si[proc] + 1); 353 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 354 len += len_si[proc]; 355 } 356 } 357 358 /* determine the number and length of messages to receive for ij-structure */ 359 /*-------------------------------------------------------------------------*/ 360 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 361 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 362 363 /* post the Irecv of j-structure */ 364 /*-------------------------------*/ 365 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 366 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 367 368 /* post the Isend of j-structure */ 369 /*--------------------------------*/ 370 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 371 sj_waits = si_waits + merge->nsend; 372 373 for (proc=0, k=0; proc<size; proc++){ 374 if (!len_s[proc]) continue; 375 i = owners[proc]; 376 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 377 k++; 378 } 379 380 /* receives and sends of j-structure are complete */ 381 /*------------------------------------------------*/ 382 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 383 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 384 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 385 386 /* send and recv i-structure */ 387 /*---------------------------*/ 388 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 389 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 390 391 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 392 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 393 394 j = 0; /* row index to be sent */ 395 for (proc=0,k=0; proc<size; proc++){ 396 if (!len_s[proc]) continue; 397 /* form outgoing message for i-structure: 398 buf_si[0]: nrows to be sent 399 [1:nrows]: row index (global) 400 [nrows+1:2*nrows+1]: i-structure index 401 */ 402 /*-------------------------------------------*/ 403 nrows = len_si[proc]/2 - 1; 404 buf_si_i = buf_si + nrows+1; 405 buf_si[0] = nrows; 406 buf_si_i[0] = 0; 407 for (i=0; i<nrows; i++){ 408 row = rmap[j++]; 409 anzi = ci[row+1] - ci[row]; 410 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 411 buf_si[i+1] = row - owners[proc]; /* local row index */ 412 } 413 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 414 k++; 415 buf_si += len_si[proc]; 416 } 417 418 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 419 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 420 421 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 422 for (i=0; i<merge->nrecv; i++){ 423 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 424 } 425 426 ierr = PetscFree(len_si);CHKERRQ(ierr); 427 ierr = PetscFree(len_ri);CHKERRQ(ierr); 428 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 429 ierr = PetscFree(si_waits);CHKERRQ(ierr); 430 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 431 ierr = PetscFree(buf_s);CHKERRQ(ierr); 432 ierr = PetscFree(status);CHKERRQ(ierr); 433 434 /* compute a local seq matrix in each processor */ 435 /*----------------------------------------------*/ 436 /* allocate bi array and free space for accumulating nonzero column info */ 437 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 438 bi[0] = 0; 439 440 /* create and initialize a linked list */ 441 nlnk = pN+1; 442 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 443 444 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 445 len = 0; 446 len = ci[owners[rank+1]] - ci[owners[rank]]; 447 free_space=PETSC_NULL; 448 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 449 current_space = free_space; 450 451 /* determine symbolic info for each local row */ 452 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 453 nextrow = buf_ri_k + merge->nrecv; 454 nextci = nextrow + merge->nrecv; 455 for (k=0; k<merge->nrecv; k++){ 456 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 457 nrows = *buf_ri_k[k]; 458 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 459 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 460 } 461 462 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 463 len = 0; 464 for (i=0; i<pn; i++) { 465 bnzi = 0; 466 /* add local non-zero cols of this proc's C_seq into lnk */ 467 arow = owners[rank] + i; 468 anzi = ci[arow+1] - ci[arow]; 469 cji = cj + ci[arow]; 470 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 471 bnzi += nlnk; 472 /* add received col data into lnk */ 473 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 474 if (i == *nextrow[k]) { /* i-th row */ 475 anzi = *(nextci[k]+1) - *nextci[k]; 476 cji = buf_rj[k] + *nextci[k]; 477 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 478 bnzi += nlnk; 479 nextrow[k]++; nextci[k]++; 480 } 481 } 482 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 483 484 /* if free space is not available, make more free space */ 485 if (current_space->local_remaining<bnzi) { 486 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 487 nspacedouble++; 488 } 489 /* copy data into free space, then initialize lnk */ 490 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 491 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 492 493 current_space->array += bnzi; 494 current_space->local_used += bnzi; 495 current_space->local_remaining -= bnzi; 496 497 bi[i+1] = bi[i] + bnzi; 498 } 499 500 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 501 502 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 503 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 504 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 505 506 /* create symbolic parallel matrix B_mpi */ 507 /*---------------------------------------*/ 508 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 509 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 510 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 511 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 512 513 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 514 B_mpi->assembled = PETSC_FALSE; 515 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 516 merge->bi = bi; 517 merge->bj = bj; 518 merge->ci = ci; 519 merge->cj = cj; 520 merge->buf_ri = buf_ri; 521 merge->buf_rj = buf_rj; 522 merge->C_seq = PETSC_NULL; 523 524 /* attach the supporting struct to B_mpi for reuse */ 525 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 526 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 527 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 528 *C = B_mpi; 529 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 535 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 536 { 537 PetscErrorCode ierr; 538 Mat_Merge_SeqsToMPI *merge; 539 Mat_MatMatMultMPI *ptap; 540 PetscObjectContainer cont_merge,cont_ptap; 541 PetscInt flops=0; 542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 543 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 544 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data, 545 *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 546 Mat C_seq; 547 Mat_SeqAIJ *cseq,*p_oth; 548 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend; 549 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 550 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 551 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 552 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 553 PetscInt am=A->m,cN=C->N,cm=C->m; 554 MPI_Comm comm=C->comm; 555 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 556 PetscInt *owners,proc,nrows; 557 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 558 MPI_Request *s_waits,*r_waits; 559 MPI_Status *status; 560 PetscInt *cseqi,*cseqj,col; 561 562 PetscFunctionBegin; 563 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 564 if (cont_merge) { 565 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 566 } else { 567 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 568 } 569 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 570 if (cont_ptap) { 571 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 572 } else { 573 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 574 } 575 /* get data from symbolic products */ 576 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 577 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 578 579 cseqi = merge->ci; cseqj=merge->cj; 580 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 581 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 582 583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 585 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 586 587 /* Allocate temporary array for storage of one row of A*P and one row of C */ 588 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 589 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 590 ca = (MatScalar*)(apa + cN); 591 apj = (PetscInt *)(ca + cN); 592 apjdense = (PetscInt *)(apj + cN); 593 594 /* Clear old values in C */ 595 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 596 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 597 598 for (i=0;i<am;i++) { 599 /* Form i-th sparse row of A*P */ 600 apnzj = 0; 601 /* diagonal portion of A */ 602 nzi = adi[i+1] - adi[i]; 603 for (j=0;j<nzi;j++) { 604 prow = *adj++; 605 /* diagonal portion of P */ 606 pnzj = pd->i[prow+1] - pd->i[prow]; 607 pjj = pd->j + pd->i[prow]; /* local col index of P */ 608 paj = pd->a + pd->i[prow]; 609 for (k=0;k<pnzj;k++) { 610 col = *pjj + p->cstart; pjj++; /* global col index of P */ 611 if (!apjdense[col]) { 612 apjdense[col] = -1; 613 apj[apnzj++] = col; 614 } 615 apa[col] += (*ada)*paj[k]; 616 } 617 flops += 2*pnzj; 618 /* off-diagonal portion of P */ 619 pnzj = po->i[prow+1] - po->i[prow]; 620 pjj = po->j + po->i[prow]; /* local col index of P */ 621 paj = po->a + po->i[prow]; 622 for (k=0;k<pnzj;k++) { 623 col = p->garray[*pjj]; pjj++; /* global col index of P */ 624 if (!apjdense[col]) { 625 apjdense[col] = -1; 626 apj[apnzj++] = col; 627 } 628 apa[col] += (*ada)*paj[k]; 629 } 630 flops += 2*pnzj; 631 632 ada++; 633 } 634 /* off-diagonal portion of A */ 635 nzi = aoi[i+1] - aoi[i]; 636 for (j=0;j<nzi;j++) { 637 prow = *aoj++; 638 pnzj = pi_oth[prow+1] - pi_oth[prow]; 639 pjj = pj_oth + pi_oth[prow]; 640 paj = pa_oth + pi_oth[prow]; 641 for (k=0;k<pnzj;k++) { 642 if (!apjdense[pjj[k]]) { 643 apjdense[pjj[k]] = -1; 644 apj[apnzj++] = pjj[k]; 645 } 646 apa[pjj[k]] += (*aoa)*paj[k]; 647 } 648 flops += 2*pnzj; 649 aoa++; 650 } 651 /* Sort the j index array for quick sparse axpy. */ 652 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 653 654 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 655 /* diagonal portion of P -- gives matrix value of local C */ 656 pnzi = pd->i[i+1] - pd->i[i]; 657 for (j=0;j<pnzi;j++) { 658 crow = (*pJ_d++) + owners[rank]; 659 cjj = cseqj + cseqi[crow]; 660 /* add value into C */ 661 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 662 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 663 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 664 pA_d++; 665 flops += 2*apnzj; 666 } 667 668 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 669 pnzi = po->i[i+1] - po->i[i]; 670 for (j=0;j<pnzi;j++) { 671 crow = p->garray[*pJ_o++]; 672 cjj = cseqj + cseqi[crow]; 673 caj = cseqa + cseqi[crow]; 674 /* add value into C_seq to be sent to other processors */ 675 nextap = 0; 676 for (k=0;nextap<apnzj;k++) { 677 if (cjj[k]==apj[nextap]) { 678 caj[k] += (*pA_o)*apa[apj[nextap++]]; 679 } 680 } 681 flops += 2*apnzj; 682 pA_o++; 683 } 684 685 /* Zero the current row info for A*P */ 686 for (j=0;j<apnzj;j++) { 687 apa[apj[j]] = 0.; 688 apjdense[apj[j]] = 0; 689 } 690 } 691 692 /* send and recv matrix values */ 693 /*-----------------------------*/ 694 bi = merge->bi; 695 bj = merge->bj; 696 buf_ri = merge->buf_ri; 697 buf_rj = merge->buf_rj; 698 len_s = merge->len_s; 699 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 700 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 701 702 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 703 for (proc=0,k=0; proc<size; proc++){ 704 if (!len_s[proc]) continue; 705 i = owners[proc]; 706 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 707 k++; 708 } 709 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 710 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 711 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 712 ierr = PetscFree(status);CHKERRQ(ierr); 713 714 ierr = PetscFree(s_waits);CHKERRQ(ierr); 715 ierr = PetscFree(r_waits);CHKERRQ(ierr); 716 ierr = PetscFree(cseqa);CHKERRQ(ierr); 717 718 /* insert mat values of mpimat */ 719 /*----------------------------*/ 720 ba_i = apa; /* rename, temporary array for storage of one row of C */ 721 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 722 nextrow = buf_ri_k + merge->nrecv; 723 nextcseqi = nextrow + merge->nrecv; 724 725 for (k=0; k<merge->nrecv; k++){ 726 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 727 nrows = *(buf_ri_k[k]); 728 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 729 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 730 } 731 732 /* add received local vals to C */ 733 for (i=0; i<cm; i++) { 734 crow = owners[rank] + i; 735 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 736 bnzi = bi[i+1] - bi[i]; 737 nzi = 0; 738 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 739 /* i-th row */ 740 if (i == *nextrow[k]) { 741 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 742 cseqj = buf_rj[k] + *(nextcseqi[k]); 743 cseqa = abuf_r[k] + *(nextcseqi[k]); 744 nextcseqj = 0; 745 for (j=0; nextcseqj<cseqnzi; j++){ 746 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 747 ba_i[j] += cseqa[nextcseqj++]; 748 nzi++; 749 } 750 } 751 nextrow[k]++; nextcseqi[k]++; 752 } 753 } 754 if (nzi>0){ 755 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 756 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 757 flops += 2*nzi; 758 } 759 } 760 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 761 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 762 763 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 764 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 765 ierr = PetscFree(apa);CHKERRQ(ierr); 766 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 767 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 773 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 774 { 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 if (scall == MAT_INITIAL_MATRIX){ 779 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 780 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 781 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 782 } 783 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 784 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 785 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 /* 790 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 791 792 Collective on Mat 793 794 Input Parameters: 795 + A - the matrix 796 - P - the projection matrix 797 798 Output Parameters: 799 . C - the (i,j) structure of the product matrix 800 801 Notes: 802 C will be created and must be destroyed by the user with MatDestroy(). 803 804 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 805 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 806 this (i,j) structure by calling MatPtAPNumeric(). 807 808 Level: intermediate 809 810 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 811 */ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatPtAPSymbolic" 814 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 815 { 816 PetscErrorCode ierr; 817 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 818 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 819 820 PetscFunctionBegin; 821 822 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 823 PetscValidType(A,1); 824 MatPreallocated(A); 825 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 826 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 827 828 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 829 PetscValidType(P,2); 830 MatPreallocated(P); 831 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 832 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 833 834 PetscValidPointer(C,3); 835 836 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 837 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 838 839 /* For now, we do not dispatch based on the type of A and P */ 840 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 841 fA = A->ops->ptapsymbolic; 842 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 843 fP = P->ops->ptapsymbolic; 844 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 845 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 846 847 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 848 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 849 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 850 851 PetscFunctionReturn(0); 852 } 853 854 typedef struct { 855 Mat symAP; 856 } Mat_PtAPstruct; 857 858 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 862 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 863 { 864 PetscErrorCode ierr; 865 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 866 867 PetscFunctionBegin; 868 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 869 ierr = PetscFree(ptap);CHKERRQ(ierr); 870 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 876 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 877 { 878 PetscErrorCode ierr; 879 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 881 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 882 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 883 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 884 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 885 MatScalar *ca; 886 PetscBT lnkbt; 887 888 PetscFunctionBegin; 889 /* Get ij structure of P^T */ 890 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 891 ptJ=ptj; 892 893 /* Allocate ci array, arrays for fill computation and */ 894 /* free space for accumulating nonzero column info */ 895 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 896 ci[0] = 0; 897 898 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 899 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 900 ptasparserow = ptadenserow + an; 901 902 /* create and initialize a linked list */ 903 nlnk = pn+1; 904 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 905 906 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 907 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 908 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 909 current_space = free_space; 910 911 /* Determine symbolic info for each row of C: */ 912 for (i=0;i<pn;i++) { 913 ptnzi = pti[i+1] - pti[i]; 914 ptanzi = 0; 915 /* Determine symbolic row of PtA: */ 916 for (j=0;j<ptnzi;j++) { 917 arow = *ptJ++; 918 anzj = ai[arow+1] - ai[arow]; 919 ajj = aj + ai[arow]; 920 for (k=0;k<anzj;k++) { 921 if (!ptadenserow[ajj[k]]) { 922 ptadenserow[ajj[k]] = -1; 923 ptasparserow[ptanzi++] = ajj[k]; 924 } 925 } 926 } 927 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 928 ptaj = ptasparserow; 929 cnzi = 0; 930 for (j=0;j<ptanzi;j++) { 931 prow = *ptaj++; 932 pnzj = pi[prow+1] - pi[prow]; 933 pjj = pj + pi[prow]; 934 /* add non-zero cols of P into the sorted linked list lnk */ 935 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 936 cnzi += nlnk; 937 } 938 939 /* If free space is not available, make more free space */ 940 /* Double the amount of total space in the list */ 941 if (current_space->local_remaining<cnzi) { 942 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 943 } 944 945 /* Copy data into free space, and zero out denserows */ 946 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 947 current_space->array += cnzi; 948 current_space->local_used += cnzi; 949 current_space->local_remaining -= cnzi; 950 951 for (j=0;j<ptanzi;j++) { 952 ptadenserow[ptasparserow[j]] = 0; 953 } 954 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 955 /* For now, we will recompute what is needed. */ 956 ci[i+1] = ci[i] + cnzi; 957 } 958 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 959 /* Allocate space for cj, initialize cj, and */ 960 /* destroy list of free space and other temporary array(s) */ 961 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 962 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 963 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 964 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 965 966 /* Allocate space for ca */ 967 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 968 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 969 970 /* put together the new matrix */ 971 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 972 973 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 974 /* Since these are PETSc arrays, change flags to free them as necessary. */ 975 c = (Mat_SeqAIJ *)((*C)->data); 976 c->freedata = PETSC_TRUE; 977 c->nonew = 0; 978 979 /* Clean up. */ 980 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 #include "src/mat/impls/maij/maij.h" 986 EXTERN_C_BEGIN 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 989 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 990 { 991 /* This routine requires testing -- I don't think it works. */ 992 PetscErrorCode ierr; 993 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 994 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 995 Mat P=pp->AIJ; 996 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 997 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 998 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 999 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1000 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1001 MatScalar *ca; 1002 1003 PetscFunctionBegin; 1004 /* Start timer */ 1005 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1006 1007 /* Get ij structure of P^T */ 1008 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1009 1010 /* Allocate ci array, arrays for fill computation and */ 1011 /* free space for accumulating nonzero column info */ 1012 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1013 ci[0] = 0; 1014 1015 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1016 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1017 ptasparserow = ptadenserow + an; 1018 denserow = ptasparserow + an; 1019 sparserow = denserow + pn; 1020 1021 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1022 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1023 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1024 current_space = free_space; 1025 1026 /* Determine symbolic info for each row of C: */ 1027 for (i=0;i<pn/ppdof;i++) { 1028 ptnzi = pti[i+1] - pti[i]; 1029 ptanzi = 0; 1030 ptJ = ptj + pti[i]; 1031 for (dof=0;dof<ppdof;dof++) { 1032 /* Determine symbolic row of PtA: */ 1033 for (j=0;j<ptnzi;j++) { 1034 arow = ptJ[j] + dof; 1035 anzj = ai[arow+1] - ai[arow]; 1036 ajj = aj + ai[arow]; 1037 for (k=0;k<anzj;k++) { 1038 if (!ptadenserow[ajj[k]]) { 1039 ptadenserow[ajj[k]] = -1; 1040 ptasparserow[ptanzi++] = ajj[k]; 1041 } 1042 } 1043 } 1044 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1045 ptaj = ptasparserow; 1046 cnzi = 0; 1047 for (j=0;j<ptanzi;j++) { 1048 pdof = *ptaj%dof; 1049 prow = (*ptaj++)/dof; 1050 pnzj = pi[prow+1] - pi[prow]; 1051 pjj = pj + pi[prow]; 1052 for (k=0;k<pnzj;k++) { 1053 if (!denserow[pjj[k]+pdof]) { 1054 denserow[pjj[k]+pdof] = -1; 1055 sparserow[cnzi++] = pjj[k]+pdof; 1056 } 1057 } 1058 } 1059 1060 /* sort sparserow */ 1061 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1062 1063 /* If free space is not available, make more free space */ 1064 /* Double the amount of total space in the list */ 1065 if (current_space->local_remaining<cnzi) { 1066 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1067 } 1068 1069 /* Copy data into free space, and zero out denserows */ 1070 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1071 current_space->array += cnzi; 1072 current_space->local_used += cnzi; 1073 current_space->local_remaining -= cnzi; 1074 1075 for (j=0;j<ptanzi;j++) { 1076 ptadenserow[ptasparserow[j]] = 0; 1077 } 1078 for (j=0;j<cnzi;j++) { 1079 denserow[sparserow[j]] = 0; 1080 } 1081 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1082 /* For now, we will recompute what is needed. */ 1083 ci[i+1+dof] = ci[i+dof] + cnzi; 1084 } 1085 } 1086 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1087 /* Allocate space for cj, initialize cj, and */ 1088 /* destroy list of free space and other temporary array(s) */ 1089 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1090 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1091 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1092 1093 /* Allocate space for ca */ 1094 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1095 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1096 1097 /* put together the new matrix */ 1098 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1099 1100 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1101 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1102 c = (Mat_SeqAIJ *)((*C)->data); 1103 c->freedata = PETSC_TRUE; 1104 c->nonew = 0; 1105 1106 /* Clean up. */ 1107 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1108 1109 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 EXTERN_C_END 1113 1114 /* 1115 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1116 1117 Collective on Mat 1118 1119 Input Parameters: 1120 + A - the matrix 1121 - P - the projection matrix 1122 1123 Output Parameters: 1124 . C - the product matrix 1125 1126 Notes: 1127 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1128 the user using MatDeatroy(). 1129 1130 This routine is currently only implemented for pairs of AIJ matrices and classes 1131 which inherit from AIJ. C will be of type MATAIJ. 1132 1133 Level: intermediate 1134 1135 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1136 */ 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatPtAPNumeric" 1139 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1140 { 1141 PetscErrorCode ierr; 1142 PetscErrorCode (*fA)(Mat,Mat,Mat); 1143 PetscErrorCode (*fP)(Mat,Mat,Mat); 1144 1145 PetscFunctionBegin; 1146 1147 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1148 PetscValidType(A,1); 1149 MatPreallocated(A); 1150 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1151 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1152 1153 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1154 PetscValidType(P,2); 1155 MatPreallocated(P); 1156 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1157 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1158 1159 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1160 PetscValidType(C,3); 1161 MatPreallocated(C); 1162 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1163 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1164 1165 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1166 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1167 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1168 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1169 1170 /* For now, we do not dispatch based on the type of A and P */ 1171 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1172 fA = A->ops->ptapnumeric; 1173 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1174 fP = P->ops->ptapnumeric; 1175 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1176 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1177 1178 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1179 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1180 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1181 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1187 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1188 { 1189 PetscErrorCode ierr; 1190 PetscInt flops=0; 1191 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1192 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1193 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1194 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1195 PetscInt *ci=c->i,*cj=c->j,*cjj; 1196 PetscInt am=A->M,cn=C->N,cm=C->M; 1197 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1198 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1199 1200 PetscFunctionBegin; 1201 /* Allocate temporary array for storage of one row of A*P */ 1202 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1203 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1204 1205 apj = (PetscInt *)(apa + cn); 1206 apjdense = apj + cn; 1207 1208 /* Clear old values in C */ 1209 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1210 1211 for (i=0;i<am;i++) { 1212 /* Form sparse row of A*P */ 1213 anzi = ai[i+1] - ai[i]; 1214 apnzj = 0; 1215 for (j=0;j<anzi;j++) { 1216 prow = *aj++; 1217 pnzj = pi[prow+1] - pi[prow]; 1218 pjj = pj + pi[prow]; 1219 paj = pa + pi[prow]; 1220 for (k=0;k<pnzj;k++) { 1221 if (!apjdense[pjj[k]]) { 1222 apjdense[pjj[k]] = -1; 1223 apj[apnzj++] = pjj[k]; 1224 } 1225 apa[pjj[k]] += (*aa)*paj[k]; 1226 } 1227 flops += 2*pnzj; 1228 aa++; 1229 } 1230 1231 /* Sort the j index array for quick sparse axpy. */ 1232 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1233 1234 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1235 pnzi = pi[i+1] - pi[i]; 1236 for (j=0;j<pnzi;j++) { 1237 nextap = 0; 1238 crow = *pJ++; 1239 cjj = cj + ci[crow]; 1240 caj = ca + ci[crow]; 1241 /* Perform sparse axpy operation. Note cjj includes apj. */ 1242 for (k=0;nextap<apnzj;k++) { 1243 if (cjj[k]==apj[nextap]) { 1244 caj[k] += (*pA)*apa[apj[nextap++]]; 1245 } 1246 } 1247 flops += 2*apnzj; 1248 pA++; 1249 } 1250 1251 /* Zero the current row info for A*P */ 1252 for (j=0;j<apnzj;j++) { 1253 apa[apj[j]] = 0.; 1254 apjdense[apj[j]] = 0; 1255 } 1256 } 1257 1258 /* Assemble the final matrix and clean up */ 1259 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1260 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1261 ierr = PetscFree(apa);CHKERRQ(ierr); 1262 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1263 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1268 static PetscEvent logkey_matptapnumeric_local = 0; 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1271 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1272 { 1273 PetscErrorCode ierr; 1274 PetscInt flops=0; 1275 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1276 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1277 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1278 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1279 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1280 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1281 PetscInt *pJ=pj_loc,*pjj; 1282 PetscInt *ci=c->i,*cj=c->j,*cjj; 1283 PetscInt am=A->m,cn=C->N,cm=C->M; 1284 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1285 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1286 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1287 1288 PetscFunctionBegin; 1289 if (!logkey_matptapnumeric_local) { 1290 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1291 } 1292 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1293 1294 /* Allocate temporary array for storage of one row of A*P */ 1295 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1296 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1297 apj = (PetscInt *)(apa + cn); 1298 apjdense = apj + cn; 1299 1300 /* Clear old values in C */ 1301 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1302 1303 for (i=0;i<am;i++) { 1304 /* Form i-th sparse row of A*P */ 1305 apnzj = 0; 1306 /* diagonal portion of A */ 1307 anzi = adi[i+1] - adi[i]; 1308 for (j=0;j<anzi;j++) { 1309 prow = *adj; 1310 adj++; 1311 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1312 pjj = pj_loc + pi_loc[prow]; 1313 paj = pa_loc + pi_loc[prow]; 1314 for (k=0;k<pnzj;k++) { 1315 if (!apjdense[pjj[k]]) { 1316 apjdense[pjj[k]] = -1; 1317 apj[apnzj++] = pjj[k]; 1318 } 1319 apa[pjj[k]] += (*ada)*paj[k]; 1320 } 1321 flops += 2*pnzj; 1322 ada++; 1323 } 1324 /* off-diagonal portion of A */ 1325 anzi = aoi[i+1] - aoi[i]; 1326 for (j=0;j<anzi;j++) { 1327 col = a->garray[*aoj]; 1328 if (col < cstart){ 1329 prow = *aoj; 1330 } else if (col >= cend){ 1331 prow = *aoj; 1332 } else { 1333 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1334 } 1335 aoj++; 1336 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1337 pjj = pj_oth + pi_oth[prow]; 1338 paj = pa_oth + pi_oth[prow]; 1339 for (k=0;k<pnzj;k++) { 1340 if (!apjdense[pjj[k]]) { 1341 apjdense[pjj[k]] = -1; 1342 apj[apnzj++] = pjj[k]; 1343 } 1344 apa[pjj[k]] += (*aoa)*paj[k]; 1345 } 1346 flops += 2*pnzj; 1347 aoa++; 1348 } 1349 /* Sort the j index array for quick sparse axpy. */ 1350 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1351 1352 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1353 pnzi = pi_loc[i+1] - pi_loc[i]; 1354 for (j=0;j<pnzi;j++) { 1355 nextap = 0; 1356 crow = *pJ++; 1357 cjj = cj + ci[crow]; 1358 caj = ca + ci[crow]; 1359 /* Perform sparse axpy operation. Note cjj includes apj. */ 1360 for (k=0;nextap<apnzj;k++) { 1361 if (cjj[k]==apj[nextap]) { 1362 caj[k] += (*pA)*apa[apj[nextap++]]; 1363 } 1364 } 1365 flops += 2*apnzj; 1366 pA++; 1367 } 1368 1369 /* Zero the current row info for A*P */ 1370 for (j=0;j<apnzj;j++) { 1371 apa[apj[j]] = 0.; 1372 apjdense[apj[j]] = 0; 1373 } 1374 } 1375 1376 /* Assemble the final matrix and clean up */ 1377 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 ierr = PetscFree(apa);CHKERRQ(ierr); 1380 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1381 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 static PetscEvent logkey_matptapsymbolic_local = 0; 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1387 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1388 { 1389 PetscErrorCode ierr; 1390 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1391 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1392 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1393 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1394 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1395 PetscInt *ci,*cj,*ptaj; 1396 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1397 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1398 PetscInt pm=P_loc->m,nlnk,*lnk; 1399 MatScalar *ca; 1400 PetscBT lnkbt; 1401 PetscInt prend,nprow_loc,nprow_oth; 1402 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1403 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1404 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1405 1406 PetscFunctionBegin; 1407 if (!logkey_matptapsymbolic_local) { 1408 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1409 } 1410 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1411 1412 prend = prstart + pm; 1413 1414 /* get ij structure of P_loc^T */ 1415 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1416 ptJ=ptj; 1417 1418 /* Allocate ci array, arrays for fill computation and */ 1419 /* free space for accumulating nonzero column info */ 1420 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1421 ci[0] = 0; 1422 1423 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1424 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1425 ptasparserow_loc = ptadenserow_loc + aN; 1426 ptadenserow_oth = ptasparserow_loc + aN; 1427 ptasparserow_oth = ptadenserow_oth + aN; 1428 1429 /* create and initialize a linked list */ 1430 nlnk = pN+1; 1431 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1432 1433 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1434 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1435 nnz = adi[am] + aoi[am]; 1436 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1437 current_space = free_space; 1438 1439 /* determine symbolic info for each row of C: */ 1440 for (i=0;i<pN;i++) { 1441 ptnzi = pti[i+1] - pti[i]; 1442 nprow_loc = 0; nprow_oth = 0; 1443 /* i-th row of symbolic P_loc^T*A_loc: */ 1444 for (j=0;j<ptnzi;j++) { 1445 arow = *ptJ++; 1446 /* diagonal portion of A */ 1447 anzj = adi[arow+1] - adi[arow]; 1448 ajj = adj + adi[arow]; 1449 for (k=0;k<anzj;k++) { 1450 col = ajj[k]+prstart; 1451 if (!ptadenserow_loc[col]) { 1452 ptadenserow_loc[col] = -1; 1453 ptasparserow_loc[nprow_loc++] = col; 1454 } 1455 } 1456 /* off-diagonal portion of A */ 1457 anzj = aoi[arow+1] - aoi[arow]; 1458 ajj = aoj + aoi[arow]; 1459 for (k=0;k<anzj;k++) { 1460 col = a->garray[ajj[k]]; /* global col */ 1461 if (col < cstart){ 1462 col = ajj[k]; 1463 } else if (col >= cend){ 1464 col = ajj[k] + pm; 1465 } else { 1466 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1467 } 1468 if (!ptadenserow_oth[col]) { 1469 ptadenserow_oth[col] = -1; 1470 ptasparserow_oth[nprow_oth++] = col; 1471 } 1472 } 1473 } 1474 1475 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1476 cnzi = 0; 1477 /* rows of P_loc */ 1478 ptaj = ptasparserow_loc; 1479 for (j=0; j<nprow_loc; j++) { 1480 prow = *ptaj++; 1481 prow -= prstart; /* rm */ 1482 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1483 pjj = pj_loc + pi_loc[prow]; 1484 /* add non-zero cols of P into the sorted linked list lnk */ 1485 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1486 cnzi += nlnk; 1487 } 1488 /* rows of P_oth */ 1489 ptaj = ptasparserow_oth; 1490 for (j=0; j<nprow_oth; j++) { 1491 prow = *ptaj++; 1492 if (prow >= prend) prow -= pm; /* rm */ 1493 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1494 pjj = pj_oth + pi_oth[prow]; 1495 /* add non-zero cols of P into the sorted linked list lnk */ 1496 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1497 cnzi += nlnk; 1498 } 1499 1500 /* If free space is not available, make more free space */ 1501 /* Double the amount of total space in the list */ 1502 if (current_space->local_remaining<cnzi) { 1503 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1504 } 1505 1506 /* Copy data into free space, and zero out denserows */ 1507 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1508 current_space->array += cnzi; 1509 current_space->local_used += cnzi; 1510 current_space->local_remaining -= cnzi; 1511 1512 for (j=0;j<nprow_loc; j++) { 1513 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1514 } 1515 for (j=0;j<nprow_oth; j++) { 1516 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1517 } 1518 1519 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1520 /* For now, we will recompute what is needed. */ 1521 ci[i+1] = ci[i] + cnzi; 1522 } 1523 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1524 /* Allocate space for cj, initialize cj, and */ 1525 /* destroy list of free space and other temporary array(s) */ 1526 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1527 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1528 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1529 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1530 1531 /* Allocate space for ca */ 1532 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1533 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1534 1535 /* put together the new matrix */ 1536 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1537 1538 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1539 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1540 c = (Mat_SeqAIJ *)((*C)->data); 1541 c->freedata = PETSC_TRUE; 1542 c->nonew = 0; 1543 1544 /* Clean up. */ 1545 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1546 1547 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550