1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 153 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 154 {\ 155 /* allocate ci array, arrays for fill computation and */\ 156 /* free space for accumulating nonzero column info */\ 157 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 158 ci[0] = 0;\ 159 \ 160 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 161 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 162 nnz = adi[am] + aoi[am];\ 163 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 164 current_space = free_space;\ 165 \ 166 ptJ=ptj;\ 167 for (i=0; i<ptM; i++) {\ 168 ptnzi = pti[i+1] - pti[i];\ 169 nprow_loc = 0; nprow_oth = 0;\ 170 /* i-th row of symbolic Pt*A: */\ 171 for (j=0;j<ptnzi;j++) {\ 172 arow = *ptJ++;\ 173 /* diagonal portion of A */\ 174 anzj = adi[arow+1] - adi[arow];\ 175 ajj = adj + adi[arow];\ 176 for (k=0;k<anzj;k++) {\ 177 col = ajj[k]+prstart;\ 178 if (!ptadenserow_loc[col]) {\ 179 ptadenserow_loc[col] = -1;\ 180 ptasparserow_loc[nprow_loc++] = col;\ 181 }\ 182 }\ 183 /* off-diagonal portion of A */\ 184 anzj = aoi[arow+1] - aoi[arow];\ 185 ajj = aoj + aoi[arow];\ 186 for (k=0;k<anzj;k++) {\ 187 col = a->garray[ajj[k]]; /* global col */\ 188 if (col < cstart){\ 189 col = ajj[k];\ 190 } else if (col >= cend){\ 191 col = ajj[k] + pm;\ 192 } else {\ 193 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 194 }\ 195 if (!ptadenserow_oth[col]) {\ 196 ptadenserow_oth[col] = -1;\ 197 ptasparserow_oth[nprow_oth++] = col;\ 198 }\ 199 }\ 200 }\ 201 /* determine symbolic info for row of C_seq: */\ 202 cnzi = 0;\ 203 /* rows of P_loc */\ 204 ptaj = ptasparserow_loc;\ 205 for (j=0; j<nprow_loc; j++) {\ 206 prow = *ptaj++; \ 207 prow -= prstart; /* rm */\ 208 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 209 pjj = pj_loc + pi_loc[prow];\ 210 /* add non-zero cols of P into the sorted linked list lnk */\ 211 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 212 cnzi += nlnk;\ 213 }\ 214 /* rows of P_oth */\ 215 ptaj = ptasparserow_oth;\ 216 for (j=0; j<nprow_oth; j++) {\ 217 prow = *ptaj++; \ 218 if (prow >= prend) prow -= pm; /* rm */\ 219 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 220 pjj = pj_oth + pi_oth[prow];\ 221 /* add non-zero cols of P into the sorted linked list lnk */\ 222 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 223 cnzi += nlnk;\ 224 }\ 225 \ 226 /* If free space is not available, make more free space */\ 227 /* Double the amount of total space in the list */\ 228 if (current_space->local_remaining<cnzi) {\ 229 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 230 }\ 231 \ 232 /* Copy data into free space, and zero out denserows */\ 233 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 234 current_space->array += cnzi;\ 235 current_space->local_used += cnzi;\ 236 current_space->local_remaining -= cnzi;\ 237 \ 238 for (j=0;j<nprow_loc; j++) {\ 239 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 240 }\ 241 for (j=0;j<nprow_oth; j++) {\ 242 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 243 }\ 244 \ 245 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 246 /* For now, we will recompute what is needed. */ \ 247 ci[i+1] = ci[i] + cnzi;\ 248 }\ 249 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 250 /* Allocate space for cj, initialize cj, and */\ 251 /* destroy list of free space and other temporary array(s) */\ 252 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 253 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 254 } 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 257 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 258 { 259 PetscErrorCode ierr; 260 Mat P_loc,P_oth; 261 Mat_MatMatMultMPI *ptap; 262 PetscObjectContainer container; 263 FreeSpaceList free_space=PETSC_NULL,current_space; 264 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 265 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 266 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 267 Mat_SeqAIJ *p_loc,*p_oth; 268 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 269 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 270 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 271 PetscInt aN=A->N,am=A->m,pN=P->N; 272 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 273 PetscBT lnkbt; 274 PetscInt prstart,prend,nprow_loc,nprow_oth; 275 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 276 277 MPI_Comm comm=A->comm; 278 Mat B_mpi; 279 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 280 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 281 PetscInt len,proc,*dnz,*onz,*owners; 282 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 283 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 284 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 285 MPI_Status *status; 286 Mat_Merge_SeqsToMPI *merge; 287 PetscInt *ptdi,*ptdj,*cdi,*cdj; 288 PetscInt *ptoi,*ptoj,*coi,*coj; 289 290 PetscFunctionBegin; 291 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 292 if (container) { 293 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 294 } else { 295 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 296 } 297 298 /* determine row ownership of C */ 299 /*---------------------------------------------------------*/ 300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 301 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 302 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 303 304 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 305 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 306 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 307 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 308 309 /* get data from P_loc and P_oth */ 310 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 311 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 312 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 313 prend = prstart + pm; 314 315 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 316 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 317 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 318 ptasparserow_loc = ptadenserow_loc + aN; 319 ptadenserow_oth = ptasparserow_loc + aN; 320 ptasparserow_oth = ptadenserow_oth + aN; 321 322 /* create and initialize a linked list */ 323 nlnk = pN+1; 324 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 325 326 /* Pt = P_loc^T */ 327 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 328 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 329 330 /* Pt = Pd^T - diagonal portion of P_loc */ 331 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 332 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 333 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 334 #ifdef NEW 335 /* Pt = Po^T - off-diagonal portion of P_loc */ 336 ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&ptoi,&ptoj);CHKERRQ(ierr); 337 ierr = MatPtAPSymbolicLocal_Private(tnrows,ptoi,ptoj,coi,coj);CHKERRQ(ierr); 338 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptoi,&ptoj);CHKERRQ(ierr); 339 #endif 340 /* clean up */ 341 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 342 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 343 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 344 345 /* determine the number of messages to send, their lengths */ 346 /*---------------------------------------------------------*/ 347 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 348 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 349 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 350 len_s = merge->len_s; 351 len = 0; /* max length of buf_si[] */ 352 merge->nsend = 0; 353 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 354 proc = 0; 355 for (i=0; i<tnrows; i++){ 356 while (rmap[i] >= owners[proc+1]) proc++; 357 len_si[proc]++; 358 } 359 for (proc=0; proc<size; proc++){ 360 len_s[proc] = 0; 361 if (len_si[proc]){ 362 merge->nsend++; 363 len_si[proc] = 2*(len_si[proc] + 1); 364 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 365 len += len_si[proc]; 366 } 367 } 368 369 /* determine the number and length of messages to receive for ij-structure */ 370 /*-------------------------------------------------------------------------*/ 371 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 372 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 373 374 /* post the Irecv of j-structure */ 375 /*-------------------------------*/ 376 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 377 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 378 379 /* post the Isend of j-structure */ 380 /*--------------------------------*/ 381 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 382 sj_waits = si_waits + merge->nsend; 383 384 for (proc=0, k=0; proc<size; proc++){ 385 if (!len_s[proc]) continue; 386 i = owners[proc]; 387 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 388 k++; 389 } 390 391 /* receives and sends of j-structure are complete */ 392 /*------------------------------------------------*/ 393 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 394 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 395 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 396 397 /* send and recv i-structure */ 398 /*---------------------------*/ 399 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 400 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 401 402 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 403 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 404 405 j = 0; /* row index to be sent */ 406 for (proc=0,k=0; proc<size; proc++){ 407 if (!len_s[proc]) continue; 408 /* form outgoing message for i-structure: 409 buf_si[0]: nrows to be sent 410 [1:nrows]: row index (global) 411 [nrows+1:2*nrows+1]: i-structure index 412 */ 413 /*-------------------------------------------*/ 414 nrows = len_si[proc]/2 - 1; 415 buf_si_i = buf_si + nrows+1; 416 buf_si[0] = nrows; 417 buf_si_i[0] = 0; 418 for (i=0; i<nrows; i++){ 419 row = rmap[j++]; 420 anzi = ci[row+1] - ci[row]; 421 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 422 buf_si[i+1] = row - owners[proc]; /* local row index */ 423 } 424 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 425 k++; 426 buf_si += len_si[proc]; 427 } 428 429 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 430 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 431 432 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 433 for (i=0; i<merge->nrecv; i++){ 434 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 435 } 436 437 ierr = PetscFree(len_si);CHKERRQ(ierr); 438 ierr = PetscFree(len_ri);CHKERRQ(ierr); 439 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 440 ierr = PetscFree(si_waits);CHKERRQ(ierr); 441 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 442 ierr = PetscFree(buf_s);CHKERRQ(ierr); 443 ierr = PetscFree(status);CHKERRQ(ierr); 444 445 /* compute a local seq matrix in each processor */ 446 /*----------------------------------------------*/ 447 /* allocate bi array and free space for accumulating nonzero column info */ 448 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 449 bi[0] = 0; 450 451 /* create and initialize a linked list */ 452 nlnk = pN+1; 453 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 454 455 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 456 len = 0; 457 len = ci[owners[rank+1]] - ci[owners[rank]]; 458 free_space=PETSC_NULL; 459 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 460 current_space = free_space; 461 462 /* determine symbolic info for each local row of C */ 463 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 464 nextrow = buf_ri_k + merge->nrecv; 465 nextci = nextrow + merge->nrecv; 466 for (k=0; k<merge->nrecv; k++){ 467 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 468 nrows = *buf_ri_k[k]; 469 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 470 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 471 } 472 473 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 474 len = 0; 475 for (i=0; i<pn; i++) { 476 bnzi = 0; 477 /* add local non-zero cols of this proc's C_seq into lnk */ 478 anzi = cdi[i+1] - cdi[i]; 479 cji = cdj + cdi[i]; 480 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 481 bnzi += nlnk; 482 /* add received col data into lnk */ 483 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 484 if (i == *nextrow[k]) { /* i-th row */ 485 anzi = *(nextci[k]+1) - *nextci[k]; 486 cji = buf_rj[k] + *nextci[k]; 487 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 488 bnzi += nlnk; 489 nextrow[k]++; nextci[k]++; 490 } 491 } 492 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 493 494 /* if free space is not available, make more free space */ 495 if (current_space->local_remaining<bnzi) { 496 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 497 nspacedouble++; 498 } 499 /* copy data into free space, then initialize lnk */ 500 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 501 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 502 503 current_space->array += bnzi; 504 current_space->local_used += bnzi; 505 current_space->local_remaining -= bnzi; 506 507 bi[i+1] = bi[i] + bnzi; 508 } 509 510 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 511 512 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 513 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 514 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 515 516 /* create symbolic parallel matrix B_mpi */ 517 /*---------------------------------------*/ 518 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 519 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 520 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 521 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 522 523 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 524 B_mpi->assembled = PETSC_FALSE; 525 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 526 merge->bi = bi; 527 merge->bj = bj; 528 merge->ci = ci; 529 merge->cj = cj; 530 merge->buf_ri = buf_ri; 531 merge->buf_rj = buf_rj; 532 merge->C_seq = PETSC_NULL; 533 534 /* attach the supporting struct to B_mpi for reuse */ 535 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 536 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 537 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 538 *C = B_mpi; 539 540 ierr = PetscFree(cdi);CHKERRQ(ierr); 541 ierr = PetscFree(cdj);CHKERRQ(ierr); 542 #ifdef NEW 543 ierr = PetscFree(coi);CHKERRQ(ierr); 544 ierr = PetscFree(coj);CHKERRQ(ierr); 545 #endif 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 551 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 552 { 553 PetscErrorCode ierr; 554 Mat_Merge_SeqsToMPI *merge; 555 Mat_MatMatMultMPI *ptap; 556 PetscObjectContainer cont_merge,cont_ptap; 557 PetscInt flops=0; 558 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 559 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 560 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 561 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 562 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 563 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 564 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 565 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 566 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 567 PetscInt am=A->m,cN=C->N,cm=C->m; 568 MPI_Comm comm=C->comm; 569 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 570 PetscInt *owners,proc,nrows; 571 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 572 MPI_Request *s_waits,*r_waits; 573 MPI_Status *status; 574 PetscInt *cseqi,*cseqj,col; 575 PetscInt stages[3]; 576 577 PetscFunctionBegin; 578 ierr = PetscLogStageRegister(&stages[0],"NumAP_local");CHKERRQ(ierr); 579 ierr = PetscLogStageRegister(&stages[1],"NumPtAP_local");CHKERRQ(ierr); 580 ierr = PetscLogStageRegister(&stages[2],"NumPtAP_Comm");CHKERRQ(ierr); 581 582 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 583 if (cont_merge) { 584 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 585 } else { 586 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 587 } 588 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 589 if (cont_ptap) { 590 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 591 } else { 592 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 593 } 594 /* get data from symbolic products */ 595 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 596 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 597 598 cseqi = merge->ci; cseqj=merge->cj; 599 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 600 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 601 602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 603 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 604 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 605 606 /* Allocate temporary array for storage of one row of A*P and one row of C */ 607 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 608 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 609 ca = (MatScalar*)(apa + cN); 610 apj = (PetscInt *)(ca + cN); 611 apjdense = (PetscInt *)(apj + cN); 612 613 /* Clear old values in C */ 614 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 615 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 616 617 for (i=0;i<am;i++) { 618 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 619 /* Form i-th sparse row of A*P */ 620 apnzj = 0; 621 /* diagonal portion of A */ 622 nzi = adi[i+1] - adi[i]; 623 for (j=0;j<nzi;j++) { 624 prow = *adj++; 625 /* diagonal portion of P */ 626 pnzj = pd->i[prow+1] - pd->i[prow]; 627 pjj = pd->j + pd->i[prow]; /* local col index of P */ 628 paj = pd->a + pd->i[prow]; 629 for (k=0;k<pnzj;k++) { 630 col = *pjj + p->cstart; pjj++; /* global col index of P */ 631 if (!apjdense[col]) { 632 apjdense[col] = -1; 633 apj[apnzj++] = col; 634 } 635 apa[col] += (*ada)*paj[k]; 636 } 637 flops += 2*pnzj; 638 /* off-diagonal portion of P */ 639 pnzj = po->i[prow+1] - po->i[prow]; 640 pjj = po->j + po->i[prow]; /* local col index of P */ 641 paj = po->a + po->i[prow]; 642 for (k=0;k<pnzj;k++) { 643 col = p->garray[*pjj]; pjj++; /* global col index of P */ 644 if (!apjdense[col]) { 645 apjdense[col] = -1; 646 apj[apnzj++] = col; 647 } 648 apa[col] += (*ada)*paj[k]; 649 } 650 flops += 2*pnzj; 651 652 ada++; 653 } 654 /* off-diagonal portion of A */ 655 nzi = aoi[i+1] - aoi[i]; 656 for (j=0;j<nzi;j++) { 657 prow = *aoj++; 658 pnzj = pi_oth[prow+1] - pi_oth[prow]; 659 pjj = pj_oth + pi_oth[prow]; 660 paj = pa_oth + pi_oth[prow]; 661 for (k=0;k<pnzj;k++) { 662 if (!apjdense[pjj[k]]) { 663 apjdense[pjj[k]] = -1; 664 apj[apnzj++] = pjj[k]; 665 } 666 apa[pjj[k]] += (*aoa)*paj[k]; 667 } 668 flops += 2*pnzj; 669 aoa++; 670 } 671 /* Sort the j index array for quick sparse axpy. */ 672 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 673 674 ierr = PetscLogStagePop(); 675 ierr = PetscLogStagePush(stages[1]); 676 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 677 /* diagonal portion of P -- gives matrix value of local C */ 678 pnzi = pd->i[i+1] - pd->i[i]; 679 for (j=0;j<pnzi;j++) { 680 /* add value into C */ 681 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 682 crow = (*pJ_d++) + owners[rank]; 683 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 684 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 685 pA_d++; 686 flops += 2*apnzj; 687 } 688 689 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 690 pnzi = po->i[i+1] - po->i[i]; 691 for (j=0;j<pnzi;j++) { 692 crow = p->garray[*pJ_o++]; 693 cjj = cseqj + cseqi[crow]; 694 caj = cseqa + cseqi[crow]; 695 /* add value into C_seq to be sent to other processors */ 696 nextap = 0; 697 for (k=0;nextap<apnzj;k++) { 698 if (cjj[k]==apj[nextap]) { 699 caj[k] += (*pA_o)*apa[apj[nextap++]]; 700 } 701 } 702 flops += 2*apnzj; 703 pA_o++; 704 } 705 706 /* Zero the current row info for A*P */ 707 for (j=0;j<apnzj;j++) { 708 apa[apj[j]] = 0.; 709 apjdense[apj[j]] = 0; 710 } 711 ierr = PetscLogStagePop(); 712 } 713 714 /* send and recv matrix values */ 715 /*-----------------------------*/ 716 ierr = PetscLogStagePush(stages[2]);CHKERRQ(ierr); 717 bi = merge->bi; 718 bj = merge->bj; 719 buf_ri = merge->buf_ri; 720 buf_rj = merge->buf_rj; 721 len_s = merge->len_s; 722 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 723 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 724 725 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 726 for (proc=0,k=0; proc<size; proc++){ 727 if (!len_s[proc]) continue; 728 i = owners[proc]; 729 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 730 k++; 731 } 732 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 733 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 734 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 735 ierr = PetscFree(status);CHKERRQ(ierr); 736 737 ierr = PetscFree(s_waits);CHKERRQ(ierr); 738 ierr = PetscFree(r_waits);CHKERRQ(ierr); 739 ierr = PetscFree(cseqa);CHKERRQ(ierr); 740 741 /* insert mat values of mpimat */ 742 /*----------------------------*/ 743 ba_i = apa; /* rename, temporary array for storage of one row of C */ 744 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 745 nextrow = buf_ri_k + merge->nrecv; 746 nextcseqi = nextrow + merge->nrecv; 747 748 for (k=0; k<merge->nrecv; k++){ 749 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 750 nrows = *(buf_ri_k[k]); 751 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 752 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 753 } 754 755 /* add received local vals to C */ 756 for (i=0; i<cm; i++) { 757 crow = owners[rank] + i; 758 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 759 bnzi = bi[i+1] - bi[i]; 760 nzi = 0; 761 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 762 /* i-th row */ 763 if (i == *nextrow[k]) { 764 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 765 cseqj = buf_rj[k] + *(nextcseqi[k]); 766 cseqa = abuf_r[k] + *(nextcseqi[k]); 767 nextcseqj = 0; 768 for (j=0; nextcseqj<cseqnzi; j++){ 769 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 770 ba_i[j] += cseqa[nextcseqj++]; 771 nzi++; 772 } 773 } 774 nextrow[k]++; nextcseqi[k]++; 775 } 776 } 777 if (nzi>0){ 778 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 779 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 780 flops += 2*nzi; 781 } 782 } 783 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 784 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 ierr = PetscLogStagePop();CHKERRQ(ierr); 786 787 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 788 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 789 ierr = PetscFree(apa);CHKERRQ(ierr); 790 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 791 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 797 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 798 { 799 PetscErrorCode ierr; 800 801 PetscFunctionBegin; 802 if (scall == MAT_INITIAL_MATRIX){ 803 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 804 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 805 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 806 } 807 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 808 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 809 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 /* 814 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 815 816 Collective on Mat 817 818 Input Parameters: 819 + A - the matrix 820 - P - the projection matrix 821 822 Output Parameters: 823 . C - the (i,j) structure of the product matrix 824 825 Notes: 826 C will be created and must be destroyed by the user with MatDestroy(). 827 828 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 829 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 830 this (i,j) structure by calling MatPtAPNumeric(). 831 832 Level: intermediate 833 834 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 835 */ 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatPtAPSymbolic" 838 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 839 { 840 PetscErrorCode ierr; 841 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 842 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 843 844 PetscFunctionBegin; 845 846 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 847 PetscValidType(A,1); 848 MatPreallocated(A); 849 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 850 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 851 852 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 853 PetscValidType(P,2); 854 MatPreallocated(P); 855 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 856 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 857 858 PetscValidPointer(C,3); 859 860 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 861 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 862 863 /* For now, we do not dispatch based on the type of A and P */ 864 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 865 fA = A->ops->ptapsymbolic; 866 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 867 fP = P->ops->ptapsymbolic; 868 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 869 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 870 871 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 872 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 873 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 874 875 PetscFunctionReturn(0); 876 } 877 878 typedef struct { 879 Mat symAP; 880 } Mat_PtAPstruct; 881 882 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 886 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 887 { 888 PetscErrorCode ierr; 889 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 890 891 PetscFunctionBegin; 892 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 893 ierr = PetscFree(ptap);CHKERRQ(ierr); 894 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 900 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 901 { 902 PetscErrorCode ierr; 903 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 904 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 905 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 906 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 907 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 908 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 909 MatScalar *ca; 910 PetscBT lnkbt; 911 912 PetscFunctionBegin; 913 /* Get ij structure of P^T */ 914 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 915 ptJ=ptj; 916 917 /* Allocate ci array, arrays for fill computation and */ 918 /* free space for accumulating nonzero column info */ 919 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 920 ci[0] = 0; 921 922 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 923 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 924 ptasparserow = ptadenserow + an; 925 926 /* create and initialize a linked list */ 927 nlnk = pn+1; 928 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 929 930 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 931 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 932 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 933 current_space = free_space; 934 935 /* Determine symbolic info for each row of C: */ 936 for (i=0;i<pn;i++) { 937 ptnzi = pti[i+1] - pti[i]; 938 ptanzi = 0; 939 /* Determine symbolic row of PtA: */ 940 for (j=0;j<ptnzi;j++) { 941 arow = *ptJ++; 942 anzj = ai[arow+1] - ai[arow]; 943 ajj = aj + ai[arow]; 944 for (k=0;k<anzj;k++) { 945 if (!ptadenserow[ajj[k]]) { 946 ptadenserow[ajj[k]] = -1; 947 ptasparserow[ptanzi++] = ajj[k]; 948 } 949 } 950 } 951 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 952 ptaj = ptasparserow; 953 cnzi = 0; 954 for (j=0;j<ptanzi;j++) { 955 prow = *ptaj++; 956 pnzj = pi[prow+1] - pi[prow]; 957 pjj = pj + pi[prow]; 958 /* add non-zero cols of P into the sorted linked list lnk */ 959 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 960 cnzi += nlnk; 961 } 962 963 /* If free space is not available, make more free space */ 964 /* Double the amount of total space in the list */ 965 if (current_space->local_remaining<cnzi) { 966 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 967 } 968 969 /* Copy data into free space, and zero out denserows */ 970 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 971 current_space->array += cnzi; 972 current_space->local_used += cnzi; 973 current_space->local_remaining -= cnzi; 974 975 for (j=0;j<ptanzi;j++) { 976 ptadenserow[ptasparserow[j]] = 0; 977 } 978 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 979 /* For now, we will recompute what is needed. */ 980 ci[i+1] = ci[i] + cnzi; 981 } 982 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 983 /* Allocate space for cj, initialize cj, and */ 984 /* destroy list of free space and other temporary array(s) */ 985 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 986 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 987 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 988 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 989 990 /* Allocate space for ca */ 991 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 992 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 993 994 /* put together the new matrix */ 995 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 996 997 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 998 /* Since these are PETSc arrays, change flags to free them as necessary. */ 999 c = (Mat_SeqAIJ *)((*C)->data); 1000 c->freedata = PETSC_TRUE; 1001 c->nonew = 0; 1002 1003 /* Clean up. */ 1004 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #include "src/mat/impls/maij/maij.h" 1010 EXTERN_C_BEGIN 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1013 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 1014 { 1015 /* This routine requires testing -- I don't think it works. */ 1016 PetscErrorCode ierr; 1017 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1018 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1019 Mat P=pp->AIJ; 1020 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1021 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1022 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1023 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1024 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1025 MatScalar *ca; 1026 1027 PetscFunctionBegin; 1028 /* Start timer */ 1029 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1030 1031 /* Get ij structure of P^T */ 1032 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1033 1034 /* Allocate ci array, arrays for fill computation and */ 1035 /* free space for accumulating nonzero column info */ 1036 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1037 ci[0] = 0; 1038 1039 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1040 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1041 ptasparserow = ptadenserow + an; 1042 denserow = ptasparserow + an; 1043 sparserow = denserow + pn; 1044 1045 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1046 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1047 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1048 current_space = free_space; 1049 1050 /* Determine symbolic info for each row of C: */ 1051 for (i=0;i<pn/ppdof;i++) { 1052 ptnzi = pti[i+1] - pti[i]; 1053 ptanzi = 0; 1054 ptJ = ptj + pti[i]; 1055 for (dof=0;dof<ppdof;dof++) { 1056 /* Determine symbolic row of PtA: */ 1057 for (j=0;j<ptnzi;j++) { 1058 arow = ptJ[j] + dof; 1059 anzj = ai[arow+1] - ai[arow]; 1060 ajj = aj + ai[arow]; 1061 for (k=0;k<anzj;k++) { 1062 if (!ptadenserow[ajj[k]]) { 1063 ptadenserow[ajj[k]] = -1; 1064 ptasparserow[ptanzi++] = ajj[k]; 1065 } 1066 } 1067 } 1068 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1069 ptaj = ptasparserow; 1070 cnzi = 0; 1071 for (j=0;j<ptanzi;j++) { 1072 pdof = *ptaj%dof; 1073 prow = (*ptaj++)/dof; 1074 pnzj = pi[prow+1] - pi[prow]; 1075 pjj = pj + pi[prow]; 1076 for (k=0;k<pnzj;k++) { 1077 if (!denserow[pjj[k]+pdof]) { 1078 denserow[pjj[k]+pdof] = -1; 1079 sparserow[cnzi++] = pjj[k]+pdof; 1080 } 1081 } 1082 } 1083 1084 /* sort sparserow */ 1085 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1086 1087 /* If free space is not available, make more free space */ 1088 /* Double the amount of total space in the list */ 1089 if (current_space->local_remaining<cnzi) { 1090 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1091 } 1092 1093 /* Copy data into free space, and zero out denserows */ 1094 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1095 current_space->array += cnzi; 1096 current_space->local_used += cnzi; 1097 current_space->local_remaining -= cnzi; 1098 1099 for (j=0;j<ptanzi;j++) { 1100 ptadenserow[ptasparserow[j]] = 0; 1101 } 1102 for (j=0;j<cnzi;j++) { 1103 denserow[sparserow[j]] = 0; 1104 } 1105 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1106 /* For now, we will recompute what is needed. */ 1107 ci[i+1+dof] = ci[i+dof] + cnzi; 1108 } 1109 } 1110 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1111 /* Allocate space for cj, initialize cj, and */ 1112 /* destroy list of free space and other temporary array(s) */ 1113 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1114 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1115 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1116 1117 /* Allocate space for ca */ 1118 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1119 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1120 1121 /* put together the new matrix */ 1122 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1123 1124 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1125 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1126 c = (Mat_SeqAIJ *)((*C)->data); 1127 c->freedata = PETSC_TRUE; 1128 c->nonew = 0; 1129 1130 /* Clean up. */ 1131 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1132 1133 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 EXTERN_C_END 1137 1138 /* 1139 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1140 1141 Collective on Mat 1142 1143 Input Parameters: 1144 + A - the matrix 1145 - P - the projection matrix 1146 1147 Output Parameters: 1148 . C - the product matrix 1149 1150 Notes: 1151 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1152 the user using MatDeatroy(). 1153 1154 This routine is currently only implemented for pairs of AIJ matrices and classes 1155 which inherit from AIJ. C will be of type MATAIJ. 1156 1157 Level: intermediate 1158 1159 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1160 */ 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatPtAPNumeric" 1163 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1164 { 1165 PetscErrorCode ierr; 1166 PetscErrorCode (*fA)(Mat,Mat,Mat); 1167 PetscErrorCode (*fP)(Mat,Mat,Mat); 1168 1169 PetscFunctionBegin; 1170 1171 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1172 PetscValidType(A,1); 1173 MatPreallocated(A); 1174 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1175 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1176 1177 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1178 PetscValidType(P,2); 1179 MatPreallocated(P); 1180 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1181 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1182 1183 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1184 PetscValidType(C,3); 1185 MatPreallocated(C); 1186 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1187 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1188 1189 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1190 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1191 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1192 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1193 1194 /* For now, we do not dispatch based on the type of A and P */ 1195 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1196 fA = A->ops->ptapnumeric; 1197 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1198 fP = P->ops->ptapnumeric; 1199 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1200 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1201 1202 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1203 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1204 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1205 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1211 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1212 { 1213 PetscErrorCode ierr; 1214 PetscInt flops=0; 1215 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1216 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1217 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1218 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1219 PetscInt *ci=c->i,*cj=c->j,*cjj; 1220 PetscInt am=A->M,cn=C->N,cm=C->M; 1221 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1222 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1223 1224 PetscFunctionBegin; 1225 /* Allocate temporary array for storage of one row of A*P */ 1226 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1227 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1228 1229 apj = (PetscInt *)(apa + cn); 1230 apjdense = apj + cn; 1231 1232 /* Clear old values in C */ 1233 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1234 1235 for (i=0;i<am;i++) { 1236 /* Form sparse row of A*P */ 1237 anzi = ai[i+1] - ai[i]; 1238 apnzj = 0; 1239 for (j=0;j<anzi;j++) { 1240 prow = *aj++; 1241 pnzj = pi[prow+1] - pi[prow]; 1242 pjj = pj + pi[prow]; 1243 paj = pa + pi[prow]; 1244 for (k=0;k<pnzj;k++) { 1245 if (!apjdense[pjj[k]]) { 1246 apjdense[pjj[k]] = -1; 1247 apj[apnzj++] = pjj[k]; 1248 } 1249 apa[pjj[k]] += (*aa)*paj[k]; 1250 } 1251 flops += 2*pnzj; 1252 aa++; 1253 } 1254 1255 /* Sort the j index array for quick sparse axpy. */ 1256 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1257 1258 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1259 pnzi = pi[i+1] - pi[i]; 1260 for (j=0;j<pnzi;j++) { 1261 nextap = 0; 1262 crow = *pJ++; 1263 cjj = cj + ci[crow]; 1264 caj = ca + ci[crow]; 1265 /* Perform sparse axpy operation. Note cjj includes apj. */ 1266 for (k=0;nextap<apnzj;k++) { 1267 if (cjj[k]==apj[nextap]) { 1268 caj[k] += (*pA)*apa[apj[nextap++]]; 1269 } 1270 } 1271 flops += 2*apnzj; 1272 pA++; 1273 } 1274 1275 /* Zero the current row info for A*P */ 1276 for (j=0;j<apnzj;j++) { 1277 apa[apj[j]] = 0.; 1278 apjdense[apj[j]] = 0; 1279 } 1280 } 1281 1282 /* Assemble the final matrix and clean up */ 1283 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1284 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1285 ierr = PetscFree(apa);CHKERRQ(ierr); 1286 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1287 1288 PetscFunctionReturn(0); 1289 } 1290 1291 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1292 static PetscEvent logkey_matptapnumeric_local = 0; 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1295 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1296 { 1297 PetscErrorCode ierr; 1298 PetscInt flops=0; 1299 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1300 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1301 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1302 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1303 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1304 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1305 PetscInt *pJ=pj_loc,*pjj; 1306 PetscInt *ci=c->i,*cj=c->j,*cjj; 1307 PetscInt am=A->m,cn=C->N,cm=C->M; 1308 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1309 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1310 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1311 1312 PetscFunctionBegin; 1313 if (!logkey_matptapnumeric_local) { 1314 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1315 } 1316 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1317 1318 /* Allocate temporary array for storage of one row of A*P */ 1319 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1320 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1321 apj = (PetscInt *)(apa + cn); 1322 apjdense = apj + cn; 1323 1324 /* Clear old values in C */ 1325 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1326 1327 for (i=0;i<am;i++) { 1328 /* Form i-th sparse row of A*P */ 1329 apnzj = 0; 1330 /* diagonal portion of A */ 1331 anzi = adi[i+1] - adi[i]; 1332 for (j=0;j<anzi;j++) { 1333 prow = *adj; 1334 adj++; 1335 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1336 pjj = pj_loc + pi_loc[prow]; 1337 paj = pa_loc + pi_loc[prow]; 1338 for (k=0;k<pnzj;k++) { 1339 if (!apjdense[pjj[k]]) { 1340 apjdense[pjj[k]] = -1; 1341 apj[apnzj++] = pjj[k]; 1342 } 1343 apa[pjj[k]] += (*ada)*paj[k]; 1344 } 1345 flops += 2*pnzj; 1346 ada++; 1347 } 1348 /* off-diagonal portion of A */ 1349 anzi = aoi[i+1] - aoi[i]; 1350 for (j=0;j<anzi;j++) { 1351 col = a->garray[*aoj]; 1352 if (col < cstart){ 1353 prow = *aoj; 1354 } else if (col >= cend){ 1355 prow = *aoj; 1356 } else { 1357 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1358 } 1359 aoj++; 1360 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1361 pjj = pj_oth + pi_oth[prow]; 1362 paj = pa_oth + pi_oth[prow]; 1363 for (k=0;k<pnzj;k++) { 1364 if (!apjdense[pjj[k]]) { 1365 apjdense[pjj[k]] = -1; 1366 apj[apnzj++] = pjj[k]; 1367 } 1368 apa[pjj[k]] += (*aoa)*paj[k]; 1369 } 1370 flops += 2*pnzj; 1371 aoa++; 1372 } 1373 /* Sort the j index array for quick sparse axpy. */ 1374 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1375 1376 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1377 pnzi = pi_loc[i+1] - pi_loc[i]; 1378 for (j=0;j<pnzi;j++) { 1379 nextap = 0; 1380 crow = *pJ++; 1381 cjj = cj + ci[crow]; 1382 caj = ca + ci[crow]; 1383 /* Perform sparse axpy operation. Note cjj includes apj. */ 1384 for (k=0;nextap<apnzj;k++) { 1385 if (cjj[k]==apj[nextap]) { 1386 caj[k] += (*pA)*apa[apj[nextap++]]; 1387 } 1388 } 1389 flops += 2*apnzj; 1390 pA++; 1391 } 1392 1393 /* Zero the current row info for A*P */ 1394 for (j=0;j<apnzj;j++) { 1395 apa[apj[j]] = 0.; 1396 apjdense[apj[j]] = 0; 1397 } 1398 } 1399 1400 /* Assemble the final matrix and clean up */ 1401 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 ierr = PetscFree(apa);CHKERRQ(ierr); 1404 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1405 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 static PetscEvent logkey_matptapsymbolic_local = 0; 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1411 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1412 { 1413 PetscErrorCode ierr; 1414 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1415 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1416 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1417 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1418 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1419 PetscInt *ci,*cj,*ptaj; 1420 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1421 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1422 PetscInt pm=P_loc->m,nlnk,*lnk; 1423 MatScalar *ca; 1424 PetscBT lnkbt; 1425 PetscInt prend,nprow_loc,nprow_oth; 1426 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1427 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1428 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1429 1430 PetscFunctionBegin; 1431 if (!logkey_matptapsymbolic_local) { 1432 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1433 } 1434 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1435 1436 prend = prstart + pm; 1437 1438 /* get ij structure of P_loc^T */ 1439 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1440 ptJ=ptj; 1441 1442 /* Allocate ci array, arrays for fill computation and */ 1443 /* free space for accumulating nonzero column info */ 1444 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1445 ci[0] = 0; 1446 1447 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1448 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1449 ptasparserow_loc = ptadenserow_loc + aN; 1450 ptadenserow_oth = ptasparserow_loc + aN; 1451 ptasparserow_oth = ptadenserow_oth + aN; 1452 1453 /* create and initialize a linked list */ 1454 nlnk = pN+1; 1455 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1456 1457 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1458 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1459 nnz = adi[am] + aoi[am]; 1460 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1461 current_space = free_space; 1462 1463 /* determine symbolic info for each row of C: */ 1464 for (i=0;i<pN;i++) { 1465 ptnzi = pti[i+1] - pti[i]; 1466 nprow_loc = 0; nprow_oth = 0; 1467 /* i-th row of symbolic P_loc^T*A_loc: */ 1468 for (j=0;j<ptnzi;j++) { 1469 arow = *ptJ++; 1470 /* diagonal portion of A */ 1471 anzj = adi[arow+1] - adi[arow]; 1472 ajj = adj + adi[arow]; 1473 for (k=0;k<anzj;k++) { 1474 col = ajj[k]+prstart; 1475 if (!ptadenserow_loc[col]) { 1476 ptadenserow_loc[col] = -1; 1477 ptasparserow_loc[nprow_loc++] = col; 1478 } 1479 } 1480 /* off-diagonal portion of A */ 1481 anzj = aoi[arow+1] - aoi[arow]; 1482 ajj = aoj + aoi[arow]; 1483 for (k=0;k<anzj;k++) { 1484 col = a->garray[ajj[k]]; /* global col */ 1485 if (col < cstart){ 1486 col = ajj[k]; 1487 } else if (col >= cend){ 1488 col = ajj[k] + pm; 1489 } else { 1490 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1491 } 1492 if (!ptadenserow_oth[col]) { 1493 ptadenserow_oth[col] = -1; 1494 ptasparserow_oth[nprow_oth++] = col; 1495 } 1496 } 1497 } 1498 1499 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1500 cnzi = 0; 1501 /* rows of P_loc */ 1502 ptaj = ptasparserow_loc; 1503 for (j=0; j<nprow_loc; j++) { 1504 prow = *ptaj++; 1505 prow -= prstart; /* rm */ 1506 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1507 pjj = pj_loc + pi_loc[prow]; 1508 /* add non-zero cols of P into the sorted linked list lnk */ 1509 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1510 cnzi += nlnk; 1511 } 1512 /* rows of P_oth */ 1513 ptaj = ptasparserow_oth; 1514 for (j=0; j<nprow_oth; j++) { 1515 prow = *ptaj++; 1516 if (prow >= prend) prow -= pm; /* rm */ 1517 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1518 pjj = pj_oth + pi_oth[prow]; 1519 /* add non-zero cols of P into the sorted linked list lnk */ 1520 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1521 cnzi += nlnk; 1522 } 1523 1524 /* If free space is not available, make more free space */ 1525 /* Double the amount of total space in the list */ 1526 if (current_space->local_remaining<cnzi) { 1527 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1528 } 1529 1530 /* Copy data into free space, and zero out denserows */ 1531 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1532 current_space->array += cnzi; 1533 current_space->local_used += cnzi; 1534 current_space->local_remaining -= cnzi; 1535 1536 for (j=0;j<nprow_loc; j++) { 1537 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1538 } 1539 for (j=0;j<nprow_oth; j++) { 1540 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1541 } 1542 1543 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1544 /* For now, we will recompute what is needed. */ 1545 ci[i+1] = ci[i] + cnzi; 1546 } 1547 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1548 /* Allocate space for cj, initialize cj, and */ 1549 /* destroy list of free space and other temporary array(s) */ 1550 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1551 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1552 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1553 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1554 1555 /* Allocate space for ca */ 1556 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1557 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1558 1559 /* put together the new matrix */ 1560 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1561 1562 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1563 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1564 c = (Mat_SeqAIJ *)((*C)->data); 1565 c->freedata = PETSC_TRUE; 1566 c->nonew = 0; 1567 1568 /* Clean up. */ 1569 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1570 1571 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574