1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 /* Set symbolic info for i-th row of local product C=P^T*A*P */ 153 #define MatPtAPSymbolicLocal_Private(ptM,pti,ptj,ci,cj) 0;\ 154 {\ 155 /* allocate ci array, arrays for fill computation and */\ 156 /* free space for accumulating nonzero column info */\ 157 ierr = PetscMalloc((ptM+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);\ 158 ci[0] = 0;\ 159 \ 160 /* set initial free space to be nnz(A) scaled by fill*P->N/PtM. */\ 161 /* this should be reasonable if sparsity of PtAP is similar to that of A. */\ 162 nnz = adi[am] + aoi[am];\ 163 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/ptM+1),&free_space);\ 164 current_space = free_space;\ 165 \ 166 ptJ=ptj;\ 167 for (i=0; i<ptM; i++) {\ 168 ptnzi = pti[i+1] - pti[i];\ 169 nprow_loc = 0; nprow_oth = 0;\ 170 /* i-th row of symbolic Pt*A: */\ 171 for (j=0;j<ptnzi;j++) {\ 172 arow = *ptJ++;\ 173 /* diagonal portion of A */\ 174 anzj = adi[arow+1] - adi[arow];\ 175 ajj = adj + adi[arow];\ 176 for (k=0;k<anzj;k++) {\ 177 col = ajj[k]+prstart;\ 178 if (!ptadenserow_loc[col]) {\ 179 ptadenserow_loc[col] = -1;\ 180 ptasparserow_loc[nprow_loc++] = col;\ 181 }\ 182 }\ 183 /* off-diagonal portion of A */\ 184 anzj = aoi[arow+1] - aoi[arow];\ 185 ajj = aoj + aoi[arow];\ 186 for (k=0;k<anzj;k++) {\ 187 col = a->garray[ajj[k]]; /* global col */\ 188 if (col < cstart){\ 189 col = ajj[k];\ 190 } else if (col >= cend){\ 191 col = ajj[k] + pm;\ 192 } else {\ 193 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");\ 194 }\ 195 if (!ptadenserow_oth[col]) {\ 196 ptadenserow_oth[col] = -1;\ 197 ptasparserow_oth[nprow_oth++] = col;\ 198 }\ 199 }\ 200 }\ 201 /* determine symbolic info for row of C_seq: */\ 202 cnzi = 0;\ 203 /* rows of P_loc */\ 204 ptaj = ptasparserow_loc;\ 205 for (j=0; j<nprow_loc; j++) {\ 206 prow = *ptaj++; \ 207 prow -= prstart; /* rm */\ 208 pnzj = pi_loc[prow+1] - pi_loc[prow];\ 209 pjj = pj_loc + pi_loc[prow];\ 210 /* add non-zero cols of P into the sorted linked list lnk */\ 211 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 212 cnzi += nlnk;\ 213 }\ 214 /* rows of P_oth */\ 215 ptaj = ptasparserow_oth;\ 216 for (j=0; j<nprow_oth; j++) {\ 217 prow = *ptaj++; \ 218 if (prow >= prend) prow -= pm; /* rm */\ 219 pnzj = pi_oth[prow+1] - pi_oth[prow];\ 220 pjj = pj_oth + pi_oth[prow];\ 221 /* add non-zero cols of P into the sorted linked list lnk */\ 222 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);\ 223 cnzi += nlnk;\ 224 }\ 225 \ 226 /* If free space is not available, make more free space */\ 227 /* Double the amount of total space in the list */\ 228 if (current_space->local_remaining<cnzi) {\ 229 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr);\ 230 }\ 231 \ 232 /* Copy data into free space, and zero out denserows */\ 233 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);\ 234 current_space->array += cnzi;\ 235 current_space->local_used += cnzi;\ 236 current_space->local_remaining -= cnzi;\ 237 \ 238 for (j=0;j<nprow_loc; j++) {\ 239 ptadenserow_loc[ptasparserow_loc[j]] = 0;\ 240 }\ 241 for (j=0;j<nprow_oth; j++) {\ 242 ptadenserow_oth[ptasparserow_oth[j]] = 0;\ 243 }\ 244 \ 245 /* Aside: Perhaps we should save the pta info for the numerical factorization. */\ 246 /* For now, we will recompute what is needed. */ \ 247 ci[i+1] = ci[i] + cnzi;\ 248 }\ 249 /* nnz is now stored in ci[ptm], column indices are in the list of free space */\ 250 /* Allocate space for cj, initialize cj, and */\ 251 /* destroy list of free space and other temporary array(s) */\ 252 ierr = PetscMalloc((ci[ptM]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);\ 253 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);\ 254 } 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 257 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 258 { 259 PetscErrorCode ierr; 260 Mat P_loc,P_oth; 261 Mat_MatMatMultMPI *ptap; 262 PetscObjectContainer container; 263 FreeSpaceList free_space=PETSC_NULL,current_space; 264 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 265 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 266 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 267 Mat_SeqAIJ *p_loc,*p_oth; 268 PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pti,*ptj,*ptJ,*ajj,*pjj,*rmap=p->garray; 269 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 270 PetscInt pm=P->m,pn=P->n,nlnk,*lnk,*ci,*cj,*cji,*ptaj; 271 PetscInt aN=A->N,am=A->m,pN=P->N; 272 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi,row; 273 PetscBT lnkbt; 274 PetscInt prstart,prend,nprow_loc,nprow_oth; 275 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 276 277 MPI_Comm comm=A->comm; 278 Mat B_mpi; 279 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 280 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 281 PetscInt len,proc,*dnz,*onz,*owners; 282 PetscInt anzi,*bi,*bj,bnzi,nspacedouble=0; 283 PetscInt nrows,tnrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; 284 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 285 MPI_Status *status; 286 Mat_Merge_SeqsToMPI *merge; 287 PetscInt *ptdi,*ptdj,*cdi,*cdj; 288 289 PetscFunctionBegin; 290 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 291 if (container) { 292 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 293 } else { 294 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 295 } 296 297 /* determine row ownership of C */ 298 /*---------------------------------------------------------*/ 299 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 300 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 301 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 302 303 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 304 ierr = PetscMapSetLocalSize(merge->rowmap,pn);CHKERRQ(ierr); 305 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 306 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 307 308 /* get data from P_loc and P_oth */ 309 P_loc=ptap->B_loc; P_oth=ptap->B_oth; prstart=ptap->brstart; 310 p_loc=(Mat_SeqAIJ*)P_loc->data; p_oth=(Mat_SeqAIJ*)P_oth->data; 311 pi_loc=p_loc->i; pj_loc=p_loc->j; pi_oth=p_oth->i; pj_oth=p_oth->j; 312 prend = prstart + pm; 313 314 /* compute symbolic C_seq = P_loc^T * A_loc * P_seq */ 315 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 316 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 317 ptasparserow_loc = ptadenserow_loc + aN; 318 ptadenserow_oth = ptasparserow_loc + aN; 319 ptasparserow_oth = ptadenserow_oth + aN; 320 321 /* create and initialize a linked list */ 322 nlnk = pN+1; 323 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 324 325 /* Pt = P_loc^T */ 326 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 327 ierr = MatPtAPSymbolicLocal_Private(pN,pti,ptj,ci,cj);CHKERRQ(ierr); 328 329 /* Pt = Pd^T -- only diagonal portion of P_loc */ 330 ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 331 ierr = MatPtAPSymbolicLocal_Private(pn,ptdi,ptdj,cdi,cdj);CHKERRQ(ierr); 332 ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&ptdi,&ptdj);CHKERRQ(ierr); 333 334 /* clean up */ 335 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 336 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 337 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 338 339 /* determine the number of messages to send, their lengths */ 340 /*---------------------------------------------------------*/ 341 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 342 ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 343 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 344 len_s = merge->len_s; 345 len = 0; /* max length of buf_si[] */ 346 merge->nsend = 0; 347 tnrows = (p->B)->N; /* total num of rows to be sent to other processors */ 348 proc = 0; 349 for (i=0; i<tnrows; i++){ 350 while (rmap[i] >= owners[proc+1]) proc++; 351 len_si[proc]++; 352 } 353 for (proc=0; proc<size; proc++){ 354 len_s[proc] = 0; 355 if (len_si[proc]){ 356 merge->nsend++; 357 len_si[proc] = 2*(len_si[proc] + 1); 358 len_s[proc] = ci[owners[proc+1]] - ci[owners[proc]]; /* num of col indices to be sent to [proc] */ 359 len += len_si[proc]; 360 } 361 } 362 363 /* determine the number and length of messages to receive for ij-structure */ 364 /*-------------------------------------------------------------------------*/ 365 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 366 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 367 368 /* post the Irecv of j-structure */ 369 /*-------------------------------*/ 370 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 371 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 372 373 /* post the Isend of j-structure */ 374 /*--------------------------------*/ 375 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 376 sj_waits = si_waits + merge->nsend; 377 378 for (proc=0, k=0; proc<size; proc++){ 379 if (!len_s[proc]) continue; 380 i = owners[proc]; 381 ierr = MPI_Isend(cj+ci[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 382 k++; 383 } 384 385 /* receives and sends of j-structure are complete */ 386 /*------------------------------------------------*/ 387 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 388 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 389 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 390 391 /* send and recv i-structure */ 392 /*---------------------------*/ 393 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 394 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 395 396 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 397 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 398 399 j = 0; /* row index to be sent */ 400 for (proc=0,k=0; proc<size; proc++){ 401 if (!len_s[proc]) continue; 402 /* form outgoing message for i-structure: 403 buf_si[0]: nrows to be sent 404 [1:nrows]: row index (global) 405 [nrows+1:2*nrows+1]: i-structure index 406 */ 407 /*-------------------------------------------*/ 408 nrows = len_si[proc]/2 - 1; 409 buf_si_i = buf_si + nrows+1; 410 buf_si[0] = nrows; 411 buf_si_i[0] = 0; 412 for (i=0; i<nrows; i++){ 413 row = rmap[j++]; 414 anzi = ci[row+1] - ci[row]; 415 buf_si_i[i+1] = buf_si_i[i] + anzi; /* i-structure */ 416 buf_si[i+1] = row - owners[proc]; /* local row index */ 417 } 418 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 419 k++; 420 buf_si += len_si[proc]; 421 } 422 423 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 424 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 425 426 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 427 for (i=0; i<merge->nrecv; i++){ 428 ierr = PetscLogInfo((PetscObject)A,"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 429 } 430 431 ierr = PetscFree(len_si);CHKERRQ(ierr); 432 ierr = PetscFree(len_ri);CHKERRQ(ierr); 433 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 434 ierr = PetscFree(si_waits);CHKERRQ(ierr); 435 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 436 ierr = PetscFree(buf_s);CHKERRQ(ierr); 437 ierr = PetscFree(status);CHKERRQ(ierr); 438 439 /* compute a local seq matrix in each processor */ 440 /*----------------------------------------------*/ 441 /* allocate bi array and free space for accumulating nonzero column info */ 442 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 443 bi[0] = 0; 444 445 /* create and initialize a linked list */ 446 nlnk = pN+1; 447 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 448 449 /* initial FreeSpace size is 2*(num of local nnz(C_seq)) */ 450 len = 0; 451 len = ci[owners[rank+1]] - ci[owners[rank]]; 452 free_space=PETSC_NULL; 453 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 454 current_space = free_space; 455 456 /* determine symbolic info for each local row of C */ 457 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 458 nextrow = buf_ri_k + merge->nrecv; 459 nextci = nextrow + merge->nrecv; 460 for (k=0; k<merge->nrecv; k++){ 461 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 462 nrows = *buf_ri_k[k]; 463 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 464 nextci[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 465 } 466 467 ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); 468 len = 0; 469 for (i=0; i<pn; i++) { 470 bnzi = 0; 471 /* add local non-zero cols of this proc's C_seq into lnk */ 472 anzi = cdi[i+1] - cdi[i]; 473 cji = cdj + cdi[i]; 474 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 475 bnzi += nlnk; 476 /* add received col data into lnk */ 477 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 478 if (i == *nextrow[k]) { /* i-th row */ 479 anzi = *(nextci[k]+1) - *nextci[k]; 480 cji = buf_rj[k] + *nextci[k]; 481 ierr = PetscLLAdd(anzi,cji,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 482 bnzi += nlnk; 483 nextrow[k]++; nextci[k]++; 484 } 485 } 486 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 487 488 /* if free space is not available, make more free space */ 489 if (current_space->local_remaining<bnzi) { 490 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 491 nspacedouble++; 492 } 493 /* copy data into free space, then initialize lnk */ 494 ierr = PetscLLClean(pN,pN,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 495 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 496 497 current_space->array += bnzi; 498 current_space->local_used += bnzi; 499 current_space->local_remaining -= bnzi; 500 501 bi[i+1] = bi[i] + bnzi; 502 } 503 504 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 505 506 ierr = PetscMalloc((bi[pn]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 507 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 508 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 509 510 /* create symbolic parallel matrix B_mpi */ 511 /*---------------------------------------*/ 512 ierr = MatCreate(comm,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 513 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 514 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 515 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 516 517 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 518 B_mpi->assembled = PETSC_FALSE; 519 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 520 merge->bi = bi; 521 merge->bj = bj; 522 merge->ci = ci; 523 merge->cj = cj; 524 merge->buf_ri = buf_ri; 525 merge->buf_rj = buf_rj; 526 merge->C_seq = PETSC_NULL; 527 528 /* attach the supporting struct to B_mpi for reuse */ 529 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 530 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 531 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 532 *C = B_mpi; 533 534 ierr = PetscFree(cdi);CHKERRQ(ierr); 535 ierr = PetscFree(cdj);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 542 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 543 { 544 PetscErrorCode ierr; 545 Mat_Merge_SeqsToMPI *merge; 546 Mat_MatMatMultMPI *ptap; 547 PetscObjectContainer cont_merge,cont_ptap; 548 PetscInt flops=0; 549 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 550 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 551 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 552 Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 553 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense; 554 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 555 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 556 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj,**abuf_r,*ba_i,*ca; 557 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 558 PetscInt am=A->m,cN=C->N,cm=C->m; 559 MPI_Comm comm=C->comm; 560 PetscMPIInt size,rank,taga,*len_s,**buf_ri,**buf_rj,**buf_ri_k,**nextrow,**nextcseqi; 561 PetscInt *owners,proc,nrows; 562 PetscInt *cjj,cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 563 MPI_Request *s_waits,*r_waits; 564 MPI_Status *status; 565 PetscInt *cseqi,*cseqj,col; 566 567 PetscFunctionBegin; 568 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 569 if (cont_merge) { 570 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 571 } else { 572 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 573 } 574 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 575 if (cont_ptap) { 576 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 577 } else { 578 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 579 } 580 /* get data from symbolic products */ 581 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 582 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 583 584 cseqi = merge->ci; cseqj=merge->cj; 585 ierr = PetscMalloc((cseqi[cN]+1)*sizeof(MatScalar),&cseqa);CHKERRQ(ierr); 586 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 587 588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 590 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 591 592 /* Allocate temporary array for storage of one row of A*P and one row of C */ 593 ierr = PetscMalloc(2*cN*(sizeof(MatScalar)+sizeof(PetscInt)),&apa);CHKERRQ(ierr); 594 ierr = PetscMemzero(apa,2*cN*(sizeof(MatScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 595 ca = (MatScalar*)(apa + cN); 596 apj = (PetscInt *)(ca + cN); 597 apjdense = (PetscInt *)(apj + cN); 598 599 /* Clear old values in C */ 600 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 601 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 602 603 for (i=0;i<am;i++) { 604 /* Form i-th sparse row of A*P */ 605 apnzj = 0; 606 /* diagonal portion of A */ 607 nzi = adi[i+1] - adi[i]; 608 for (j=0;j<nzi;j++) { 609 prow = *adj++; 610 /* diagonal portion of P */ 611 pnzj = pd->i[prow+1] - pd->i[prow]; 612 pjj = pd->j + pd->i[prow]; /* local col index of P */ 613 paj = pd->a + pd->i[prow]; 614 for (k=0;k<pnzj;k++) { 615 col = *pjj + p->cstart; pjj++; /* global col index of P */ 616 if (!apjdense[col]) { 617 apjdense[col] = -1; 618 apj[apnzj++] = col; 619 } 620 apa[col] += (*ada)*paj[k]; 621 } 622 flops += 2*pnzj; 623 /* off-diagonal portion of P */ 624 pnzj = po->i[prow+1] - po->i[prow]; 625 pjj = po->j + po->i[prow]; /* local col index of P */ 626 paj = po->a + po->i[prow]; 627 for (k=0;k<pnzj;k++) { 628 col = p->garray[*pjj]; pjj++; /* global col index of P */ 629 if (!apjdense[col]) { 630 apjdense[col] = -1; 631 apj[apnzj++] = col; 632 } 633 apa[col] += (*ada)*paj[k]; 634 } 635 flops += 2*pnzj; 636 637 ada++; 638 } 639 /* off-diagonal portion of A */ 640 nzi = aoi[i+1] - aoi[i]; 641 for (j=0;j<nzi;j++) { 642 prow = *aoj++; 643 pnzj = pi_oth[prow+1] - pi_oth[prow]; 644 pjj = pj_oth + pi_oth[prow]; 645 paj = pa_oth + pi_oth[prow]; 646 for (k=0;k<pnzj;k++) { 647 if (!apjdense[pjj[k]]) { 648 apjdense[pjj[k]] = -1; 649 apj[apnzj++] = pjj[k]; 650 } 651 apa[pjj[k]] += (*aoa)*paj[k]; 652 } 653 flops += 2*pnzj; 654 aoa++; 655 } 656 /* Sort the j index array for quick sparse axpy. */ 657 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 658 659 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 660 /* diagonal portion of P -- gives matrix value of local C */ 661 pnzi = pd->i[i+1] - pd->i[i]; 662 for (j=0;j<pnzi;j++) { 663 /* add value into C */ 664 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 665 crow = (*pJ_d++) + owners[rank]; 666 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 667 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 668 pA_d++; 669 flops += 2*apnzj; 670 } 671 672 /* off-diagonal portion of P -- gives matrix values to be sent to others */ 673 pnzi = po->i[i+1] - po->i[i]; 674 for (j=0;j<pnzi;j++) { 675 crow = p->garray[*pJ_o++]; 676 cjj = cseqj + cseqi[crow]; 677 caj = cseqa + cseqi[crow]; 678 /* add value into C_seq to be sent to other processors */ 679 nextap = 0; 680 for (k=0;nextap<apnzj;k++) { 681 if (cjj[k]==apj[nextap]) { 682 caj[k] += (*pA_o)*apa[apj[nextap++]]; 683 } 684 } 685 flops += 2*apnzj; 686 pA_o++; 687 } 688 689 /* Zero the current row info for A*P */ 690 for (j=0;j<apnzj;j++) { 691 apa[apj[j]] = 0.; 692 apjdense[apj[j]] = 0; 693 } 694 } 695 696 /* send and recv matrix values */ 697 /*-----------------------------*/ 698 bi = merge->bi; 699 bj = merge->bj; 700 buf_ri = merge->buf_ri; 701 buf_rj = merge->buf_rj; 702 len_s = merge->len_s; 703 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 704 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 705 706 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 707 for (proc=0,k=0; proc<size; proc++){ 708 if (!len_s[proc]) continue; 709 i = owners[proc]; 710 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 711 k++; 712 } 713 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 714 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 715 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 716 ierr = PetscFree(status);CHKERRQ(ierr); 717 718 ierr = PetscFree(s_waits);CHKERRQ(ierr); 719 ierr = PetscFree(r_waits);CHKERRQ(ierr); 720 ierr = PetscFree(cseqa);CHKERRQ(ierr); 721 722 /* insert mat values of mpimat */ 723 /*----------------------------*/ 724 ba_i = apa; /* rename, temporary array for storage of one row of C */ 725 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 726 nextrow = buf_ri_k + merge->nrecv; 727 nextcseqi = nextrow + merge->nrecv; 728 729 for (k=0; k<merge->nrecv; k++){ 730 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 731 nrows = *(buf_ri_k[k]); 732 nextrow[k] = buf_ri_k[k]+1; /* next row index of k-th recved i-structure */ 733 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 734 } 735 736 /* add received local vals to C */ 737 for (i=0; i<cm; i++) { 738 crow = owners[rank] + i; 739 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 740 bnzi = bi[i+1] - bi[i]; 741 nzi = 0; 742 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 743 /* i-th row */ 744 if (i == *nextrow[k]) { 745 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 746 cseqj = buf_rj[k] + *(nextcseqi[k]); 747 cseqa = abuf_r[k] + *(nextcseqi[k]); 748 nextcseqj = 0; 749 for (j=0; nextcseqj<cseqnzi; j++){ 750 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 751 ba_i[j] += cseqa[nextcseqj++]; 752 nzi++; 753 } 754 } 755 nextrow[k]++; nextcseqi[k]++; 756 } 757 } 758 if (nzi>0){ 759 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 760 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 761 flops += 2*nzi; 762 } 763 } 764 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 765 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 767 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 768 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 769 ierr = PetscFree(apa);CHKERRQ(ierr); 770 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 771 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 777 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 778 { 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 if (scall == MAT_INITIAL_MATRIX){ 783 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 784 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 785 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 786 } 787 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 788 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 789 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 /* 794 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 795 796 Collective on Mat 797 798 Input Parameters: 799 + A - the matrix 800 - P - the projection matrix 801 802 Output Parameters: 803 . C - the (i,j) structure of the product matrix 804 805 Notes: 806 C will be created and must be destroyed by the user with MatDestroy(). 807 808 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 809 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 810 this (i,j) structure by calling MatPtAPNumeric(). 811 812 Level: intermediate 813 814 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 815 */ 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatPtAPSymbolic" 818 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 819 { 820 PetscErrorCode ierr; 821 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 822 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 823 824 PetscFunctionBegin; 825 826 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 827 PetscValidType(A,1); 828 MatPreallocated(A); 829 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 830 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 831 832 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 833 PetscValidType(P,2); 834 MatPreallocated(P); 835 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 836 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 837 838 PetscValidPointer(C,3); 839 840 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 841 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 842 843 /* For now, we do not dispatch based on the type of A and P */ 844 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 845 fA = A->ops->ptapsymbolic; 846 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 847 fP = P->ops->ptapsymbolic; 848 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 849 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 850 851 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 852 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 853 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 854 855 PetscFunctionReturn(0); 856 } 857 858 typedef struct { 859 Mat symAP; 860 } Mat_PtAPstruct; 861 862 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 866 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 867 { 868 PetscErrorCode ierr; 869 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 870 871 PetscFunctionBegin; 872 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 873 ierr = PetscFree(ptap);CHKERRQ(ierr); 874 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 880 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 881 { 882 PetscErrorCode ierr; 883 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 884 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 885 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 886 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 887 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 888 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 889 MatScalar *ca; 890 PetscBT lnkbt; 891 892 PetscFunctionBegin; 893 /* Get ij structure of P^T */ 894 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 895 ptJ=ptj; 896 897 /* Allocate ci array, arrays for fill computation and */ 898 /* free space for accumulating nonzero column info */ 899 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 900 ci[0] = 0; 901 902 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 903 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 904 ptasparserow = ptadenserow + an; 905 906 /* create and initialize a linked list */ 907 nlnk = pn+1; 908 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 909 910 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 911 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 912 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 913 current_space = free_space; 914 915 /* Determine symbolic info for each row of C: */ 916 for (i=0;i<pn;i++) { 917 ptnzi = pti[i+1] - pti[i]; 918 ptanzi = 0; 919 /* Determine symbolic row of PtA: */ 920 for (j=0;j<ptnzi;j++) { 921 arow = *ptJ++; 922 anzj = ai[arow+1] - ai[arow]; 923 ajj = aj + ai[arow]; 924 for (k=0;k<anzj;k++) { 925 if (!ptadenserow[ajj[k]]) { 926 ptadenserow[ajj[k]] = -1; 927 ptasparserow[ptanzi++] = ajj[k]; 928 } 929 } 930 } 931 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 932 ptaj = ptasparserow; 933 cnzi = 0; 934 for (j=0;j<ptanzi;j++) { 935 prow = *ptaj++; 936 pnzj = pi[prow+1] - pi[prow]; 937 pjj = pj + pi[prow]; 938 /* add non-zero cols of P into the sorted linked list lnk */ 939 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 940 cnzi += nlnk; 941 } 942 943 /* If free space is not available, make more free space */ 944 /* Double the amount of total space in the list */ 945 if (current_space->local_remaining<cnzi) { 946 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 947 } 948 949 /* Copy data into free space, and zero out denserows */ 950 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 951 current_space->array += cnzi; 952 current_space->local_used += cnzi; 953 current_space->local_remaining -= cnzi; 954 955 for (j=0;j<ptanzi;j++) { 956 ptadenserow[ptasparserow[j]] = 0; 957 } 958 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 959 /* For now, we will recompute what is needed. */ 960 ci[i+1] = ci[i] + cnzi; 961 } 962 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 963 /* Allocate space for cj, initialize cj, and */ 964 /* destroy list of free space and other temporary array(s) */ 965 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 966 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 967 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 968 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 969 970 /* Allocate space for ca */ 971 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 972 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 973 974 /* put together the new matrix */ 975 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 976 977 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 978 /* Since these are PETSc arrays, change flags to free them as necessary. */ 979 c = (Mat_SeqAIJ *)((*C)->data); 980 c->freedata = PETSC_TRUE; 981 c->nonew = 0; 982 983 /* Clean up. */ 984 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 985 986 PetscFunctionReturn(0); 987 } 988 989 #include "src/mat/impls/maij/maij.h" 990 EXTERN_C_BEGIN 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 993 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 994 { 995 /* This routine requires testing -- I don't think it works. */ 996 PetscErrorCode ierr; 997 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 998 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 999 Mat P=pp->AIJ; 1000 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1001 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1002 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 1003 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 1004 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1005 MatScalar *ca; 1006 1007 PetscFunctionBegin; 1008 /* Start timer */ 1009 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1010 1011 /* Get ij structure of P^T */ 1012 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1013 1014 /* Allocate ci array, arrays for fill computation and */ 1015 /* free space for accumulating nonzero column info */ 1016 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1017 ci[0] = 0; 1018 1019 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1020 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1021 ptasparserow = ptadenserow + an; 1022 denserow = ptasparserow + an; 1023 sparserow = denserow + pn; 1024 1025 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1026 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1027 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1028 current_space = free_space; 1029 1030 /* Determine symbolic info for each row of C: */ 1031 for (i=0;i<pn/ppdof;i++) { 1032 ptnzi = pti[i+1] - pti[i]; 1033 ptanzi = 0; 1034 ptJ = ptj + pti[i]; 1035 for (dof=0;dof<ppdof;dof++) { 1036 /* Determine symbolic row of PtA: */ 1037 for (j=0;j<ptnzi;j++) { 1038 arow = ptJ[j] + dof; 1039 anzj = ai[arow+1] - ai[arow]; 1040 ajj = aj + ai[arow]; 1041 for (k=0;k<anzj;k++) { 1042 if (!ptadenserow[ajj[k]]) { 1043 ptadenserow[ajj[k]] = -1; 1044 ptasparserow[ptanzi++] = ajj[k]; 1045 } 1046 } 1047 } 1048 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1049 ptaj = ptasparserow; 1050 cnzi = 0; 1051 for (j=0;j<ptanzi;j++) { 1052 pdof = *ptaj%dof; 1053 prow = (*ptaj++)/dof; 1054 pnzj = pi[prow+1] - pi[prow]; 1055 pjj = pj + pi[prow]; 1056 for (k=0;k<pnzj;k++) { 1057 if (!denserow[pjj[k]+pdof]) { 1058 denserow[pjj[k]+pdof] = -1; 1059 sparserow[cnzi++] = pjj[k]+pdof; 1060 } 1061 } 1062 } 1063 1064 /* sort sparserow */ 1065 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 1066 1067 /* If free space is not available, make more free space */ 1068 /* Double the amount of total space in the list */ 1069 if (current_space->local_remaining<cnzi) { 1070 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1071 } 1072 1073 /* Copy data into free space, and zero out denserows */ 1074 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 1075 current_space->array += cnzi; 1076 current_space->local_used += cnzi; 1077 current_space->local_remaining -= cnzi; 1078 1079 for (j=0;j<ptanzi;j++) { 1080 ptadenserow[ptasparserow[j]] = 0; 1081 } 1082 for (j=0;j<cnzi;j++) { 1083 denserow[sparserow[j]] = 0; 1084 } 1085 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1086 /* For now, we will recompute what is needed. */ 1087 ci[i+1+dof] = ci[i+dof] + cnzi; 1088 } 1089 } 1090 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1091 /* Allocate space for cj, initialize cj, and */ 1092 /* destroy list of free space and other temporary array(s) */ 1093 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1094 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1095 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1096 1097 /* Allocate space for ca */ 1098 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1099 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1100 1101 /* put together the new matrix */ 1102 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1103 1104 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1105 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1106 c = (Mat_SeqAIJ *)((*C)->data); 1107 c->freedata = PETSC_TRUE; 1108 c->nonew = 0; 1109 1110 /* Clean up. */ 1111 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1112 1113 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 EXTERN_C_END 1117 1118 /* 1119 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 1120 1121 Collective on Mat 1122 1123 Input Parameters: 1124 + A - the matrix 1125 - P - the projection matrix 1126 1127 Output Parameters: 1128 . C - the product matrix 1129 1130 Notes: 1131 C must have been created by calling MatPtAPSymbolic and must be destroyed by 1132 the user using MatDeatroy(). 1133 1134 This routine is currently only implemented for pairs of AIJ matrices and classes 1135 which inherit from AIJ. C will be of type MATAIJ. 1136 1137 Level: intermediate 1138 1139 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 1140 */ 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatPtAPNumeric" 1143 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 1144 { 1145 PetscErrorCode ierr; 1146 PetscErrorCode (*fA)(Mat,Mat,Mat); 1147 PetscErrorCode (*fP)(Mat,Mat,Mat); 1148 1149 PetscFunctionBegin; 1150 1151 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1152 PetscValidType(A,1); 1153 MatPreallocated(A); 1154 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1155 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1156 1157 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 1158 PetscValidType(P,2); 1159 MatPreallocated(P); 1160 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1161 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1162 1163 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 1164 PetscValidType(C,3); 1165 MatPreallocated(C); 1166 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1167 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1168 1169 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 1170 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 1171 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 1172 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 1173 1174 /* For now, we do not dispatch based on the type of A and P */ 1175 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 1176 fA = A->ops->ptapnumeric; 1177 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 1178 fP = P->ops->ptapnumeric; 1179 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 1180 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 1181 1182 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1183 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 1184 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1185 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 1191 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 1192 { 1193 PetscErrorCode ierr; 1194 PetscInt flops=0; 1195 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 1196 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 1197 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1198 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 1199 PetscInt *ci=c->i,*cj=c->j,*cjj; 1200 PetscInt am=A->M,cn=C->N,cm=C->M; 1201 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1202 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 1203 1204 PetscFunctionBegin; 1205 /* Allocate temporary array for storage of one row of A*P */ 1206 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1207 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1208 1209 apj = (PetscInt *)(apa + cn); 1210 apjdense = apj + cn; 1211 1212 /* Clear old values in C */ 1213 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1214 1215 for (i=0;i<am;i++) { 1216 /* Form sparse row of A*P */ 1217 anzi = ai[i+1] - ai[i]; 1218 apnzj = 0; 1219 for (j=0;j<anzi;j++) { 1220 prow = *aj++; 1221 pnzj = pi[prow+1] - pi[prow]; 1222 pjj = pj + pi[prow]; 1223 paj = pa + pi[prow]; 1224 for (k=0;k<pnzj;k++) { 1225 if (!apjdense[pjj[k]]) { 1226 apjdense[pjj[k]] = -1; 1227 apj[apnzj++] = pjj[k]; 1228 } 1229 apa[pjj[k]] += (*aa)*paj[k]; 1230 } 1231 flops += 2*pnzj; 1232 aa++; 1233 } 1234 1235 /* Sort the j index array for quick sparse axpy. */ 1236 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1237 1238 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 1239 pnzi = pi[i+1] - pi[i]; 1240 for (j=0;j<pnzi;j++) { 1241 nextap = 0; 1242 crow = *pJ++; 1243 cjj = cj + ci[crow]; 1244 caj = ca + ci[crow]; 1245 /* Perform sparse axpy operation. Note cjj includes apj. */ 1246 for (k=0;nextap<apnzj;k++) { 1247 if (cjj[k]==apj[nextap]) { 1248 caj[k] += (*pA)*apa[apj[nextap++]]; 1249 } 1250 } 1251 flops += 2*apnzj; 1252 pA++; 1253 } 1254 1255 /* Zero the current row info for A*P */ 1256 for (j=0;j<apnzj;j++) { 1257 apa[apj[j]] = 0.; 1258 apjdense[apj[j]] = 0; 1259 } 1260 } 1261 1262 /* Assemble the final matrix and clean up */ 1263 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1264 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1265 ierr = PetscFree(apa);CHKERRQ(ierr); 1266 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1267 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 1272 static PetscEvent logkey_matptapnumeric_local = 0; 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1275 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1276 { 1277 PetscErrorCode ierr; 1278 PetscInt flops=0; 1279 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1280 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1281 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1282 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1283 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1284 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1285 PetscInt *pJ=pj_loc,*pjj; 1286 PetscInt *ci=c->i,*cj=c->j,*cjj; 1287 PetscInt am=A->m,cn=C->N,cm=C->M; 1288 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1289 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1290 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1291 1292 PetscFunctionBegin; 1293 if (!logkey_matptapnumeric_local) { 1294 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 1295 } 1296 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1297 1298 /* Allocate temporary array for storage of one row of A*P */ 1299 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1300 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1301 apj = (PetscInt *)(apa + cn); 1302 apjdense = apj + cn; 1303 1304 /* Clear old values in C */ 1305 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1306 1307 for (i=0;i<am;i++) { 1308 /* Form i-th sparse row of A*P */ 1309 apnzj = 0; 1310 /* diagonal portion of A */ 1311 anzi = adi[i+1] - adi[i]; 1312 for (j=0;j<anzi;j++) { 1313 prow = *adj; 1314 adj++; 1315 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1316 pjj = pj_loc + pi_loc[prow]; 1317 paj = pa_loc + pi_loc[prow]; 1318 for (k=0;k<pnzj;k++) { 1319 if (!apjdense[pjj[k]]) { 1320 apjdense[pjj[k]] = -1; 1321 apj[apnzj++] = pjj[k]; 1322 } 1323 apa[pjj[k]] += (*ada)*paj[k]; 1324 } 1325 flops += 2*pnzj; 1326 ada++; 1327 } 1328 /* off-diagonal portion of A */ 1329 anzi = aoi[i+1] - aoi[i]; 1330 for (j=0;j<anzi;j++) { 1331 col = a->garray[*aoj]; 1332 if (col < cstart){ 1333 prow = *aoj; 1334 } else if (col >= cend){ 1335 prow = *aoj; 1336 } else { 1337 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1338 } 1339 aoj++; 1340 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1341 pjj = pj_oth + pi_oth[prow]; 1342 paj = pa_oth + pi_oth[prow]; 1343 for (k=0;k<pnzj;k++) { 1344 if (!apjdense[pjj[k]]) { 1345 apjdense[pjj[k]] = -1; 1346 apj[apnzj++] = pjj[k]; 1347 } 1348 apa[pjj[k]] += (*aoa)*paj[k]; 1349 } 1350 flops += 2*pnzj; 1351 aoa++; 1352 } 1353 /* Sort the j index array for quick sparse axpy. */ 1354 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1355 1356 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1357 pnzi = pi_loc[i+1] - pi_loc[i]; 1358 for (j=0;j<pnzi;j++) { 1359 nextap = 0; 1360 crow = *pJ++; 1361 cjj = cj + ci[crow]; 1362 caj = ca + ci[crow]; 1363 /* Perform sparse axpy operation. Note cjj includes apj. */ 1364 for (k=0;nextap<apnzj;k++) { 1365 if (cjj[k]==apj[nextap]) { 1366 caj[k] += (*pA)*apa[apj[nextap++]]; 1367 } 1368 } 1369 flops += 2*apnzj; 1370 pA++; 1371 } 1372 1373 /* Zero the current row info for A*P */ 1374 for (j=0;j<apnzj;j++) { 1375 apa[apj[j]] = 0.; 1376 apjdense[apj[j]] = 0; 1377 } 1378 } 1379 1380 /* Assemble the final matrix and clean up */ 1381 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 ierr = PetscFree(apa);CHKERRQ(ierr); 1384 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1385 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 static PetscEvent logkey_matptapsymbolic_local = 0; 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1391 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1392 { 1393 PetscErrorCode ierr; 1394 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1395 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1396 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1397 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1398 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1399 PetscInt *ci,*cj,*ptaj; 1400 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1401 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1402 PetscInt pm=P_loc->m,nlnk,*lnk; 1403 MatScalar *ca; 1404 PetscBT lnkbt; 1405 PetscInt prend,nprow_loc,nprow_oth; 1406 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1407 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1408 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1409 1410 PetscFunctionBegin; 1411 if (!logkey_matptapsymbolic_local) { 1412 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1413 } 1414 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1415 1416 prend = prstart + pm; 1417 1418 /* get ij structure of P_loc^T */ 1419 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1420 ptJ=ptj; 1421 1422 /* Allocate ci array, arrays for fill computation and */ 1423 /* free space for accumulating nonzero column info */ 1424 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1425 ci[0] = 0; 1426 1427 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1428 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1429 ptasparserow_loc = ptadenserow_loc + aN; 1430 ptadenserow_oth = ptasparserow_loc + aN; 1431 ptasparserow_oth = ptadenserow_oth + aN; 1432 1433 /* create and initialize a linked list */ 1434 nlnk = pN+1; 1435 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1436 1437 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1438 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1439 nnz = adi[am] + aoi[am]; 1440 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1441 current_space = free_space; 1442 1443 /* determine symbolic info for each row of C: */ 1444 for (i=0;i<pN;i++) { 1445 ptnzi = pti[i+1] - pti[i]; 1446 nprow_loc = 0; nprow_oth = 0; 1447 /* i-th row of symbolic P_loc^T*A_loc: */ 1448 for (j=0;j<ptnzi;j++) { 1449 arow = *ptJ++; 1450 /* diagonal portion of A */ 1451 anzj = adi[arow+1] - adi[arow]; 1452 ajj = adj + adi[arow]; 1453 for (k=0;k<anzj;k++) { 1454 col = ajj[k]+prstart; 1455 if (!ptadenserow_loc[col]) { 1456 ptadenserow_loc[col] = -1; 1457 ptasparserow_loc[nprow_loc++] = col; 1458 } 1459 } 1460 /* off-diagonal portion of A */ 1461 anzj = aoi[arow+1] - aoi[arow]; 1462 ajj = aoj + aoi[arow]; 1463 for (k=0;k<anzj;k++) { 1464 col = a->garray[ajj[k]]; /* global col */ 1465 if (col < cstart){ 1466 col = ajj[k]; 1467 } else if (col >= cend){ 1468 col = ajj[k] + pm; 1469 } else { 1470 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1471 } 1472 if (!ptadenserow_oth[col]) { 1473 ptadenserow_oth[col] = -1; 1474 ptasparserow_oth[nprow_oth++] = col; 1475 } 1476 } 1477 } 1478 1479 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1480 cnzi = 0; 1481 /* rows of P_loc */ 1482 ptaj = ptasparserow_loc; 1483 for (j=0; j<nprow_loc; j++) { 1484 prow = *ptaj++; 1485 prow -= prstart; /* rm */ 1486 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1487 pjj = pj_loc + pi_loc[prow]; 1488 /* add non-zero cols of P into the sorted linked list lnk */ 1489 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1490 cnzi += nlnk; 1491 } 1492 /* rows of P_oth */ 1493 ptaj = ptasparserow_oth; 1494 for (j=0; j<nprow_oth; j++) { 1495 prow = *ptaj++; 1496 if (prow >= prend) prow -= pm; /* rm */ 1497 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1498 pjj = pj_oth + pi_oth[prow]; 1499 /* add non-zero cols of P into the sorted linked list lnk */ 1500 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1501 cnzi += nlnk; 1502 } 1503 1504 /* If free space is not available, make more free space */ 1505 /* Double the amount of total space in the list */ 1506 if (current_space->local_remaining<cnzi) { 1507 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1508 } 1509 1510 /* Copy data into free space, and zero out denserows */ 1511 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1512 current_space->array += cnzi; 1513 current_space->local_used += cnzi; 1514 current_space->local_remaining -= cnzi; 1515 1516 for (j=0;j<nprow_loc; j++) { 1517 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1518 } 1519 for (j=0;j<nprow_oth; j++) { 1520 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1521 } 1522 1523 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1524 /* For now, we will recompute what is needed. */ 1525 ci[i+1] = ci[i] + cnzi; 1526 } 1527 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1528 /* Allocate space for cj, initialize cj, and */ 1529 /* destroy list of free space and other temporary array(s) */ 1530 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1531 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1532 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1533 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1534 1535 /* Allocate space for ca */ 1536 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1537 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1538 1539 /* put together the new matrix */ 1540 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1541 1542 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1543 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1544 c = (Mat_SeqAIJ *)((*C)->data); 1545 c->freedata = PETSC_TRUE; 1546 c->nonew = 0; 1547 1548 /* Clean up. */ 1549 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1550 1551 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554