1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 75 EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 76 EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat); 77 78 79 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 80 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 84 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 85 { 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 if (scall == MAT_INITIAL_MATRIX){ 90 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 91 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 92 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 93 } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */ 94 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 95 ierr = MatDestroy(*C);CHKERRQ(ierr); 96 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr); 97 98 /* 99 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 100 */ 101 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 102 } else { 103 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 104 } 105 PetscFunctionReturn(0); 106 } 107 #define NEW 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 110 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 111 { 112 PetscErrorCode ierr; 113 Mat C_seq; 114 Mat P_loc,P_oth,*ploc; 115 PetscInt prstart,prend,m=P->m,low; 116 IS isrow,iscol; 117 118 PetscFunctionBegin; 119 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 120 ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr); 121 prend = prstart + m; 122 /* get P_loc by taking all local rows of P */ 123 ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 124 ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 125 ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 126 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr); 127 P_loc = ploc[0]; 128 ierr = ISDestroy(isrow);CHKERRQ(ierr); 129 ierr = ISDestroy(iscol);CHKERRQ(ierr); 130 131 /* compute C_seq = P_loc^T * A_loc * P_seq */ 132 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr); 133 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr); 134 135 ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr); 136 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 137 #ifdef OLD 138 Mat P_seq; 139 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 140 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 141 /* compute C_seq = P_loc^T * A_loc * P_seq */ 142 prend = prstart + m; 143 ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 144 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 145 #endif 146 /* add C_seq into mpi C */ 147 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 148 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 154 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 155 { 156 PetscErrorCode ierr; 157 Mat P_seq,C_seq; 158 PetscInt prstart,prend,m=P->m; 159 Mat_Merge_SeqsToMPI *merge; 160 PetscObjectContainer container; 161 162 PetscFunctionBegin; 163 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 164 if (container) { 165 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 166 } else { 167 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 168 } 169 170 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 171 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 172 173 /* compute C_seq = P_loc^T * A_loc * P_seq */ 174 prend = prstart + m; 175 C_seq = merge->C_seq; 176 ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 177 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 178 179 /* add C_seq into mpi C */ 180 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 181 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 187 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 188 { 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 if (scall == MAT_INITIAL_MATRIX){ 193 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 194 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 195 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 196 } 197 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 198 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 199 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 /* 204 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 205 206 Collective on Mat 207 208 Input Parameters: 209 + A - the matrix 210 - P - the projection matrix 211 212 Output Parameters: 213 . C - the (i,j) structure of the product matrix 214 215 Notes: 216 C will be created and must be destroyed by the user with MatDestroy(). 217 218 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 219 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 220 this (i,j) structure by calling MatPtAPNumeric(). 221 222 Level: intermediate 223 224 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 225 */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatPtAPSymbolic" 228 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 229 { 230 PetscErrorCode ierr; 231 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 232 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 233 234 PetscFunctionBegin; 235 236 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 237 PetscValidType(A,1); 238 MatPreallocated(A); 239 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 240 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 241 242 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 243 PetscValidType(P,2); 244 MatPreallocated(P); 245 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 246 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 247 248 PetscValidPointer(C,3); 249 250 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 251 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 252 253 /* For now, we do not dispatch based on the type of A and P */ 254 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 255 fA = A->ops->ptapsymbolic; 256 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 257 fP = P->ops->ptapsymbolic; 258 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 259 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 260 261 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 262 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 263 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 264 265 PetscFunctionReturn(0); 266 } 267 268 typedef struct { 269 Mat symAP; 270 } Mat_PtAPstruct; 271 272 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 276 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 277 { 278 PetscErrorCode ierr; 279 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 280 281 PetscFunctionBegin; 282 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 283 ierr = PetscFree(ptap);CHKERRQ(ierr); 284 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 290 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 291 { 292 PetscErrorCode ierr; 293 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 294 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 295 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 296 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 297 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 298 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 299 MatScalar *ca; 300 PetscBT lnkbt; 301 302 PetscFunctionBegin; 303 /* Get ij structure of P^T */ 304 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 305 ptJ=ptj; 306 307 /* Allocate ci array, arrays for fill computation and */ 308 /* free space for accumulating nonzero column info */ 309 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 310 ci[0] = 0; 311 312 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 313 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 314 ptasparserow = ptadenserow + an; 315 316 /* create and initialize a linked list */ 317 nlnk = pn+1; 318 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 319 320 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 321 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 322 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 323 current_space = free_space; 324 325 /* Determine symbolic info for each row of C: */ 326 for (i=0;i<pn;i++) { 327 ptnzi = pti[i+1] - pti[i]; 328 ptanzi = 0; 329 /* Determine symbolic row of PtA: */ 330 for (j=0;j<ptnzi;j++) { 331 arow = *ptJ++; 332 anzj = ai[arow+1] - ai[arow]; 333 ajj = aj + ai[arow]; 334 for (k=0;k<anzj;k++) { 335 if (!ptadenserow[ajj[k]]) { 336 ptadenserow[ajj[k]] = -1; 337 ptasparserow[ptanzi++] = ajj[k]; 338 } 339 } 340 } 341 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 342 ptaj = ptasparserow; 343 cnzi = 0; 344 for (j=0;j<ptanzi;j++) { 345 prow = *ptaj++; 346 pnzj = pi[prow+1] - pi[prow]; 347 pjj = pj + pi[prow]; 348 /* add non-zero cols of P into the sorted linked list lnk */ 349 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 350 cnzi += nlnk; 351 } 352 353 /* If free space is not available, make more free space */ 354 /* Double the amount of total space in the list */ 355 if (current_space->local_remaining<cnzi) { 356 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 357 } 358 359 /* Copy data into free space, and zero out denserows */ 360 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 361 current_space->array += cnzi; 362 current_space->local_used += cnzi; 363 current_space->local_remaining -= cnzi; 364 365 for (j=0;j<ptanzi;j++) { 366 ptadenserow[ptasparserow[j]] = 0; 367 } 368 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 369 /* For now, we will recompute what is needed. */ 370 ci[i+1] = ci[i] + cnzi; 371 } 372 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 373 /* Allocate space for cj, initialize cj, and */ 374 /* destroy list of free space and other temporary array(s) */ 375 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 376 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 377 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 378 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 379 380 /* Allocate space for ca */ 381 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 382 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 383 384 /* put together the new matrix */ 385 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 386 387 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 388 /* Since these are PETSc arrays, change flags to free them as necessary. */ 389 c = (Mat_SeqAIJ *)((*C)->data); 390 c->freedata = PETSC_TRUE; 391 c->nonew = 0; 392 393 /* Clean up. */ 394 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 395 396 PetscFunctionReturn(0); 397 } 398 399 #include "src/mat/impls/maij/maij.h" 400 EXTERN_C_BEGIN 401 #undef __FUNCT__ 402 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 403 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 404 { 405 /* This routine requires testing -- I don't think it works. */ 406 PetscErrorCode ierr; 407 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 408 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 409 Mat P=pp->AIJ; 410 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 411 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 412 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 413 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 414 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 415 MatScalar *ca; 416 417 PetscFunctionBegin; 418 /* Start timer */ 419 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 420 421 /* Get ij structure of P^T */ 422 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 423 424 /* Allocate ci array, arrays for fill computation and */ 425 /* free space for accumulating nonzero column info */ 426 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 427 ci[0] = 0; 428 429 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 430 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 431 ptasparserow = ptadenserow + an; 432 denserow = ptasparserow + an; 433 sparserow = denserow + pn; 434 435 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 436 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 437 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 438 current_space = free_space; 439 440 /* Determine symbolic info for each row of C: */ 441 for (i=0;i<pn/ppdof;i++) { 442 ptnzi = pti[i+1] - pti[i]; 443 ptanzi = 0; 444 ptJ = ptj + pti[i]; 445 for (dof=0;dof<ppdof;dof++) { 446 /* Determine symbolic row of PtA: */ 447 for (j=0;j<ptnzi;j++) { 448 arow = ptJ[j] + dof; 449 anzj = ai[arow+1] - ai[arow]; 450 ajj = aj + ai[arow]; 451 for (k=0;k<anzj;k++) { 452 if (!ptadenserow[ajj[k]]) { 453 ptadenserow[ajj[k]] = -1; 454 ptasparserow[ptanzi++] = ajj[k]; 455 } 456 } 457 } 458 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 459 ptaj = ptasparserow; 460 cnzi = 0; 461 for (j=0;j<ptanzi;j++) { 462 pdof = *ptaj%dof; 463 prow = (*ptaj++)/dof; 464 pnzj = pi[prow+1] - pi[prow]; 465 pjj = pj + pi[prow]; 466 for (k=0;k<pnzj;k++) { 467 if (!denserow[pjj[k]+pdof]) { 468 denserow[pjj[k]+pdof] = -1; 469 sparserow[cnzi++] = pjj[k]+pdof; 470 } 471 } 472 } 473 474 /* sort sparserow */ 475 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 476 477 /* If free space is not available, make more free space */ 478 /* Double the amount of total space in the list */ 479 if (current_space->local_remaining<cnzi) { 480 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 481 } 482 483 /* Copy data into free space, and zero out denserows */ 484 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 485 current_space->array += cnzi; 486 current_space->local_used += cnzi; 487 current_space->local_remaining -= cnzi; 488 489 for (j=0;j<ptanzi;j++) { 490 ptadenserow[ptasparserow[j]] = 0; 491 } 492 for (j=0;j<cnzi;j++) { 493 denserow[sparserow[j]] = 0; 494 } 495 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 496 /* For now, we will recompute what is needed. */ 497 ci[i+1+dof] = ci[i+dof] + cnzi; 498 } 499 } 500 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 501 /* Allocate space for cj, initialize cj, and */ 502 /* destroy list of free space and other temporary array(s) */ 503 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 504 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 505 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 506 507 /* Allocate space for ca */ 508 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 509 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 510 511 /* put together the new matrix */ 512 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 513 514 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 515 /* Since these are PETSc arrays, change flags to free them as necessary. */ 516 c = (Mat_SeqAIJ *)((*C)->data); 517 c->freedata = PETSC_TRUE; 518 c->nonew = 0; 519 520 /* Clean up. */ 521 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 522 523 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 EXTERN_C_END 527 528 /* 529 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 530 531 Collective on Mat 532 533 Input Parameters: 534 + A - the matrix 535 - P - the projection matrix 536 537 Output Parameters: 538 . C - the product matrix 539 540 Notes: 541 C must have been created by calling MatPtAPSymbolic and must be destroyed by 542 the user using MatDeatroy(). 543 544 This routine is currently only implemented for pairs of AIJ matrices and classes 545 which inherit from AIJ. C will be of type MATAIJ. 546 547 Level: intermediate 548 549 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 550 */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatPtAPNumeric" 553 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 554 { 555 PetscErrorCode ierr; 556 PetscErrorCode (*fA)(Mat,Mat,Mat); 557 PetscErrorCode (*fP)(Mat,Mat,Mat); 558 559 PetscFunctionBegin; 560 561 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 562 PetscValidType(A,1); 563 MatPreallocated(A); 564 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 565 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 566 567 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 568 PetscValidType(P,2); 569 MatPreallocated(P); 570 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 571 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 572 573 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 574 PetscValidType(C,3); 575 MatPreallocated(C); 576 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 577 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 578 579 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 580 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 581 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 582 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 583 584 /* For now, we do not dispatch based on the type of A and P */ 585 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 586 fA = A->ops->ptapnumeric; 587 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 588 fP = P->ops->ptapnumeric; 589 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 590 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 591 592 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 593 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 594 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 595 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 601 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 602 { 603 PetscErrorCode ierr; 604 PetscInt flops=0; 605 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 606 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 607 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 608 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 609 PetscInt *ci=c->i,*cj=c->j,*cjj; 610 PetscInt am=A->M,cn=C->N,cm=C->M; 611 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 612 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 613 614 PetscFunctionBegin; 615 /* Allocate temporary array for storage of one row of A*P */ 616 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 617 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 618 619 apj = (PetscInt *)(apa + cn); 620 apjdense = apj + cn; 621 622 /* Clear old values in C */ 623 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 624 625 for (i=0;i<am;i++) { 626 /* Form sparse row of A*P */ 627 anzi = ai[i+1] - ai[i]; 628 apnzj = 0; 629 for (j=0;j<anzi;j++) { 630 prow = *aj++; 631 pnzj = pi[prow+1] - pi[prow]; 632 pjj = pj + pi[prow]; 633 paj = pa + pi[prow]; 634 for (k=0;k<pnzj;k++) { 635 if (!apjdense[pjj[k]]) { 636 apjdense[pjj[k]] = -1; 637 apj[apnzj++] = pjj[k]; 638 } 639 apa[pjj[k]] += (*aa)*paj[k]; 640 } 641 flops += 2*pnzj; 642 aa++; 643 } 644 645 /* Sort the j index array for quick sparse axpy. */ 646 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 647 648 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 649 pnzi = pi[i+1] - pi[i]; 650 for (j=0;j<pnzi;j++) { 651 nextap = 0; 652 crow = *pJ++; 653 cjj = cj + ci[crow]; 654 caj = ca + ci[crow]; 655 /* Perform sparse axpy operation. Note cjj includes apj. */ 656 for (k=0;nextap<apnzj;k++) { 657 if (cjj[k]==apj[nextap]) { 658 caj[k] += (*pA)*apa[apj[nextap++]]; 659 } 660 } 661 flops += 2*apnzj; 662 pA++; 663 } 664 665 /* Zero the current row info for A*P */ 666 for (j=0;j<apnzj;j++) { 667 apa[apj[j]] = 0.; 668 apjdense[apj[j]] = 0; 669 } 670 } 671 672 /* Assemble the final matrix and clean up */ 673 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 674 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 675 ierr = PetscFree(apa);CHKERRQ(ierr); 676 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 677 678 PetscFunctionReturn(0); 679 } 680 681 /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ" 685 PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 686 { 687 PetscErrorCode ierr; 688 PetscInt flops=0; 689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 690 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 691 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 692 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 693 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 694 PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 695 PetscInt *ci=c->i,*cj=c->j,*cjj; 696 PetscInt am=A->m,cn=C->N,cm=C->M; 697 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 698 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 699 700 PetscFunctionBegin; 701 pA=p->a+pi[prstart]; 702 /* Allocate temporary array for storage of one row of A*P */ 703 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 704 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 705 706 apj = (PetscInt *)(apa + cn); 707 apjdense = apj + cn; 708 709 /* Clear old values in C */ 710 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 711 712 for (i=0;i<am;i++) { 713 /* Form sparse row of A*P */ 714 /* diagonal portion of A */ 715 anzi = adi[i+1] - adi[i]; 716 apnzj = 0; 717 for (j=0;j<anzi;j++) { 718 prow = *adj + prstart; 719 adj++; 720 pnzj = pi[prow+1] - pi[prow]; 721 pjj = pj + pi[prow]; 722 paj = pa + pi[prow]; 723 for (k=0;k<pnzj;k++) { 724 if (!apjdense[pjj[k]]) { 725 apjdense[pjj[k]] = -1; 726 apj[apnzj++] = pjj[k]; 727 } 728 apa[pjj[k]] += (*ada)*paj[k]; 729 } 730 flops += 2*pnzj; 731 ada++; 732 } 733 /* off-diagonal portion of A */ 734 anzi = aoi[i+1] - aoi[i]; 735 for (j=0;j<anzi;j++) { 736 col = a->garray[*aoj]; 737 if (col < cstart){ 738 prow = *aoj; 739 } else if (col >= cend){ 740 prow = *aoj + prend-prstart; 741 } else { 742 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 743 } 744 aoj++; 745 pnzj = pi[prow+1] - pi[prow]; 746 pjj = pj + pi[prow]; 747 paj = pa + pi[prow]; 748 for (k=0;k<pnzj;k++) { 749 if (!apjdense[pjj[k]]) { 750 apjdense[pjj[k]] = -1; 751 apj[apnzj++] = pjj[k]; 752 } 753 apa[pjj[k]] += (*aoa)*paj[k]; 754 } 755 flops += 2*pnzj; 756 aoa++; 757 } 758 759 /* Sort the j index array for quick sparse axpy. */ 760 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 761 762 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 763 pnzi = pi[i+1+prstart] - pi[i+prstart]; 764 for (j=0;j<pnzi;j++) { 765 nextap = 0; 766 crow = *pJ++; 767 cjj = cj + ci[crow]; 768 caj = ca + ci[crow]; 769 /* Perform sparse axpy operation. Note cjj includes apj. */ 770 for (k=0;nextap<apnzj;k++) { 771 if (cjj[k]==apj[nextap]) { 772 caj[k] += (*pA)*apa[apj[nextap++]]; 773 } 774 } 775 flops += 2*apnzj; 776 pA++; 777 } 778 779 /* Zero the current row info for A*P */ 780 for (j=0;j<apnzj;j++) { 781 apa[apj[j]] = 0.; 782 apjdense[apj[j]] = 0; 783 } 784 } 785 786 /* Assemble the final matrix and clean up */ 787 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 788 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 789 ierr = PetscFree(apa);CHKERRQ(ierr); 790 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 791 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ" 797 PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 798 { 799 PetscErrorCode ierr; 800 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 801 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 802 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 803 Mat_SeqAIJ *p=(Mat_SeqAIJ*)P->data,*c; 804 PetscInt *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj; 805 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 806 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 807 PetscInt an=A->N,am=A->m,pn=P->N,pm=P->M; 808 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 809 PetscInt m=prend-prstart,nlnk,*lnk; 810 MatScalar *ca; 811 PetscBT lnkbt; 812 813 PetscFunctionBegin; 814 int rank; 815 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 816 817 /* Get ij structure of P[rstart:rend,:]^T */ 818 ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr); 819 ptJ=ptj; 820 821 /* Allocate ci array, arrays for fill computation and */ 822 /* free space for accumulating nonzero column info */ 823 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 824 ci[0] = 0; 825 826 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 827 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 828 ptasparserow = ptadenserow + an; 829 830 /* create and initialize a linked list */ 831 nlnk = pn+1; 832 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 833 834 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 835 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 836 nnz = adi[am] + aoi[am]; 837 ierr = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space); 838 current_space = free_space; 839 840 /* Determine symbolic info for each row of C: */ 841 for (i=0;i<pn;i++) { 842 ptnzi = pti[i+1] - pti[i]; 843 ptanzi = 0; 844 /* Determine symbolic row of PtA_reduced: */ 845 for (j=0;j<ptnzi;j++) { 846 arow = *ptJ++; 847 if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); 848 /* diagonal portion of A */ 849 anzj = adi[arow+1] - adi[arow]; 850 ajj = adj + adi[arow]; 851 for (k=0;k<anzj;k++) { 852 col = ajj[k]+prstart; 853 if (!ptadenserow[col]) { 854 ptadenserow[col] = -1; 855 ptasparserow[ptanzi++] = col; 856 } 857 } 858 /* off-diagonal portion of A */ 859 anzj = aoi[arow+1] - aoi[arow]; 860 ajj = aoj + aoi[arow]; 861 for (k=0;k<anzj;k++) { 862 col = a->garray[ajj[k]]; /* global col */ 863 if (col < cstart){ 864 col = ajj[k]; 865 } else if (col >= cend){ 866 col = ajj[k] + m; 867 } else { 868 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 869 } 870 if (!ptadenserow[col]) { 871 ptadenserow[col] = -1; 872 ptasparserow[ptanzi++] = col; 873 } 874 } 875 } 876 877 printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi); 878 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 879 ptaj = ptasparserow; 880 cnzi = 0; 881 for (j=0;j<ptanzi;j++) { 882 prow = *ptaj++; 883 pnzj = pi[prow+1] - pi[prow]; 884 pjj = pj + pi[prow]; 885 /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */ 886 /* add non-zero cols of P into the sorted linked list lnk */ 887 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 888 cnzi += nlnk; 889 } 890 891 /* If free space is not available, make more free space */ 892 /* Double the amount of total space in the list */ 893 if (current_space->local_remaining<cnzi) { 894 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 895 } 896 897 /* Copy data into free space, and zero out denserows */ 898 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 899 current_space->array += cnzi; 900 current_space->local_used += cnzi; 901 current_space->local_remaining -= cnzi; 902 903 for (j=0;j<ptanzi;j++) { 904 ptadenserow[ptasparserow[j]] = 0; 905 } 906 907 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 908 /* For now, we will recompute what is needed. */ 909 ci[i+1] = ci[i] + cnzi; 910 } 911 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 912 /* Allocate space for cj, initialize cj, and */ 913 /* destroy list of free space and other temporary array(s) */ 914 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 915 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 916 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 917 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 918 919 /* Allocate space for ca */ 920 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 921 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 922 923 /* put together the new matrix */ 924 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 925 926 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 927 /* Since these are PETSc arrays, change flags to free them as necessary. */ 928 c = (Mat_SeqAIJ *)((*C)->data); 929 c->freedata = PETSC_TRUE; 930 c->nonew = 0; 931 932 /* Clean up. */ 933 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 934 935 PetscFunctionReturn(0); 936 } 937 938 /*@C 939 MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 940 used by MatPtAP_MPIAIJ_MPIAIJ() 941 942 Collective on Mat 943 944 Input Parameters: 945 + A - the matrix in mpiaij format 946 . P - the projection matrix in seqaij format 947 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 948 . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 949 . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 950 951 Output Parameters: 952 . C - the product matrix in seqaij format 953 954 Level: developer 955 956 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 957 @*/ 958 static PetscEvent logkey_matptapreducedpt = 0; 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ" 961 PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 962 { 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 967 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 968 if (!logkey_matptapreducedpt) { 969 ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE); 970 } 971 PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 972 973 if (scall == MAT_INITIAL_MATRIX){ 974 ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 975 } 976 ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr); 977 ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 978 979 PetscFunctionReturn(0); 980 } 981 982 /* ------------- New --------------------*/ 983 static PetscEvent logkey_matptapnumeric_local = 0; 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 986 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 987 { 988 PetscErrorCode ierr; 989 PetscInt flops=0; 990 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 991 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 992 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 993 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 994 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 995 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 996 PetscInt *pJ=pj_loc,*pjj; 997 PetscInt *ci=c->i,*cj=c->j,*cjj; 998 PetscInt am=A->m,cn=C->N,cm=C->M; 999 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1000 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1001 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1002 1003 PetscFunctionBegin; 1004 if (!logkey_matptapnumeric_local) { 1005 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1006 } 1007 PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1008 1009 /* Allocate temporary array for storage of one row of A*P */ 1010 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1011 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1012 apj = (PetscInt *)(apa + cn); 1013 apjdense = apj + cn; 1014 1015 /* Clear old values in C */ 1016 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1017 1018 for (i=0;i<am;i++) { 1019 /* Form i-th sparse row of A*P */ 1020 apnzj = 0; 1021 /* diagonal portion of A */ 1022 anzi = adi[i+1] - adi[i]; 1023 for (j=0;j<anzi;j++) { 1024 prow = *adj; 1025 adj++; 1026 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1027 pjj = pj_loc + pi_loc[prow]; 1028 paj = pa_loc + pi_loc[prow]; 1029 for (k=0;k<pnzj;k++) { 1030 if (!apjdense[pjj[k]]) { 1031 apjdense[pjj[k]] = -1; 1032 apj[apnzj++] = pjj[k]; 1033 } 1034 apa[pjj[k]] += (*ada)*paj[k]; 1035 } 1036 flops += 2*pnzj; 1037 ada++; 1038 } 1039 /* off-diagonal portion of A */ 1040 anzi = aoi[i+1] - aoi[i]; 1041 for (j=0;j<anzi;j++) { 1042 col = a->garray[*aoj]; 1043 if (col < cstart){ 1044 prow = *aoj; 1045 } else if (col >= cend){ 1046 prow = *aoj; 1047 } else { 1048 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1049 } 1050 aoj++; 1051 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1052 pjj = pj_oth + pi_oth[prow]; 1053 paj = pa_oth + pi_oth[prow]; 1054 for (k=0;k<pnzj;k++) { 1055 if (!apjdense[pjj[k]]) { 1056 apjdense[pjj[k]] = -1; 1057 apj[apnzj++] = pjj[k]; 1058 } 1059 apa[pjj[k]] += (*aoa)*paj[k]; 1060 } 1061 flops += 2*pnzj; 1062 aoa++; 1063 } 1064 /* Sort the j index array for quick sparse axpy. */ 1065 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1066 1067 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1068 pnzi = pi_loc[i+1] - pi_loc[i]; 1069 for (j=0;j<pnzi;j++) { 1070 nextap = 0; 1071 crow = *pJ++; 1072 cjj = cj + ci[crow]; 1073 caj = ca + ci[crow]; 1074 /* Perform sparse axpy operation. Note cjj includes apj. */ 1075 for (k=0;nextap<apnzj;k++) { 1076 if (cjj[k]==apj[nextap]) { 1077 caj[k] += (*pA)*apa[apj[nextap++]]; 1078 } 1079 } 1080 flops += 2*apnzj; 1081 pA++; 1082 } 1083 1084 /* Zero the current row info for A*P */ 1085 for (j=0;j<apnzj;j++) { 1086 apa[apj[j]] = 0.; 1087 apjdense[apj[j]] = 0; 1088 } 1089 } 1090 1091 /* Assemble the final matrix and clean up */ 1092 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1093 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094 ierr = PetscFree(apa);CHKERRQ(ierr); 1095 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1096 PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 static PetscEvent logkey_matptapsymbolic_local = 0; 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1102 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1103 { 1104 PetscErrorCode ierr; 1105 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1106 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1107 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1108 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1109 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1110 PetscInt *ci,*cj,*ptaj; 1111 PetscInt an=A->N,am=A->m,pN=P_loc->N; 1112 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1113 PetscInt pm=P_loc->m,nlnk,*lnk; 1114 MatScalar *ca; 1115 PetscBT lnkbt; 1116 PetscInt prend,nprow_loc,nprow_oth; 1117 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1118 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1119 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1120 /* PetscInt rank; */ 1121 1122 PetscFunctionBegin; 1123 if (!logkey_matptapsymbolic_local) { 1124 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1125 } 1126 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1127 1128 /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */ 1129 prend = prstart + pm; 1130 1131 /* get ij structure of P_loc^T */ 1132 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1133 ptJ=ptj; 1134 1135 /* Allocate ci array, arrays for fill computation and */ 1136 /* free space for accumulating nonzero column info */ 1137 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1138 ci[0] = 0; 1139 1140 ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1141 ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1142 ptasparserow_loc = ptadenserow_loc + an; 1143 ptadenserow_oth = ptasparserow_loc + an; 1144 ptasparserow_oth = ptadenserow_oth + an; 1145 1146 /* create and initialize a linked list */ 1147 nlnk = pN+1; 1148 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1149 1150 /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 1151 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1152 nnz = adi[am] + aoi[am]; 1153 ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 1154 current_space = free_space; 1155 1156 /* determine symbolic info for each row of C: */ 1157 for (i=0;i<pN;i++) { 1158 ptnzi = pti[i+1] - pti[i]; 1159 nprow_loc = 0; nprow_oth = 0; 1160 /* i-th row of symbolic P_loc^T*A_loc: */ 1161 for (j=0;j<ptnzi;j++) { 1162 arow = *ptJ++; 1163 /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */ 1164 /* diagonal portion of A */ 1165 anzj = adi[arow+1] - adi[arow]; 1166 ajj = adj + adi[arow]; 1167 for (k=0;k<anzj;k++) { 1168 col = ajj[k]+prstart; 1169 if (!ptadenserow_loc[col]) { 1170 ptadenserow_loc[col] = -1; 1171 ptasparserow_loc[nprow_loc++] = col; 1172 } 1173 } 1174 /* off-diagonal portion of A */ 1175 anzj = aoi[arow+1] - aoi[arow]; 1176 ajj = aoj + aoi[arow]; 1177 for (k=0;k<anzj;k++) { 1178 col = a->garray[ajj[k]]; /* global col */ 1179 if (col < cstart){ 1180 col = ajj[k]; 1181 } else if (col >= cend){ 1182 col = ajj[k] + pm; 1183 } else { 1184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1185 } 1186 if (!ptadenserow_oth[col]) { 1187 ptadenserow_oth[col] = -1; 1188 ptasparserow_oth[nprow_oth++] = col; 1189 } 1190 } 1191 } 1192 1193 /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */ 1194 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1195 cnzi = 0; 1196 /* rows of P_loc */ 1197 ptaj = ptasparserow_loc; 1198 for (j=0; j<nprow_loc; j++) { 1199 prow = *ptaj++; 1200 prow -= prstart; /* rm */ 1201 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1202 pjj = pj_loc + pi_loc[prow]; 1203 /* add non-zero cols of P into the sorted linked list lnk */ 1204 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1205 cnzi += nlnk; 1206 } 1207 /* rows of P_oth */ 1208 ptaj = ptasparserow_oth; 1209 for (j=0; j<nprow_oth; j++) { 1210 prow = *ptaj++; 1211 if (prow >= prend) prow -= pm; /* rm */ 1212 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1213 pjj = pj_oth + pi_oth[prow]; 1214 /* add non-zero cols of P into the sorted linked list lnk */ 1215 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1216 cnzi += nlnk; 1217 } 1218 1219 /* If free space is not available, make more free space */ 1220 /* Double the amount of total space in the list */ 1221 if (current_space->local_remaining<cnzi) { 1222 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1223 } 1224 1225 /* Copy data into free space, and zero out denserows */ 1226 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1227 current_space->array += cnzi; 1228 current_space->local_used += cnzi; 1229 current_space->local_remaining -= cnzi; 1230 1231 for (j=0;j<nprow_loc; j++) { 1232 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1233 } 1234 for (j=0;j<nprow_oth; j++) { 1235 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1236 } 1237 1238 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1239 /* For now, we will recompute what is needed. */ 1240 ci[i+1] = ci[i] + cnzi; 1241 } 1242 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1243 /* Allocate space for cj, initialize cj, and */ 1244 /* destroy list of free space and other temporary array(s) */ 1245 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1246 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1247 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1248 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1249 1250 /* Allocate space for ca */ 1251 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1252 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1253 1254 /* put together the new matrix */ 1255 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1256 1257 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1258 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1259 c = (Mat_SeqAIJ *)((*C)->data); 1260 c->freedata = PETSC_TRUE; 1261 c->nonew = 0; 1262 1263 /* Clean up. */ 1264 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1265 /* 1266 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1267 if (rank == 1){ 1268 printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N); 1269 ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 1270 } */ 1271 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274