1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 75 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 76 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat); 77 78 EXTERN PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 79 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 80 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat); 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 84 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 85 { 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 if (scall == MAT_INITIAL_MATRIX){ 90 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 91 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 92 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 93 } else if (scall == MAT_REUSE_MATRIX){ 94 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 95 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 97 } else { 98 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 105 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 106 { 107 PetscErrorCode ierr; 108 Mat P_seq,C_seq; /* A_loc */ 109 PetscInt prstart,prend,m=P->m; 110 /* IS isrowp,iscolp; */ 111 112 PetscFunctionBegin; 113 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 114 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 115 /* 116 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 117 ierr = ISDestroy(iscolp);CHKERRQ(ierr); */ 118 119 /* get A_loc = submatrix of A by taking all local rows of A */ 120 /* 121 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 122 */ 123 /* ierr = ISDestroy(isrowp);CHKERRQ(ierr); */ 124 125 /* compute C_seq = P_loc^T * A_loc * P_seq */ 126 prend = prstart + m; 127 ierr = MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 128 /* 129 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 130 */ 131 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 132 /* ierr = MatDestroy(A_loc);CHKERRQ(ierr); */ 133 134 /* add C_seq into mpi C */ 135 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 136 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 142 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 143 { 144 PetscErrorCode ierr; 145 Mat P_seq,A_loc,C_seq; 146 PetscInt prstart,prend,m=P->m; 147 IS isrowp,iscolp; 148 Mat_Merge_SeqsToMPI *merge; 149 PetscObjectContainer container; 150 151 PetscFunctionBegin; 152 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 153 if (container) { 154 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 155 } else { 156 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 157 } 158 159 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 160 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 161 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 162 163 /* get A_loc = submatrix of A by taking all local rows of A */ 164 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 165 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 166 167 /* compute C_seq = P_loc^T * A_loc * P_seq */ 168 prend = prstart + m; 169 C_seq = merge->C_seq; 170 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 171 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 172 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 173 174 /* add C_seq into mpi C */ 175 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 176 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 182 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 183 { 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (scall == MAT_INITIAL_MATRIX){ 188 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 189 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 190 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 191 } 192 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 193 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 194 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatPtAPSymbolic" 200 /* 201 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 202 203 Collective on Mat 204 205 Input Parameters: 206 + A - the matrix 207 - P - the projection matrix 208 209 Output Parameters: 210 . C - the (i,j) structure of the product matrix 211 212 Notes: 213 C will be created and must be destroyed by the user with MatDestroy(). 214 215 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 216 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 217 this (i,j) structure by calling MatPtAPNumeric(). 218 219 Level: intermediate 220 221 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 222 */ 223 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 224 { 225 PetscErrorCode ierr; 226 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 227 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 228 229 PetscFunctionBegin; 230 231 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 232 PetscValidType(A,1); 233 MatPreallocated(A); 234 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 235 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 236 237 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 238 PetscValidType(P,2); 239 MatPreallocated(P); 240 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 241 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 242 243 PetscValidPointer(C,3); 244 245 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 246 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 247 248 /* For now, we do not dispatch based on the type of A and P */ 249 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 250 fA = A->ops->ptapsymbolic; 251 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 252 fP = P->ops->ptapsymbolic; 253 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 254 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 255 256 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 257 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 258 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 259 260 PetscFunctionReturn(0); 261 } 262 263 typedef struct { 264 Mat symAP; 265 } Mat_PtAPstruct; 266 267 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 271 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 272 { 273 PetscErrorCode ierr; 274 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 275 276 PetscFunctionBegin; 277 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 278 ierr = PetscFree(ptap);CHKERRQ(ierr); 279 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 285 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 286 { 287 PetscErrorCode ierr; 288 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 290 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 291 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 292 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 293 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 294 MatScalar *ca; 295 PetscBT lnkbt; 296 297 PetscFunctionBegin; 298 /* Get ij structure of P^T */ 299 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 300 ptJ=ptj; 301 302 /* Allocate ci array, arrays for fill computation and */ 303 /* free space for accumulating nonzero column info */ 304 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 305 ci[0] = 0; 306 307 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 308 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 309 ptasparserow = ptadenserow + an; 310 311 /* create and initialize a linked list */ 312 nlnk = pn+1; 313 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 314 315 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 316 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 317 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 318 current_space = free_space; 319 320 /* Determine symbolic info for each row of C: */ 321 for (i=0;i<pn;i++) { 322 ptnzi = pti[i+1] - pti[i]; 323 ptanzi = 0; 324 /* Determine symbolic row of PtA: */ 325 for (j=0;j<ptnzi;j++) { 326 arow = *ptJ++; 327 anzj = ai[arow+1] - ai[arow]; 328 ajj = aj + ai[arow]; 329 for (k=0;k<anzj;k++) { 330 if (!ptadenserow[ajj[k]]) { 331 ptadenserow[ajj[k]] = -1; 332 ptasparserow[ptanzi++] = ajj[k]; 333 } 334 } 335 } 336 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 337 ptaj = ptasparserow; 338 cnzi = 0; 339 for (j=0;j<ptanzi;j++) { 340 prow = *ptaj++; 341 pnzj = pi[prow+1] - pi[prow]; 342 pjj = pj + pi[prow]; 343 /* add non-zero cols of P into the sorted linked list lnk */ 344 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 345 cnzi += nlnk; 346 } 347 348 /* If free space is not available, make more free space */ 349 /* Double the amount of total space in the list */ 350 if (current_space->local_remaining<cnzi) { 351 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 352 } 353 354 /* Copy data into free space, and zero out denserows */ 355 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 356 current_space->array += cnzi; 357 current_space->local_used += cnzi; 358 current_space->local_remaining -= cnzi; 359 360 for (j=0;j<ptanzi;j++) { 361 ptadenserow[ptasparserow[j]] = 0; 362 } 363 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 364 /* For now, we will recompute what is needed. */ 365 ci[i+1] = ci[i] + cnzi; 366 } 367 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 368 /* Allocate space for cj, initialize cj, and */ 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 372 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374 375 /* Allocate space for ca */ 376 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 377 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 381 382 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 383 /* Since these are PETSc arrays, change flags to free them as necessary. */ 384 c = (Mat_SeqAIJ *)((*C)->data); 385 c->freedata = PETSC_TRUE; 386 c->nonew = 0; 387 388 /* Clean up. */ 389 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 390 391 PetscFunctionReturn(0); 392 } 393 394 #include "src/mat/impls/maij/maij.h" 395 EXTERN_C_BEGIN 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 398 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 399 { 400 /* This routine requires testing -- I don't think it works. */ 401 PetscErrorCode ierr; 402 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 403 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 404 Mat P=pp->AIJ; 405 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 406 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 407 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 408 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 409 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 410 MatScalar *ca; 411 412 PetscFunctionBegin; 413 /* Start timer */ 414 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 415 416 /* Get ij structure of P^T */ 417 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 418 419 /* Allocate ci array, arrays for fill computation and */ 420 /* free space for accumulating nonzero column info */ 421 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 422 ci[0] = 0; 423 424 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 425 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 426 ptasparserow = ptadenserow + an; 427 denserow = ptasparserow + an; 428 sparserow = denserow + pn; 429 430 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 431 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 432 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 433 current_space = free_space; 434 435 /* Determine symbolic info for each row of C: */ 436 for (i=0;i<pn/ppdof;i++) { 437 ptnzi = pti[i+1] - pti[i]; 438 ptanzi = 0; 439 ptJ = ptj + pti[i]; 440 for (dof=0;dof<ppdof;dof++) { 441 /* Determine symbolic row of PtA: */ 442 for (j=0;j<ptnzi;j++) { 443 arow = ptJ[j] + dof; 444 anzj = ai[arow+1] - ai[arow]; 445 ajj = aj + ai[arow]; 446 for (k=0;k<anzj;k++) { 447 if (!ptadenserow[ajj[k]]) { 448 ptadenserow[ajj[k]] = -1; 449 ptasparserow[ptanzi++] = ajj[k]; 450 } 451 } 452 } 453 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 454 ptaj = ptasparserow; 455 cnzi = 0; 456 for (j=0;j<ptanzi;j++) { 457 pdof = *ptaj%dof; 458 prow = (*ptaj++)/dof; 459 pnzj = pi[prow+1] - pi[prow]; 460 pjj = pj + pi[prow]; 461 for (k=0;k<pnzj;k++) { 462 if (!denserow[pjj[k]+pdof]) { 463 denserow[pjj[k]+pdof] = -1; 464 sparserow[cnzi++] = pjj[k]+pdof; 465 } 466 } 467 } 468 469 /* sort sparserow */ 470 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 471 472 /* If free space is not available, make more free space */ 473 /* Double the amount of total space in the list */ 474 if (current_space->local_remaining<cnzi) { 475 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 476 } 477 478 /* Copy data into free space, and zero out denserows */ 479 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 480 current_space->array += cnzi; 481 current_space->local_used += cnzi; 482 current_space->local_remaining -= cnzi; 483 484 for (j=0;j<ptanzi;j++) { 485 ptadenserow[ptasparserow[j]] = 0; 486 } 487 for (j=0;j<cnzi;j++) { 488 denserow[sparserow[j]] = 0; 489 } 490 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 491 /* For now, we will recompute what is needed. */ 492 ci[i+1+dof] = ci[i+dof] + cnzi; 493 } 494 } 495 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 496 /* Allocate space for cj, initialize cj, and */ 497 /* destroy list of free space and other temporary array(s) */ 498 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 499 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 500 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 501 502 /* Allocate space for ca */ 503 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 504 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 505 506 /* put together the new matrix */ 507 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 508 509 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 510 /* Since these are PETSc arrays, change flags to free them as necessary. */ 511 c = (Mat_SeqAIJ *)((*C)->data); 512 c->freedata = PETSC_TRUE; 513 c->nonew = 0; 514 515 /* Clean up. */ 516 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 517 518 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 EXTERN_C_END 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatPtAPNumeric" 525 /* 526 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 527 528 Collective on Mat 529 530 Input Parameters: 531 + A - the matrix 532 - P - the projection matrix 533 534 Output Parameters: 535 . C - the product matrix 536 537 Notes: 538 C must have been created by calling MatPtAPSymbolic and must be destroyed by 539 the user using MatDeatroy(). 540 541 This routine is currently only implemented for pairs of AIJ matrices and classes 542 which inherit from AIJ. C will be of type MATAIJ. 543 544 Level: intermediate 545 546 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 547 */ 548 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 549 { 550 PetscErrorCode ierr; 551 PetscErrorCode (*fA)(Mat,Mat,Mat); 552 PetscErrorCode (*fP)(Mat,Mat,Mat); 553 554 PetscFunctionBegin; 555 556 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 557 PetscValidType(A,1); 558 MatPreallocated(A); 559 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 560 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 561 562 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 563 PetscValidType(P,2); 564 MatPreallocated(P); 565 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 566 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 567 568 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 569 PetscValidType(C,3); 570 MatPreallocated(C); 571 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 572 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 573 574 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 575 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 576 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 577 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 578 579 /* For now, we do not dispatch based on the type of A and P */ 580 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 581 fA = A->ops->ptapnumeric; 582 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 583 fP = P->ops->ptapnumeric; 584 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 585 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 586 587 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 588 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 589 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 590 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 596 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 597 { 598 PetscErrorCode ierr; 599 PetscInt flops=0; 600 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 601 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 602 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 603 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 604 PetscInt *ci=c->i,*cj=c->j,*cjj; 605 PetscInt am=A->M,cn=C->N,cm=C->M; 606 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 607 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 608 609 PetscFunctionBegin; 610 /* Allocate temporary array for storage of one row of A*P */ 611 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 612 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 613 614 apj = (PetscInt *)(apa + cn); 615 apjdense = apj + cn; 616 617 /* Clear old values in C */ 618 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 619 620 for (i=0;i<am;i++) { 621 /* Form sparse row of A*P */ 622 anzi = ai[i+1] - ai[i]; 623 apnzj = 0; 624 for (j=0;j<anzi;j++) { 625 prow = *aj++; 626 pnzj = pi[prow+1] - pi[prow]; 627 pjj = pj + pi[prow]; 628 paj = pa + pi[prow]; 629 for (k=0;k<pnzj;k++) { 630 if (!apjdense[pjj[k]]) { 631 apjdense[pjj[k]] = -1; 632 apj[apnzj++] = pjj[k]; 633 } 634 apa[pjj[k]] += (*aa)*paj[k]; 635 } 636 flops += 2*pnzj; 637 aa++; 638 } 639 640 /* Sort the j index array for quick sparse axpy. */ 641 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 642 643 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 644 pnzi = pi[i+1] - pi[i]; 645 for (j=0;j<pnzi;j++) { 646 nextap = 0; 647 crow = *pJ++; 648 cjj = cj + ci[crow]; 649 caj = ca + ci[crow]; 650 /* Perform sparse axpy operation. Note cjj includes apj. */ 651 for (k=0;nextap<apnzj;k++) { 652 if (cjj[k]==apj[nextap]) { 653 caj[k] += (*pA)*apa[apj[nextap++]]; 654 } 655 } 656 flops += 2*apnzj; 657 pA++; 658 } 659 660 /* Zero the current row info for A*P */ 661 for (j=0;j<apnzj;j++) { 662 apa[apj[j]] = 0.; 663 apjdense[apj[j]] = 0; 664 } 665 } 666 667 /* Assemble the final matrix and clean up */ 668 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 669 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 670 ierr = PetscFree(apa);CHKERRQ(ierr); 671 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 672 673 PetscFunctionReturn(0); 674 } 675 676 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 680 /*@C 681 MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 682 used by MatPtAP_MPIAIJ_MPIAIJ() 683 684 Collective on Mat 685 686 Input Parameters: 687 + A - the matrix in seqaij format 688 . P - the projection matrix in seqaij format 689 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 690 . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 691 . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 692 693 Output Parameters: 694 . C - the product matrix in seqaij format 695 696 Level: developer 697 698 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 699 @*/ 700 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 701 { 702 PetscErrorCode ierr; 703 PetscInt flops=0; 704 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 705 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 706 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 707 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense; 708 PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 709 PetscInt *ci=c->i,*cj=c->j,*cjj; 710 PetscInt am=A->M,cn=C->N,cm=C->M; 711 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 712 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 713 714 PetscFunctionBegin; 715 pA=p->a+pi[prstart]; 716 /* Allocate temporary array for storage of one row of A*P */ 717 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 718 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 719 720 apj = (PetscInt *)(apa + cn); 721 apjdense = apj + cn; 722 723 /* Clear old values in C */ 724 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 725 726 for (i=0;i<am;i++) { 727 /* Form sparse row of A*P */ 728 anzi = ai[i+1] - ai[i]; 729 apnzj = 0; 730 for (j=0;j<anzi;j++) { 731 prow = *aj++; 732 pnzj = pi[prow+1] - pi[prow]; 733 pjj = pj + pi[prow]; 734 paj = pa + pi[prow]; 735 for (k=0;k<pnzj;k++) { 736 if (!apjdense[pjj[k]]) { 737 apjdense[pjj[k]] = -1; 738 apj[apnzj++] = pjj[k]; 739 } 740 apa[pjj[k]] += (*aa)*paj[k]; 741 } 742 flops += 2*pnzj; 743 aa++; 744 } 745 746 /* Sort the j index array for quick sparse axpy. */ 747 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 748 749 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 750 pnzi = pi[i+1+prstart] - pi[i+prstart]; 751 for (j=0;j<pnzi;j++) { 752 nextap = 0; 753 crow = *pJ++; 754 cjj = cj + ci[crow]; 755 caj = ca + ci[crow]; 756 /* Perform sparse axpy operation. Note cjj includes apj. */ 757 for (k=0;nextap<apnzj;k++) { 758 if (cjj[k]==apj[nextap]) { 759 caj[k] += (*pA)*apa[apj[nextap++]]; 760 } 761 } 762 flops += 2*apnzj; 763 pA++; 764 } 765 766 /* Zero the current row info for A*P */ 767 for (j=0;j<apnzj;j++) { 768 apa[apj[j]] = 0.; 769 apjdense[apj[j]] = 0; 770 } 771 } 772 773 /* Assemble the final matrix and clean up */ 774 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 775 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 776 ierr = PetscFree(apa);CHKERRQ(ierr); 777 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 778 779 int rank; 780 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 781 if (rank==1){ 782 ierr = MatView(C,PETSC_VIEWER_STDOUT_SELF); 783 } 784 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 790 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 791 { 792 PetscErrorCode ierr; 793 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 794 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 795 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 796 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 797 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 798 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 799 PetscInt m = prend - prstart,nlnk,*lnk; 800 MatScalar *ca; 801 Mat *psub,P_sub; 802 IS isrow,iscol; 803 PetscBT lnkbt; 804 805 PetscFunctionBegin; 806 /* Get ij structure of P[rstart:rend,:]^T */ 807 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 808 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 809 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 810 ierr = ISDestroy(isrow);CHKERRQ(ierr); 811 ierr = ISDestroy(iscol);CHKERRQ(ierr); 812 P_sub = psub[0]; 813 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 815 ptJ=ptj; 816 817 /* Allocate ci array, arrays for fill computation and */ 818 /* free space for accumulating nonzero column info */ 819 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 820 ci[0] = 0; 821 822 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 823 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 824 ptasparserow = ptadenserow + an; 825 826 /* create and initialize a linked list */ 827 nlnk = pn+1; 828 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 829 830 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 831 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 832 ierr = GetMoreSpace((PetscInt)(fill*ai[am]/pm)*pn,&free_space); 833 current_space = free_space; 834 835 int rank; 836 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 837 /* printf(" [%d] Symbolic_SeqAIJ_SeqAIJ_ReducedPt, an: %d, am: %d, nnz: %d\n",rank,an,am,ai[am]);*/ 838 839 /* Determine symbolic info for each row of C: */ 840 for (i=0;i<pn;i++) { 841 ptnzi = pti[i+1] - pti[i]; 842 ptanzi = 0; 843 /* Determine symbolic row of PtA_reduced: */ 844 for (j=0;j<ptnzi;j++) { 845 arow = *ptJ++; 846 anzj = ai[arow+1] - ai[arow]; 847 ajj = aj + ai[arow]; 848 for (k=0;k<anzj;k++) { 849 if (!ptadenserow[ajj[k]]) { 850 ptadenserow[ajj[k]] = -1; 851 ptasparserow[ptanzi++] = ajj[k]; 852 } 853 } 854 } 855 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 856 ptaj = ptasparserow; 857 cnzi = 0; 858 for (j=0;j<ptanzi;j++) { 859 prow = *ptaj++; 860 pnzj = pi[prow+1] - pi[prow]; 861 pjj = pj + pi[prow]; 862 /* add non-zero cols of P into the sorted linked list lnk */ 863 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 864 cnzi += nlnk; 865 } 866 867 /* If free space is not available, make more free space */ 868 /* Double the amount of total space in the list */ 869 if (current_space->local_remaining<cnzi) { 870 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 871 } 872 873 /* Copy data into free space, and zero out denserows */ 874 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 875 current_space->array += cnzi; 876 current_space->local_used += cnzi; 877 current_space->local_remaining -= cnzi; 878 879 for (j=0;j<ptanzi;j++) { 880 ptadenserow[ptasparserow[j]] = 0; 881 } 882 883 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 884 /* For now, we will recompute what is needed. */ 885 ci[i+1] = ci[i] + cnzi; 886 } 887 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 888 /* Allocate space for cj, initialize cj, and */ 889 /* destroy list of free space and other temporary array(s) */ 890 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 891 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 892 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 893 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 894 895 /* Allocate space for ca */ 896 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 897 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 898 899 /* put together the new matrix */ 900 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 901 902 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 903 /* Since these are PETSc arrays, change flags to free them as necessary. */ 904 c = (Mat_SeqAIJ *)((*C)->data); 905 c->freedata = PETSC_TRUE; 906 c->nonew = 0; 907 908 /* Clean up. */ 909 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 910 911 if (rank==1){ 912 ierr = MatView((*C), PETSC_VIEWER_STDOUT_SELF); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 919 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 920 { 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 925 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 926 if (scall == MAT_INITIAL_MATRIX){ 927 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 928 } 929 930 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 931 932 PetscFunctionReturn(0); 933 } 934 935 /* ----------- new ------------------ */ 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt" 938 PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 939 { 940 PetscErrorCode ierr; 941 PetscInt flops=0; 942 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 943 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 944 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 945 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 946 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 947 PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 948 PetscInt *ci=c->i,*cj=c->j,*cjj; 949 PetscInt am=A->m,cn=C->N,cm=C->M; 950 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 951 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 952 953 PetscFunctionBegin; 954 pA=p->a+pi[prstart]; 955 /* Allocate temporary array for storage of one row of A*P */ 956 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 957 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 958 959 apj = (PetscInt *)(apa + cn); 960 apjdense = apj + cn; 961 962 /* Clear old values in C */ 963 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 964 965 for (i=0;i<am;i++) { 966 /* Form sparse row of A*P */ 967 /* diagonal portion of A */ 968 anzi = adi[i+1] - adi[i]; 969 apnzj = 0; 970 for (j=0;j<anzi;j++) { 971 prow = *adj + prstart; 972 adj++; 973 pnzj = pi[prow+1] - pi[prow]; 974 pjj = pj + pi[prow]; 975 paj = pa + pi[prow]; 976 for (k=0;k<pnzj;k++) { 977 if (!apjdense[pjj[k]]) { 978 apjdense[pjj[k]] = -1; 979 apj[apnzj++] = pjj[k]; 980 } 981 apa[pjj[k]] += (*ada)*paj[k]; 982 } 983 flops += 2*pnzj; 984 ada++; 985 } 986 /* off-diagonal portion of A */ 987 anzi = aoi[i+1] - aoi[i]; 988 for (j=0;j<anzi;j++) { 989 col = a->garray[*aoj]; 990 if (col < cstart){ 991 prow = *aoj; 992 } else if (col >= cend){ 993 prow = *aoj + prend-prstart; 994 } else { 995 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 996 } 997 aoj++; 998 pnzj = pi[prow+1] - pi[prow]; 999 pjj = pj + pi[prow]; 1000 paj = pa + pi[prow]; 1001 for (k=0;k<pnzj;k++) { 1002 if (!apjdense[pjj[k]]) { 1003 apjdense[pjj[k]] = -1; 1004 apj[apnzj++] = pjj[k]; 1005 } 1006 apa[pjj[k]] += (*aoa)*paj[k]; 1007 } 1008 flops += 2*pnzj; 1009 aoa++; 1010 } 1011 1012 /* Sort the j index array for quick sparse axpy. */ 1013 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1014 1015 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 1016 pnzi = pi[i+1+prstart] - pi[i+prstart]; 1017 for (j=0;j<pnzi;j++) { 1018 nextap = 0; 1019 crow = *pJ++; 1020 cjj = cj + ci[crow]; 1021 caj = ca + ci[crow]; 1022 /* Perform sparse axpy operation. Note cjj includes apj. */ 1023 for (k=0;nextap<apnzj;k++) { 1024 if (cjj[k]==apj[nextap]) { 1025 caj[k] += (*pA)*apa[apj[nextap++]]; 1026 } 1027 } 1028 flops += 2*apnzj; 1029 pA++; 1030 } 1031 1032 /* Zero the current row info for A*P */ 1033 for (j=0;j<apnzj;j++) { 1034 apa[apj[j]] = 0.; 1035 apjdense[apj[j]] = 0; 1036 } 1037 } 1038 1039 /* Assemble the final matrix and clean up */ 1040 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = PetscFree(apa);CHKERRQ(ierr); 1043 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1044 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt" 1050 PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 1051 { 1052 PetscErrorCode ierr; 1053 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1054 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1055 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1056 Mat_SeqAIJ *p=(Mat_SeqAIJ*)P->data,*c; 1057 PetscInt *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj; 1058 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1059 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 1060 PetscInt an=A->N,am=A->m,pn=P->N,pm=P->M; 1061 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1062 PetscInt m=prend-prstart,nlnk,*lnk; 1063 MatScalar *ca; 1064 Mat *psub,P_sub; 1065 IS isrow,iscol; 1066 PetscBT lnkbt; 1067 1068 PetscFunctionBegin; 1069 1070 /* Get ij structure of P[rstart:rend,:]^T */ 1071 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 1072 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 1073 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 1074 ierr = ISDestroy(isrow);CHKERRQ(ierr); 1075 ierr = ISDestroy(iscol);CHKERRQ(ierr); 1076 P_sub = psub[0]; 1077 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 1078 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 1079 ptJ=ptj; 1080 1081 /* Allocate ci array, arrays for fill computation and */ 1082 /* free space for accumulating nonzero column info */ 1083 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1084 ci[0] = 0; 1085 1086 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1087 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1088 ptasparserow = ptadenserow + an; 1089 1090 /* create and initialize a linked list */ 1091 nlnk = pn+1; 1092 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1093 1094 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1095 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1096 nnz = adi[am] + aoi[am]; 1097 ierr = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space); 1098 current_space = free_space; 1099 1100 /* Determine symbolic info for each row of C: */ 1101 for (i=0;i<pn;i++) { 1102 ptnzi = pti[i+1] - pti[i]; 1103 ptanzi = 0; 1104 /* Determine symbolic row of PtA_reduced: */ 1105 for (j=0;j<ptnzi;j++) { 1106 arow = *ptJ++; 1107 /* diagonal portion of A */ 1108 anzj = adi[arow+1] - adi[arow]; 1109 ajj = adj + adi[arow]; 1110 for (k=0;k<anzj;k++) { 1111 col = ajj[k]+prstart; 1112 if (!ptadenserow[col]) { 1113 ptadenserow[col] = -1; 1114 ptasparserow[ptanzi++] = col; 1115 } 1116 } 1117 /* off-diagonal portion of A */ 1118 anzj = aoi[arow+1] - aoi[arow]; 1119 ajj = aoj + aoi[arow]; 1120 for (k=0;k<anzj;k++) { 1121 col = a->garray[ajj[k]]; /* global col */ 1122 if (col < cstart){ 1123 col = ajj[k]; 1124 } else if (col >= cend){ 1125 col = ajj[k] + m; 1126 } else { 1127 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1128 } 1129 if (!ptadenserow[col]) { 1130 ptadenserow[col] = -1; 1131 ptasparserow[ptanzi++] = col; 1132 } 1133 } 1134 } 1135 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1136 ptaj = ptasparserow; 1137 cnzi = 0; 1138 for (j=0;j<ptanzi;j++) { 1139 prow = *ptaj++; 1140 pnzj = pi[prow+1] - pi[prow]; 1141 pjj = pj + pi[prow]; 1142 /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */ 1143 /* add non-zero cols of P into the sorted linked list lnk */ 1144 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1145 cnzi += nlnk; 1146 } 1147 1148 /* If free space is not available, make more free space */ 1149 /* Double the amount of total space in the list */ 1150 if (current_space->local_remaining<cnzi) { 1151 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1152 } 1153 1154 /* Copy data into free space, and zero out denserows */ 1155 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1156 current_space->array += cnzi; 1157 current_space->local_used += cnzi; 1158 current_space->local_remaining -= cnzi; 1159 1160 for (j=0;j<ptanzi;j++) { 1161 ptadenserow[ptasparserow[j]] = 0; 1162 } 1163 1164 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1165 /* For now, we will recompute what is needed. */ 1166 ci[i+1] = ci[i] + cnzi; 1167 } 1168 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1169 /* Allocate space for cj, initialize cj, and */ 1170 /* destroy list of free space and other temporary array(s) */ 1171 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1172 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1173 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 1174 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1175 1176 /* Allocate space for ca */ 1177 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1178 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1179 1180 /* put together the new matrix */ 1181 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 1182 1183 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1184 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1185 c = (Mat_SeqAIJ *)((*C)->data); 1186 c->freedata = PETSC_TRUE; 1187 c->nonew = 0; 1188 1189 /* Clean up. */ 1190 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 PetscEvent PtAP_ReducedPt = 0; 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatPtAP_MPIAIJ_SeqAIJ_ReducedPt" 1198 PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 1199 { 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 1204 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 1205 ierr = PetscLogEventRegister(&PtAP_ReducedPt,"MatPtAP_ReducedPt",MAT_COOKIE); 1206 PetscLogEventBegin(PtAP_ReducedPt,A,P,0,0);CHKERRQ(ierr); 1207 1208 if (scall == MAT_INITIAL_MATRIX){ 1209 ierr = MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 1210 } 1211 ierr = MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 1212 ierr = PetscLogEventEnd(PtAP_ReducedPt,A,P,0,0);CHKERRQ(ierr); 1213 1214 PetscFunctionReturn(0); 1215 } 1216 1217