1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 78 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 if (scall == MAT_INITIAL_MATRIX){ 84 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 85 } else if (scall == MAT_REUSE_MATRIX){ 86 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 87 } else { 88 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 89 } 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 95 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 96 { 97 PetscErrorCode ierr; 98 Mat P_seq,A_loc,C_seq; 99 int prstart,prend,m=P->m; 100 IS isrowp,iscolp; 101 102 PetscFunctionBegin; 103 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 104 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 105 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 106 107 /* get A_loc = submatrix of A by taking all local rows of A */ 108 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 109 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 110 111 /* compute C_seq = P_loc^T * A_loc * P_seq */ 112 prend = prstart + m; 113 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 114 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 115 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 116 117 /* add C_seq into mpi C */ 118 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 119 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 126 { 127 PetscErrorCode ierr; 128 Mat P_seq,A_loc,C_seq; 129 int prstart,prend,m=P->m; 130 IS isrowp,iscolp; 131 Mat_Merge_SeqsToMPI *merge; 132 PetscObjectContainer container; 133 134 PetscFunctionBegin; 135 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 136 if (container) { 137 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 138 } else { 139 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 140 } 141 142 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 143 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 144 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 145 146 /* get A_loc = submatrix of A by taking all local rows of A */ 147 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 148 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 149 150 /* compute C_seq = P_loc^T * A_loc * P_seq */ 151 prend = prstart + m; 152 C_seq = merge->C_seq; 153 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 154 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 155 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 156 157 /* add C_seq into mpi C */ 158 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 159 160 PetscFunctionReturn(0); 161 } 162 163 #undef __FUNCT__ 164 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 165 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 166 { 167 PetscErrorCode ierr; 168 PetscFunctionBegin; 169 if (scall == MAT_INITIAL_MATRIX){ 170 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 171 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 172 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 173 } 174 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 175 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 176 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatPtAPSymbolic" 182 /* 183 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 184 185 Collective on Mat 186 187 Input Parameters: 188 + A - the matrix 189 - P - the projection matrix 190 191 Output Parameters: 192 . C - the (i,j) structure of the product matrix 193 194 Notes: 195 C will be created and must be destroyed by the user with MatDestroy(). 196 197 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 198 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 199 this (i,j) structure by calling MatPtAPNumeric(). 200 201 Level: intermediate 202 203 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 204 */ 205 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 206 PetscErrorCode ierr; 207 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 208 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 209 210 PetscFunctionBegin; 211 212 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 213 PetscValidType(A,1); 214 MatPreallocated(A); 215 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 216 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 217 218 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 219 PetscValidType(P,2); 220 MatPreallocated(P); 221 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 222 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 223 224 PetscValidPointer(C,3); 225 226 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 227 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 228 229 /* For now, we do not dispatch based on the type of A and P */ 230 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 231 fA = A->ops->ptapsymbolic; 232 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 233 fP = P->ops->ptapsymbolic; 234 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 235 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 236 237 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 238 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 239 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 240 241 PetscFunctionReturn(0); 242 } 243 244 typedef struct { 245 Mat symAP; 246 } Mat_PtAPstruct; 247 248 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 252 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 253 { 254 PetscErrorCode ierr; 255 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 256 257 PetscFunctionBegin; 258 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 259 ierr = PetscFree(ptap);CHKERRQ(ierr); 260 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 266 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 267 PetscErrorCode ierr; 268 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 269 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 270 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 271 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 272 int an=A->N,am=A->M,pn=P->N,pm=P->M; 273 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 274 MatScalar *ca; 275 276 PetscFunctionBegin; 277 /* Get ij structure of P^T */ 278 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 279 ptJ=ptj; 280 281 /* Allocate ci array, arrays for fill computation and */ 282 /* free space for accumulating nonzero column info */ 283 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 284 ci[0] = 0; 285 286 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 287 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 288 ptasparserow = ptadenserow + an; 289 denserow = ptasparserow + an; 290 sparserow = denserow + pn; 291 292 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 293 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 294 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 295 current_space = free_space; 296 297 /* Determine symbolic info for each row of C: */ 298 for (i=0;i<pn;i++) { 299 ptnzi = pti[i+1] - pti[i]; 300 ptanzi = 0; 301 /* Determine symbolic row of PtA: */ 302 for (j=0;j<ptnzi;j++) { 303 arow = *ptJ++; 304 anzj = ai[arow+1] - ai[arow]; 305 ajj = aj + ai[arow]; 306 for (k=0;k<anzj;k++) { 307 if (!ptadenserow[ajj[k]]) { 308 ptadenserow[ajj[k]] = -1; 309 ptasparserow[ptanzi++] = ajj[k]; 310 } 311 } 312 } 313 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 314 ptaj = ptasparserow; 315 cnzi = 0; 316 for (j=0;j<ptanzi;j++) { 317 prow = *ptaj++; 318 pnzj = pi[prow+1] - pi[prow]; 319 pjj = pj + pi[prow]; 320 for (k=0;k<pnzj;k++) { 321 if (!denserow[pjj[k]]) { 322 denserow[pjj[k]] = -1; 323 sparserow[cnzi++] = pjj[k]; 324 } 325 } 326 } 327 328 /* sort sparserow */ 329 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 330 331 /* If free space is not available, make more free space */ 332 /* Double the amount of total space in the list */ 333 if (current_space->local_remaining<cnzi) { 334 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 335 } 336 337 /* Copy data into free space, and zero out denserows */ 338 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 339 current_space->array += cnzi; 340 current_space->local_used += cnzi; 341 current_space->local_remaining -= cnzi; 342 343 for (j=0;j<ptanzi;j++) { 344 ptadenserow[ptasparserow[j]] = 0; 345 } 346 for (j=0;j<cnzi;j++) { 347 denserow[sparserow[j]] = 0; 348 } 349 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 350 /* For now, we will recompute what is needed. */ 351 ci[i+1] = ci[i] + cnzi; 352 } 353 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 354 /* Allocate space for cj, initialize cj, and */ 355 /* destroy list of free space and other temporary array(s) */ 356 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 357 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 358 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 359 360 /* Allocate space for ca */ 361 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 362 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 366 367 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 368 /* Since these are PETSc arrays, change flags to free them as necessary. */ 369 c = (Mat_SeqAIJ *)((*C)->data); 370 c->freedata = PETSC_TRUE; 371 c->nonew = 0; 372 373 /* Clean up. */ 374 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 375 376 PetscFunctionReturn(0); 377 } 378 379 #include "src/mat/impls/maij/maij.h" 380 EXTERN_C_BEGIN 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 383 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 384 /* This routine requires testing -- I don't think it works. */ 385 PetscErrorCode ierr; 386 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 387 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 388 Mat P=pp->AIJ; 389 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 390 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 391 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 392 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 393 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 394 MatScalar *ca; 395 396 PetscFunctionBegin; 397 /* Start timer */ 398 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 399 400 /* Get ij structure of P^T */ 401 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 402 403 /* Allocate ci array, arrays for fill computation and */ 404 /* free space for accumulating nonzero column info */ 405 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 406 ci[0] = 0; 407 408 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 409 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 410 ptasparserow = ptadenserow + an; 411 denserow = ptasparserow + an; 412 sparserow = denserow + pn; 413 414 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 415 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 416 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 417 current_space = free_space; 418 419 /* Determine symbolic info for each row of C: */ 420 for (i=0;i<pn/ppdof;i++) { 421 ptnzi = pti[i+1] - pti[i]; 422 ptanzi = 0; 423 ptJ = ptj + pti[i]; 424 for (dof=0;dof<ppdof;dof++) { 425 /* Determine symbolic row of PtA: */ 426 for (j=0;j<ptnzi;j++) { 427 arow = ptJ[j] + dof; 428 anzj = ai[arow+1] - ai[arow]; 429 ajj = aj + ai[arow]; 430 for (k=0;k<anzj;k++) { 431 if (!ptadenserow[ajj[k]]) { 432 ptadenserow[ajj[k]] = -1; 433 ptasparserow[ptanzi++] = ajj[k]; 434 } 435 } 436 } 437 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 438 ptaj = ptasparserow; 439 cnzi = 0; 440 for (j=0;j<ptanzi;j++) { 441 pdof = *ptaj%dof; 442 prow = (*ptaj++)/dof; 443 pnzj = pi[prow+1] - pi[prow]; 444 pjj = pj + pi[prow]; 445 for (k=0;k<pnzj;k++) { 446 if (!denserow[pjj[k]+pdof]) { 447 denserow[pjj[k]+pdof] = -1; 448 sparserow[cnzi++] = pjj[k]+pdof; 449 } 450 } 451 } 452 453 /* sort sparserow */ 454 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 455 456 /* If free space is not available, make more free space */ 457 /* Double the amount of total space in the list */ 458 if (current_space->local_remaining<cnzi) { 459 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 460 } 461 462 /* Copy data into free space, and zero out denserows */ 463 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 464 current_space->array += cnzi; 465 current_space->local_used += cnzi; 466 current_space->local_remaining -= cnzi; 467 468 for (j=0;j<ptanzi;j++) { 469 ptadenserow[ptasparserow[j]] = 0; 470 } 471 for (j=0;j<cnzi;j++) { 472 denserow[sparserow[j]] = 0; 473 } 474 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 475 /* For now, we will recompute what is needed. */ 476 ci[i+1+dof] = ci[i+dof] + cnzi; 477 } 478 } 479 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 480 /* Allocate space for cj, initialize cj, and */ 481 /* destroy list of free space and other temporary array(s) */ 482 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 483 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 484 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 485 486 /* Allocate space for ca */ 487 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 488 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 489 490 /* put together the new matrix */ 491 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 492 493 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 494 /* Since these are PETSc arrays, change flags to free them as necessary. */ 495 c = (Mat_SeqAIJ *)((*C)->data); 496 c->freedata = PETSC_TRUE; 497 c->nonew = 0; 498 499 /* Clean up. */ 500 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 501 502 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 EXTERN_C_END 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatPtAPNumeric" 509 /* 510 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 511 512 Collective on Mat 513 514 Input Parameters: 515 + A - the matrix 516 - P - the projection matrix 517 518 Output Parameters: 519 . C - the product matrix 520 521 Notes: 522 C must have been created by calling MatPtAPSymbolic and must be destroyed by 523 the user using MatDeatroy(). 524 525 This routine is currently only implemented for pairs of AIJ matrices and classes 526 which inherit from AIJ. C will be of type MATAIJ. 527 528 Level: intermediate 529 530 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 531 */ 532 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 533 PetscErrorCode ierr; 534 PetscErrorCode (*fA)(Mat,Mat,Mat); 535 PetscErrorCode (*fP)(Mat,Mat,Mat); 536 537 PetscFunctionBegin; 538 539 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 540 PetscValidType(A,1); 541 MatPreallocated(A); 542 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 543 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 544 545 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 546 PetscValidType(P,2); 547 MatPreallocated(P); 548 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 549 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 550 551 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 552 PetscValidType(C,3); 553 MatPreallocated(C); 554 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 555 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 556 557 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 558 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 559 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 560 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 561 562 /* For now, we do not dispatch based on the type of A and P */ 563 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 564 fA = A->ops->ptapnumeric; 565 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 566 fP = P->ops->ptapnumeric; 567 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 568 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 569 570 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 571 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 572 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 579 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 580 { 581 PetscErrorCode ierr; 582 int flops=0; 583 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 584 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 585 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 586 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 587 int *ci=c->i,*cj=c->j,*cjj; 588 int am=A->M,cn=C->N,cm=C->M; 589 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 590 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 591 592 PetscFunctionBegin; 593 /* Allocate temporary array for storage of one row of A*P */ 594 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 595 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 596 597 apj = (int *)(apa + cn); 598 apjdense = apj + cn; 599 600 /* Clear old values in C */ 601 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 602 603 for (i=0;i<am;i++) { 604 /* Form sparse row of A*P */ 605 anzi = ai[i+1] - ai[i]; 606 apnzj = 0; 607 for (j=0;j<anzi;j++) { 608 prow = *aj++; 609 pnzj = pi[prow+1] - pi[prow]; 610 pjj = pj + pi[prow]; 611 paj = pa + pi[prow]; 612 for (k=0;k<pnzj;k++) { 613 if (!apjdense[pjj[k]]) { 614 apjdense[pjj[k]] = -1; 615 apj[apnzj++] = pjj[k]; 616 } 617 apa[pjj[k]] += (*aa)*paj[k]; 618 } 619 flops += 2*pnzj; 620 aa++; 621 } 622 623 /* Sort the j index array for quick sparse axpy. */ 624 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 625 626 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 627 pnzi = pi[i+1] - pi[i]; 628 for (j=0;j<pnzi;j++) { 629 nextap = 0; 630 crow = *pJ++; 631 cjj = cj + ci[crow]; 632 caj = ca + ci[crow]; 633 /* Perform sparse axpy operation. Note cjj includes apj. */ 634 for (k=0;nextap<apnzj;k++) { 635 if (cjj[k]==apj[nextap]) { 636 caj[k] += (*pA)*apa[apj[nextap++]]; 637 } 638 } 639 flops += 2*apnzj; 640 pA++; 641 } 642 643 /* Zero the current row info for A*P */ 644 for (j=0;j<apnzj;j++) { 645 apa[apj[j]] = 0.; 646 apjdense[apj[j]] = 0; 647 } 648 } 649 650 /* Assemble the final matrix and clean up */ 651 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 652 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 653 ierr = PetscFree(apa);CHKERRQ(ierr); 654 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 655 656 PetscFunctionReturn(0); 657 } 658 659 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 663 /*@C 664 MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 665 used by MatPtAP_MPIAIJ_MPIAIJ() 666 667 Collective on Mat 668 669 Input Parameters: 670 + A - the matrix in seqaij format 671 . P - the projection matrix in seqaij format 672 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 673 . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 674 . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 675 676 Output Parameters: 677 . C - the product matrix in seqaij format 678 679 Level: developer 680 681 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 682 @*/ 683 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 684 { 685 PetscErrorCode ierr; 686 int flops=0; 687 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 688 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 689 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 690 int *ai=a->i,*aj=a->j,*apj,*apjdense; 691 int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 692 int *ci=c->i,*cj=c->j,*cjj; 693 int am=A->M,cn=C->N,cm=C->M; 694 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 695 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 696 697 PetscFunctionBegin; 698 pA=p->a+pi[prstart]; 699 /* Allocate temporary array for storage of one row of A*P */ 700 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 701 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 702 703 apj = (int *)(apa + cn); 704 apjdense = apj + cn; 705 706 /* Clear old values in C */ 707 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 708 709 for (i=0;i<am;i++) { 710 /* Form sparse row of A*P */ 711 anzi = ai[i+1] - ai[i]; 712 apnzj = 0; 713 for (j=0;j<anzi;j++) { 714 prow = *aj++; 715 pnzj = pi[prow+1] - pi[prow]; 716 pjj = pj + pi[prow]; 717 paj = pa + pi[prow]; 718 for (k=0;k<pnzj;k++) { 719 if (!apjdense[pjj[k]]) { 720 apjdense[pjj[k]] = -1; 721 apj[apnzj++] = pjj[k]; 722 } 723 apa[pjj[k]] += (*aa)*paj[k]; 724 } 725 flops += 2*pnzj; 726 aa++; 727 } 728 729 /* Sort the j index array for quick sparse axpy. */ 730 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 731 732 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 733 pnzi = pi[i+1+prstart] - pi[i+prstart]; 734 for (j=0;j<pnzi;j++) { 735 nextap = 0; 736 crow = *pJ++; 737 cjj = cj + ci[crow]; 738 caj = ca + ci[crow]; 739 /* Perform sparse axpy operation. Note cjj includes apj. */ 740 for (k=0;nextap<apnzj;k++) { 741 if (cjj[k]==apj[nextap]) { 742 caj[k] += (*pA)*apa[apj[nextap++]]; 743 } 744 } 745 flops += 2*apnzj; 746 pA++; 747 } 748 749 /* Zero the current row info for A*P */ 750 for (j=0;j<apnzj;j++) { 751 apa[apj[j]] = 0.; 752 apjdense[apj[j]] = 0; 753 } 754 } 755 756 /* Assemble the final matrix and clean up */ 757 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 ierr = PetscFree(apa);CHKERRQ(ierr); 760 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 761 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 767 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 768 PetscErrorCode ierr; 769 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 770 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 771 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 772 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 773 int an=A->N,am=A->M,pn=P->N,pm=P->M; 774 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 775 MatScalar *ca; 776 Mat *psub,P_sub; 777 IS isrow,iscol; 778 int m = prend - prstart; 779 780 PetscFunctionBegin; 781 /* Get ij structure of P[rstart:rend,:]^T */ 782 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 783 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 784 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 785 ierr = ISDestroy(isrow);CHKERRQ(ierr); 786 ierr = ISDestroy(iscol);CHKERRQ(ierr); 787 P_sub = psub[0]; 788 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 789 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 790 ptJ=ptj; 791 792 /* Allocate ci array, arrays for fill computation and */ 793 /* free space for accumulating nonzero column info */ 794 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 795 ci[0] = 0; 796 797 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 798 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 799 ptasparserow = ptadenserow + an; 800 denserow = ptasparserow + an; 801 sparserow = denserow + pn; 802 803 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 804 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 805 ierr = GetMoreSpace((int)(fill*ai[am]/pm)*pn,&free_space); 806 current_space = free_space; 807 808 /* Determine symbolic info for each row of C: */ 809 for (i=0;i<pn;i++) { 810 ptnzi = pti[i+1] - pti[i]; 811 ptanzi = 0; 812 /* Determine symbolic row of PtA_reduced: */ 813 for (j=0;j<ptnzi;j++) { 814 arow = *ptJ++; 815 anzj = ai[arow+1] - ai[arow]; 816 ajj = aj + ai[arow]; 817 for (k=0;k<anzj;k++) { 818 if (!ptadenserow[ajj[k]]) { 819 ptadenserow[ajj[k]] = -1; 820 ptasparserow[ptanzi++] = ajj[k]; 821 } 822 } 823 } 824 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 825 ptaj = ptasparserow; 826 cnzi = 0; 827 for (j=0;j<ptanzi;j++) { 828 prow = *ptaj++; 829 pnzj = pi[prow+1] - pi[prow]; 830 pjj = pj + pi[prow]; 831 for (k=0;k<pnzj;k++) { 832 if (!denserow[pjj[k]]) { 833 denserow[pjj[k]] = -1; 834 sparserow[cnzi++] = pjj[k]; 835 } 836 } 837 } 838 839 /* sort sparserow */ 840 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 841 842 /* If free space is not available, make more free space */ 843 /* Double the amount of total space in the list */ 844 if (current_space->local_remaining<cnzi) { 845 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 846 } 847 848 /* Copy data into free space, and zero out denserows */ 849 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 850 current_space->array += cnzi; 851 current_space->local_used += cnzi; 852 current_space->local_remaining -= cnzi; 853 854 for (j=0;j<ptanzi;j++) { 855 ptadenserow[ptasparserow[j]] = 0; 856 } 857 for (j=0;j<cnzi;j++) { 858 denserow[sparserow[j]] = 0; 859 } 860 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 861 /* For now, we will recompute what is needed. */ 862 ci[i+1] = ci[i] + cnzi; 863 } 864 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 865 /* Allocate space for cj, initialize cj, and */ 866 /* destroy list of free space and other temporary array(s) */ 867 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 868 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 869 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 870 871 /* Allocate space for ca */ 872 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 873 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 874 875 /* put together the new matrix */ 876 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 877 878 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 879 /* Since these are PETSc arrays, change flags to free them as necessary. */ 880 c = (Mat_SeqAIJ *)((*C)->data); 881 c->freedata = PETSC_TRUE; 882 c->nonew = 0; 883 884 /* Clean up. */ 885 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 886 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 892 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 893 { 894 PetscErrorCode ierr; 895 PetscFunctionBegin; 896 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 897 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 898 if (scall == MAT_INITIAL_MATRIX){ 899 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 900 } 901 902 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 903 904 PetscFunctionReturn(0); 905 } 906 907