1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 93 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 94 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 95 } 96 ierr = PetscFree(ptap);CHKERRQ(ierr); 97 98 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 104 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 105 { 106 PetscErrorCode ierr; 107 Mat_MatMatMultMPI *ptap; 108 PetscObjectContainer container; 109 110 PetscFunctionBegin; 111 if (scall == MAT_INITIAL_MATRIX){ 112 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 113 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 114 115 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 116 ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 117 118 /* get P_loc by taking all local rows of P */ 119 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 120 121 /* attach the supporting struct to P for reuse */ 122 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 123 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 124 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 125 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 126 127 /* now, compute symbolic local P^T*A*P */ 128 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 129 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 130 } else if (scall == MAT_REUSE_MATRIX){ 131 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 132 if (container) { 133 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 134 } else { 135 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 136 } 137 138 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 139 /* improve! */ 140 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 141 ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); /* improve! */ 145 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 146 147 } else { 148 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 149 } 150 /* now, compute numeric local P^T*A*P */ 151 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 152 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 153 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 154 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 160 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 161 { 162 PetscErrorCode ierr; 163 Mat C_seq; 164 Mat_MatMatMultMPI *ptap; 165 PetscObjectContainer container; 166 167 PetscFunctionBegin; 168 169 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 170 if (container) { 171 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 172 } else { 173 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 174 } 175 /* compute C_seq = P_loc^T * A_loc * P_seq */ 176 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 177 178 /* add C_seq into mpi C */ 179 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 180 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 186 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 187 { 188 PetscErrorCode ierr; 189 Mat_Merge_SeqsToMPI *merge; 190 Mat_MatMatMultMPI *ptap; 191 PetscObjectContainer cont_merge,cont_ptap; 192 193 PetscFunctionBegin; 194 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 195 if (cont_merge) { 196 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 197 } else { 198 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 199 } 200 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 201 if (cont_ptap) { 202 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 203 } else { 204 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 205 } 206 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 207 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 213 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 214 { 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (scall == MAT_INITIAL_MATRIX){ 219 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 220 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 221 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 222 } 223 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 224 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 225 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 /* 230 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 231 232 Collective on Mat 233 234 Input Parameters: 235 + A - the matrix 236 - P - the projection matrix 237 238 Output Parameters: 239 . C - the (i,j) structure of the product matrix 240 241 Notes: 242 C will be created and must be destroyed by the user with MatDestroy(). 243 244 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 245 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 246 this (i,j) structure by calling MatPtAPNumeric(). 247 248 Level: intermediate 249 250 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatPtAPSymbolic" 254 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 255 { 256 PetscErrorCode ierr; 257 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 258 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 259 260 PetscFunctionBegin; 261 262 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 263 PetscValidType(A,1); 264 MatPreallocated(A); 265 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 266 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 267 268 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 269 PetscValidType(P,2); 270 MatPreallocated(P); 271 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 272 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 273 274 PetscValidPointer(C,3); 275 276 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 277 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 278 279 /* For now, we do not dispatch based on the type of A and P */ 280 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 281 fA = A->ops->ptapsymbolic; 282 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 283 fP = P->ops->ptapsymbolic; 284 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 285 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 286 287 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 288 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 289 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 290 291 PetscFunctionReturn(0); 292 } 293 294 typedef struct { 295 Mat symAP; 296 } Mat_PtAPstruct; 297 298 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 302 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 303 { 304 PetscErrorCode ierr; 305 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 306 307 PetscFunctionBegin; 308 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 309 ierr = PetscFree(ptap);CHKERRQ(ierr); 310 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 316 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 317 { 318 PetscErrorCode ierr; 319 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 321 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 322 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 323 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 324 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 325 MatScalar *ca; 326 PetscBT lnkbt; 327 328 PetscFunctionBegin; 329 /* Get ij structure of P^T */ 330 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 331 ptJ=ptj; 332 333 /* Allocate ci array, arrays for fill computation and */ 334 /* free space for accumulating nonzero column info */ 335 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 336 ci[0] = 0; 337 338 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 339 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 340 ptasparserow = ptadenserow + an; 341 342 /* create and initialize a linked list */ 343 nlnk = pn+1; 344 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 345 346 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 347 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 348 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 349 current_space = free_space; 350 351 /* Determine symbolic info for each row of C: */ 352 for (i=0;i<pn;i++) { 353 ptnzi = pti[i+1] - pti[i]; 354 ptanzi = 0; 355 /* Determine symbolic row of PtA: */ 356 for (j=0;j<ptnzi;j++) { 357 arow = *ptJ++; 358 anzj = ai[arow+1] - ai[arow]; 359 ajj = aj + ai[arow]; 360 for (k=0;k<anzj;k++) { 361 if (!ptadenserow[ajj[k]]) { 362 ptadenserow[ajj[k]] = -1; 363 ptasparserow[ptanzi++] = ajj[k]; 364 } 365 } 366 } 367 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 368 ptaj = ptasparserow; 369 cnzi = 0; 370 for (j=0;j<ptanzi;j++) { 371 prow = *ptaj++; 372 pnzj = pi[prow+1] - pi[prow]; 373 pjj = pj + pi[prow]; 374 /* add non-zero cols of P into the sorted linked list lnk */ 375 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 376 cnzi += nlnk; 377 } 378 379 /* If free space is not available, make more free space */ 380 /* Double the amount of total space in the list */ 381 if (current_space->local_remaining<cnzi) { 382 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 383 } 384 385 /* Copy data into free space, and zero out denserows */ 386 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 387 current_space->array += cnzi; 388 current_space->local_used += cnzi; 389 current_space->local_remaining -= cnzi; 390 391 for (j=0;j<ptanzi;j++) { 392 ptadenserow[ptasparserow[j]] = 0; 393 } 394 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 395 /* For now, we will recompute what is needed. */ 396 ci[i+1] = ci[i] + cnzi; 397 } 398 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 399 /* Allocate space for cj, initialize cj, and */ 400 /* destroy list of free space and other temporary array(s) */ 401 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 402 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 403 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 404 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 405 406 /* Allocate space for ca */ 407 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 408 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 409 410 /* put together the new matrix */ 411 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 412 413 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 414 /* Since these are PETSc arrays, change flags to free them as necessary. */ 415 c = (Mat_SeqAIJ *)((*C)->data); 416 c->freedata = PETSC_TRUE; 417 c->nonew = 0; 418 419 /* Clean up. */ 420 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 421 422 PetscFunctionReturn(0); 423 } 424 425 #include "src/mat/impls/maij/maij.h" 426 EXTERN_C_BEGIN 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 429 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 430 { 431 /* This routine requires testing -- I don't think it works. */ 432 PetscErrorCode ierr; 433 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 434 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 435 Mat P=pp->AIJ; 436 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 437 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 438 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 439 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 440 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 441 MatScalar *ca; 442 443 PetscFunctionBegin; 444 /* Start timer */ 445 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 446 447 /* Get ij structure of P^T */ 448 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 449 450 /* Allocate ci array, arrays for fill computation and */ 451 /* free space for accumulating nonzero column info */ 452 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 453 ci[0] = 0; 454 455 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 456 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 457 ptasparserow = ptadenserow + an; 458 denserow = ptasparserow + an; 459 sparserow = denserow + pn; 460 461 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 462 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 463 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 464 current_space = free_space; 465 466 /* Determine symbolic info for each row of C: */ 467 for (i=0;i<pn/ppdof;i++) { 468 ptnzi = pti[i+1] - pti[i]; 469 ptanzi = 0; 470 ptJ = ptj + pti[i]; 471 for (dof=0;dof<ppdof;dof++) { 472 /* Determine symbolic row of PtA: */ 473 for (j=0;j<ptnzi;j++) { 474 arow = ptJ[j] + dof; 475 anzj = ai[arow+1] - ai[arow]; 476 ajj = aj + ai[arow]; 477 for (k=0;k<anzj;k++) { 478 if (!ptadenserow[ajj[k]]) { 479 ptadenserow[ajj[k]] = -1; 480 ptasparserow[ptanzi++] = ajj[k]; 481 } 482 } 483 } 484 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 485 ptaj = ptasparserow; 486 cnzi = 0; 487 for (j=0;j<ptanzi;j++) { 488 pdof = *ptaj%dof; 489 prow = (*ptaj++)/dof; 490 pnzj = pi[prow+1] - pi[prow]; 491 pjj = pj + pi[prow]; 492 for (k=0;k<pnzj;k++) { 493 if (!denserow[pjj[k]+pdof]) { 494 denserow[pjj[k]+pdof] = -1; 495 sparserow[cnzi++] = pjj[k]+pdof; 496 } 497 } 498 } 499 500 /* sort sparserow */ 501 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 502 503 /* If free space is not available, make more free space */ 504 /* Double the amount of total space in the list */ 505 if (current_space->local_remaining<cnzi) { 506 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 507 } 508 509 /* Copy data into free space, and zero out denserows */ 510 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 511 current_space->array += cnzi; 512 current_space->local_used += cnzi; 513 current_space->local_remaining -= cnzi; 514 515 for (j=0;j<ptanzi;j++) { 516 ptadenserow[ptasparserow[j]] = 0; 517 } 518 for (j=0;j<cnzi;j++) { 519 denserow[sparserow[j]] = 0; 520 } 521 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 522 /* For now, we will recompute what is needed. */ 523 ci[i+1+dof] = ci[i+dof] + cnzi; 524 } 525 } 526 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 527 /* Allocate space for cj, initialize cj, and */ 528 /* destroy list of free space and other temporary array(s) */ 529 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 530 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 531 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 532 533 /* Allocate space for ca */ 534 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 535 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 536 537 /* put together the new matrix */ 538 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 539 540 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 541 /* Since these are PETSc arrays, change flags to free them as necessary. */ 542 c = (Mat_SeqAIJ *)((*C)->data); 543 c->freedata = PETSC_TRUE; 544 c->nonew = 0; 545 546 /* Clean up. */ 547 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 548 549 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 EXTERN_C_END 553 554 /* 555 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 556 557 Collective on Mat 558 559 Input Parameters: 560 + A - the matrix 561 - P - the projection matrix 562 563 Output Parameters: 564 . C - the product matrix 565 566 Notes: 567 C must have been created by calling MatPtAPSymbolic and must be destroyed by 568 the user using MatDeatroy(). 569 570 This routine is currently only implemented for pairs of AIJ matrices and classes 571 which inherit from AIJ. C will be of type MATAIJ. 572 573 Level: intermediate 574 575 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatPtAPNumeric" 579 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 580 { 581 PetscErrorCode ierr; 582 PetscErrorCode (*fA)(Mat,Mat,Mat); 583 PetscErrorCode (*fP)(Mat,Mat,Mat); 584 585 PetscFunctionBegin; 586 587 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 588 PetscValidType(A,1); 589 MatPreallocated(A); 590 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 591 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 592 593 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 594 PetscValidType(P,2); 595 MatPreallocated(P); 596 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 597 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 598 599 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 600 PetscValidType(C,3); 601 MatPreallocated(C); 602 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 603 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 604 605 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 606 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 607 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 608 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 609 610 /* For now, we do not dispatch based on the type of A and P */ 611 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 612 fA = A->ops->ptapnumeric; 613 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 614 fP = P->ops->ptapnumeric; 615 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 616 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 617 618 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 619 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 620 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 621 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 627 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 628 { 629 PetscErrorCode ierr; 630 PetscInt flops=0; 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 632 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 633 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 634 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 635 PetscInt *ci=c->i,*cj=c->j,*cjj; 636 PetscInt am=A->M,cn=C->N,cm=C->M; 637 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 638 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 639 640 PetscFunctionBegin; 641 /* Allocate temporary array for storage of one row of A*P */ 642 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 643 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 644 645 apj = (PetscInt *)(apa + cn); 646 apjdense = apj + cn; 647 648 /* Clear old values in C */ 649 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 650 651 for (i=0;i<am;i++) { 652 /* Form sparse row of A*P */ 653 anzi = ai[i+1] - ai[i]; 654 apnzj = 0; 655 for (j=0;j<anzi;j++) { 656 prow = *aj++; 657 pnzj = pi[prow+1] - pi[prow]; 658 pjj = pj + pi[prow]; 659 paj = pa + pi[prow]; 660 for (k=0;k<pnzj;k++) { 661 if (!apjdense[pjj[k]]) { 662 apjdense[pjj[k]] = -1; 663 apj[apnzj++] = pjj[k]; 664 } 665 apa[pjj[k]] += (*aa)*paj[k]; 666 } 667 flops += 2*pnzj; 668 aa++; 669 } 670 671 /* Sort the j index array for quick sparse axpy. */ 672 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 673 674 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 675 pnzi = pi[i+1] - pi[i]; 676 for (j=0;j<pnzi;j++) { 677 nextap = 0; 678 crow = *pJ++; 679 cjj = cj + ci[crow]; 680 caj = ca + ci[crow]; 681 /* Perform sparse axpy operation. Note cjj includes apj. */ 682 for (k=0;nextap<apnzj;k++) { 683 if (cjj[k]==apj[nextap]) { 684 caj[k] += (*pA)*apa[apj[nextap++]]; 685 } 686 } 687 flops += 2*apnzj; 688 pA++; 689 } 690 691 /* Zero the current row info for A*P */ 692 for (j=0;j<apnzj;j++) { 693 apa[apj[j]] = 0.; 694 apjdense[apj[j]] = 0; 695 } 696 } 697 698 /* Assemble the final matrix and clean up */ 699 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 700 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 701 ierr = PetscFree(apa);CHKERRQ(ierr); 702 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 703 704 PetscFunctionReturn(0); 705 } 706 707 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 708 static PetscEvent logkey_matptapnumeric_local = 0; 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 711 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 712 { 713 PetscErrorCode ierr; 714 PetscInt flops=0; 715 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 716 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 717 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 718 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 719 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 720 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 721 PetscInt *pJ=pj_loc,*pjj; 722 PetscInt *ci=c->i,*cj=c->j,*cjj; 723 PetscInt am=A->m,cn=C->N,cm=C->M; 724 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 725 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 726 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 727 728 PetscFunctionBegin; 729 if (!logkey_matptapnumeric_local) { 730 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 731 } 732 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 733 734 /* Allocate temporary array for storage of one row of A*P */ 735 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 736 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 737 apj = (PetscInt *)(apa + cn); 738 apjdense = apj + cn; 739 740 /* Clear old values in C */ 741 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 742 743 for (i=0;i<am;i++) { 744 /* Form i-th sparse row of A*P */ 745 apnzj = 0; 746 /* diagonal portion of A */ 747 anzi = adi[i+1] - adi[i]; 748 for (j=0;j<anzi;j++) { 749 prow = *adj; 750 adj++; 751 pnzj = pi_loc[prow+1] - pi_loc[prow]; 752 pjj = pj_loc + pi_loc[prow]; 753 paj = pa_loc + pi_loc[prow]; 754 for (k=0;k<pnzj;k++) { 755 if (!apjdense[pjj[k]]) { 756 apjdense[pjj[k]] = -1; 757 apj[apnzj++] = pjj[k]; 758 } 759 apa[pjj[k]] += (*ada)*paj[k]; 760 } 761 flops += 2*pnzj; 762 ada++; 763 } 764 /* off-diagonal portion of A */ 765 anzi = aoi[i+1] - aoi[i]; 766 for (j=0;j<anzi;j++) { 767 col = a->garray[*aoj]; 768 if (col < cstart){ 769 prow = *aoj; 770 } else if (col >= cend){ 771 prow = *aoj; 772 } else { 773 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 774 } 775 aoj++; 776 pnzj = pi_oth[prow+1] - pi_oth[prow]; 777 pjj = pj_oth + pi_oth[prow]; 778 paj = pa_oth + pi_oth[prow]; 779 for (k=0;k<pnzj;k++) { 780 if (!apjdense[pjj[k]]) { 781 apjdense[pjj[k]] = -1; 782 apj[apnzj++] = pjj[k]; 783 } 784 apa[pjj[k]] += (*aoa)*paj[k]; 785 } 786 flops += 2*pnzj; 787 aoa++; 788 } 789 /* Sort the j index array for quick sparse axpy. */ 790 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 791 792 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 793 pnzi = pi_loc[i+1] - pi_loc[i]; 794 for (j=0;j<pnzi;j++) { 795 nextap = 0; 796 crow = *pJ++; 797 cjj = cj + ci[crow]; 798 caj = ca + ci[crow]; 799 /* Perform sparse axpy operation. Note cjj includes apj. */ 800 for (k=0;nextap<apnzj;k++) { 801 if (cjj[k]==apj[nextap]) { 802 caj[k] += (*pA)*apa[apj[nextap++]]; 803 } 804 } 805 flops += 2*apnzj; 806 pA++; 807 } 808 809 /* Zero the current row info for A*P */ 810 for (j=0;j<apnzj;j++) { 811 apa[apj[j]] = 0.; 812 apjdense[apj[j]] = 0; 813 } 814 } 815 816 /* Assemble the final matrix and clean up */ 817 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = PetscFree(apa);CHKERRQ(ierr); 820 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 821 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 static PetscEvent logkey_matptapsymbolic_local = 0; 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 827 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 828 { 829 PetscErrorCode ierr; 830 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 831 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 832 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 833 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 834 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 835 PetscInt *ci,*cj,*ptaj; 836 PetscInt an=A->N,am=A->m,pN=P_loc->N; 837 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 838 PetscInt pm=P_loc->m,nlnk,*lnk; 839 MatScalar *ca; 840 PetscBT lnkbt; 841 PetscInt prend,nprow_loc,nprow_oth; 842 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 843 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 844 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 845 846 PetscFunctionBegin; 847 if (!logkey_matptapsymbolic_local) { 848 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 849 } 850 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 851 852 prend = prstart + pm; 853 854 /* get ij structure of P_loc^T */ 855 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 856 ptJ=ptj; 857 858 /* Allocate ci array, arrays for fill computation and */ 859 /* free space for accumulating nonzero column info */ 860 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 861 ci[0] = 0; 862 863 ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 864 ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 865 ptasparserow_loc = ptadenserow_loc + an; 866 ptadenserow_oth = ptasparserow_loc + an; 867 ptasparserow_oth = ptadenserow_oth + an; 868 869 /* create and initialize a linked list */ 870 nlnk = pN+1; 871 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 872 873 /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 874 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 875 nnz = adi[am] + aoi[am]; 876 ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 877 current_space = free_space; 878 879 /* determine symbolic info for each row of C: */ 880 for (i=0;i<pN;i++) { 881 ptnzi = pti[i+1] - pti[i]; 882 nprow_loc = 0; nprow_oth = 0; 883 /* i-th row of symbolic P_loc^T*A_loc: */ 884 for (j=0;j<ptnzi;j++) { 885 arow = *ptJ++; 886 /* diagonal portion of A */ 887 anzj = adi[arow+1] - adi[arow]; 888 ajj = adj + adi[arow]; 889 for (k=0;k<anzj;k++) { 890 col = ajj[k]+prstart; 891 if (!ptadenserow_loc[col]) { 892 ptadenserow_loc[col] = -1; 893 ptasparserow_loc[nprow_loc++] = col; 894 } 895 } 896 /* off-diagonal portion of A */ 897 anzj = aoi[arow+1] - aoi[arow]; 898 ajj = aoj + aoi[arow]; 899 for (k=0;k<anzj;k++) { 900 col = a->garray[ajj[k]]; /* global col */ 901 if (col < cstart){ 902 col = ajj[k]; 903 } else if (col >= cend){ 904 col = ajj[k] + pm; 905 } else { 906 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 907 } 908 if (!ptadenserow_oth[col]) { 909 ptadenserow_oth[col] = -1; 910 ptasparserow_oth[nprow_oth++] = col; 911 } 912 } 913 } 914 915 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 916 cnzi = 0; 917 /* rows of P_loc */ 918 ptaj = ptasparserow_loc; 919 for (j=0; j<nprow_loc; j++) { 920 prow = *ptaj++; 921 prow -= prstart; /* rm */ 922 pnzj = pi_loc[prow+1] - pi_loc[prow]; 923 pjj = pj_loc + pi_loc[prow]; 924 /* add non-zero cols of P into the sorted linked list lnk */ 925 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 926 cnzi += nlnk; 927 } 928 /* rows of P_oth */ 929 ptaj = ptasparserow_oth; 930 for (j=0; j<nprow_oth; j++) { 931 prow = *ptaj++; 932 if (prow >= prend) prow -= pm; /* rm */ 933 pnzj = pi_oth[prow+1] - pi_oth[prow]; 934 pjj = pj_oth + pi_oth[prow]; 935 /* add non-zero cols of P into the sorted linked list lnk */ 936 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 937 cnzi += nlnk; 938 } 939 940 /* If free space is not available, make more free space */ 941 /* Double the amount of total space in the list */ 942 if (current_space->local_remaining<cnzi) { 943 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 944 } 945 946 /* Copy data into free space, and zero out denserows */ 947 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 948 current_space->array += cnzi; 949 current_space->local_used += cnzi; 950 current_space->local_remaining -= cnzi; 951 952 for (j=0;j<nprow_loc; j++) { 953 ptadenserow_loc[ptasparserow_loc[j]] = 0; 954 } 955 for (j=0;j<nprow_oth; j++) { 956 ptadenserow_oth[ptasparserow_oth[j]] = 0; 957 } 958 959 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 960 /* For now, we will recompute what is needed. */ 961 ci[i+1] = ci[i] + cnzi; 962 } 963 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 964 /* Allocate space for cj, initialize cj, and */ 965 /* destroy list of free space and other temporary array(s) */ 966 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 967 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 968 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 969 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 970 971 /* Allocate space for ca */ 972 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 973 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 974 975 /* put together the new matrix */ 976 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 977 978 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 979 /* Since these are PETSc arrays, change flags to free them as necessary. */ 980 c = (Mat_SeqAIJ *)((*C)->data); 981 c->freedata = PETSC_TRUE; 982 c->nonew = 0; 983 984 /* Clean up. */ 985 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 986 987 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990