1 /* 2 Defines projective product routines where A is a SeqAIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/utils/freespace.h" 8 9 int MatSeqAIJPtAP(Mat,Mat,Mat*); 10 int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*); 11 int MatSeqAIJPtAPNumeric(Mat,Mat,Mat); 12 13 static int MATSeqAIJ_PtAP = 0; 14 static int MATSeqAIJ_PtAPSymbolic = 0; 15 static int MATSeqAIJ_PtAPNumeric = 0; 16 17 /* 18 MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C, 19 of SeqAIJ matrix A and matrix P: 20 C = P^T * A * P; 21 22 Note: C is assumed to be uncreated. 23 If this is not the case, Destroy C before calling this routine. 24 */ 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatSeqAIJPtAP" 27 int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) { 28 int ierr; 29 char funct[80]; 30 31 PetscFunctionBegin; 32 33 ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 34 35 ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr); 36 37 /* Avoid additional error checking included in */ 38 /* ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */ 39 40 /* Query A for ApplyPtAPNumeric implementation based on types of P */ 41 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 42 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 43 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));CHKERRQ(ierr); 44 45 ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 46 47 PetscFunctionReturn(0); 48 } 49 50 /* 51 MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C, 52 of SeqAIJ matrix A and matrix P, according to: 53 C = P^T * A * P; 54 55 Note: C is assumed to be uncreated. 56 If this is not the case, Destroy C before calling this routine. 57 */ 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatSeqAIJPtAPSymbolic" 60 int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) { 61 int ierr; 62 char funct[80]; 63 64 PetscFunctionBegin; 65 66 PetscValidPointer(C); 67 68 PetscValidHeaderSpecific(A,MAT_COOKIE); 69 PetscValidType(A); 70 MatPreallocated(A); 71 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 72 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 73 74 PetscValidHeaderSpecific(P,MAT_COOKIE); 75 PetscValidType(P); 76 MatPreallocated(P); 77 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 78 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 79 80 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 81 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 82 83 /* Query A for ApplyPtAP implementation based on types of P */ 84 ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");CHKERRQ(ierr); 85 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 86 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));CHKERRQ(ierr); 87 88 PetscFunctionReturn(0); 89 } 90 91 EXTERN_C_BEGIN 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ" 94 int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 95 int ierr; 96 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 97 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 98 int aishift=a->indexshift,pishift=p->indexshift; 99 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 100 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 101 int an=A->N,am=A->M,pn=P->N,pm=P->M; 102 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 103 MatScalar *ca; 104 105 PetscFunctionBegin; 106 107 /* some error checking which could be moved into interface layer */ 108 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 109 110 /* Start timer */ 111 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 112 113 /* Get ij structure of P^T */ 114 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 115 ptJ=ptj; 116 117 /* Allocate ci array, arrays for fill computation and */ 118 /* free space for accumulating nonzero column info */ 119 ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 120 ci[0] = 0; 121 122 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 123 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 124 ptasparserow = ptadenserow + an; 125 denserow = ptasparserow + an; 126 sparserow = denserow + pn; 127 128 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 129 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 130 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 131 current_space = free_space; 132 133 /* Determine symbolic info for each row of C: */ 134 for (i=0;i<pn;i++) { 135 ptnzi = pti[i+1] - pti[i]; 136 ptanzi = 0; 137 /* Determine symbolic row of PtA: */ 138 for (j=0;j<ptnzi;j++) { 139 arow = *ptJ++; 140 anzj = ai[arow+1] - ai[arow]; 141 ajj = aj + ai[arow]; 142 for (k=0;k<anzj;k++) { 143 if (!ptadenserow[ajj[k]]) { 144 ptadenserow[ajj[k]] = -1; 145 ptasparserow[ptanzi++] = ajj[k]; 146 } 147 } 148 } 149 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 150 ptaj = ptasparserow; 151 cnzi = 0; 152 for (j=0;j<ptanzi;j++) { 153 prow = *ptaj++; 154 pnzj = pi[prow+1] - pi[prow]; 155 pjj = pj + pi[prow]; 156 for (k=0;k<pnzj;k++) { 157 if (!denserow[pjj[k]]) { 158 denserow[pjj[k]] = -1; 159 sparserow[cnzi++] = pjj[k]; 160 } 161 } 162 } 163 164 /* sort sparserow */ 165 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 166 167 /* If free space is not available, make more free space */ 168 /* Double the amount of total space in the list */ 169 if (current_space->local_remaining<cnzi) { 170 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 171 } 172 173 /* Copy data into free space, and zero out denserows */ 174 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 175 current_space->array += cnzi; 176 current_space->local_used += cnzi; 177 current_space->local_remaining -= cnzi; 178 179 for (j=0;j<ptanzi;j++) { 180 ptadenserow[ptasparserow[j]] = 0; 181 } 182 for (j=0;j<cnzi;j++) { 183 denserow[sparserow[j]] = 0; 184 } 185 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 186 /* For now, we will recompute what is needed. */ 187 ci[i+1] = ci[i] + cnzi; 188 } 189 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 190 /* Allocate space for cj, initialize cj, and */ 191 /* destroy list of free space and other temporary array(s) */ 192 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 193 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 194 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 195 196 /* Allocate space for ca */ 197 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 198 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 199 200 /* put together the new matrix */ 201 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 202 203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 205 c = (Mat_SeqAIJ *)((*C)->data); 206 c->freedata = PETSC_TRUE; 207 c->nonew = 0; 208 209 /* Clean up. */ 210 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 211 212 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 EXTERN_C_END 216 217 /* 218 MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C, 219 of SeqAIJ matrix A and matrix P, according to: 220 C = P^T * A * P 221 Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic. 222 */ 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatSeqAIJPtAPNumeric" 225 int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) { 226 int ierr; 227 char funct[80]; 228 229 PetscFunctionBegin; 230 231 PetscValidHeaderSpecific(A,MAT_COOKIE); 232 PetscValidType(A); 233 MatPreallocated(A); 234 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 235 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 236 237 PetscValidHeaderSpecific(P,MAT_COOKIE); 238 PetscValidType(P); 239 MatPreallocated(P); 240 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 241 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 242 243 PetscValidHeaderSpecific(C,MAT_COOKIE); 244 PetscValidType(C); 245 MatPreallocated(C); 246 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 247 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 248 249 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 250 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 251 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 252 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 253 254 /* Query A for ApplyPtAP implementation based on types of P */ 255 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 256 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 257 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 } 261 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ" 265 int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 266 int ierr,flops=0; 267 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 268 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 269 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 270 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 271 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 272 int *ci=c->i,*cj=c->j,*cjj; 273 int am=A->M,cn=C->N,cm=C->M; 274 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 275 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 276 277 PetscFunctionBegin; 278 279 /* Currently not for shifted matrices! */ 280 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 281 282 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 283 284 /* Allocate temporary array for storage of one row of A*P */ 285 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 286 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 287 288 apj = (int *)(apa + cn); 289 apjdense = apj + cn; 290 291 /* Clear old values in C */ 292 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 293 294 for (i=0;i<am;i++) { 295 /* Form sparse row of A*P */ 296 anzi = ai[i+1] - ai[i]; 297 apnzj = 0; 298 for (j=0;j<anzi;j++) { 299 prow = *aj++; 300 pnzj = pi[prow+1] - pi[prow]; 301 pjj = pj + pi[prow]; 302 paj = pa + pi[prow]; 303 for (k=0;k<pnzj;k++) { 304 if (!apjdense[pjj[k]]) { 305 apjdense[pjj[k]] = -1; 306 apj[apnzj++] = pjj[k]; 307 } 308 apa[pjj[k]] += (*aa)*paj[k]; 309 } 310 flops += 2*pnzj; 311 aa++; 312 } 313 314 /* Sort the j index array for quick sparse axpy. */ 315 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 316 317 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 318 pnzi = pi[i+1] - pi[i]; 319 for (j=0;j<pnzi;j++) { 320 nextap = 0; 321 crow = *pJ++; 322 cjj = cj + ci[crow]; 323 caj = ca + ci[crow]; 324 /* Perform sparse axpy operation. Note cjj includes apj. */ 325 for (k=0;nextap<apnzj;k++) { 326 if (cjj[k]==apj[nextap]) { 327 caj[k] += (*pA)*apa[apj[nextap++]]; 328 } 329 } 330 flops += 2*apnzj; 331 pA++; 332 } 333 334 /* Zero the current row info for A*P */ 335 for (j=0;j<apnzj;j++) { 336 apa[apj[j]] = 0.; 337 apjdense[apj[j]] = 0; 338 } 339 } 340 341 /* Assemble the final matrix and clean up */ 342 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = PetscFree(apa);CHKERRQ(ierr); 345 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 346 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 347 348 PetscFunctionReturn(0); 349 } 350 EXTERN_C_END 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "RegisterApplyPtAPRoutines_Private" 354 int RegisterApplyPtAPRoutines_Private(Mat A) { 355 int ierr; 356 357 PetscFunctionBegin; 358 359 if (!MATSeqAIJ_PtAP) { 360 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 361 } 362 363 if (!MATSeqAIJ_PtAPSymbolic) { 364 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 365 } 366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij", 367 "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ", 368 MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 369 370 if (!MATSeqAIJ_PtAPNumeric) { 371 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 372 } 373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij", 374 "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ", 375 MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378