1eb9c0419SKris Buschelman /* 2eb9c0419SKris Buschelman Defines projective product routines where A is a SeqAIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6eb9c0419SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 8eb9c0419SKris Buschelman 9eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat,Mat,Mat*); 10eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*); 11eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat,Mat,Mat); 12eb9c0419SKris Buschelman 13eb9c0419SKris Buschelman static int MATSeqAIJ_PtAP = 0; 14eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPSymbolic = 0; 15eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPNumeric = 0; 16eb9c0419SKris Buschelman 17eb9c0419SKris Buschelman /* 18eb9c0419SKris Buschelman MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C, 19eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P: 20eb9c0419SKris Buschelman C = P^T * A * P; 21eb9c0419SKris Buschelman 22eb9c0419SKris Buschelman Note: C is assumed to be uncreated. 23eb9c0419SKris Buschelman If this is not the case, Destroy C before calling this routine. 24eb9c0419SKris Buschelman */ 25eb9c0419SKris Buschelman #undef __FUNCT__ 26eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAP" 27eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) { 28eb9c0419SKris Buschelman int ierr; 29eb9c0419SKris Buschelman char funct[80]; 30eb9c0419SKris Buschelman 31eb9c0419SKris Buschelman PetscFunctionBegin; 32eb9c0419SKris Buschelman 33eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 34eb9c0419SKris Buschelman 35eb9c0419SKris Buschelman ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr); 36eb9c0419SKris Buschelman 37eb9c0419SKris Buschelman /* Avoid additional error checking included in */ 38eb9c0419SKris Buschelman /* ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */ 39eb9c0419SKris Buschelman 40eb9c0419SKris Buschelman /* Query A for ApplyPtAPNumeric implementation based on types of P */ 41eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 42eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 43eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));CHKERRQ(ierr); 44eb9c0419SKris Buschelman 45eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 46eb9c0419SKris Buschelman 47eb9c0419SKris Buschelman PetscFunctionReturn(0); 48eb9c0419SKris Buschelman } 49eb9c0419SKris Buschelman 50eb9c0419SKris Buschelman /* 51eb9c0419SKris Buschelman MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C, 52eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P, according to: 53eb9c0419SKris Buschelman C = P^T * A * P; 54eb9c0419SKris Buschelman 55eb9c0419SKris Buschelman Note: C is assumed to be uncreated. 56eb9c0419SKris Buschelman If this is not the case, Destroy C before calling this routine. 57eb9c0419SKris Buschelman */ 58eb9c0419SKris Buschelman #undef __FUNCT__ 59eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPSymbolic" 60eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) { 61eb9c0419SKris Buschelman int ierr; 62eb9c0419SKris Buschelman char funct[80]; 63eb9c0419SKris Buschelman 64eb9c0419SKris Buschelman PetscFunctionBegin; 65eb9c0419SKris Buschelman 66eb9c0419SKris Buschelman PetscValidPointer(C); 67eb9c0419SKris Buschelman 68eb9c0419SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 69eb9c0419SKris Buschelman PetscValidType(A); 70eb9c0419SKris Buschelman MatPreallocated(A); 71eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 72eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 73eb9c0419SKris Buschelman 74eb9c0419SKris Buschelman PetscValidHeaderSpecific(P,MAT_COOKIE); 75eb9c0419SKris Buschelman PetscValidType(P); 76eb9c0419SKris Buschelman MatPreallocated(P); 77eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 78eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 79eb9c0419SKris Buschelman 80eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 81eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 82eb9c0419SKris Buschelman 83eb9c0419SKris Buschelman /* Query A for ApplyPtAP implementation based on types of P */ 84eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");CHKERRQ(ierr); 85eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 86eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));CHKERRQ(ierr); 87eb9c0419SKris Buschelman 88eb9c0419SKris Buschelman PetscFunctionReturn(0); 89eb9c0419SKris Buschelman } 90eb9c0419SKris Buschelman 91eb9c0419SKris Buschelman EXTERN_C_BEGIN 92eb9c0419SKris Buschelman #undef __FUNCT__ 93eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ" 94eb9c0419SKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 95eb9c0419SKris Buschelman int ierr; 96eb9c0419SKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 97eb9c0419SKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 98eb9c0419SKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 99eb9c0419SKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 100eb9c0419SKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 101eb9c0419SKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M; 102eb9c0419SKris Buschelman int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 103eb9c0419SKris Buschelman MatScalar *ca; 104eb9c0419SKris Buschelman 105eb9c0419SKris Buschelman PetscFunctionBegin; 106eb9c0419SKris Buschelman 107eb9c0419SKris Buschelman /* some error checking which could be moved into interface layer */ 108eb9c0419SKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 109eb9c0419SKris Buschelman 110eb9c0419SKris Buschelman /* Start timer */ 111eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 112eb9c0419SKris Buschelman 113eb9c0419SKris Buschelman /* Get ij structure of P^T */ 114eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 115eb9c0419SKris Buschelman ptJ=ptj; 116eb9c0419SKris Buschelman 117eb9c0419SKris Buschelman /* Allocate ci array, arrays for fill computation and */ 118eb9c0419SKris Buschelman /* free space for accumulating nonzero column info */ 119*3985e5eaSKris Buschelman ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 120eb9c0419SKris Buschelman ci[0] = 0; 121eb9c0419SKris Buschelman 122eb9c0419SKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 123eb9c0419SKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 124eb9c0419SKris Buschelman ptasparserow = ptadenserow + an; 125eb9c0419SKris Buschelman denserow = ptasparserow + an; 126eb9c0419SKris Buschelman sparserow = denserow + pn; 127eb9c0419SKris Buschelman 128eb9c0419SKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 129eb9c0419SKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 130eb9c0419SKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 131eb9c0419SKris Buschelman current_space = free_space; 132eb9c0419SKris Buschelman 133eb9c0419SKris Buschelman /* Determine symbolic info for each row of C: */ 134eb9c0419SKris Buschelman for (i=0;i<pn;i++) { 135eb9c0419SKris Buschelman ptnzi = pti[i+1] - pti[i]; 136eb9c0419SKris Buschelman ptanzi = 0; 137eb9c0419SKris Buschelman /* Determine symbolic row of PtA: */ 138eb9c0419SKris Buschelman for (j=0;j<ptnzi;j++) { 139eb9c0419SKris Buschelman arow = *ptJ++; 140eb9c0419SKris Buschelman anzj = ai[arow+1] - ai[arow]; 141eb9c0419SKris Buschelman ajj = aj + ai[arow]; 142eb9c0419SKris Buschelman for (k=0;k<anzj;k++) { 143eb9c0419SKris Buschelman if (!ptadenserow[ajj[k]]) { 144eb9c0419SKris Buschelman ptadenserow[ajj[k]] = -1; 145eb9c0419SKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 146eb9c0419SKris Buschelman } 147eb9c0419SKris Buschelman } 148eb9c0419SKris Buschelman } 149eb9c0419SKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 150eb9c0419SKris Buschelman ptaj = ptasparserow; 151eb9c0419SKris Buschelman cnzi = 0; 152eb9c0419SKris Buschelman for (j=0;j<ptanzi;j++) { 153eb9c0419SKris Buschelman prow = *ptaj++; 154eb9c0419SKris Buschelman pnzj = pi[prow+1] - pi[prow]; 155eb9c0419SKris Buschelman pjj = pj + pi[prow]; 156eb9c0419SKris Buschelman for (k=0;k<pnzj;k++) { 157eb9c0419SKris Buschelman if (!denserow[pjj[k]]) { 158eb9c0419SKris Buschelman denserow[pjj[k]] = -1; 159eb9c0419SKris Buschelman sparserow[cnzi++] = pjj[k]; 160eb9c0419SKris Buschelman } 161eb9c0419SKris Buschelman } 162eb9c0419SKris Buschelman } 163eb9c0419SKris Buschelman 164eb9c0419SKris Buschelman /* sort sparserow */ 165eb9c0419SKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 166eb9c0419SKris Buschelman 167eb9c0419SKris Buschelman /* If free space is not available, make more free space */ 168eb9c0419SKris Buschelman /* Double the amount of total space in the list */ 169eb9c0419SKris Buschelman if (current_space->local_remaining<cnzi) { 170eb9c0419SKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 171eb9c0419SKris Buschelman } 172eb9c0419SKris Buschelman 173eb9c0419SKris Buschelman /* Copy data into free space, and zero out denserows */ 174eb9c0419SKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 175eb9c0419SKris Buschelman current_space->array += cnzi; 176eb9c0419SKris Buschelman current_space->local_used += cnzi; 177eb9c0419SKris Buschelman current_space->local_remaining -= cnzi; 178eb9c0419SKris Buschelman 179eb9c0419SKris Buschelman for (j=0;j<ptanzi;j++) { 180eb9c0419SKris Buschelman ptadenserow[ptasparserow[j]] = 0; 181eb9c0419SKris Buschelman } 182eb9c0419SKris Buschelman for (j=0;j<cnzi;j++) { 183eb9c0419SKris Buschelman denserow[sparserow[j]] = 0; 184eb9c0419SKris Buschelman } 185eb9c0419SKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 186eb9c0419SKris Buschelman /* For now, we will recompute what is needed. */ 187eb9c0419SKris Buschelman ci[i+1] = ci[i] + cnzi; 188eb9c0419SKris Buschelman } 189eb9c0419SKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 190eb9c0419SKris Buschelman /* Allocate space for cj, initialize cj, and */ 191eb9c0419SKris Buschelman /* destroy list of free space and other temporary array(s) */ 192eb9c0419SKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 193eb9c0419SKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 194eb9c0419SKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 195eb9c0419SKris Buschelman 196eb9c0419SKris Buschelman /* Allocate space for ca */ 197eb9c0419SKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 198eb9c0419SKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 199eb9c0419SKris Buschelman 200eb9c0419SKris Buschelman /* put together the new matrix */ 201eb9c0419SKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 202eb9c0419SKris Buschelman 203eb9c0419SKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 204eb9c0419SKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 205eb9c0419SKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 206eb9c0419SKris Buschelman c->freedata = PETSC_TRUE; 207eb9c0419SKris Buschelman c->nonew = 0; 208eb9c0419SKris Buschelman 209eb9c0419SKris Buschelman /* Clean up. */ 210eb9c0419SKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 211eb9c0419SKris Buschelman 212eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 213eb9c0419SKris Buschelman PetscFunctionReturn(0); 214eb9c0419SKris Buschelman } 215eb9c0419SKris Buschelman EXTERN_C_END 216eb9c0419SKris Buschelman 217*3985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 218*3985e5eaSKris Buschelman EXTERN_C_BEGIN 219*3985e5eaSKris Buschelman #undef __FUNCT__ 220*3985e5eaSKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ" 221*3985e5eaSKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 222*3985e5eaSKris Buschelman int ierr; 223*3985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 224*3985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 225*3985e5eaSKris Buschelman Mat P=pp->AIJ; 226*3985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 227*3985e5eaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 228*3985e5eaSKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 229*3985e5eaSKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 230*3985e5eaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 231*3985e5eaSKris Buschelman int i,j,k,dof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 232*3985e5eaSKris Buschelman MatScalar *ca; 233*3985e5eaSKris Buschelman 234*3985e5eaSKris Buschelman PetscFunctionBegin; 235*3985e5eaSKris Buschelman 236*3985e5eaSKris Buschelman /* some error checking which could be moved into interface layer */ 237*3985e5eaSKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 238*3985e5eaSKris Buschelman 239*3985e5eaSKris Buschelman /* Start timer */ 240*3985e5eaSKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 241*3985e5eaSKris Buschelman 242*3985e5eaSKris Buschelman /* Get ij structure of P^T */ 243*3985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 244*3985e5eaSKris Buschelman 245*3985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 246*3985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 247*3985e5eaSKris Buschelman ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 248*3985e5eaSKris Buschelman ci[0] = 0; 249*3985e5eaSKris Buschelman 250*3985e5eaSKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 251*3985e5eaSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 252*3985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 253*3985e5eaSKris Buschelman denserow = ptasparserow + an; 254*3985e5eaSKris Buschelman sparserow = denserow + pn; 255*3985e5eaSKris Buschelman 256*3985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 257*3985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 258*3985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 259*3985e5eaSKris Buschelman current_space = free_space; 260*3985e5eaSKris Buschelman 261*3985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 262*3985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 263*3985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 264*3985e5eaSKris Buschelman ptanzi = 0; 265*3985e5eaSKris Buschelman ptJ = ptj + pti[i]; 266*3985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 267*3985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 268*3985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 269*3985e5eaSKris Buschelman arow = ptJ[j] + dof; 270*3985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 271*3985e5eaSKris Buschelman ajj = aj + ai[arow]; 272*3985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 273*3985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 274*3985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 275*3985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 276*3985e5eaSKris Buschelman } 277*3985e5eaSKris Buschelman } 278*3985e5eaSKris Buschelman } 279*3985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 280*3985e5eaSKris Buschelman ptaj = ptasparserow; 281*3985e5eaSKris Buschelman cnzi = 0; 282*3985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 283*3985e5eaSKris Buschelman prow = (*ptaj++)/dof; 284*3985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 285*3985e5eaSKris Buschelman pjj = pj + pi[prow]; 286*3985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 287*3985e5eaSKris Buschelman if (!denserow[pjj[k]]) { 288*3985e5eaSKris Buschelman denserow[pjj[k]] = -1; 289*3985e5eaSKris Buschelman sparserow[cnzi++] = pjj[k]; 290*3985e5eaSKris Buschelman } 291*3985e5eaSKris Buschelman } 292*3985e5eaSKris Buschelman } 293*3985e5eaSKris Buschelman 294*3985e5eaSKris Buschelman /* sort sparserow */ 295*3985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 296*3985e5eaSKris Buschelman 297*3985e5eaSKris Buschelman /* If free space is not available, make more free space */ 298*3985e5eaSKris Buschelman /* Double the amount of total space in the list */ 299*3985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 300*3985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 301*3985e5eaSKris Buschelman } 302*3985e5eaSKris Buschelman 303*3985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 304*3985e5eaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 305*3985e5eaSKris Buschelman current_space->array += cnzi; 306*3985e5eaSKris Buschelman current_space->local_used += cnzi; 307*3985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 308*3985e5eaSKris Buschelman 309*3985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 310*3985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 311*3985e5eaSKris Buschelman } 312*3985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 313*3985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 314*3985e5eaSKris Buschelman } 315*3985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 316*3985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 317*3985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 318*3985e5eaSKris Buschelman } 319*3985e5eaSKris Buschelman } 320*3985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 321*3985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 322*3985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 323*3985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 324*3985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 325*3985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 326*3985e5eaSKris Buschelman 327*3985e5eaSKris Buschelman /* Allocate space for ca */ 328*3985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 329*3985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 330*3985e5eaSKris Buschelman 331*3985e5eaSKris Buschelman /* put together the new matrix */ 332*3985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 333*3985e5eaSKris Buschelman 334*3985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 335*3985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 336*3985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 337*3985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 338*3985e5eaSKris Buschelman c->nonew = 0; 339*3985e5eaSKris Buschelman 340*3985e5eaSKris Buschelman /* Clean up. */ 341*3985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 342*3985e5eaSKris Buschelman 343*3985e5eaSKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 344*3985e5eaSKris Buschelman PetscFunctionReturn(0); 345*3985e5eaSKris Buschelman } 346*3985e5eaSKris Buschelman EXTERN_C_END 347*3985e5eaSKris Buschelman 348eb9c0419SKris Buschelman /* 349eb9c0419SKris Buschelman MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C, 350eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P, according to: 351eb9c0419SKris Buschelman C = P^T * A * P 352eb9c0419SKris Buschelman Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic. 353eb9c0419SKris Buschelman */ 354eb9c0419SKris Buschelman #undef __FUNCT__ 355eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPNumeric" 356eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) { 357eb9c0419SKris Buschelman int ierr; 358eb9c0419SKris Buschelman char funct[80]; 359eb9c0419SKris Buschelman 360eb9c0419SKris Buschelman PetscFunctionBegin; 361eb9c0419SKris Buschelman 362eb9c0419SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 363eb9c0419SKris Buschelman PetscValidType(A); 364eb9c0419SKris Buschelman MatPreallocated(A); 365eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 366eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 367eb9c0419SKris Buschelman 368eb9c0419SKris Buschelman PetscValidHeaderSpecific(P,MAT_COOKIE); 369eb9c0419SKris Buschelman PetscValidType(P); 370eb9c0419SKris Buschelman MatPreallocated(P); 371eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 372eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 373eb9c0419SKris Buschelman 374eb9c0419SKris Buschelman PetscValidHeaderSpecific(C,MAT_COOKIE); 375eb9c0419SKris Buschelman PetscValidType(C); 376eb9c0419SKris Buschelman MatPreallocated(C); 377eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 378eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 379eb9c0419SKris Buschelman 380eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 381eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 382eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 383eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 384eb9c0419SKris Buschelman 385eb9c0419SKris Buschelman /* Query A for ApplyPtAP implementation based on types of P */ 386eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 387eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 388eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));CHKERRQ(ierr); 389eb9c0419SKris Buschelman 390eb9c0419SKris Buschelman PetscFunctionReturn(0); 391eb9c0419SKris Buschelman } 392eb9c0419SKris Buschelman 393eb9c0419SKris Buschelman EXTERN_C_BEGIN 394eb9c0419SKris Buschelman #undef __FUNCT__ 395eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ" 396eb9c0419SKris Buschelman int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 397eb9c0419SKris Buschelman int ierr,flops=0; 398eb9c0419SKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 399eb9c0419SKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 400eb9c0419SKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 401eb9c0419SKris Buschelman int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 402eb9c0419SKris Buschelman int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 403eb9c0419SKris Buschelman int *ci=c->i,*cj=c->j,*cjj; 404eb9c0419SKris Buschelman int am=A->M,cn=C->N,cm=C->M; 405eb9c0419SKris Buschelman int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 406eb9c0419SKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 407eb9c0419SKris Buschelman 408eb9c0419SKris Buschelman PetscFunctionBegin; 409eb9c0419SKris Buschelman 410eb9c0419SKris Buschelman /* Currently not for shifted matrices! */ 411eb9c0419SKris Buschelman if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 412eb9c0419SKris Buschelman 413eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 414eb9c0419SKris Buschelman 415eb9c0419SKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 416eb9c0419SKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 417eb9c0419SKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 418eb9c0419SKris Buschelman 419eb9c0419SKris Buschelman apj = (int *)(apa + cn); 420eb9c0419SKris Buschelman apjdense = apj + cn; 421eb9c0419SKris Buschelman 422eb9c0419SKris Buschelman /* Clear old values in C */ 423eb9c0419SKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 424eb9c0419SKris Buschelman 425eb9c0419SKris Buschelman for (i=0;i<am;i++) { 426eb9c0419SKris Buschelman /* Form sparse row of A*P */ 427eb9c0419SKris Buschelman anzi = ai[i+1] - ai[i]; 428eb9c0419SKris Buschelman apnzj = 0; 429eb9c0419SKris Buschelman for (j=0;j<anzi;j++) { 430eb9c0419SKris Buschelman prow = *aj++; 431eb9c0419SKris Buschelman pnzj = pi[prow+1] - pi[prow]; 432eb9c0419SKris Buschelman pjj = pj + pi[prow]; 433eb9c0419SKris Buschelman paj = pa + pi[prow]; 434eb9c0419SKris Buschelman for (k=0;k<pnzj;k++) { 435eb9c0419SKris Buschelman if (!apjdense[pjj[k]]) { 436eb9c0419SKris Buschelman apjdense[pjj[k]] = -1; 437eb9c0419SKris Buschelman apj[apnzj++] = pjj[k]; 438eb9c0419SKris Buschelman } 439eb9c0419SKris Buschelman apa[pjj[k]] += (*aa)*paj[k]; 440eb9c0419SKris Buschelman } 441eb9c0419SKris Buschelman flops += 2*pnzj; 442eb9c0419SKris Buschelman aa++; 443eb9c0419SKris Buschelman } 444eb9c0419SKris Buschelman 445eb9c0419SKris Buschelman /* Sort the j index array for quick sparse axpy. */ 446eb9c0419SKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 447eb9c0419SKris Buschelman 448eb9c0419SKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 449eb9c0419SKris Buschelman pnzi = pi[i+1] - pi[i]; 450eb9c0419SKris Buschelman for (j=0;j<pnzi;j++) { 451eb9c0419SKris Buschelman nextap = 0; 452eb9c0419SKris Buschelman crow = *pJ++; 453eb9c0419SKris Buschelman cjj = cj + ci[crow]; 454eb9c0419SKris Buschelman caj = ca + ci[crow]; 455eb9c0419SKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 456eb9c0419SKris Buschelman for (k=0;nextap<apnzj;k++) { 457eb9c0419SKris Buschelman if (cjj[k]==apj[nextap]) { 458eb9c0419SKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 459eb9c0419SKris Buschelman } 460eb9c0419SKris Buschelman } 461eb9c0419SKris Buschelman flops += 2*apnzj; 462eb9c0419SKris Buschelman pA++; 463eb9c0419SKris Buschelman } 464eb9c0419SKris Buschelman 465eb9c0419SKris Buschelman /* Zero the current row info for A*P */ 466eb9c0419SKris Buschelman for (j=0;j<apnzj;j++) { 467eb9c0419SKris Buschelman apa[apj[j]] = 0.; 468eb9c0419SKris Buschelman apjdense[apj[j]] = 0; 469eb9c0419SKris Buschelman } 470eb9c0419SKris Buschelman } 471eb9c0419SKris Buschelman 472eb9c0419SKris Buschelman /* Assemble the final matrix and clean up */ 473eb9c0419SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 474eb9c0419SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 475eb9c0419SKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 476eb9c0419SKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 477eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 478eb9c0419SKris Buschelman 479eb9c0419SKris Buschelman PetscFunctionReturn(0); 480eb9c0419SKris Buschelman } 481eb9c0419SKris Buschelman EXTERN_C_END 482eb9c0419SKris Buschelman 483eb9c0419SKris Buschelman #undef __FUNCT__ 484eb9c0419SKris Buschelman #define __FUNCT__ "RegisterApplyPtAPRoutines_Private" 485eb9c0419SKris Buschelman int RegisterApplyPtAPRoutines_Private(Mat A) { 486eb9c0419SKris Buschelman int ierr; 487eb9c0419SKris Buschelman 488eb9c0419SKris Buschelman PetscFunctionBegin; 489eb9c0419SKris Buschelman 490eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAP) { 491eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 492eb9c0419SKris Buschelman } 493eb9c0419SKris Buschelman 494eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAPSymbolic) { 495eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 496eb9c0419SKris Buschelman } 497eb9c0419SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij", 498eb9c0419SKris Buschelman "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ", 499eb9c0419SKris Buschelman MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 500eb9c0419SKris Buschelman 501eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAPNumeric) { 502eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 503eb9c0419SKris Buschelman } 504eb9c0419SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij", 505eb9c0419SKris Buschelman "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ", 506eb9c0419SKris Buschelman MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 507eb9c0419SKris Buschelman PetscFunctionReturn(0); 508eb9c0419SKris Buschelman } 509