170f19b1fSKris Buschelman /* 270f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 370f19b1fSKris Buschelman 470f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 570f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 670f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 770f19b1fSKris Buschelman 870f19b1fSKris Buschelman Also defined is a "faster" implementation of MatTranspose for SeqAIJ 970f19b1fSKris Buschelman matrices which avoids calls to MatSetValues. This routine has not 1070f19b1fSKris Buschelman been adopted as the standard yet as it is somewhat untested. 1170f19b1fSKris Buschelman 1270f19b1fSKris Buschelman */ 1370f19b1fSKris Buschelman 1470f19b1fSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 1570f19b1fSKris Buschelman 16*6849ba73SBarry Smith static PetscEvent logkey_matgetsymtranspose = 0; 17*6849ba73SBarry Smith static PetscEvent logkey_mattranspose = 0; 1870f19b1fSKris Buschelman 1970f19b1fSKris Buschelman 2070f19b1fSKris Buschelman #undef __FUNCT__ 2170f19b1fSKris Buschelman #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ" 22dfbe8321SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,int *Ati[],int *Atj[]) 23dfbe8321SBarry Smith { 24dfbe8321SBarry Smith PetscErrorCode ierr; 25dfbe8321SBarry Smith int i,j,anzj; 2670f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 27010a20caSHong Zhang int an=A->N,am=A->M; 2870f19b1fSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2970f19b1fSKris Buschelman 3070f19b1fSKris Buschelman PetscFunctionBegin; 3170f19b1fSKris Buschelman 3270f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 3370f19b1fSKris Buschelman 3470f19b1fSKris Buschelman /* Set up timers */ 3570f19b1fSKris Buschelman if (!logkey_matgetsymtranspose) { 3670f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr); 3770f19b1fSKris Buschelman } 3870f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 3970f19b1fSKris Buschelman 4070f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 4170f19b1fSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 4270f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 4370f19b1fSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 4470f19b1fSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 4570f19b1fSKris Buschelman 4670f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 4770f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 4870f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 4970f19b1fSKris Buschelman ati[aj[i]+1] += 1; 5070f19b1fSKris Buschelman } 5170f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 5270f19b1fSKris Buschelman for (i=0;i<an;i++) { 5370f19b1fSKris Buschelman ati[i+1] += ati[i]; 5470f19b1fSKris Buschelman } 5570f19b1fSKris Buschelman 5670f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 5770f19b1fSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 5870f19b1fSKris Buschelman 5970f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 6070f19b1fSKris Buschelman for (i=0;i<am;i++) { 6170f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 6270f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 6370f19b1fSKris Buschelman atj[atfill[*aj]] = i; 6470f19b1fSKris Buschelman atfill[*aj++] += 1; 6570f19b1fSKris Buschelman } 6670f19b1fSKris Buschelman } 6770f19b1fSKris Buschelman 6870f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 6970f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 7070f19b1fSKris Buschelman *Ati = ati; 7170f19b1fSKris Buschelman *Atj = atj; 7270f19b1fSKris Buschelman 7370f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 7470f19b1fSKris Buschelman PetscFunctionReturn(0); 7570f19b1fSKris Buschelman } 7670f19b1fSKris Buschelman 7770f19b1fSKris Buschelman #undef __FUNCT__ 7870f19b1fSKris Buschelman #define __FUNCT__ "MatTranspose_SeqIJ_FAST" 79dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqAIJ_FAST(Mat A,Mat *B) 80dfbe8321SBarry Smith { 81dfbe8321SBarry Smith PetscErrorCode ierr; 82dfbe8321SBarry Smith int i,j,anzj; 8370f19b1fSKris Buschelman Mat At; 8470f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data,*at; 85010a20caSHong Zhang int an=A->N,am=A->M; 8670f19b1fSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 8770f19b1fSKris Buschelman MatScalar *ata,*aa=a->a; 8870f19b1fSKris Buschelman PetscFunctionBegin; 8970f19b1fSKris Buschelman 9070f19b1fSKris Buschelman /* Set up timers */ 9170f19b1fSKris Buschelman if (!logkey_mattranspose) { 9270f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr); 9370f19b1fSKris Buschelman } 9470f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 9570f19b1fSKris Buschelman 9670f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 9770f19b1fSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 9870f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 9970f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 10070f19b1fSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 10170f19b1fSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 10270f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 10370f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 10470f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 10570f19b1fSKris Buschelman ati[aj[i]+1] += 1; 10670f19b1fSKris Buschelman } 10770f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 10870f19b1fSKris Buschelman for (i=0;i<an;i++) { 10970f19b1fSKris Buschelman ati[i+1] += ati[i]; 11070f19b1fSKris Buschelman } 11170f19b1fSKris Buschelman 11270f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 11370f19b1fSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 11470f19b1fSKris Buschelman 11570f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 11670f19b1fSKris Buschelman for (i=0;i<am;i++) { 11770f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 11870f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 11970f19b1fSKris Buschelman atj[atfill[*aj]] = i; 12070f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 12170f19b1fSKris Buschelman atfill[*aj++] += 1; 12270f19b1fSKris Buschelman } 12370f19b1fSKris Buschelman } 12470f19b1fSKris Buschelman 12570f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 12670f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 12770f19b1fSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 12870f19b1fSKris Buschelman at = (Mat_SeqAIJ *)(At->data); 12970f19b1fSKris Buschelman at->freedata = PETSC_TRUE; 13070f19b1fSKris Buschelman at->nonew = 0; 13170f19b1fSKris Buschelman if (B) { 13270f19b1fSKris Buschelman *B = At; 13370f19b1fSKris Buschelman } else { 13470f19b1fSKris Buschelman ierr = MatHeaderCopy(A,At); 13570f19b1fSKris Buschelman } 13670f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 13770f19b1fSKris Buschelman PetscFunctionReturn(0); 13870f19b1fSKris Buschelman } 13970f19b1fSKris Buschelman 14070f19b1fSKris Buschelman #undef __FUNCT__ 14170f19b1fSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose_SeqAIJ" 142dfbe8321SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,int *ati[],int *atj[]) { 143dfbe8321SBarry Smith PetscErrorCode ierr; 14470f19b1fSKris Buschelman 14570f19b1fSKris Buschelman PetscFunctionBegin; 14670f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 14770f19b1fSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 14870f19b1fSKris Buschelman ati = PETSC_NULL; 14970f19b1fSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 15070f19b1fSKris Buschelman atj = PETSC_NULL; 15170f19b1fSKris Buschelman PetscFunctionReturn(0); 15270f19b1fSKris Buschelman } 15370f19b1fSKris Buschelman 154