170f19b1fSKris Buschelman /* 270f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 370f19b1fSKris Buschelman 470f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 570f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 670f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 770f19b1fSKris Buschelman 870f19b1fSKris Buschelman Also defined is a "faster" implementation of MatTranspose for SeqAIJ 970f19b1fSKris Buschelman matrices which avoids calls to MatSetValues. This routine has not 1070f19b1fSKris Buschelman been adopted as the standard yet as it is somewhat untested. 1170f19b1fSKris Buschelman 1270f19b1fSKris Buschelman */ 1370f19b1fSKris Buschelman 1470f19b1fSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 1570f19b1fSKris Buschelman 1670f19b1fSKris Buschelman static int logkey_matgetsymtranspose = 0; 1770f19b1fSKris Buschelman static int logkey_mattranspose = 0; 1870f19b1fSKris Buschelman 1970f19b1fSKris Buschelman 2070f19b1fSKris Buschelman #undef __FUNCT__ 2170f19b1fSKris Buschelman #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ" 2270f19b1fSKris Buschelman int MatGetSymbolicTranspose_SeqAIJ(Mat A,int *Ati[],int *Atj[]) { 2370f19b1fSKris Buschelman int ierr,i,j,anzj; 2470f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 25*010a20caSHong Zhang int an=A->N,am=A->M; 2670f19b1fSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2770f19b1fSKris Buschelman 2870f19b1fSKris Buschelman PetscFunctionBegin; 2970f19b1fSKris Buschelman 3070f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 3170f19b1fSKris Buschelman 3270f19b1fSKris Buschelman /* Set up timers */ 3370f19b1fSKris Buschelman if (!logkey_matgetsymtranspose) { 3470f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr); 3570f19b1fSKris Buschelman } 3670f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 3770f19b1fSKris Buschelman 3870f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 3970f19b1fSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 4070f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 4170f19b1fSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 4270f19b1fSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 4370f19b1fSKris Buschelman 4470f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 4570f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 4670f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 4770f19b1fSKris Buschelman ati[aj[i]+1] += 1; 4870f19b1fSKris Buschelman } 4970f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 5070f19b1fSKris Buschelman for (i=0;i<an;i++) { 5170f19b1fSKris Buschelman ati[i+1] += ati[i]; 5270f19b1fSKris Buschelman } 5370f19b1fSKris Buschelman 5470f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 5570f19b1fSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 5670f19b1fSKris Buschelman 5770f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 5870f19b1fSKris Buschelman for (i=0;i<am;i++) { 5970f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 6070f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 6170f19b1fSKris Buschelman atj[atfill[*aj]] = i; 6270f19b1fSKris Buschelman atfill[*aj++] += 1; 6370f19b1fSKris Buschelman } 6470f19b1fSKris Buschelman } 6570f19b1fSKris Buschelman 6670f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 6770f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 6870f19b1fSKris Buschelman *Ati = ati; 6970f19b1fSKris Buschelman *Atj = atj; 7070f19b1fSKris Buschelman 7170f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 7270f19b1fSKris Buschelman PetscFunctionReturn(0); 7370f19b1fSKris Buschelman } 7470f19b1fSKris Buschelman 7570f19b1fSKris Buschelman #undef __FUNCT__ 7670f19b1fSKris Buschelman #define __FUNCT__ "MatTranspose_SeqIJ_FAST" 7770f19b1fSKris Buschelman int MatTranspose_SeqAIJ_FAST(Mat A,Mat *B) { 7870f19b1fSKris Buschelman int ierr,i,j,anzj; 7970f19b1fSKris Buschelman Mat At; 8070f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data,*at; 81*010a20caSHong Zhang int an=A->N,am=A->M; 8270f19b1fSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 8370f19b1fSKris Buschelman MatScalar *ata,*aa=a->a; 8470f19b1fSKris Buschelman PetscFunctionBegin; 8570f19b1fSKris Buschelman 8670f19b1fSKris Buschelman /* Set up timers */ 8770f19b1fSKris Buschelman if (!logkey_mattranspose) { 8870f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr); 8970f19b1fSKris Buschelman } 9070f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 9170f19b1fSKris Buschelman 9270f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 9370f19b1fSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 9470f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 9570f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 9670f19b1fSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 9770f19b1fSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 9870f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 9970f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 10070f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 10170f19b1fSKris Buschelman ati[aj[i]+1] += 1; 10270f19b1fSKris Buschelman } 10370f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 10470f19b1fSKris Buschelman for (i=0;i<an;i++) { 10570f19b1fSKris Buschelman ati[i+1] += ati[i]; 10670f19b1fSKris Buschelman } 10770f19b1fSKris Buschelman 10870f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 10970f19b1fSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 11070f19b1fSKris Buschelman 11170f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 11270f19b1fSKris Buschelman for (i=0;i<am;i++) { 11370f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 11470f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 11570f19b1fSKris Buschelman atj[atfill[*aj]] = i; 11670f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 11770f19b1fSKris Buschelman atfill[*aj++] += 1; 11870f19b1fSKris Buschelman } 11970f19b1fSKris Buschelman } 12070f19b1fSKris Buschelman 12170f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 12270f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 12370f19b1fSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 12470f19b1fSKris Buschelman at = (Mat_SeqAIJ *)(At->data); 12570f19b1fSKris Buschelman at->freedata = PETSC_TRUE; 12670f19b1fSKris Buschelman at->nonew = 0; 12770f19b1fSKris Buschelman if (B) { 12870f19b1fSKris Buschelman *B = At; 12970f19b1fSKris Buschelman } else { 13070f19b1fSKris Buschelman ierr = MatHeaderCopy(A,At); 13170f19b1fSKris Buschelman } 13270f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 13370f19b1fSKris Buschelman PetscFunctionReturn(0); 13470f19b1fSKris Buschelman } 13570f19b1fSKris Buschelman 13670f19b1fSKris Buschelman #undef __FUNCT__ 13770f19b1fSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose_SeqAIJ" 13870f19b1fSKris Buschelman int MatRestoreSymbolicTranspose_SeqAIJ(Mat A,int *ati[],int *atj[]) { 13970f19b1fSKris Buschelman int ierr; 14070f19b1fSKris Buschelman 14170f19b1fSKris Buschelman PetscFunctionBegin; 14270f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 14370f19b1fSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 14470f19b1fSKris Buschelman ati = PETSC_NULL; 14570f19b1fSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 14670f19b1fSKris Buschelman atj = PETSC_NULL; 14770f19b1fSKris Buschelman PetscFunctionReturn(0); 14870f19b1fSKris Buschelman } 14970f19b1fSKris Buschelman 150