170f19b1fSKris Buschelman /* 270f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 370f19b1fSKris Buschelman 470f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 570f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 670f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 770f19b1fSKris Buschelman 870f19b1fSKris Buschelman Also defined is a "faster" implementation of MatTranspose for SeqAIJ 970f19b1fSKris Buschelman matrices which avoids calls to MatSetValues. This routine has not 1070f19b1fSKris Buschelman been adopted as the standard yet as it is somewhat untested. 1170f19b1fSKris Buschelman 1270f19b1fSKris Buschelman */ 1370f19b1fSKris Buschelman 1470f19b1fSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 1570f19b1fSKris Buschelman 166849ba73SBarry Smith static PetscEvent logkey_matgetsymtranspose = 0; 176849ba73SBarry Smith static PetscEvent logkey_mattranspose = 0; 1870f19b1fSKris Buschelman 1970f19b1fSKris Buschelman #undef __FUNCT__ 2070f19b1fSKris Buschelman #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ" 212e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[]) 22dfbe8321SBarry Smith { 23dfbe8321SBarry Smith PetscErrorCode ierr; 242e111b49SBarry Smith PetscInt i,j,anzj; 2570f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 262e111b49SBarry Smith PetscInt an=A->N,am=A->M; 272e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2870f19b1fSKris Buschelman 2970f19b1fSKris Buschelman PetscFunctionBegin; 3070f19b1fSKris Buschelman 3170f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 3270f19b1fSKris Buschelman 3370f19b1fSKris Buschelman /* Set up timers */ 3470f19b1fSKris Buschelman if (!logkey_matgetsymtranspose) { 3570f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr); 3670f19b1fSKris Buschelman } 3770f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 3870f19b1fSKris Buschelman 3970f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 402e111b49SBarry Smith ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 412e111b49SBarry Smith ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr); 422e111b49SBarry Smith ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 432e111b49SBarry Smith ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4470f19b1fSKris Buschelman 4570f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 4670f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 4770f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 4870f19b1fSKris Buschelman ati[aj[i]+1] += 1; 4970f19b1fSKris Buschelman } 5070f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 5170f19b1fSKris Buschelman for (i=0;i<an;i++) { 5270f19b1fSKris Buschelman ati[i+1] += ati[i]; 5370f19b1fSKris Buschelman } 5470f19b1fSKris Buschelman 5570f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 562e111b49SBarry Smith ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 5770f19b1fSKris Buschelman 5870f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 5970f19b1fSKris Buschelman for (i=0;i<am;i++) { 6070f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 6170f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 6270f19b1fSKris Buschelman atj[atfill[*aj]] = i; 6370f19b1fSKris Buschelman atfill[*aj++] += 1; 6470f19b1fSKris Buschelman } 6570f19b1fSKris Buschelman } 6670f19b1fSKris Buschelman 6770f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 6870f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 6970f19b1fSKris Buschelman *Ati = ati; 7070f19b1fSKris Buschelman *Atj = atj; 7170f19b1fSKris Buschelman 7270f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 7370f19b1fSKris Buschelman PetscFunctionReturn(0); 7470f19b1fSKris Buschelman } 750390132cSHong Zhang /* 760390132cSHong Zhang MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:], 770390132cSHong Zhang modified from MatGetSymbolicTranspose_SeqAIJ() 780390132cSHong Zhang */ 790390132cSHong Zhang static PetscEvent logkey_matgetsymtransreduced = 0; 800390132cSHong Zhang #undef __FUNCT__ 810390132cSHong Zhang #define __FUNCT__ "MatGetSymbolicTransposeReduced_SeqIJ" 820390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[]) 830390132cSHong Zhang { 840390132cSHong Zhang PetscErrorCode ierr; 850390132cSHong Zhang PetscInt i,j,anzj; 860390132cSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 87*429d309bSHong Zhang PetscInt an=A->N; 880390132cSHong Zhang PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 890390132cSHong Zhang 900390132cSHong Zhang PetscFunctionBegin; 910390132cSHong Zhang 920390132cSHong Zhang ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 930390132cSHong Zhang 940390132cSHong Zhang /* Set up timers */ 950390132cSHong Zhang if (!logkey_matgetsymtransreduced) { 960390132cSHong Zhang ierr = PetscLogEventRegister(&logkey_matgetsymtransreduced,"MatGetSymbolicTransposeReduced",MAT_COOKIE);CHKERRQ(ierr); 970390132cSHong Zhang } 980390132cSHong Zhang ierr = PetscLogEventBegin(logkey_matgetsymtransreduced,A,0,0,0);CHKERRQ(ierr); 990390132cSHong Zhang 1000390132cSHong Zhang /* Allocate space for symbolic transpose info and work array */ 1010390132cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 1020390132cSHong Zhang anzj = ai[rend] - ai[rstart]; 1030390132cSHong Zhang ierr = PetscMalloc((anzj+1)*sizeof(PetscInt),&atj);CHKERRQ(ierr); 1040390132cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 1050390132cSHong Zhang ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1060390132cSHong Zhang 1070390132cSHong Zhang /* Walk through aj and count ## of non-zeros in each row of A^T. */ 1080390132cSHong Zhang /* Note: offset by 1 for fast conversion into csr format. */ 1090390132cSHong Zhang for (i=ai[rstart]; i<ai[rend]; i++) { 1100390132cSHong Zhang ati[aj[i]+1] += 1; 1110390132cSHong Zhang } 1120390132cSHong Zhang /* Form ati for csr format of A^T. */ 1130390132cSHong Zhang for (i=0;i<an;i++) { 1140390132cSHong Zhang ati[i+1] += ati[i]; 1150390132cSHong Zhang } 1160390132cSHong Zhang 1170390132cSHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1180390132cSHong Zhang ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 1190390132cSHong Zhang 1200390132cSHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 1210390132cSHong Zhang aj = aj + ai[rstart]; 1220390132cSHong Zhang for (i=rstart; i<rend; i++) { 1230390132cSHong Zhang anzj = ai[i+1] - ai[i]; 1240390132cSHong Zhang for (j=0;j<anzj;j++) { 1250390132cSHong Zhang atj[atfill[*aj]] = i-rstart; 1260390132cSHong Zhang atfill[*aj++] += 1; 1270390132cSHong Zhang } 1280390132cSHong Zhang } 1290390132cSHong Zhang 1300390132cSHong Zhang /* Clean up temporary space and complete requests. */ 1310390132cSHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 1320390132cSHong Zhang *Ati = ati; 1330390132cSHong Zhang *Atj = atj; 1340390132cSHong Zhang 1350390132cSHong Zhang ierr = PetscLogEventEnd(logkey_matgetsymtransreduced,A,0,0,0);CHKERRQ(ierr); 1360390132cSHong Zhang PetscFunctionReturn(0); 1370390132cSHong Zhang } 13870f19b1fSKris Buschelman 13970f19b1fSKris Buschelman #undef __FUNCT__ 14070f19b1fSKris Buschelman #define __FUNCT__ "MatTranspose_SeqIJ_FAST" 141dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqAIJ_FAST(Mat A,Mat *B) 142dfbe8321SBarry Smith { 143dfbe8321SBarry Smith PetscErrorCode ierr; 1442e111b49SBarry Smith PetscInt i,j,anzj; 14570f19b1fSKris Buschelman Mat At; 14670f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data,*at; 1472e111b49SBarry Smith PetscInt an=A->N,am=A->M; 1482e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 14970f19b1fSKris Buschelman MatScalar *ata,*aa=a->a; 1502e111b49SBarry Smith 15170f19b1fSKris Buschelman PetscFunctionBegin; 15270f19b1fSKris Buschelman 15370f19b1fSKris Buschelman /* Set up timers */ 15470f19b1fSKris Buschelman if (!logkey_mattranspose) { 15570f19b1fSKris Buschelman ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr); 15670f19b1fSKris Buschelman } 15770f19b1fSKris Buschelman ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 15870f19b1fSKris Buschelman 15970f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 1602e111b49SBarry Smith ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 1612e111b49SBarry Smith ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr); 16270f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 1632e111b49SBarry Smith ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 1642e111b49SBarry Smith ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 16570f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 16670f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 16770f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 16870f19b1fSKris Buschelman ati[aj[i]+1] += 1; 16970f19b1fSKris Buschelman } 17070f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 17170f19b1fSKris Buschelman for (i=0;i<an;i++) { 17270f19b1fSKris Buschelman ati[i+1] += ati[i]; 17370f19b1fSKris Buschelman } 17470f19b1fSKris Buschelman 17570f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 1762e111b49SBarry Smith ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 17770f19b1fSKris Buschelman 17870f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 17970f19b1fSKris Buschelman for (i=0;i<am;i++) { 18070f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 18170f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 18270f19b1fSKris Buschelman atj[atfill[*aj]] = i; 18370f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 18470f19b1fSKris Buschelman atfill[*aj++] += 1; 18570f19b1fSKris Buschelman } 18670f19b1fSKris Buschelman } 18770f19b1fSKris Buschelman 18870f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 18970f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 19070f19b1fSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 19170f19b1fSKris Buschelman at = (Mat_SeqAIJ *)(At->data); 19270f19b1fSKris Buschelman at->freedata = PETSC_TRUE; 19370f19b1fSKris Buschelman at->nonew = 0; 19470f19b1fSKris Buschelman if (B) { 19570f19b1fSKris Buschelman *B = At; 19670f19b1fSKris Buschelman } else { 19770f19b1fSKris Buschelman ierr = MatHeaderCopy(A,At); 19870f19b1fSKris Buschelman } 19970f19b1fSKris Buschelman ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 20070f19b1fSKris Buschelman PetscFunctionReturn(0); 20170f19b1fSKris Buschelman } 20270f19b1fSKris Buschelman 20370f19b1fSKris Buschelman #undef __FUNCT__ 20470f19b1fSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose_SeqAIJ" 2052e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[]) 2062e111b49SBarry Smith { 207dfbe8321SBarry Smith PetscErrorCode ierr; 20870f19b1fSKris Buschelman 20970f19b1fSKris Buschelman PetscFunctionBegin; 21070f19b1fSKris Buschelman ierr = PetscLogInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 21170f19b1fSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 21270f19b1fSKris Buschelman ati = PETSC_NULL; 21370f19b1fSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 21470f19b1fSKris Buschelman atj = PETSC_NULL; 21570f19b1fSKris Buschelman PetscFunctionReturn(0); 21670f19b1fSKris Buschelman } 21770f19b1fSKris Buschelman 218