1be1d678aSKris Buschelman 270f19b1fSKris Buschelman /* 370f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 470f19b1fSKris Buschelman 570f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 670f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 770f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 870f19b1fSKris Buschelman 970f19b1fSKris Buschelman Also defined is a "faster" implementation of MatTranspose for SeqAIJ 1070f19b1fSKris Buschelman matrices which avoids calls to MatSetValues. This routine has not 1170f19b1fSKris Buschelman been adopted as the standard yet as it is somewhat untested. 1270f19b1fSKris Buschelman 1370f19b1fSKris Buschelman */ 1470f19b1fSKris Buschelman 15c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 1670f19b1fSKris Buschelman 1770f19b1fSKris Buschelman 1870f19b1fSKris Buschelman #undef __FUNCT__ 1953c77d0aSJed Brown #define __FUNCT__ "MatGetSymbolicTranspose_SeqAIJ" 202e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[]) 21dfbe8321SBarry Smith { 22dfbe8321SBarry Smith PetscErrorCode ierr; 232e111b49SBarry Smith PetscInt i,j,anzj; 2470f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 25d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 262e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2770f19b1fSKris Buschelman 2870f19b1fSKris Buschelman PetscFunctionBegin; 29ae15b995SBarry Smith ierr = PetscInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 3070f19b1fSKris Buschelman 3170f19b1fSKris Buschelman /* Set up timers */ 324ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr); 3370f19b1fSKris Buschelman 3470f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 352e111b49SBarry Smith ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 362e111b49SBarry Smith ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr); 372e111b49SBarry Smith ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 382e111b49SBarry Smith ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 3970f19b1fSKris Buschelman 4070f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 4170f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 4270f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 4370f19b1fSKris Buschelman ati[aj[i]+1] += 1; 4470f19b1fSKris Buschelman } 4570f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 4670f19b1fSKris Buschelman for (i=0;i<an;i++) { 4770f19b1fSKris Buschelman ati[i+1] += ati[i]; 4870f19b1fSKris Buschelman } 4970f19b1fSKris Buschelman 5070f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 512e111b49SBarry Smith ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 5270f19b1fSKris Buschelman 5370f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 5470f19b1fSKris Buschelman for (i=0; i<am; i++) { 5570f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 5670f19b1fSKris Buschelman for (j=0; j<anzj; j++) { 5770f19b1fSKris Buschelman atj[atfill[*aj]] = i; 5870f19b1fSKris Buschelman atfill[*aj++] += 1; 5970f19b1fSKris Buschelman } 6070f19b1fSKris Buschelman } 6170f19b1fSKris Buschelman 6270f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 6370f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 6470f19b1fSKris Buschelman *Ati = ati; 6570f19b1fSKris Buschelman *Atj = atj; 6670f19b1fSKris Buschelman 674ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr); 6870f19b1fSKris Buschelman PetscFunctionReturn(0); 6970f19b1fSKris Buschelman } 700390132cSHong Zhang /* 710390132cSHong Zhang MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:], 720390132cSHong Zhang modified from MatGetSymbolicTranspose_SeqAIJ() 730390132cSHong Zhang */ 740390132cSHong Zhang #undef __FUNCT__ 7553c77d0aSJed Brown #define __FUNCT__ "MatGetSymbolicTransposeReduced_SeqAIJ" 760390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[]) 770390132cSHong Zhang { 780390132cSHong Zhang PetscErrorCode ierr; 790390132cSHong Zhang PetscInt i,j,anzj; 800390132cSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 81d0f46423SBarry Smith PetscInt an=A->cmap->N; 820390132cSHong Zhang PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 830390132cSHong Zhang 840390132cSHong Zhang PetscFunctionBegin; 85ae15b995SBarry Smith ierr = PetscInfo(A,"Getting Symbolic Transpose\n");CHKERRQ(ierr); 864ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 870390132cSHong Zhang 880390132cSHong Zhang /* Allocate space for symbolic transpose info and work array */ 890390132cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 900390132cSHong Zhang anzj = ai[rend] - ai[rstart]; 910390132cSHong Zhang ierr = PetscMalloc((anzj+1)*sizeof(PetscInt),&atj);CHKERRQ(ierr); 920390132cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 930390132cSHong Zhang ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 940390132cSHong Zhang 950390132cSHong Zhang /* Walk through aj and count ## of non-zeros in each row of A^T. */ 960390132cSHong Zhang /* Note: offset by 1 for fast conversion into csr format. */ 970390132cSHong Zhang for (i=ai[rstart]; i<ai[rend]; i++) { 980390132cSHong Zhang ati[aj[i]+1] += 1; 990390132cSHong Zhang } 1000390132cSHong Zhang /* Form ati for csr format of A^T. */ 1010390132cSHong Zhang for (i=0;i<an;i++) { 1020390132cSHong Zhang ati[i+1] += ati[i]; 1030390132cSHong Zhang } 1040390132cSHong Zhang 1050390132cSHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1060390132cSHong Zhang ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 1070390132cSHong Zhang 1080390132cSHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 1090390132cSHong Zhang aj = aj + ai[rstart]; 1100390132cSHong Zhang for (i=rstart; i<rend; i++) { 1110390132cSHong Zhang anzj = ai[i+1] - ai[i]; 1120390132cSHong Zhang for (j=0; j<anzj; j++) { 1130390132cSHong Zhang atj[atfill[*aj]] = i-rstart; 1140390132cSHong Zhang atfill[*aj++] += 1; 1150390132cSHong Zhang } 1160390132cSHong Zhang } 1170390132cSHong Zhang 1180390132cSHong Zhang /* Clean up temporary space and complete requests. */ 1190390132cSHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 1200390132cSHong Zhang *Ati = ati; 1210390132cSHong Zhang *Atj = atj; 1220390132cSHong Zhang 1234ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 1240390132cSHong Zhang PetscFunctionReturn(0); 1250390132cSHong Zhang } 12670f19b1fSKris Buschelman 12770f19b1fSKris Buschelman #undef __FUNCT__ 12853c77d0aSJed Brown #define __FUNCT__ "MatTranspose_SeqAIJ_FAST" 129fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqAIJ_FAST(Mat A,MatReuse reuse,Mat *B) 130dfbe8321SBarry Smith { 131dfbe8321SBarry Smith PetscErrorCode ierr; 1322e111b49SBarry Smith PetscInt i,j,anzj; 13370f19b1fSKris Buschelman Mat At; 13470f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*at; 135d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 1362e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 13770f19b1fSKris Buschelman MatScalar *ata,*aa=a->a; 1382e111b49SBarry Smith 13970f19b1fSKris Buschelman PetscFunctionBegin; 1404ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Transpose_SeqAIJ,A,0,0,0);CHKERRQ(ierr); 14170f19b1fSKris Buschelman 142fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX || *B == A) { 14370f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 1442e111b49SBarry Smith ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 1452e111b49SBarry Smith ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr); 14670f19b1fSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 1472e111b49SBarry Smith ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 14870f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 14970f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 15070f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 15170f19b1fSKris Buschelman ati[aj[i]+1] += 1; 15270f19b1fSKris Buschelman } 15370f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 15470f19b1fSKris Buschelman for (i=0;i<an;i++) { 15570f19b1fSKris Buschelman ati[i+1] += ati[i]; 15670f19b1fSKris Buschelman } 157fc4dec0aSBarry Smith } else { 158fc4dec0aSBarry Smith Mat_SeqAIJ *sub_B = (Mat_SeqAIJ*) (*B)->data; 159fc4dec0aSBarry Smith ati = sub_B->i; 160fc4dec0aSBarry Smith atj = sub_B->j; 161fc4dec0aSBarry Smith ata = sub_B->a; 162fc4dec0aSBarry Smith At = *B; 163fc4dec0aSBarry Smith } 16470f19b1fSKris Buschelman 16570f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 166fc4dec0aSBarry Smith ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 1672e111b49SBarry Smith ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 16870f19b1fSKris Buschelman 16970f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 17070f19b1fSKris Buschelman for (i=0;i<am;i++) { 17170f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 17270f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 17370f19b1fSKris Buschelman atj[atfill[*aj]] = i; 17470f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 17570f19b1fSKris Buschelman atfill[*aj++] += 1; 17670f19b1fSKris Buschelman } 17770f19b1fSKris Buschelman } 17870f19b1fSKris Buschelman 17970f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 18070f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 181fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1827adad957SLisandro Dalcin ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 183*2205254eSKarl Rupp 18470f19b1fSKris Buschelman at = (Mat_SeqAIJ*)(At->data); 185e6b907acSBarry Smith at->free_a = PETSC_TRUE; 186e6b907acSBarry Smith at->free_ij = PETSC_TRUE; 18770f19b1fSKris Buschelman at->nonew = 0; 188fc4dec0aSBarry Smith } 189fc4dec0aSBarry Smith 190815cbec1SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *B != A) { 19170f19b1fSKris Buschelman *B = At; 19270f19b1fSKris Buschelman } else { 19322d28d08SBarry Smith ierr = MatHeaderMerge(A,At);CHKERRQ(ierr); 19470f19b1fSKris Buschelman } 1954ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Transpose_SeqAIJ,A,0,0,0);CHKERRQ(ierr); 19670f19b1fSKris Buschelman PetscFunctionReturn(0); 19770f19b1fSKris Buschelman } 19870f19b1fSKris Buschelman 19970f19b1fSKris Buschelman #undef __FUNCT__ 20070f19b1fSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose_SeqAIJ" 2012e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[]) 2022e111b49SBarry Smith { 203dfbe8321SBarry Smith PetscErrorCode ierr; 20470f19b1fSKris Buschelman 20570f19b1fSKris Buschelman PetscFunctionBegin; 206ae15b995SBarry Smith ierr = PetscInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 20770f19b1fSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 20870f19b1fSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 20970f19b1fSKris Buschelman PetscFunctionReturn(0); 21070f19b1fSKris Buschelman } 21170f19b1fSKris Buschelman 212