xref: /petsc/src/mat/impls/aij/seq/symtranspose.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
2*be1d678aSKris Buschelman 
370f19b1fSKris Buschelman /*
470f19b1fSKris Buschelman   Defines symbolic transpose routines for SeqAIJ matrices.
570f19b1fSKris Buschelman 
670f19b1fSKris Buschelman   Currently Get/Restore only allocates/frees memory for holding the
770f19b1fSKris Buschelman   (i,j) info for the transpose.  Someday, this info could be
870f19b1fSKris Buschelman   maintained so successive calls to Get will not recompute the info.
970f19b1fSKris Buschelman 
1070f19b1fSKris Buschelman   Also defined is a "faster" implementation of MatTranspose for SeqAIJ
1170f19b1fSKris Buschelman   matrices which avoids calls to MatSetValues.  This routine has not
1270f19b1fSKris Buschelman   been adopted as the standard yet as it is somewhat untested.
1370f19b1fSKris Buschelman 
1470f19b1fSKris Buschelman */
1570f19b1fSKris Buschelman 
1670f19b1fSKris Buschelman #include "src/mat/impls/aij/seq/aij.h"
1770f19b1fSKris Buschelman 
186849ba73SBarry Smith static PetscEvent logkey_matgetsymtranspose    = 0;
196849ba73SBarry Smith static PetscEvent logkey_mattranspose          = 0;
2070f19b1fSKris Buschelman 
2170f19b1fSKris Buschelman #undef __FUNCT__
2270f19b1fSKris Buschelman #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ"
232e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[])
24dfbe8321SBarry Smith {
25dfbe8321SBarry Smith   PetscErrorCode ierr;
262e111b49SBarry Smith   PetscInt       i,j,anzj;
2770f19b1fSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)A->data;
282e111b49SBarry Smith   PetscInt       an=A->N,am=A->M;
292e111b49SBarry Smith   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
3070f19b1fSKris Buschelman 
3170f19b1fSKris Buschelman   PetscFunctionBegin;
3270f19b1fSKris Buschelman 
3363ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatGetSymbolicTranspose_SeqAIJ:Getting Symbolic Transpose.\n"));CHKERRQ(ierr);
3470f19b1fSKris Buschelman 
3570f19b1fSKris Buschelman   /* Set up timers */
3670f19b1fSKris Buschelman   if (!logkey_matgetsymtranspose) {
3770f19b1fSKris Buschelman     ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr);
3870f19b1fSKris Buschelman   }
3970f19b1fSKris Buschelman   ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr);
4070f19b1fSKris Buschelman 
4170f19b1fSKris Buschelman   /* Allocate space for symbolic transpose info and work array */
422e111b49SBarry Smith   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr);
432e111b49SBarry Smith   ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr);
442e111b49SBarry Smith   ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr);
452e111b49SBarry Smith   ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4670f19b1fSKris Buschelman 
4770f19b1fSKris Buschelman   /* Walk through aj and count ## of non-zeros in each row of A^T. */
4870f19b1fSKris Buschelman   /* Note: offset by 1 for fast conversion into csr format. */
4970f19b1fSKris Buschelman   for (i=0;i<ai[am];i++) {
5070f19b1fSKris Buschelman     ati[aj[i]+1] += 1;
5170f19b1fSKris Buschelman   }
5270f19b1fSKris Buschelman   /* Form ati for csr format of A^T. */
5370f19b1fSKris Buschelman   for (i=0;i<an;i++) {
5470f19b1fSKris Buschelman     ati[i+1] += ati[i];
5570f19b1fSKris Buschelman   }
5670f19b1fSKris Buschelman 
5770f19b1fSKris Buschelman   /* Copy ati into atfill so we have locations of the next free space in atj */
582e111b49SBarry Smith   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
5970f19b1fSKris Buschelman 
6070f19b1fSKris Buschelman   /* Walk through A row-wise and mark nonzero entries of A^T. */
6170f19b1fSKris Buschelman   for (i=0;i<am;i++) {
6270f19b1fSKris Buschelman     anzj = ai[i+1] - ai[i];
6370f19b1fSKris Buschelman     for (j=0;j<anzj;j++) {
6470f19b1fSKris Buschelman       atj[atfill[*aj]] = i;
6570f19b1fSKris Buschelman       atfill[*aj++]   += 1;
6670f19b1fSKris Buschelman     }
6770f19b1fSKris Buschelman   }
6870f19b1fSKris Buschelman 
6970f19b1fSKris Buschelman   /* Clean up temporary space and complete requests. */
7070f19b1fSKris Buschelman   ierr = PetscFree(atfill);CHKERRQ(ierr);
7170f19b1fSKris Buschelman   *Ati = ati;
7270f19b1fSKris Buschelman   *Atj = atj;
7370f19b1fSKris Buschelman 
7470f19b1fSKris Buschelman   ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr);
7570f19b1fSKris Buschelman   PetscFunctionReturn(0);
7670f19b1fSKris Buschelman }
770390132cSHong Zhang /*
780390132cSHong Zhang   MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:],
790390132cSHong Zhang      modified from MatGetSymbolicTranspose_SeqAIJ()
800390132cSHong Zhang */
810390132cSHong Zhang static PetscEvent logkey_matgetsymtransreduced = 0;
820390132cSHong Zhang #undef __FUNCT__
830390132cSHong Zhang #define __FUNCT__ "MatGetSymbolicTransposeReduced_SeqIJ"
840390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[])
850390132cSHong Zhang {
860390132cSHong Zhang   PetscErrorCode ierr;
870390132cSHong Zhang   PetscInt       i,j,anzj;
880390132cSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)A->data;
89429d309bSHong Zhang   PetscInt       an=A->N;
900390132cSHong Zhang   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
910390132cSHong Zhang 
920390132cSHong Zhang   PetscFunctionBegin;
9363ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatGetSymbolicTransposeReduced_SeqAIJ:Getting Symbolic Transpose.\n"));CHKERRQ(ierr);
940390132cSHong Zhang 
950390132cSHong Zhang   /* Set up timers */
960390132cSHong Zhang   if (!logkey_matgetsymtransreduced) {
970390132cSHong Zhang     ierr = PetscLogEventRegister(&logkey_matgetsymtransreduced,"MatGetSymbolicTransposeReduced",MAT_COOKIE);CHKERRQ(ierr);
980390132cSHong Zhang   }
990390132cSHong Zhang   ierr = PetscLogEventBegin(logkey_matgetsymtransreduced,A,0,0,0);CHKERRQ(ierr);
1000390132cSHong Zhang 
1010390132cSHong Zhang   /* Allocate space for symbolic transpose info and work array */
1020390132cSHong Zhang   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr);
1030390132cSHong Zhang   anzj = ai[rend] - ai[rstart];
1040390132cSHong Zhang   ierr = PetscMalloc((anzj+1)*sizeof(PetscInt),&atj);CHKERRQ(ierr);
1050390132cSHong Zhang   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&atfill);CHKERRQ(ierr);
1060390132cSHong Zhang   ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1070390132cSHong Zhang 
1080390132cSHong Zhang   /* Walk through aj and count ## of non-zeros in each row of A^T. */
1090390132cSHong Zhang   /* Note: offset by 1 for fast conversion into csr format. */
1100390132cSHong Zhang   for (i=ai[rstart]; i<ai[rend]; i++) {
1110390132cSHong Zhang     ati[aj[i]+1] += 1;
1120390132cSHong Zhang   }
1130390132cSHong Zhang   /* Form ati for csr format of A^T. */
1140390132cSHong Zhang   for (i=0;i<an;i++) {
1150390132cSHong Zhang     ati[i+1] += ati[i];
1160390132cSHong Zhang   }
1170390132cSHong Zhang 
1180390132cSHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
1190390132cSHong Zhang   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
1200390132cSHong Zhang 
1210390132cSHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
1220390132cSHong Zhang   aj = aj + ai[rstart];
1230390132cSHong Zhang   for (i=rstart; i<rend; i++) {
1240390132cSHong Zhang     anzj = ai[i+1] - ai[i];
1250390132cSHong Zhang     for (j=0;j<anzj;j++) {
1260390132cSHong Zhang       atj[atfill[*aj]] = i-rstart;
1270390132cSHong Zhang       atfill[*aj++]   += 1;
1280390132cSHong Zhang     }
1290390132cSHong Zhang   }
1300390132cSHong Zhang 
1310390132cSHong Zhang   /* Clean up temporary space and complete requests. */
1320390132cSHong Zhang   ierr = PetscFree(atfill);CHKERRQ(ierr);
1330390132cSHong Zhang   *Ati = ati;
1340390132cSHong Zhang   *Atj = atj;
1350390132cSHong Zhang 
1360390132cSHong Zhang   ierr = PetscLogEventEnd(logkey_matgetsymtransreduced,A,0,0,0);CHKERRQ(ierr);
1370390132cSHong Zhang   PetscFunctionReturn(0);
1380390132cSHong Zhang }
13970f19b1fSKris Buschelman 
14070f19b1fSKris Buschelman #undef __FUNCT__
14170f19b1fSKris Buschelman #define __FUNCT__ "MatTranspose_SeqIJ_FAST"
142dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqAIJ_FAST(Mat A,Mat *B)
143dfbe8321SBarry Smith {
144dfbe8321SBarry Smith   PetscErrorCode ierr;
1452e111b49SBarry Smith   PetscInt       i,j,anzj;
14670f19b1fSKris Buschelman   Mat            At;
14770f19b1fSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)A->data,*at;
1482e111b49SBarry Smith   PetscInt       an=A->N,am=A->M;
1492e111b49SBarry Smith   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
15070f19b1fSKris Buschelman   MatScalar      *ata,*aa=a->a;
1512e111b49SBarry Smith 
15270f19b1fSKris Buschelman   PetscFunctionBegin;
15370f19b1fSKris Buschelman 
15470f19b1fSKris Buschelman   /* Set up timers */
15570f19b1fSKris Buschelman   if (!logkey_mattranspose) {
15670f19b1fSKris Buschelman     ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr);
15770f19b1fSKris Buschelman   }
15870f19b1fSKris Buschelman   ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr);
15970f19b1fSKris Buschelman 
16070f19b1fSKris Buschelman   /* Allocate space for symbolic transpose info and work array */
1612e111b49SBarry Smith   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr);
1622e111b49SBarry Smith   ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&atj);CHKERRQ(ierr);
16370f19b1fSKris Buschelman   ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr);
1642e111b49SBarry Smith   ierr = PetscMalloc(an*sizeof(PetscInt),&atfill);CHKERRQ(ierr);
1652e111b49SBarry Smith   ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr);
16670f19b1fSKris Buschelman   /* Walk through aj and count ## of non-zeros in each row of A^T. */
16770f19b1fSKris Buschelman   /* Note: offset by 1 for fast conversion into csr format. */
16870f19b1fSKris Buschelman   for (i=0;i<ai[am];i++) {
16970f19b1fSKris Buschelman     ati[aj[i]+1] += 1;
17070f19b1fSKris Buschelman   }
17170f19b1fSKris Buschelman   /* Form ati for csr format of A^T. */
17270f19b1fSKris Buschelman   for (i=0;i<an;i++) {
17370f19b1fSKris Buschelman     ati[i+1] += ati[i];
17470f19b1fSKris Buschelman   }
17570f19b1fSKris Buschelman 
17670f19b1fSKris Buschelman   /* Copy ati into atfill so we have locations of the next free space in atj */
1772e111b49SBarry Smith   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
17870f19b1fSKris Buschelman 
17970f19b1fSKris Buschelman   /* Walk through A row-wise and mark nonzero entries of A^T. */
18070f19b1fSKris Buschelman   for (i=0;i<am;i++) {
18170f19b1fSKris Buschelman     anzj = ai[i+1] - ai[i];
18270f19b1fSKris Buschelman     for (j=0;j<anzj;j++) {
18370f19b1fSKris Buschelman       atj[atfill[*aj]] = i;
18470f19b1fSKris Buschelman       ata[atfill[*aj]] = *aa++;
18570f19b1fSKris Buschelman       atfill[*aj++]   += 1;
18670f19b1fSKris Buschelman     }
18770f19b1fSKris Buschelman   }
18870f19b1fSKris Buschelman 
18970f19b1fSKris Buschelman   /* Clean up temporary space and complete requests. */
19070f19b1fSKris Buschelman   ierr = PetscFree(atfill);CHKERRQ(ierr);
19170f19b1fSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr);
19270f19b1fSKris Buschelman   at   = (Mat_SeqAIJ *)(At->data);
19370f19b1fSKris Buschelman   at->freedata = PETSC_TRUE;
19470f19b1fSKris Buschelman   at->nonew    = 0;
19570f19b1fSKris Buschelman   if (B) {
19670f19b1fSKris Buschelman     *B = At;
19770f19b1fSKris Buschelman   } else {
19870f19b1fSKris Buschelman     ierr = MatHeaderCopy(A,At);
19970f19b1fSKris Buschelman   }
20070f19b1fSKris Buschelman   ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr);
20170f19b1fSKris Buschelman   PetscFunctionReturn(0);
20270f19b1fSKris Buschelman }
20370f19b1fSKris Buschelman 
20470f19b1fSKris Buschelman #undef __FUNCT__
20570f19b1fSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose_SeqAIJ"
2062e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[])
2072e111b49SBarry Smith {
208dfbe8321SBarry Smith   PetscErrorCode ierr;
20970f19b1fSKris Buschelman 
21070f19b1fSKris Buschelman   PetscFunctionBegin;
21163ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatRestoreSymbolicTranspose_SeqAIJ:Restoring Symbolic Transpose.\n"));CHKERRQ(ierr);
21270f19b1fSKris Buschelman   ierr = PetscFree(*ati);CHKERRQ(ierr);
21370f19b1fSKris Buschelman   ati  = PETSC_NULL;
21470f19b1fSKris Buschelman   ierr = PetscFree(*atj);CHKERRQ(ierr);
21570f19b1fSKris Buschelman   atj  = PETSC_NULL;
21670f19b1fSKris Buschelman   PetscFunctionReturn(0);
21770f19b1fSKris Buschelman }
21870f19b1fSKris Buschelman 
219