xref: /petsc/src/mat/impls/aij/seq/symtranspose.c (revision 65ac4ee061e56e03a843ccf1413bef032a7e938a)
1be1d678aSKris Buschelman 
270f19b1fSKris Buschelman /*
370f19b1fSKris Buschelman   Defines symbolic transpose routines for SeqAIJ matrices.
470f19b1fSKris Buschelman 
570f19b1fSKris Buschelman   Currently Get/Restore only allocates/frees memory for holding the
670f19b1fSKris Buschelman   (i,j) info for the transpose.  Someday, this info could be
770f19b1fSKris Buschelman   maintained so successive calls to Get will not recompute the info.
870f19b1fSKris Buschelman 
9419ecdd9Sandi selinger   Also defined is a faster implementation of MatTranspose for SeqAIJ
10419ecdd9Sandi selinger   matrices which avoids calls to MatSetValues. This routine is the new
11419ecdd9Sandi selinger   standard since it is much faster than MatTranspose_AIJ.
1270f19b1fSKris Buschelman 
1370f19b1fSKris Buschelman */
1470f19b1fSKris Buschelman 
15c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
1670f19b1fSKris Buschelman 
1770f19b1fSKris Buschelman 
182e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[])
19dfbe8321SBarry Smith {
20dfbe8321SBarry Smith   PetscErrorCode ierr;
212e111b49SBarry Smith   PetscInt       i,j,anzj;
2270f19b1fSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
23d0f46423SBarry Smith   PetscInt       an=A->cmap->N,am=A->rmap->N;
242e111b49SBarry Smith   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
2570f19b1fSKris Buschelman 
2670f19b1fSKris Buschelman   PetscFunctionBegin;
27ae15b995SBarry Smith   ierr = PetscInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr);
2870f19b1fSKris Buschelman 
2970f19b1fSKris Buschelman   /* Set up timers */
304ebed01fSBarry Smith   ierr = PetscLogEventBegin(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr);
3170f19b1fSKris Buschelman 
3270f19b1fSKris Buschelman   /* Allocate space for symbolic transpose info and work array */
33854ce69bSBarry Smith   ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr);
34785e854fSJed Brown   ierr = PetscMalloc1(ai[am],&atj);CHKERRQ(ierr);
35785e854fSJed Brown   ierr = PetscMalloc1(an,&atfill);CHKERRQ(ierr);
3670f19b1fSKris Buschelman 
3770f19b1fSKris Buschelman   /* Walk through aj and count ## of non-zeros in each row of A^T. */
3870f19b1fSKris Buschelman   /* Note: offset by 1 for fast conversion into csr format. */
3970f19b1fSKris Buschelman   for (i=0;i<ai[am];i++) {
4070f19b1fSKris Buschelman     ati[aj[i]+1] += 1;
4170f19b1fSKris Buschelman   }
4270f19b1fSKris Buschelman   /* Form ati for csr format of A^T. */
4370f19b1fSKris Buschelman   for (i=0;i<an;i++) {
4470f19b1fSKris Buschelman     ati[i+1] += ati[i];
4570f19b1fSKris Buschelman   }
4670f19b1fSKris Buschelman 
4770f19b1fSKris Buschelman   /* Copy ati into atfill so we have locations of the next free space in atj */
482e111b49SBarry Smith   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
4970f19b1fSKris Buschelman 
5070f19b1fSKris Buschelman   /* Walk through A row-wise and mark nonzero entries of A^T. */
5170f19b1fSKris Buschelman   for (i=0; i<am; i++) {
5270f19b1fSKris Buschelman     anzj = ai[i+1] - ai[i];
5370f19b1fSKris Buschelman     for (j=0; j<anzj; j++) {
5470f19b1fSKris Buschelman       atj[atfill[*aj]] = i;
5570f19b1fSKris Buschelman       atfill[*aj++]   += 1;
5670f19b1fSKris Buschelman     }
5770f19b1fSKris Buschelman   }
5870f19b1fSKris Buschelman 
5970f19b1fSKris Buschelman   /* Clean up temporary space and complete requests. */
6070f19b1fSKris Buschelman   ierr = PetscFree(atfill);CHKERRQ(ierr);
6170f19b1fSKris Buschelman   *Ati = ati;
6270f19b1fSKris Buschelman   *Atj = atj;
6370f19b1fSKris Buschelman 
644ebed01fSBarry Smith   ierr = PetscLogEventEnd(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr);
6570f19b1fSKris Buschelman   PetscFunctionReturn(0);
6670f19b1fSKris Buschelman }
670390132cSHong Zhang /*
680390132cSHong Zhang   MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:],
690390132cSHong Zhang      modified from MatGetSymbolicTranspose_SeqAIJ()
700390132cSHong Zhang */
710390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[])
720390132cSHong Zhang {
730390132cSHong Zhang   PetscErrorCode ierr;
740390132cSHong Zhang   PetscInt       i,j,anzj;
750390132cSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
76d0f46423SBarry Smith   PetscInt       an=A->cmap->N;
770390132cSHong Zhang   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
780390132cSHong Zhang 
790390132cSHong Zhang   PetscFunctionBegin;
80ae15b995SBarry Smith   ierr = PetscInfo(A,"Getting Symbolic Transpose\n");CHKERRQ(ierr);
814ebed01fSBarry Smith   ierr = PetscLogEventBegin(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr);
820390132cSHong Zhang 
830390132cSHong Zhang   /* Allocate space for symbolic transpose info and work array */
84854ce69bSBarry Smith   ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr);
850390132cSHong Zhang   anzj = ai[rend] - ai[rstart];
86854ce69bSBarry Smith   ierr = PetscMalloc1(anzj+1,&atj);CHKERRQ(ierr);
87854ce69bSBarry Smith   ierr = PetscMalloc1(an+1,&atfill);CHKERRQ(ierr);
880390132cSHong Zhang 
890390132cSHong Zhang   /* Walk through aj and count ## of non-zeros in each row of A^T. */
900390132cSHong Zhang   /* Note: offset by 1 for fast conversion into csr format. */
910390132cSHong Zhang   for (i=ai[rstart]; i<ai[rend]; i++) {
920390132cSHong Zhang     ati[aj[i]+1] += 1;
930390132cSHong Zhang   }
940390132cSHong Zhang   /* Form ati for csr format of A^T. */
950390132cSHong Zhang   for (i=0;i<an;i++) {
960390132cSHong Zhang     ati[i+1] += ati[i];
970390132cSHong Zhang   }
980390132cSHong Zhang 
990390132cSHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
1000390132cSHong Zhang   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
1010390132cSHong Zhang 
1020390132cSHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
1030390132cSHong Zhang   aj = aj + ai[rstart];
1040390132cSHong Zhang   for (i=rstart; i<rend; i++) {
1050390132cSHong Zhang     anzj = ai[i+1] - ai[i];
1060390132cSHong Zhang     for (j=0; j<anzj; j++) {
1070390132cSHong Zhang       atj[atfill[*aj]] = i-rstart;
1080390132cSHong Zhang       atfill[*aj++]   += 1;
1090390132cSHong Zhang     }
1100390132cSHong Zhang   }
1110390132cSHong Zhang 
1120390132cSHong Zhang   /* Clean up temporary space and complete requests. */
1130390132cSHong Zhang   ierr = PetscFree(atfill);CHKERRQ(ierr);
1140390132cSHong Zhang   *Ati = ati;
1150390132cSHong Zhang   *Atj = atj;
1160390132cSHong Zhang 
1174ebed01fSBarry Smith   ierr = PetscLogEventEnd(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr);
1180390132cSHong Zhang   PetscFunctionReturn(0);
1190390132cSHong Zhang }
12070f19b1fSKris Buschelman 
121fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqAIJ_FAST(Mat A,MatReuse reuse,Mat *B)
122dfbe8321SBarry Smith {
123dfbe8321SBarry Smith   PetscErrorCode ierr;
1242e111b49SBarry Smith   PetscInt       i,j,anzj;
12570f19b1fSKris Buschelman   Mat            At;
12670f19b1fSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*at;
127d0f46423SBarry Smith   PetscInt       an=A->cmap->N,am=A->rmap->N;
1282e111b49SBarry Smith   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
12970f19b1fSKris Buschelman   MatScalar      *ata,*aa=a->a;
1302e111b49SBarry Smith 
13170f19b1fSKris Buschelman   PetscFunctionBegin;
1324ebed01fSBarry Smith   ierr = PetscLogEventBegin(MAT_Transpose_SeqAIJ,A,0,0,0);CHKERRQ(ierr);
13370f19b1fSKris Buschelman 
134cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
13570f19b1fSKris Buschelman     /* Allocate space for symbolic transpose info and work array */
136854ce69bSBarry Smith     ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr);
137785e854fSJed Brown     ierr = PetscMalloc1(ai[am],&atj);CHKERRQ(ierr);
138785e854fSJed Brown     ierr = PetscMalloc1(ai[am],&ata);CHKERRQ(ierr);
13970f19b1fSKris Buschelman     /* Walk through aj and count ## of non-zeros in each row of A^T. */
14070f19b1fSKris Buschelman     /* Note: offset by 1 for fast conversion into csr format. */
14170f19b1fSKris Buschelman     for (i=0;i<ai[am];i++) {
14270f19b1fSKris Buschelman       ati[aj[i]+1] += 1;
14370f19b1fSKris Buschelman     }
14470f19b1fSKris Buschelman     /* Form ati for csr format of A^T. */
14570f19b1fSKris Buschelman     for (i=0;i<an;i++) {
14670f19b1fSKris Buschelman       ati[i+1] += ati[i];
14770f19b1fSKris Buschelman     }
148fc4dec0aSBarry Smith   } else {
149fc4dec0aSBarry Smith     Mat_SeqAIJ *sub_B = (Mat_SeqAIJ*) (*B)->data;
150fc4dec0aSBarry Smith     ati = sub_B->i;
151fc4dec0aSBarry Smith     atj = sub_B->j;
152fc4dec0aSBarry Smith     ata = sub_B->a;
153fc4dec0aSBarry Smith     At  = *B;
154fc4dec0aSBarry Smith   }
15570f19b1fSKris Buschelman 
15670f19b1fSKris Buschelman   /* Copy ati into atfill so we have locations of the next free space in atj */
157785e854fSJed Brown   ierr = PetscMalloc1(an,&atfill);CHKERRQ(ierr);
1582e111b49SBarry Smith   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
15970f19b1fSKris Buschelman 
16070f19b1fSKris Buschelman   /* Walk through A row-wise and mark nonzero entries of A^T. */
16170f19b1fSKris Buschelman   for (i=0;i<am;i++) {
16270f19b1fSKris Buschelman     anzj = ai[i+1] - ai[i];
16370f19b1fSKris Buschelman     for (j=0;j<anzj;j++) {
16470f19b1fSKris Buschelman       atj[atfill[*aj]] = i;
16570f19b1fSKris Buschelman       ata[atfill[*aj]] = *aa++;
16670f19b1fSKris Buschelman       atfill[*aj++]   += 1;
16770f19b1fSKris Buschelman     }
16870f19b1fSKris Buschelman   }
16970f19b1fSKris Buschelman 
17070f19b1fSKris Buschelman   /* Clean up temporary space and complete requests. */
17170f19b1fSKris Buschelman   ierr = PetscFree(atfill);CHKERRQ(ierr);
172419ecdd9Sandi selinger   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
173ce94432eSBarry Smith     ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),an,am,ati,atj,ata,&At);CHKERRQ(ierr);
1742205254eSKarl Rupp 
17570f19b1fSKris Buschelman     at          = (Mat_SeqAIJ*)(At->data);
176e6b907acSBarry Smith     at->free_a  = PETSC_TRUE;
177e6b907acSBarry Smith     at->free_ij = PETSC_TRUE;
17870f19b1fSKris Buschelman     at->nonew   = 0;
179*65ac4ee0Sandi selinger     at->maxnz   = ati[an];
180fc4dec0aSBarry Smith   }
181fc4dec0aSBarry Smith 
182cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
18370f19b1fSKris Buschelman     *B = At;
18470f19b1fSKris Buschelman   } else {
18528be2f97SBarry Smith     ierr = MatHeaderMerge(A,&At);CHKERRQ(ierr);
18670f19b1fSKris Buschelman   }
1874ebed01fSBarry Smith   ierr = PetscLogEventEnd(MAT_Transpose_SeqAIJ,A,0,0,0);CHKERRQ(ierr);
18870f19b1fSKris Buschelman   PetscFunctionReturn(0);
18970f19b1fSKris Buschelman }
19070f19b1fSKris Buschelman 
1912e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[])
1922e111b49SBarry Smith {
193dfbe8321SBarry Smith   PetscErrorCode ierr;
19470f19b1fSKris Buschelman 
19570f19b1fSKris Buschelman   PetscFunctionBegin;
196ae15b995SBarry Smith   ierr = PetscInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr);
19770f19b1fSKris Buschelman   ierr = PetscFree(*ati);CHKERRQ(ierr);
19870f19b1fSKris Buschelman   ierr = PetscFree(*atj);CHKERRQ(ierr);
19970f19b1fSKris Buschelman   PetscFunctionReturn(0);
20070f19b1fSKris Buschelman }
201