1be1d678aSKris Buschelman 270f19b1fSKris Buschelman /* 370f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 470f19b1fSKris Buschelman 570f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 670f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 770f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 870f19b1fSKris Buschelman 9419ecdd9Sandi selinger Also defined is a faster implementation of MatTranspose for SeqAIJ 10419ecdd9Sandi selinger matrices which avoids calls to MatSetValues. This routine is the new 11419ecdd9Sandi selinger standard since it is much faster than MatTranspose_AIJ. 1270f19b1fSKris Buschelman 1370f19b1fSKris Buschelman */ 1470f19b1fSKris Buschelman 15c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 1670f19b1fSKris Buschelman 172e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[]) 18dfbe8321SBarry Smith { 192e111b49SBarry Smith PetscInt i,j,anzj; 2070f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 21d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 222e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2370f19b1fSKris Buschelman 2470f19b1fSKris Buschelman PetscFunctionBegin; 25*9566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Getting Symbolic Transpose.\n")); 2670f19b1fSKris Buschelman 2770f19b1fSKris Buschelman /* Set up timers */ 28*9566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getsymtranspose,A,0,0,0)); 2970f19b1fSKris Buschelman 3070f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 31*9566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(an+1,&ati)); 32*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am],&atj)); 33*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(an,&atfill)); 3470f19b1fSKris Buschelman 3570f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 3670f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 3770f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 3870f19b1fSKris Buschelman ati[aj[i]+1] += 1; 3970f19b1fSKris Buschelman } 4070f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 4170f19b1fSKris Buschelman for (i=0;i<an;i++) { 4270f19b1fSKris Buschelman ati[i+1] += ati[i]; 4370f19b1fSKris Buschelman } 4470f19b1fSKris Buschelman 4570f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 46*9566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill,ati,an)); 4770f19b1fSKris Buschelman 4870f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 4970f19b1fSKris Buschelman for (i=0; i<am; i++) { 5070f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 5170f19b1fSKris Buschelman for (j=0; j<anzj; j++) { 5270f19b1fSKris Buschelman atj[atfill[*aj]] = i; 5370f19b1fSKris Buschelman atfill[*aj++] += 1; 5470f19b1fSKris Buschelman } 5570f19b1fSKris Buschelman } 5670f19b1fSKris Buschelman 5770f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 58*9566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 5970f19b1fSKris Buschelman *Ati = ati; 6070f19b1fSKris Buschelman *Atj = atj; 6170f19b1fSKris Buschelman 62*9566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getsymtranspose,A,0,0,0)); 6370f19b1fSKris Buschelman PetscFunctionReturn(0); 6470f19b1fSKris Buschelman } 650390132cSHong Zhang /* 660390132cSHong Zhang MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:], 670390132cSHong Zhang modified from MatGetSymbolicTranspose_SeqAIJ() 680390132cSHong Zhang */ 690390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[]) 700390132cSHong Zhang { 710390132cSHong Zhang PetscInt i,j,anzj; 720390132cSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 73d0f46423SBarry Smith PetscInt an=A->cmap->N; 740390132cSHong Zhang PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 750390132cSHong Zhang 760390132cSHong Zhang PetscFunctionBegin; 77*9566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Getting Symbolic Transpose\n")); 78*9566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getsymtransreduced,A,0,0,0)); 790390132cSHong Zhang 800390132cSHong Zhang /* Allocate space for symbolic transpose info and work array */ 81*9566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(an+1,&ati)); 820390132cSHong Zhang anzj = ai[rend] - ai[rstart]; 83*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(anzj+1,&atj)); 84*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(an+1,&atfill)); 850390132cSHong Zhang 860390132cSHong Zhang /* Walk through aj and count ## of non-zeros in each row of A^T. */ 870390132cSHong Zhang /* Note: offset by 1 for fast conversion into csr format. */ 880390132cSHong Zhang for (i=ai[rstart]; i<ai[rend]; i++) { 890390132cSHong Zhang ati[aj[i]+1] += 1; 900390132cSHong Zhang } 910390132cSHong Zhang /* Form ati for csr format of A^T. */ 920390132cSHong Zhang for (i=0;i<an;i++) { 930390132cSHong Zhang ati[i+1] += ati[i]; 940390132cSHong Zhang } 950390132cSHong Zhang 960390132cSHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 97*9566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill,ati,an)); 980390132cSHong Zhang 990390132cSHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 1000390132cSHong Zhang aj = aj + ai[rstart]; 1010390132cSHong Zhang for (i=rstart; i<rend; i++) { 1020390132cSHong Zhang anzj = ai[i+1] - ai[i]; 1030390132cSHong Zhang for (j=0; j<anzj; j++) { 1040390132cSHong Zhang atj[atfill[*aj]] = i-rstart; 1050390132cSHong Zhang atfill[*aj++] += 1; 1060390132cSHong Zhang } 1070390132cSHong Zhang } 1080390132cSHong Zhang 1090390132cSHong Zhang /* Clean up temporary space and complete requests. */ 110*9566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 1110390132cSHong Zhang *Ati = ati; 1120390132cSHong Zhang *Atj = atj; 1130390132cSHong Zhang 114*9566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getsymtransreduced,A,0,0,0)); 1150390132cSHong Zhang PetscFunctionReturn(0); 1160390132cSHong Zhang } 11770f19b1fSKris Buschelman 11891e9d3e2SHong Zhang PetscErrorCode MatTranspose_SeqAIJ(Mat A,MatReuse reuse,Mat *B) 119dfbe8321SBarry Smith { 1202e111b49SBarry Smith PetscInt i,j,anzj; 12170f19b1fSKris Buschelman Mat At; 12270f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*at; 123d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 1242e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 125ce496241SStefano Zampini MatScalar *ata; 126ce496241SStefano Zampini const MatScalar *aa,*av; 1272e111b49SBarry Smith 12870f19b1fSKris Buschelman PetscFunctionBegin; 129*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A,&av)); 130ce496241SStefano Zampini aa = av; 131cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 13270f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 133*9566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(an+1,&ati)); 134*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am],&atj)); 135*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am],&ata)); 13670f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 13770f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 13870f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 139e1c026e1SHong Zhang ati[aj[i]+1] += 1; /* count ## of non-zeros for row aj[i] of A^T */ 14070f19b1fSKris Buschelman } 14170f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 14270f19b1fSKris Buschelman for (i=0;i<an;i++) { 14370f19b1fSKris Buschelman ati[i+1] += ati[i]; 14470f19b1fSKris Buschelman } 145317cfdd9SHong Zhang } else { /* This segment is called by MatTranspose_MPIAIJ(...,MAT_INITIAL_MATRIX,..) directly! */ 146fc4dec0aSBarry Smith Mat_SeqAIJ *sub_B = (Mat_SeqAIJ*) (*B)->data; 147fc4dec0aSBarry Smith ati = sub_B->i; 148fc4dec0aSBarry Smith atj = sub_B->j; 149fc4dec0aSBarry Smith ata = sub_B->a; 150fc4dec0aSBarry Smith At = *B; 151fc4dec0aSBarry Smith } 15270f19b1fSKris Buschelman 15370f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 154*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(an,&atfill)); 155*9566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill,ati,an)); 15670f19b1fSKris Buschelman 15770f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 15870f19b1fSKris Buschelman for (i=0;i<am;i++) { 15970f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 16070f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 16170f19b1fSKris Buschelman atj[atfill[*aj]] = i; 16270f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 16370f19b1fSKris Buschelman atfill[*aj++] += 1; 16470f19b1fSKris Buschelman } 16570f19b1fSKris Buschelman } 166*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 16770f19b1fSKris Buschelman 16870f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 169*9566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 170419ecdd9Sandi selinger if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 171*9566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),an,am,ati,atj,ata,&At)); 172*9566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1732205254eSKarl Rupp 17470f19b1fSKris Buschelman at = (Mat_SeqAIJ*)(At->data); 175e6b907acSBarry Smith at->free_a = PETSC_TRUE; 176e6b907acSBarry Smith at->free_ij = PETSC_TRUE; 17770f19b1fSKris Buschelman at->nonew = 0; 17865ac4ee0Sandi selinger at->maxnz = ati[an]; 1795e8f4152SKarl Rupp 180*9566063dSJacob Faibussowitsch PetscCall(MatSetType(At,((PetscObject)A)->type_name)); 181fc4dec0aSBarry Smith } 182fc4dec0aSBarry Smith 183cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 18470f19b1fSKris Buschelman *B = At; 18570f19b1fSKris Buschelman } else { 186*9566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A,&At)); 18770f19b1fSKris Buschelman } 18870f19b1fSKris Buschelman PetscFunctionReturn(0); 18970f19b1fSKris Buschelman } 19070f19b1fSKris Buschelman 1912e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[]) 1922e111b49SBarry Smith { 19370f19b1fSKris Buschelman PetscFunctionBegin; 194*9566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Restoring Symbolic Transpose.\n")); 195*9566063dSJacob Faibussowitsch PetscCall(PetscFree(*ati)); 196*9566063dSJacob Faibussowitsch PetscCall(PetscFree(*atj)); 19770f19b1fSKris Buschelman PetscFunctionReturn(0); 19870f19b1fSKris Buschelman } 199