1be1d678aSKris Buschelman 270f19b1fSKris Buschelman /* 370f19b1fSKris Buschelman Defines symbolic transpose routines for SeqAIJ matrices. 470f19b1fSKris Buschelman 570f19b1fSKris Buschelman Currently Get/Restore only allocates/frees memory for holding the 670f19b1fSKris Buschelman (i,j) info for the transpose. Someday, this info could be 770f19b1fSKris Buschelman maintained so successive calls to Get will not recompute the info. 870f19b1fSKris Buschelman 9419ecdd9Sandi selinger Also defined is a faster implementation of MatTranspose for SeqAIJ 10419ecdd9Sandi selinger matrices which avoids calls to MatSetValues. This routine is the new 11419ecdd9Sandi selinger standard since it is much faster than MatTranspose_AIJ. 1270f19b1fSKris Buschelman 1370f19b1fSKris Buschelman */ 1470f19b1fSKris Buschelman 15c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 1670f19b1fSKris Buschelman 1770f19b1fSKris Buschelman 182e111b49SBarry Smith PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[]) 19dfbe8321SBarry Smith { 20dfbe8321SBarry Smith PetscErrorCode ierr; 212e111b49SBarry Smith PetscInt i,j,anzj; 2270f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 23d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 242e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2570f19b1fSKris Buschelman 2670f19b1fSKris Buschelman PetscFunctionBegin; 27ae15b995SBarry Smith ierr = PetscInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 2870f19b1fSKris Buschelman 2970f19b1fSKris Buschelman /* Set up timers */ 304ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr); 3170f19b1fSKris Buschelman 3270f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 33854ce69bSBarry Smith ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr); 34785e854fSJed Brown ierr = PetscMalloc1(ai[am],&atj);CHKERRQ(ierr); 35785e854fSJed Brown ierr = PetscMalloc1(an,&atfill);CHKERRQ(ierr); 3670f19b1fSKris Buschelman 3770f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 3870f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 3970f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 4070f19b1fSKris Buschelman ati[aj[i]+1] += 1; 4170f19b1fSKris Buschelman } 4270f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 4370f19b1fSKris Buschelman for (i=0;i<an;i++) { 4470f19b1fSKris Buschelman ati[i+1] += ati[i]; 4570f19b1fSKris Buschelman } 4670f19b1fSKris Buschelman 4770f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 48580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,an);CHKERRQ(ierr); 4970f19b1fSKris Buschelman 5070f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 5170f19b1fSKris Buschelman for (i=0; i<am; i++) { 5270f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 5370f19b1fSKris Buschelman for (j=0; j<anzj; j++) { 5470f19b1fSKris Buschelman atj[atfill[*aj]] = i; 5570f19b1fSKris Buschelman atfill[*aj++] += 1; 5670f19b1fSKris Buschelman } 5770f19b1fSKris Buschelman } 5870f19b1fSKris Buschelman 5970f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 6070f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 6170f19b1fSKris Buschelman *Ati = ati; 6270f19b1fSKris Buschelman *Atj = atj; 6370f19b1fSKris Buschelman 644ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getsymtranspose,A,0,0,0);CHKERRQ(ierr); 6570f19b1fSKris Buschelman PetscFunctionReturn(0); 6670f19b1fSKris Buschelman } 670390132cSHong Zhang /* 680390132cSHong Zhang MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:], 690390132cSHong Zhang modified from MatGetSymbolicTranspose_SeqAIJ() 700390132cSHong Zhang */ 710390132cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[]) 720390132cSHong Zhang { 730390132cSHong Zhang PetscErrorCode ierr; 740390132cSHong Zhang PetscInt i,j,anzj; 750390132cSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 76d0f46423SBarry Smith PetscInt an=A->cmap->N; 770390132cSHong Zhang PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 780390132cSHong Zhang 790390132cSHong Zhang PetscFunctionBegin; 80ae15b995SBarry Smith ierr = PetscInfo(A,"Getting Symbolic Transpose\n");CHKERRQ(ierr); 814ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 820390132cSHong Zhang 830390132cSHong Zhang /* Allocate space for symbolic transpose info and work array */ 84854ce69bSBarry Smith ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr); 850390132cSHong Zhang anzj = ai[rend] - ai[rstart]; 86854ce69bSBarry Smith ierr = PetscMalloc1(anzj+1,&atj);CHKERRQ(ierr); 87854ce69bSBarry Smith ierr = PetscMalloc1(an+1,&atfill);CHKERRQ(ierr); 880390132cSHong Zhang 890390132cSHong Zhang /* Walk through aj and count ## of non-zeros in each row of A^T. */ 900390132cSHong Zhang /* Note: offset by 1 for fast conversion into csr format. */ 910390132cSHong Zhang for (i=ai[rstart]; i<ai[rend]; i++) { 920390132cSHong Zhang ati[aj[i]+1] += 1; 930390132cSHong Zhang } 940390132cSHong Zhang /* Form ati for csr format of A^T. */ 950390132cSHong Zhang for (i=0;i<an;i++) { 960390132cSHong Zhang ati[i+1] += ati[i]; 970390132cSHong Zhang } 980390132cSHong Zhang 990390132cSHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 100580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,an);CHKERRQ(ierr); 1010390132cSHong Zhang 1020390132cSHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 1030390132cSHong Zhang aj = aj + ai[rstart]; 1040390132cSHong Zhang for (i=rstart; i<rend; i++) { 1050390132cSHong Zhang anzj = ai[i+1] - ai[i]; 1060390132cSHong Zhang for (j=0; j<anzj; j++) { 1070390132cSHong Zhang atj[atfill[*aj]] = i-rstart; 1080390132cSHong Zhang atfill[*aj++] += 1; 1090390132cSHong Zhang } 1100390132cSHong Zhang } 1110390132cSHong Zhang 1120390132cSHong Zhang /* Clean up temporary space and complete requests. */ 1130390132cSHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 1140390132cSHong Zhang *Ati = ati; 1150390132cSHong Zhang *Atj = atj; 1160390132cSHong Zhang 1174ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 1180390132cSHong Zhang PetscFunctionReturn(0); 1190390132cSHong Zhang } 12070f19b1fSKris Buschelman 12191e9d3e2SHong Zhang PetscErrorCode MatTranspose_SeqAIJ(Mat A,MatReuse reuse,Mat *B) 122dfbe8321SBarry Smith { 123dfbe8321SBarry Smith PetscErrorCode ierr; 1242e111b49SBarry Smith PetscInt i,j,anzj; 12570f19b1fSKris Buschelman Mat At; 12670f19b1fSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*at; 127d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N; 1282e111b49SBarry Smith PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 129*ce496241SStefano Zampini MatScalar *ata; 130*ce496241SStefano Zampini const MatScalar *aa,*av; 1312e111b49SBarry Smith 13270f19b1fSKris Buschelman PetscFunctionBegin; 133*ce496241SStefano Zampini ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 134*ce496241SStefano Zampini aa = av; 135cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 13670f19b1fSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 137854ce69bSBarry Smith ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr); 138785e854fSJed Brown ierr = PetscMalloc1(ai[am],&atj);CHKERRQ(ierr); 139785e854fSJed Brown ierr = PetscMalloc1(ai[am],&ata);CHKERRQ(ierr); 14070f19b1fSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 14170f19b1fSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 14270f19b1fSKris Buschelman for (i=0;i<ai[am];i++) { 143e1c026e1SHong Zhang ati[aj[i]+1] += 1; /* count ## of non-zeros for row aj[i] of A^T */ 14470f19b1fSKris Buschelman } 14570f19b1fSKris Buschelman /* Form ati for csr format of A^T. */ 14670f19b1fSKris Buschelman for (i=0;i<an;i++) { 14770f19b1fSKris Buschelman ati[i+1] += ati[i]; 14870f19b1fSKris Buschelman } 149317cfdd9SHong Zhang } else { /* This segment is called by MatTranspose_MPIAIJ(...,MAT_INITIAL_MATRIX,..) directly! */ 150fc4dec0aSBarry Smith Mat_SeqAIJ *sub_B = (Mat_SeqAIJ*) (*B)->data; 151fc4dec0aSBarry Smith ati = sub_B->i; 152fc4dec0aSBarry Smith atj = sub_B->j; 153fc4dec0aSBarry Smith ata = sub_B->a; 154fc4dec0aSBarry Smith At = *B; 155fc4dec0aSBarry Smith } 15670f19b1fSKris Buschelman 15770f19b1fSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 158785e854fSJed Brown ierr = PetscMalloc1(an,&atfill);CHKERRQ(ierr); 159580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,an);CHKERRQ(ierr); 16070f19b1fSKris Buschelman 16170f19b1fSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 16270f19b1fSKris Buschelman for (i=0;i<am;i++) { 16370f19b1fSKris Buschelman anzj = ai[i+1] - ai[i]; 16470f19b1fSKris Buschelman for (j=0;j<anzj;j++) { 16570f19b1fSKris Buschelman atj[atfill[*aj]] = i; 16670f19b1fSKris Buschelman ata[atfill[*aj]] = *aa++; 16770f19b1fSKris Buschelman atfill[*aj++] += 1; 16870f19b1fSKris Buschelman } 16970f19b1fSKris Buschelman } 170*ce496241SStefano Zampini ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 17170f19b1fSKris Buschelman 17270f19b1fSKris Buschelman /* Clean up temporary space and complete requests. */ 17370f19b1fSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 174419ecdd9Sandi selinger if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 175ce94432eSBarry Smith ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),an,am,ati,atj,ata,&At);CHKERRQ(ierr); 176317cfdd9SHong Zhang ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1772205254eSKarl Rupp 17870f19b1fSKris Buschelman at = (Mat_SeqAIJ*)(At->data); 179e6b907acSBarry Smith at->free_a = PETSC_TRUE; 180e6b907acSBarry Smith at->free_ij = PETSC_TRUE; 18170f19b1fSKris Buschelman at->nonew = 0; 18265ac4ee0Sandi selinger at->maxnz = ati[an]; 1835e8f4152SKarl Rupp 1845e8f4152SKarl Rupp ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr); 185fc4dec0aSBarry Smith } 186fc4dec0aSBarry Smith 187cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 18870f19b1fSKris Buschelman *B = At; 18970f19b1fSKris Buschelman } else { 19028be2f97SBarry Smith ierr = MatHeaderMerge(A,&At);CHKERRQ(ierr); 19170f19b1fSKris Buschelman } 192*ce496241SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 193*ce496241SStefano Zampini (*B)->offloadmask = PETSC_OFFLOAD_CPU; 194*ce496241SStefano Zampini #endif 19570f19b1fSKris Buschelman PetscFunctionReturn(0); 19670f19b1fSKris Buschelman } 19770f19b1fSKris Buschelman 1982e111b49SBarry Smith PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[]) 1992e111b49SBarry Smith { 200dfbe8321SBarry Smith PetscErrorCode ierr; 20170f19b1fSKris Buschelman 20270f19b1fSKris Buschelman PetscFunctionBegin; 203ae15b995SBarry Smith ierr = PetscInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 20470f19b1fSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 20570f19b1fSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 20670f19b1fSKris Buschelman PetscFunctionReturn(0); 20770f19b1fSKris Buschelman } 208