1c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 397929ea7SJunchao Zhang #include <petsc/private/sfimpl.h> 4af0996ceSBarry Smith #include <petsc/private/isimpl.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 60c312b8eSJed Brown #include <petscsf.h> 7bc8e477aSFande Kong #include <petsc/private/hashmapi.h> 88a729477SBarry Smith 901bebe75SBarry Smith /*MC 1001bebe75SBarry Smith MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1101bebe75SBarry Smith 1201bebe75SBarry Smith This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1301bebe75SBarry Smith and MATMPIAIJ otherwise. As a result, for single process communicators, 14a323099bSStefano Zampini MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 1501bebe75SBarry Smith for communicators controlling multiple processes. It is recommended that you call both of 1601bebe75SBarry Smith the above preallocation routines for simplicity. 1701bebe75SBarry Smith 1801bebe75SBarry Smith Options Database Keys: 1901bebe75SBarry Smith . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 2001bebe75SBarry Smith 2195452b02SPatrick Sanan Developer Notes: 22f719121fSJed Brown Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 2301bebe75SBarry Smith enough exist. 2401bebe75SBarry Smith 2501bebe75SBarry Smith Level: beginner 2601bebe75SBarry Smith 2769b1f4b7SBarry Smith .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 2801bebe75SBarry Smith M*/ 2901bebe75SBarry Smith 3001bebe75SBarry Smith /*MC 3101bebe75SBarry Smith MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 3201bebe75SBarry Smith 3301bebe75SBarry Smith This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 3401bebe75SBarry Smith and MATMPIAIJCRL otherwise. As a result, for single process communicators, 3501bebe75SBarry Smith MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 3601bebe75SBarry Smith for communicators controlling multiple processes. It is recommended that you call both of 3701bebe75SBarry Smith the above preallocation routines for simplicity. 3801bebe75SBarry Smith 3901bebe75SBarry Smith Options Database Keys: 4001bebe75SBarry Smith . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 4101bebe75SBarry Smith 4201bebe75SBarry Smith Level: beginner 4301bebe75SBarry Smith 4401bebe75SBarry Smith .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 4501bebe75SBarry Smith M*/ 4601bebe75SBarry Smith 47b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48f74ef234SStefano Zampini { 49f74ef234SStefano Zampini Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50f74ef234SStefano Zampini 51f74ef234SStefano Zampini PetscFunctionBegin; 52f74ef234SStefano Zampini #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53b470e4b4SRichard Tran Mills A->boundtocpu = flg; 54f74ef234SStefano Zampini #endif 55f74ef234SStefano Zampini if (a->A) { 569566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(a->A,flg)); 57f74ef234SStefano Zampini } 58f74ef234SStefano Zampini if (a->B) { 599566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(a->B,flg)); 60f74ef234SStefano Zampini } 613120d049SRichard Tran Mills 623120d049SRichard Tran Mills /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 633120d049SRichard Tran Mills * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 643120d049SRichard Tran Mills * to differ from the parent matrix. */ 653120d049SRichard Tran Mills if (a->lvec) { 669566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(a->lvec,flg)); 673120d049SRichard Tran Mills } 683120d049SRichard Tran Mills if (a->diag) { 699566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(a->diag,flg)); 703120d049SRichard Tran Mills } 713120d049SRichard Tran Mills 72f74ef234SStefano Zampini PetscFunctionReturn(0); 73f74ef234SStefano Zampini } 74f74ef234SStefano Zampini 7546533700Sstefano_zampini PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 7626bda2c4Sstefano_zampini { 7726bda2c4Sstefano_zampini Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 7826bda2c4Sstefano_zampini 7926bda2c4Sstefano_zampini PetscFunctionBegin; 8046533700Sstefano_zampini if (mat->A) { 819566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 829566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 8346533700Sstefano_zampini } 8426bda2c4Sstefano_zampini PetscFunctionReturn(0); 8526bda2c4Sstefano_zampini } 8626bda2c4Sstefano_zampini 87f2c98031SJed Brown PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 8827d4218bSShri Abhyankar { 8927d4218bSShri Abhyankar Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 9027d4218bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 9127d4218bSShri Abhyankar Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 9227d4218bSShri Abhyankar const PetscInt *ia,*ib; 93ce496241SStefano Zampini const MatScalar *aa,*bb,*aav,*bav; 9427d4218bSShri Abhyankar PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 9527d4218bSShri Abhyankar PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 9627d4218bSShri Abhyankar 9727d4218bSShri Abhyankar PetscFunctionBegin; 98f4259b30SLisandro Dalcin *keptrows = NULL; 99ce496241SStefano Zampini 10027d4218bSShri Abhyankar ia = a->i; 10127d4218bSShri Abhyankar ib = b->i; 1029566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 1039566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 10427d4218bSShri Abhyankar for (i=0; i<m; i++) { 10527d4218bSShri Abhyankar na = ia[i+1] - ia[i]; 10627d4218bSShri Abhyankar nb = ib[i+1] - ib[i]; 10727d4218bSShri Abhyankar if (!na && !nb) { 10827d4218bSShri Abhyankar cnt++; 10927d4218bSShri Abhyankar goto ok1; 11027d4218bSShri Abhyankar } 111ce496241SStefano Zampini aa = aav + ia[i]; 11227d4218bSShri Abhyankar for (j=0; j<na; j++) { 11327d4218bSShri Abhyankar if (aa[j] != 0.0) goto ok1; 11427d4218bSShri Abhyankar } 115ce496241SStefano Zampini bb = bav + ib[i]; 11627d4218bSShri Abhyankar for (j=0; j <nb; j++) { 11727d4218bSShri Abhyankar if (bb[j] != 0.0) goto ok1; 11827d4218bSShri Abhyankar } 11927d4218bSShri Abhyankar cnt++; 12027d4218bSShri Abhyankar ok1:; 12127d4218bSShri Abhyankar } 1221c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123ce496241SStefano Zampini if (!n0rows) { 1249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 1259566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126ce496241SStefano Zampini PetscFunctionReturn(0); 127ce496241SStefano Zampini } 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 12927d4218bSShri Abhyankar cnt = 0; 13027d4218bSShri Abhyankar for (i=0; i<m; i++) { 13127d4218bSShri Abhyankar na = ia[i+1] - ia[i]; 13227d4218bSShri Abhyankar nb = ib[i+1] - ib[i]; 13327d4218bSShri Abhyankar if (!na && !nb) continue; 134ce496241SStefano Zampini aa = aav + ia[i]; 13527d4218bSShri Abhyankar for (j=0; j<na;j++) { 13627d4218bSShri Abhyankar if (aa[j] != 0.0) { 13727d4218bSShri Abhyankar rows[cnt++] = rstart + i; 13827d4218bSShri Abhyankar goto ok2; 13927d4218bSShri Abhyankar } 14027d4218bSShri Abhyankar } 141ce496241SStefano Zampini bb = bav + ib[i]; 14227d4218bSShri Abhyankar for (j=0; j<nb; j++) { 14327d4218bSShri Abhyankar if (bb[j] != 0.0) { 14427d4218bSShri Abhyankar rows[cnt++] = rstart + i; 14527d4218bSShri Abhyankar goto ok2; 14627d4218bSShri Abhyankar } 14727d4218bSShri Abhyankar } 14827d4218bSShri Abhyankar ok2:; 14927d4218bSShri Abhyankar } 1509566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 1519566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 1529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 15327d4218bSShri Abhyankar PetscFunctionReturn(0); 15427d4218bSShri Abhyankar } 15527d4218bSShri Abhyankar 15699e65526SBarry Smith PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 15799e65526SBarry Smith { 15899e65526SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 15994342113SStefano Zampini PetscBool cong; 16099e65526SBarry Smith 16199e65526SBarry Smith PetscFunctionBegin; 1629566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(Y,&cong)); 16394342113SStefano Zampini if (Y->assembled && cong) { 1649566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(aij->A,D,is)); 16599e65526SBarry Smith } else { 1669566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet_Default(Y,D,is)); 16799e65526SBarry Smith } 16899e65526SBarry Smith PetscFunctionReturn(0); 16999e65526SBarry Smith } 17099e65526SBarry Smith 171f1f41ecbSJed Brown PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172f1f41ecbSJed Brown { 173f1f41ecbSJed Brown Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174f1f41ecbSJed Brown PetscInt i,rstart,nrows,*rows; 175f1f41ecbSJed Brown 176f1f41ecbSJed Brown PetscFunctionBegin; 1770298fd71SBarry Smith *zrows = NULL; 1789566063dSJacob Faibussowitsch PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 1799566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180f1f41ecbSJed Brown for (i=0; i<nrows; i++) rows[i] += rstart; 1819566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182f1f41ecbSJed Brown PetscFunctionReturn(0); 183f1f41ecbSJed Brown } 184f1f41ecbSJed Brown 185857cbf51SRichard Tran Mills PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 1860716a85fSBarry Smith { 1870716a85fSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188a873a8cdSSam Reynolds PetscInt i,m,n,*garray = aij->garray; 1890716a85fSBarry Smith Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 1900716a85fSBarry Smith Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 1910716a85fSBarry Smith PetscReal *work; 192ce496241SStefano Zampini const PetscScalar *dummy; 1930716a85fSBarry Smith 1940716a85fSBarry Smith PetscFunctionBegin; 1959566063dSJacob Faibussowitsch PetscCall(MatGetSize(A,&m,&n)); 1969566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n,&work)); 1979566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 1989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 1999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 2009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201857cbf51SRichard Tran Mills if (type == NORM_2) { 2020716a85fSBarry Smith for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 2030716a85fSBarry Smith work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 2040716a85fSBarry Smith } 2050716a85fSBarry Smith for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 2060716a85fSBarry Smith work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 2070716a85fSBarry Smith } 208857cbf51SRichard Tran Mills } else if (type == NORM_1) { 2090716a85fSBarry Smith for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 2100716a85fSBarry Smith work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 2110716a85fSBarry Smith } 2120716a85fSBarry Smith for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 2130716a85fSBarry Smith work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 2140716a85fSBarry Smith } 215857cbf51SRichard Tran Mills } else if (type == NORM_INFINITY) { 2160716a85fSBarry Smith for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 2170716a85fSBarry Smith work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 2180716a85fSBarry Smith } 2190716a85fSBarry Smith for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 2200716a85fSBarry Smith work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 2210716a85fSBarry Smith } 222857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223a873a8cdSSam Reynolds for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225a873a8cdSSam Reynolds } 226a873a8cdSSam Reynolds for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227857cbf51SRichard Tran Mills work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228a873a8cdSSam Reynolds } 229857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230857cbf51SRichard Tran Mills for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232857cbf51SRichard Tran Mills } 233857cbf51SRichard Tran Mills for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234857cbf51SRichard Tran Mills work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235857cbf51SRichard Tran Mills } 236857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 2381c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2390716a85fSBarry Smith } else { 2401c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2410716a85fSBarry Smith } 2429566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 243857cbf51SRichard Tran Mills if (type == NORM_2) { 244a873a8cdSSam Reynolds for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246a873a8cdSSam Reynolds for (i=0; i<n; i++) reductions[i] /= m; 2470716a85fSBarry Smith } 2480716a85fSBarry Smith PetscFunctionReturn(0); 2490716a85fSBarry Smith } 2500716a85fSBarry Smith 251e52d2c62SBarry Smith PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252e52d2c62SBarry Smith { 253e52d2c62SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254e52d2c62SBarry Smith IS sis,gis; 255e52d2c62SBarry Smith const PetscInt *isis,*igis; 256e52d2c62SBarry Smith PetscInt n,*iis,nsis,ngis,rstart,i; 257e52d2c62SBarry Smith 258e52d2c62SBarry Smith PetscFunctionBegin; 2599566063dSJacob Faibussowitsch PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 2609566063dSJacob Faibussowitsch PetscCall(MatFindNonzeroRows(a->B,&gis)); 2619566063dSJacob Faibussowitsch PetscCall(ISGetSize(gis,&ngis)); 2629566063dSJacob Faibussowitsch PetscCall(ISGetSize(sis,&nsis)); 2639566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sis,&isis)); 2649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(gis,&igis)); 265e52d2c62SBarry Smith 2669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ngis+nsis,&iis)); 2679566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(iis,igis,ngis)); 2689566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269e52d2c62SBarry Smith n = ngis + nsis; 2709566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&n,iis)); 2719566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272e52d2c62SBarry Smith for (i=0; i<n; i++) iis[i] += rstart; 2739566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274e52d2c62SBarry Smith 2759566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sis,&isis)); 2769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(gis,&igis)); 2779566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sis)); 2789566063dSJacob Faibussowitsch PetscCall(ISDestroy(&gis)); 279e52d2c62SBarry Smith PetscFunctionReturn(0); 280e52d2c62SBarry Smith } 281e52d2c62SBarry Smith 282dd6ea824SBarry Smith /* 2830f5bd95cSBarry Smith Local utility routine that creates a mapping from the global column 2849e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 2850f5bd95cSBarry Smith storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 2860f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor 28772fa4726SStefano Zampini has an order N integer array but is fast to access. 2889e25ed09SBarry Smith */ 289ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 2909e25ed09SBarry Smith { 29144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292d0f46423SBarry Smith PetscInt n = aij->B->cmap->n,i; 293dbb450caSBarry Smith 2943a40ed3dSBarry Smith PetscFunctionBegin; 29508401ef6SPierre Jolivet PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2979566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298b1fc9764SSatish Balay for (i=0; i<n; i++) { 2999566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300b1fc9764SSatish Balay } 301b1fc9764SSatish Balay #else 3029566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 3039566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304905e6a2fSBarry Smith for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305b1fc9764SSatish Balay #endif 3063a40ed3dSBarry Smith PetscFunctionReturn(0); 3079e25ed09SBarry Smith } 3089e25ed09SBarry Smith 309d40312a9SBarry Smith #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 3100520107fSSatish Balay { \ 311db4deed7SKarl Rupp if (col <= lastcol1) low1 = 0; \ 312db4deed7SKarl Rupp else high1 = nrow1; \ 313fd3458f5SBarry Smith lastcol1 = col;\ 314fd3458f5SBarry Smith while (high1-low1 > 5) { \ 315fd3458f5SBarry Smith t = (low1+high1)/2; \ 316fd3458f5SBarry Smith if (rp1[t] > col) high1 = t; \ 317fd3458f5SBarry Smith else low1 = t; \ 318ba4e3ef2SSatish Balay } \ 319fd3458f5SBarry Smith for (_i=low1; _i<high1; _i++) { \ 320fd3458f5SBarry Smith if (rp1[_i] > col) break; \ 321fd3458f5SBarry Smith if (rp1[_i] == col) { \ 3220c0d7e18SFande Kong if (addv == ADD_VALUES) { \ 3230c0d7e18SFande Kong ap1[_i] += value; \ 3240c0d7e18SFande Kong /* Not sure LogFlops will slow dow the code or not */ \ 3250c0d7e18SFande Kong (void)PetscLogFlops(1.0); \ 3260c0d7e18SFande Kong } \ 327fd3458f5SBarry Smith else ap1[_i] = value; \ 32830770e4dSSatish Balay goto a_noinsert; \ 3290520107fSSatish Balay } \ 3300520107fSSatish Balay } \ 331dcd36c23SBarry Smith if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332e44c0bd4SBarry Smith if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 33308401ef6SPierre Jolivet PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335669a8dbcSSatish Balay N = nrow1++ - 1; a->nz++; high1++; \ 3360520107fSSatish Balay /* shift up all the later entries in this row */ \ 3379566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 3389566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339fd3458f5SBarry Smith rp1[_i] = col; \ 340fd3458f5SBarry Smith ap1[_i] = value; \ 341e56f5c9eSBarry Smith A->nonzerostate++;\ 34230770e4dSSatish Balay a_noinsert: ; \ 343fd3458f5SBarry Smith ailen[row] = nrow1; \ 3440520107fSSatish Balay } 3450a198c4cSBarry Smith 346d40312a9SBarry Smith #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 34730770e4dSSatish Balay { \ 348db4deed7SKarl Rupp if (col <= lastcol2) low2 = 0; \ 349db4deed7SKarl Rupp else high2 = nrow2; \ 350fd3458f5SBarry Smith lastcol2 = col; \ 351fd3458f5SBarry Smith while (high2-low2 > 5) { \ 352fd3458f5SBarry Smith t = (low2+high2)/2; \ 353fd3458f5SBarry Smith if (rp2[t] > col) high2 = t; \ 354fd3458f5SBarry Smith else low2 = t; \ 355ba4e3ef2SSatish Balay } \ 356fd3458f5SBarry Smith for (_i=low2; _i<high2; _i++) { \ 357fd3458f5SBarry Smith if (rp2[_i] > col) break; \ 358fd3458f5SBarry Smith if (rp2[_i] == col) { \ 3590c0d7e18SFande Kong if (addv == ADD_VALUES) { \ 3600c0d7e18SFande Kong ap2[_i] += value; \ 3610c0d7e18SFande Kong (void)PetscLogFlops(1.0); \ 3620c0d7e18SFande Kong } \ 363fd3458f5SBarry Smith else ap2[_i] = value; \ 36430770e4dSSatish Balay goto b_noinsert; \ 36530770e4dSSatish Balay } \ 36630770e4dSSatish Balay } \ 367e44c0bd4SBarry Smith if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368e44c0bd4SBarry Smith if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 36908401ef6SPierre Jolivet PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371669a8dbcSSatish Balay N = nrow2++ - 1; b->nz++; high2++; \ 37230770e4dSSatish Balay /* shift up all the later entries in this row */ \ 3739566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 3749566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375fd3458f5SBarry Smith rp2[_i] = col; \ 376fd3458f5SBarry Smith ap2[_i] = value; \ 377e56f5c9eSBarry Smith B->nonzerostate++; \ 37830770e4dSSatish Balay b_noinsert: ; \ 379fd3458f5SBarry Smith bilen[row] = nrow2; \ 38030770e4dSSatish Balay } 38130770e4dSSatish Balay 3822fd7e33dSBarry Smith PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 3832fd7e33dSBarry Smith { 3842fd7e33dSBarry Smith Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 3852fd7e33dSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 3862fd7e33dSBarry Smith PetscInt l,*garray = mat->garray,diag; 387fff043a9SJunchao Zhang PetscScalar *aa,*ba; 3882fd7e33dSBarry Smith 3892fd7e33dSBarry Smith PetscFunctionBegin; 3902fd7e33dSBarry Smith /* code only works for square matrices A */ 3912fd7e33dSBarry Smith 3922fd7e33dSBarry Smith /* find size of row to the left of the diagonal part */ 3939566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 3942fd7e33dSBarry Smith row = row - diag; 3952fd7e33dSBarry Smith for (l=0; l<b->i[row+1]-b->i[row]; l++) { 3962fd7e33dSBarry Smith if (garray[b->j[b->i[row]+l]] > diag) break; 3972fd7e33dSBarry Smith } 398fff043a9SJunchao Zhang if (l) { 3999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 4009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 4019566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402fff043a9SJunchao Zhang } 4032fd7e33dSBarry Smith 4042fd7e33dSBarry Smith /* diagonal part */ 405fff043a9SJunchao Zhang if (a->i[row+1]-a->i[row]) { 4069566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 4079566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 4089566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409fff043a9SJunchao Zhang } 4102fd7e33dSBarry Smith 4112fd7e33dSBarry Smith /* right of diagonal part */ 412fff043a9SJunchao Zhang if (b->i[row+1]-b->i[row]-l) { 4139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 4149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 4159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416fff043a9SJunchao Zhang } 4172fd7e33dSBarry Smith PetscFunctionReturn(0); 4182fd7e33dSBarry Smith } 4192fd7e33dSBarry Smith 420b1d57f15SBarry Smith PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 4218a729477SBarry Smith { 42244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423071fcb05SBarry Smith PetscScalar value = 0.0; 424d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426ace3abfcSBarry Smith PetscBool roworiented = aij->roworiented; 4278a729477SBarry Smith 4280520107fSSatish Balay /* Some Variables required in the macro */ 4294ee7247eSSatish Balay Mat A = aij->A; 4304ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 43157809a77SBarry Smith PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432ace3abfcSBarry Smith PetscBool ignorezeroentries = a->ignorezeroentries; 43330770e4dSSatish Balay Mat B = aij->B; 43430770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435d0f46423SBarry Smith PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436ce496241SStefano Zampini MatScalar *aa,*ba; 437fd3458f5SBarry Smith PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4388d76821aSHong Zhang PetscInt nonew; 439a77337e4SBarry Smith MatScalar *ap1,*ap2; 4404ee7247eSSatish Balay 4413a40ed3dSBarry Smith PetscFunctionBegin; 4429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A,&aa)); 4439566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B,&ba)); 4448a729477SBarry Smith for (i=0; i<m; i++) { 4455ef9f2a5SBarry Smith if (im[i] < 0) continue; 44608401ef6SPierre Jolivet PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 4474b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 4484b0e389bSBarry Smith row = im[i] - rstart; 449fd3458f5SBarry Smith lastcol1 = -1; 450fd3458f5SBarry Smith rp1 = aj + ai[row]; 451fd3458f5SBarry Smith ap1 = aa + ai[row]; 452fd3458f5SBarry Smith rmax1 = aimax[row]; 453fd3458f5SBarry Smith nrow1 = ailen[row]; 454fd3458f5SBarry Smith low1 = 0; 455fd3458f5SBarry Smith high1 = nrow1; 456fd3458f5SBarry Smith lastcol2 = -1; 457fd3458f5SBarry Smith rp2 = bj + bi[row]; 458d498b1e9SBarry Smith ap2 = ba + bi[row]; 459fd3458f5SBarry Smith rmax2 = bimax[row]; 460d498b1e9SBarry Smith nrow2 = bilen[row]; 461fd3458f5SBarry Smith low2 = 0; 462fd3458f5SBarry Smith high2 = nrow2; 463fd3458f5SBarry Smith 4641eb62cbbSBarry Smith for (j=0; j<n; j++) { 465071fcb05SBarry Smith if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466c80a64e6SBarry Smith if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467fd3458f5SBarry Smith if (in[j] >= cstart && in[j] < cend) { 468fd3458f5SBarry Smith col = in[j] - cstart; 4698d76821aSHong Zhang nonew = a->nonew; 470d40312a9SBarry Smith MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471273d9f13SBarry Smith } else if (in[j] < 0) continue; 47208401ef6SPierre Jolivet else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 4731eb62cbbSBarry Smith else { 474227d817aSBarry Smith if (mat->was_assembled) { 475905e6a2fSBarry Smith if (!aij->colmap) { 4769566063dSJacob Faibussowitsch PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477905e6a2fSBarry Smith } 478aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 4799566063dSJacob Faibussowitsch PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480fa46199cSSatish Balay col--; 481b1fc9764SSatish Balay #else 482905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 483b1fc9764SSatish Balay #endif 484fff043a9SJunchao Zhang if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 4859566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 4864b0e389bSBarry Smith col = in[j]; 4879bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488f9508a3cSSatish Balay B = aij->B; 489f9508a3cSSatish Balay b = (Mat_SeqAIJ*)B->data; 490e44c0bd4SBarry Smith bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491d498b1e9SBarry Smith rp2 = bj + bi[row]; 492d498b1e9SBarry Smith ap2 = ba + bi[row]; 493d498b1e9SBarry Smith rmax2 = bimax[row]; 494d498b1e9SBarry Smith nrow2 = bilen[row]; 495d498b1e9SBarry Smith low2 = 0; 496d498b1e9SBarry Smith high2 = nrow2; 497d0f46423SBarry Smith bm = aij->B->rmap->n; 498f9508a3cSSatish Balay ba = b->a; 499d707bf6cSMatthew Knepley } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 5000587a0fcSBarry Smith if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 5019566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 50298921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 5030587a0fcSBarry Smith } 504c48de900SBarry Smith } else col = in[j]; 5058d76821aSHong Zhang nonew = b->nonew; 506d40312a9SBarry Smith MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith } 5095ef9f2a5SBarry Smith } else { 51028b400f6SJacob Faibussowitsch PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 51190f02eecSBarry Smith if (!aij->donotstash) { 5125080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 513d36fbae8SSatish Balay if (roworiented) { 5149566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515d36fbae8SSatish Balay } else { 5169566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 5174b0e389bSBarry Smith } 5181eb62cbbSBarry Smith } 5198a729477SBarry Smith } 52090f02eecSBarry Smith } 5219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A,&aa)); 5229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B,&ba)); 5233a40ed3dSBarry Smith PetscFunctionReturn(0); 5248a729477SBarry Smith } 5258a729477SBarry Smith 5262b08fdbeSandi selinger /* 527904d1e70Sandi selinger This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 5282b08fdbeSandi selinger The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529904d1e70Sandi selinger No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 5302b08fdbeSandi selinger */ 531904d1e70Sandi selinger PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532904d1e70Sandi selinger { 533904d1e70Sandi selinger Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534904d1e70Sandi selinger Mat A = aij->A; /* diagonal part of the matrix */ 535904d1e70Sandi selinger Mat B = aij->B; /* offdiagonal part of the matrix */ 536904d1e70Sandi selinger Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537904d1e70Sandi selinger Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538904d1e70Sandi selinger PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539904d1e70Sandi selinger PetscInt *ailen = a->ilen,*aj = a->j; 540904d1e70Sandi selinger PetscInt *bilen = b->ilen,*bj = b->j; 5416dc1ffa3Sandi selinger PetscInt am = aij->A->rmap->n,j; 542904d1e70Sandi selinger PetscInt diag_so_far = 0,dnz; 543904d1e70Sandi selinger PetscInt offd_so_far = 0,onz; 544904d1e70Sandi selinger 545904d1e70Sandi selinger PetscFunctionBegin; 546904d1e70Sandi selinger /* Iterate over all rows of the matrix */ 547904d1e70Sandi selinger for (j=0; j<am; j++) { 548904d1e70Sandi selinger dnz = onz = 0; 549904d1e70Sandi selinger /* Iterate over all non-zero columns of the current row */ 5506dc1ffa3Sandi selinger for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551904d1e70Sandi selinger /* If column is in the diagonal */ 552904d1e70Sandi selinger if (mat_j[col] >= cstart && mat_j[col] < cend) { 553904d1e70Sandi selinger aj[diag_so_far++] = mat_j[col] - cstart; 554904d1e70Sandi selinger dnz++; 555904d1e70Sandi selinger } else { /* off-diagonal entries */ 556904d1e70Sandi selinger bj[offd_so_far++] = mat_j[col]; 557904d1e70Sandi selinger onz++; 558904d1e70Sandi selinger } 559904d1e70Sandi selinger } 560904d1e70Sandi selinger ailen[j] = dnz; 561904d1e70Sandi selinger bilen[j] = onz; 562904d1e70Sandi selinger } 563904d1e70Sandi selinger PetscFunctionReturn(0); 564904d1e70Sandi selinger } 565904d1e70Sandi selinger 566904d1e70Sandi selinger /* 567904d1e70Sandi selinger This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568904d1e70Sandi selinger The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 5691de21080Sandi selinger No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 5701de21080Sandi selinger Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 5711de21080Sandi selinger would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572904d1e70Sandi selinger */ 573e9ede7d0Sandi selinger PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 5743a063d27Sandi selinger { 5753a063d27Sandi selinger Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5763a063d27Sandi selinger Mat A = aij->A; /* diagonal part of the matrix */ 5773a063d27Sandi selinger Mat B = aij->B; /* offdiagonal part of the matrix */ 578e9ede7d0Sandi selinger Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 5793a063d27Sandi selinger Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5803a063d27Sandi selinger Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5813a063d27Sandi selinger PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 5823a063d27Sandi selinger PetscInt *ailen = a->ilen,*aj = a->j; 5833a063d27Sandi selinger PetscInt *bilen = b->ilen,*bj = b->j; 5846dc1ffa3Sandi selinger PetscInt am = aij->A->rmap->n,j; 5851de21080Sandi selinger PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586904d1e70Sandi selinger PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587904d1e70Sandi selinger PetscScalar *aa = a->a,*ba = b->a; 5883a063d27Sandi selinger 5893a063d27Sandi selinger PetscFunctionBegin; 5903a063d27Sandi selinger /* Iterate over all rows of the matrix */ 5913a063d27Sandi selinger for (j=0; j<am; j++) { 592904d1e70Sandi selinger dnz_row = onz_row = 0; 593904d1e70Sandi selinger rowstart_offd = full_offd_i[j]; 594904d1e70Sandi selinger rowstart_diag = full_diag_i[j]; 595e9ede7d0Sandi selinger /* Iterate over all non-zero columns of the current row */ 596e9ede7d0Sandi selinger for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597ae8e66a0Sandi selinger /* If column is in the diagonal */ 5983a063d27Sandi selinger if (mat_j[col] >= cstart && mat_j[col] < cend) { 599904d1e70Sandi selinger aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600904d1e70Sandi selinger aa[rowstart_diag+dnz_row] = mat_a[col]; 601904d1e70Sandi selinger dnz_row++; 602ae8e66a0Sandi selinger } else { /* off-diagonal entries */ 603904d1e70Sandi selinger bj[rowstart_offd+onz_row] = mat_j[col]; 604904d1e70Sandi selinger ba[rowstart_offd+onz_row] = mat_a[col]; 605904d1e70Sandi selinger onz_row++; 6063a063d27Sandi selinger } 6073a063d27Sandi selinger } 608904d1e70Sandi selinger ailen[j] = dnz_row; 609904d1e70Sandi selinger bilen[j] = onz_row; 6103a063d27Sandi selinger } 6113a063d27Sandi selinger PetscFunctionReturn(0); 6123a063d27Sandi selinger } 6133a063d27Sandi selinger 614b1d57f15SBarry Smith PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615b49de8d1SLois Curfman McInnes { 616b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619b49de8d1SLois Curfman McInnes 6203a40ed3dSBarry Smith PetscFunctionBegin; 621b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 62254c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 62354c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 625b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 626b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 62754c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 62854c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend) { 630b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 6319566063dSJacob Faibussowitsch PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632fa852ad4SSatish Balay } else { 633905e6a2fSBarry Smith if (!aij->colmap) { 6349566063dSJacob Faibussowitsch PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635905e6a2fSBarry Smith } 636aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 6379566063dSJacob Faibussowitsch PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638fa46199cSSatish Balay col--; 639b1fc9764SSatish Balay #else 640905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 641b1fc9764SSatish Balay #endif 642e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643d9d09a02SSatish Balay else { 6449566063dSJacob Faibussowitsch PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645b49de8d1SLois Curfman McInnes } 646b49de8d1SLois Curfman McInnes } 647b49de8d1SLois Curfman McInnes } 648f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649b49de8d1SLois Curfman McInnes } 6503a40ed3dSBarry Smith PetscFunctionReturn(0); 651b49de8d1SLois Curfman McInnes } 652bc5ccf88SSatish Balay 653dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654bc5ccf88SSatish Balay { 655bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656b1d57f15SBarry Smith PetscInt nstash,reallocs; 657bc5ccf88SSatish Balay 658bc5ccf88SSatish Balay PetscFunctionBegin; 6592205254eSKarl Rupp if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660bc5ccf88SSatish Balay 6619566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 6629566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 6639566063dSJacob Faibussowitsch PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664bc5ccf88SSatish Balay PetscFunctionReturn(0); 665bc5ccf88SSatish Balay } 666bc5ccf88SSatish Balay 667dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668bc5ccf88SSatish Balay { 669bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670b1d57f15SBarry Smith PetscMPIInt n; 671b1d57f15SBarry Smith PetscInt i,j,rstart,ncols,flg; 672e44c0bd4SBarry Smith PetscInt *row,*col; 673ace3abfcSBarry Smith PetscBool other_disassembled; 67487828ca2SBarry Smith PetscScalar *val; 675bc5ccf88SSatish Balay 67691c97fd4SSatish Balay /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 6776e111a19SKarl Rupp 678bc5ccf88SSatish Balay PetscFunctionBegin; 6794cb17eb5SBarry Smith if (!aij->donotstash && !mat->nooffprocentries) { 680a2d1c673SSatish Balay while (1) { 6819566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682a2d1c673SSatish Balay if (!flg) break; 683a2d1c673SSatish Balay 684bc5ccf88SSatish Balay for (i=0; i<n;) { 685bc5ccf88SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 6862205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 6872205254eSKarl Rupp if (row[j] != rstart) break; 6882205254eSKarl Rupp } 689bc5ccf88SSatish Balay if (j < n) ncols = j-i; 690bc5ccf88SSatish Balay else ncols = n-i; 691bc5ccf88SSatish Balay /* Now assemble all these values with a single function call */ 6929566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693bc5ccf88SSatish Balay i = j; 694bc5ccf88SSatish Balay } 695bc5ccf88SSatish Balay } 6969566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697bc5ccf88SSatish Balay } 6988c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 699c70f7ee4SJunchao Zhang if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 7009ecce9b1SRichard Tran Mills /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 7019ecce9b1SRichard Tran Mills if (mat->boundtocpu) { 7029566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 7039566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 7049ecce9b1SRichard Tran Mills } 705e2cf4d64SStefano Zampini #endif 7069566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(aij->A,mode)); 7079566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(aij->A,mode)); 708bc5ccf88SSatish Balay 709bc5ccf88SSatish Balay /* determine if any processor has disassembled, if so we must 710071fcb05SBarry Smith also disassemble ourself, in order that we may reassemble. */ 711bc5ccf88SSatish Balay /* 712bc5ccf88SSatish Balay if nonzero structure of submatrix B cannot change then we know that 713bc5ccf88SSatish Balay no processor disassembled thus we can skip this stuff 714bc5ccf88SSatish Balay */ 715bc5ccf88SSatish Balay if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 7161c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717fff043a9SJunchao Zhang if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 7189566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); 719ad59fb31SSatish Balay } 720ad59fb31SSatish Balay } 721bc5ccf88SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 7229566063dSJacob Faibussowitsch PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723bc5ccf88SSatish Balay } 7249566063dSJacob Faibussowitsch PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 7258c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 726c70f7ee4SJunchao Zhang if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727e2cf4d64SStefano Zampini #endif 7289566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(aij->B,mode)); 7299566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(aij->B,mode)); 730bc5ccf88SSatish Balay 7319566063dSJacob Faibussowitsch PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 7322205254eSKarl Rupp 733f4259b30SLisandro Dalcin aij->rowvalues = NULL; 734a30b2313SHong Zhang 7359566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->diag)); 736e56f5c9eSBarry Smith 7374f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 7384f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739e56f5c9eSBarry Smith PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 7401c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741e56f5c9eSBarry Smith } 7428c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 743c70f7ee4SJunchao Zhang mat->offloadmask = PETSC_OFFLOAD_BOTH; 744e2cf4d64SStefano Zampini #endif 745bc5ccf88SSatish Balay PetscFunctionReturn(0); 746bc5ccf88SSatish Balay } 747bc5ccf88SSatish Balay 748dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 7491eb62cbbSBarry Smith { 75044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 7513a40ed3dSBarry Smith 7523a40ed3dSBarry Smith PetscFunctionBegin; 7539566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 7549566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 7553a40ed3dSBarry Smith PetscFunctionReturn(0); 7561eb62cbbSBarry Smith } 7571eb62cbbSBarry Smith 7582b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 7591eb62cbbSBarry Smith { 7601b1dd7adSMatthew G. Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761a92ad425SStefano Zampini PetscObjectState sA, sB; 7621b1dd7adSMatthew G. Knepley PetscInt *lrows; 7636e520ac8SStefano Zampini PetscInt r, len; 764a92ad425SStefano Zampini PetscBool cong, lch, gch; 7651eb62cbbSBarry Smith 7663a40ed3dSBarry Smith PetscFunctionBegin; 7676e520ac8SStefano Zampini /* get locally owned rows */ 7689566063dSJacob Faibussowitsch PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 7699566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A,&cong)); 77097b48c8fSBarry Smith /* fix right hand side if needed */ 77197b48c8fSBarry Smith if (x && b) { 7721b1dd7adSMatthew G. Knepley const PetscScalar *xx; 7731b1dd7adSMatthew G. Knepley PetscScalar *bb; 7741b1dd7adSMatthew G. Knepley 77528b400f6SJacob Faibussowitsch PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 7769566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 7779566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 7781b1dd7adSMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 7799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 7809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 78197b48c8fSBarry Smith } 782a92ad425SStefano Zampini 783a92ad425SStefano Zampini sA = mat->A->nonzerostate; 784a92ad425SStefano Zampini sB = mat->B->nonzerostate; 785a92ad425SStefano Zampini 786a92ad425SStefano Zampini if (diag != 0.0 && cong) { 7879566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 7889566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789a92ad425SStefano Zampini } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790a92ad425SStefano Zampini Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791a92ad425SStefano Zampini Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792a92ad425SStefano Zampini PetscInt nnwA, nnwB; 793a92ad425SStefano Zampini PetscBool nnzA, nnzB; 794a92ad425SStefano Zampini 795a92ad425SStefano Zampini nnwA = aijA->nonew; 796a92ad425SStefano Zampini nnwB = aijB->nonew; 797a92ad425SStefano Zampini nnzA = aijA->keepnonzeropattern; 798a92ad425SStefano Zampini nnzB = aijB->keepnonzeropattern; 799a92ad425SStefano Zampini if (!nnzA) { 8009566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801a92ad425SStefano Zampini aijA->nonew = 0; 802a92ad425SStefano Zampini } 803a92ad425SStefano Zampini if (!nnzB) { 8049566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805a92ad425SStefano Zampini aijB->nonew = 0; 806a92ad425SStefano Zampini } 807a92ad425SStefano Zampini /* Must zero here before the next loop */ 8089566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 8099566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 8101b1dd7adSMatthew G. Knepley for (r = 0; r < len; ++r) { 8111b1dd7adSMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 812a92ad425SStefano Zampini if (row >= A->cmap->N) continue; 8139566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814e2d53e46SBarry Smith } 815a92ad425SStefano Zampini aijA->nonew = nnwA; 816a92ad425SStefano Zampini aijB->nonew = nnwB; 8176eb55b6aSBarry Smith } else { 8189566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 8199566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 8206eb55b6aSBarry Smith } 8219566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 8229566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 8239566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 8244f9cfa9eSBarry Smith 825a92ad425SStefano Zampini /* reduce nonzerostate */ 826a92ad425SStefano Zampini lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 8271c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828a92ad425SStefano Zampini if (gch) A->nonzerostate++; 8293a40ed3dSBarry Smith PetscFunctionReturn(0); 8301eb62cbbSBarry Smith } 8311eb62cbbSBarry Smith 8329c7c4993SBarry Smith PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 8339c7c4993SBarry Smith { 8349c7c4993SBarry Smith Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 8355ba17502SJed Brown PetscMPIInt n = A->rmap->n; 836131c27b5Sprj- PetscInt i,j,r,m,len = 0; 83754bd4135SMatthew G. Knepley PetscInt *lrows,*owners = A->rmap->range; 838131c27b5Sprj- PetscMPIInt p = 0; 83954bd4135SMatthew G. Knepley PetscSFNode *rrows; 84054bd4135SMatthew G. Knepley PetscSF sf; 8419c7c4993SBarry Smith const PetscScalar *xx; 842fff043a9SJunchao Zhang PetscScalar *bb,*mask,*aij_a; 843564f14d6SBarry Smith Vec xmask,lmask; 844564f14d6SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845564f14d6SBarry Smith const PetscInt *aj, *ii,*ridx; 846564f14d6SBarry Smith PetscScalar *aa; 8479c7c4993SBarry Smith 8489c7c4993SBarry Smith PetscFunctionBegin; 84954bd4135SMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 8509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &lrows)); 85154bd4135SMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 8529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N, &rrows)); 85354bd4135SMatthew G. Knepley for (r = 0; r < N; ++r) { 85454bd4135SMatthew G. Knepley const PetscInt idx = rows[r]; 8552c71b3e2SJacob Faibussowitsch PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 8565ba17502SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 8579566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 8585ba17502SJed Brown } 85954bd4135SMatthew G. Knepley rrows[r].rank = p; 86054bd4135SMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 8619c7c4993SBarry Smith } 8629566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 8639566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 86454bd4135SMatthew G. Knepley /* Collect flags for rows to be zeroed */ 8659566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 8669566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 8679566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 86854bd4135SMatthew G. Knepley /* Compress and put in row numbers */ 86954bd4135SMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870564f14d6SBarry Smith /* zero diagonal part of matrix */ 8719566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872564f14d6SBarry Smith /* handle off diagonal part of matrix */ 8739566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A,&xmask,NULL)); 8749566063dSJacob Faibussowitsch PetscCall(VecDuplicate(l->lvec,&lmask)); 8759566063dSJacob Faibussowitsch PetscCall(VecGetArray(xmask,&bb)); 87654bd4135SMatthew G. Knepley for (i=0; i<len; i++) bb[lrows[i]] = 1; 8779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xmask,&bb)); 8789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 8799566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 8809566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xmask)); 881a92ad425SStefano Zampini if (x && b) { /* this code is buggy when the row and column layout don't match */ 882a92ad425SStefano Zampini PetscBool cong; 883a92ad425SStefano Zampini 8849566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A,&cong)); 88528b400f6SJacob Faibussowitsch PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 8869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 8879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 8889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(l->lvec,&xx)); 8899566063dSJacob Faibussowitsch PetscCall(VecGetArray(b,&bb)); 890377aa5a1SBarry Smith } 8919566063dSJacob Faibussowitsch PetscCall(VecGetArray(lmask,&mask)); 892564f14d6SBarry Smith /* remove zeroed rows of off diagonal matrix */ 8939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894564f14d6SBarry Smith ii = aij->i; 89554bd4135SMatthew G. Knepley for (i=0; i<len; i++) { 8969566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 8979c7c4993SBarry Smith } 898564f14d6SBarry Smith /* loop over all elements of off process part of matrix zeroing removed columns*/ 899564f14d6SBarry Smith if (aij->compressedrow.use) { 900564f14d6SBarry Smith m = aij->compressedrow.nrows; 901564f14d6SBarry Smith ii = aij->compressedrow.i; 902564f14d6SBarry Smith ridx = aij->compressedrow.rindex; 903564f14d6SBarry Smith for (i=0; i<m; i++) { 904564f14d6SBarry Smith n = ii[i+1] - ii[i]; 905564f14d6SBarry Smith aj = aij->j + ii[i]; 906fff043a9SJunchao Zhang aa = aij_a + ii[i]; 907564f14d6SBarry Smith 908564f14d6SBarry Smith for (j=0; j<n; j++) { 90925266a92SSatish Balay if (PetscAbsScalar(mask[*aj])) { 910377aa5a1SBarry Smith if (b) bb[*ridx] -= *aa*xx[*aj]; 911564f14d6SBarry Smith *aa = 0.0; 912564f14d6SBarry Smith } 913564f14d6SBarry Smith aa++; 914564f14d6SBarry Smith aj++; 915564f14d6SBarry Smith } 916564f14d6SBarry Smith ridx++; 917564f14d6SBarry Smith } 918564f14d6SBarry Smith } else { /* do not use compressed row format */ 919564f14d6SBarry Smith m = l->B->rmap->n; 920564f14d6SBarry Smith for (i=0; i<m; i++) { 921564f14d6SBarry Smith n = ii[i+1] - ii[i]; 922564f14d6SBarry Smith aj = aij->j + ii[i]; 923fff043a9SJunchao Zhang aa = aij_a + ii[i]; 924564f14d6SBarry Smith for (j=0; j<n; j++) { 92525266a92SSatish Balay if (PetscAbsScalar(mask[*aj])) { 926377aa5a1SBarry Smith if (b) bb[i] -= *aa*xx[*aj]; 927564f14d6SBarry Smith *aa = 0.0; 928564f14d6SBarry Smith } 929564f14d6SBarry Smith aa++; 930564f14d6SBarry Smith aj++; 931564f14d6SBarry Smith } 932564f14d6SBarry Smith } 933564f14d6SBarry Smith } 934a92ad425SStefano Zampini if (x && b) { 9359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b,&bb)); 9369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937377aa5a1SBarry Smith } 9389566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 9399566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lmask,&mask)); 9409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lmask)); 9419566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 9424f9cfa9eSBarry Smith 9434f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 9444f9cfa9eSBarry Smith if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 9454f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 9461c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 9474f9cfa9eSBarry Smith } 9489c7c4993SBarry Smith PetscFunctionReturn(0); 9499c7c4993SBarry Smith } 9509c7c4993SBarry Smith 951dfbe8321SBarry Smith PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 9521eb62cbbSBarry Smith { 953416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954b1d57f15SBarry Smith PetscInt nt; 95519b3b6edSHong Zhang VecScatter Mvctx = a->Mvctx; 956416022c9SBarry Smith 9573a40ed3dSBarry Smith PetscFunctionBegin; 9589566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx,&nt)); 95908401ef6SPierre Jolivet PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 9609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 9619566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 9629566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 9639566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 9643a40ed3dSBarry Smith PetscFunctionReturn(0); 9651eb62cbbSBarry Smith } 9661eb62cbbSBarry Smith 967bd0c2dcbSBarry Smith PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968bd0c2dcbSBarry Smith { 969bd0c2dcbSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970bd0c2dcbSBarry Smith 971bd0c2dcbSBarry Smith PetscFunctionBegin; 9729566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973bd0c2dcbSBarry Smith PetscFunctionReturn(0); 974bd0c2dcbSBarry Smith } 975bd0c2dcbSBarry Smith 976dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977da3a660dSBarry Smith { 978416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 97901ad2aeeSHong Zhang VecScatter Mvctx = a->Mvctx; 9803a40ed3dSBarry Smith 9813a40ed3dSBarry Smith PetscFunctionBegin; 9829566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 9839566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 9849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 9859566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 9863a40ed3dSBarry Smith PetscFunctionReturn(0); 987da3a660dSBarry Smith } 988da3a660dSBarry Smith 989dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990da3a660dSBarry Smith { 991416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992da3a660dSBarry Smith 9933a40ed3dSBarry Smith PetscFunctionBegin; 994da3a660dSBarry Smith /* do nondiagonal part */ 9959566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996da3a660dSBarry Smith /* do local part */ 9979566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 9989613dc34SJunchao Zhang /* add partial results together */ 9999566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 10009566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 10013a40ed3dSBarry Smith PetscFunctionReturn(0); 1002da3a660dSBarry Smith } 1003da3a660dSBarry Smith 10047087cfbeSBarry Smith PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005cd0d46ebSvictorle { 10064f423910Svictorle MPI_Comm comm; 1007cd0d46ebSvictorle Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 100866501d38Svictorle Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009cd0d46ebSvictorle IS Me,Notme; 1010b1d57f15SBarry Smith PetscInt M,N,first,last,*notme,i; 101154d735aeSStefano Zampini PetscBool lf; 1012b1d57f15SBarry Smith PetscMPIInt size; 1013cd0d46ebSvictorle 1014cd0d46ebSvictorle PetscFunctionBegin; 101542e5f5b4Svictorle /* Easy test: symmetric diagonal block */ 101666501d38Svictorle Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 10179566063dSJacob Faibussowitsch PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 10181c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019cd0d46ebSvictorle if (!*f) PetscFunctionReturn(0); 10209566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 10219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 1022b1d57f15SBarry Smith if (size == 1) PetscFunctionReturn(0); 102342e5f5b4Svictorle 10247dae84e0SHong Zhang /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 10259566063dSJacob Faibussowitsch PetscCall(MatGetSize(Amat,&M,&N)); 10269566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 10279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N-last+first,¬me)); 1028cd0d46ebSvictorle for (i=0; i<first; i++) notme[i] = i; 1029cd0d46ebSvictorle for (i=last; i<M; i++) notme[i-last+first] = i; 10309566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 10319566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 10329566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 103366501d38Svictorle Aoff = Aoffs[0]; 10349566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 103566501d38Svictorle Boff = Boffs[0]; 10369566063dSJacob Faibussowitsch PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 10379566063dSJacob Faibussowitsch PetscCall(MatDestroyMatrices(1,&Aoffs)); 10389566063dSJacob Faibussowitsch PetscCall(MatDestroyMatrices(1,&Boffs)); 10399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&Me)); 10409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&Notme)); 10419566063dSJacob Faibussowitsch PetscCall(PetscFree(notme)); 1042cd0d46ebSvictorle PetscFunctionReturn(0); 1043cd0d46ebSvictorle } 1044cd0d46ebSvictorle 1045a3bbdb47SHong Zhang PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046a3bbdb47SHong Zhang { 1047a3bbdb47SHong Zhang PetscFunctionBegin; 10489566063dSJacob Faibussowitsch PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049a3bbdb47SHong Zhang PetscFunctionReturn(0); 1050a3bbdb47SHong Zhang } 1051a3bbdb47SHong Zhang 1052dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053da3a660dSBarry Smith { 1054416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055da3a660dSBarry Smith 10563a40ed3dSBarry Smith PetscFunctionBegin; 1057da3a660dSBarry Smith /* do nondiagonal part */ 10589566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059da3a660dSBarry Smith /* do local part */ 10609566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 10619613dc34SJunchao Zhang /* add partial results together */ 10629566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 10639566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 10643a40ed3dSBarry Smith PetscFunctionReturn(0); 1065da3a660dSBarry Smith } 1066da3a660dSBarry Smith 10671eb62cbbSBarry Smith /* 10681eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 10691eb62cbbSBarry Smith diagonal block 10701eb62cbbSBarry Smith */ 1071dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 10721eb62cbbSBarry Smith { 1073416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 10743a40ed3dSBarry Smith 10753a40ed3dSBarry Smith PetscFunctionBegin; 107608401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 10772c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 10789566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A,v)); 10793a40ed3dSBarry Smith PetscFunctionReturn(0); 10801eb62cbbSBarry Smith } 10811eb62cbbSBarry Smith 1082f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083052efed2SBarry Smith { 1084052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 10853a40ed3dSBarry Smith 10863a40ed3dSBarry Smith PetscFunctionBegin; 10879566063dSJacob Faibussowitsch PetscCall(MatScale(a->A,aa)); 10889566063dSJacob Faibussowitsch PetscCall(MatScale(a->B,aa)); 10893a40ed3dSBarry Smith PetscFunctionReturn(0); 1090052efed2SBarry Smith } 1091052efed2SBarry Smith 1092cbc6b225SStefano Zampini /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093cbc6b225SStefano Zampini PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094cbc6b225SStefano Zampini { 1095cbc6b225SStefano Zampini Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096cbc6b225SStefano Zampini 1097cbc6b225SStefano Zampini PetscFunctionBegin; 10989566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Aperm1)); 1100*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Bperm1)); 1101*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Ajmap1)); 1102*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Bjmap1)); 1103*158ec288SJunchao Zhang 1104*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Aimap2)); 1105*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Bimap2)); 1106*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Aperm2)); 1107*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Bperm2)); 1108*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Ajmap2)); 1109*158ec288SJunchao Zhang PetscCall(PetscFree(aij->Bjmap2)); 1110*158ec288SJunchao Zhang 11119566063dSJacob Faibussowitsch PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 11129566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->Cperm1)); 1113cbc6b225SStefano Zampini PetscFunctionReturn(0); 1114cbc6b225SStefano Zampini } 1115cbc6b225SStefano Zampini 1116dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 11171eb62cbbSBarry Smith { 111844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 111983e2fdc7SBarry Smith 11203a40ed3dSBarry Smith PetscFunctionBegin; 1121aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1122c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1123a5a9c739SBarry Smith #endif 11249566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 11259566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->diag)); 11269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij->A)); 11279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij->B)); 1128aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 11299566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&aij->colmap)); 1130b1fc9764SSatish Balay #else 11319566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->colmap)); 1132b1fc9764SSatish Balay #endif 11339566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->garray)); 11349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->lvec)); 11359566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&aij->Mvctx)); 11369566063dSJacob Faibussowitsch PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 11379566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->ld)); 1138394ed5ebSJunchao Zhang 1139cbc6b225SStefano Zampini /* Free COO */ 11409566063dSJacob Faibussowitsch PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1141394ed5ebSJunchao Zhang 11429566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 1143901853e0SKris Buschelman 11446718818eSStefano Zampini /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 11459566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 11466718818eSStefano Zampini 11479566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 11489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 11499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 11509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 11519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 11529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 11539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 11549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 11559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 11569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 11573d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA) 11589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 11593d0639e7SStefano Zampini #endif 11603d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS) 11619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 11623d0639e7SStefano Zampini #endif 11639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 11645d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 11659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 11665d7652ecSHong Zhang #endif 1167d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 11689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1169d24d4204SJose E. Roman #endif 117063c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE) 11719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 11729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 117363c07aadSStefano Zampini #endif 11749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 11759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 11769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 11779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 11789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 11799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 11803d0639e7SStefano Zampini #if defined(PETSC_HAVE_MKL_SPARSE) 11819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 11823d0639e7SStefano Zampini #endif 11839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 11849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 11859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 11869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 11879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 11883a40ed3dSBarry Smith PetscFunctionReturn(0); 11891eb62cbbSBarry Smith } 1190ee50ffe9SBarry Smith 1191dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 11928e2fed03SBarry Smith { 11938e2fed03SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11948e2fed03SBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 11958e2fed03SBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 11963ea6fe3dSLisandro Dalcin const PetscInt *garray = aij->garray; 11972e5835c6SStefano Zampini const PetscScalar *aa,*ba; 11983ea6fe3dSLisandro Dalcin PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 11993ea6fe3dSLisandro Dalcin PetscInt *rowlens; 12003ea6fe3dSLisandro Dalcin PetscInt *colidxs; 12013ea6fe3dSLisandro Dalcin PetscScalar *matvals; 12028e2fed03SBarry Smith 12038e2fed03SBarry Smith PetscFunctionBegin; 12049566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 12053ea6fe3dSLisandro Dalcin 12063ea6fe3dSLisandro Dalcin M = mat->rmap->N; 12073ea6fe3dSLisandro Dalcin N = mat->cmap->N; 12083ea6fe3dSLisandro Dalcin m = mat->rmap->n; 12093ea6fe3dSLisandro Dalcin rs = mat->rmap->rstart; 12103ea6fe3dSLisandro Dalcin cs = mat->cmap->rstart; 12118e2fed03SBarry Smith nz = A->nz + B->nz; 12123ea6fe3dSLisandro Dalcin 12133ea6fe3dSLisandro Dalcin /* write matrix header */ 12140700a824SBarry Smith header[0] = MAT_FILE_CLASSID; 12153ea6fe3dSLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 12169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 12179566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 12188e2fed03SBarry Smith 12193ea6fe3dSLisandro Dalcin /* fill in and store row lengths */ 12209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m,&rowlens)); 12213ea6fe3dSLisandro Dalcin for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 12229566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 12239566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 12248e2fed03SBarry Smith 12253ea6fe3dSLisandro Dalcin /* fill in and store column indices */ 12269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz,&colidxs)); 12273ea6fe3dSLisandro Dalcin for (cnt=0, i=0; i<m; i++) { 12283ea6fe3dSLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 12293ea6fe3dSLisandro Dalcin if (garray[B->j[jb]] > cs) break; 12303ea6fe3dSLisandro Dalcin colidxs[cnt++] = garray[B->j[jb]]; 12318e2fed03SBarry Smith } 12323ea6fe3dSLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 12333ea6fe3dSLisandro Dalcin colidxs[cnt++] = A->j[ja] + cs; 12343ea6fe3dSLisandro Dalcin for (; jb<B->i[i+1]; jb++) 12353ea6fe3dSLisandro Dalcin colidxs[cnt++] = garray[B->j[jb]]; 12368e2fed03SBarry Smith } 123708401ef6SPierre Jolivet PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 12389566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 12399566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 12408e2fed03SBarry Smith 12413ea6fe3dSLisandro Dalcin /* fill in and store nonzero values */ 12429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 12439566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 12449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz,&matvals)); 12453ea6fe3dSLisandro Dalcin for (cnt=0, i=0; i<m; i++) { 12463ea6fe3dSLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 12473ea6fe3dSLisandro Dalcin if (garray[B->j[jb]] > cs) break; 12482e5835c6SStefano Zampini matvals[cnt++] = ba[jb]; 12498e2fed03SBarry Smith } 12503ea6fe3dSLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 12512e5835c6SStefano Zampini matvals[cnt++] = aa[ja]; 12523ea6fe3dSLisandro Dalcin for (; jb<B->i[i+1]; jb++) 12532e5835c6SStefano Zampini matvals[cnt++] = ba[jb]; 12548e2fed03SBarry Smith } 12559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 12569566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 125708401ef6SPierre Jolivet PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 12589566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 12599566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 12608e2fed03SBarry Smith 12613ea6fe3dSLisandro Dalcin /* write block size option to the viewer's .info file */ 12629566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 12638e2fed03SBarry Smith PetscFunctionReturn(0); 12648e2fed03SBarry Smith } 12658e2fed03SBarry Smith 12669804daf3SBarry Smith #include <petscdraw.h> 1267dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1268416022c9SBarry Smith { 126944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 127032dcc486SBarry Smith PetscMPIInt rank = aij->rank,size = aij->size; 1271ace3abfcSBarry Smith PetscBool isdraw,iascii,isbinary; 1272b0a32e0cSBarry Smith PetscViewer sviewer; 1273f3ef73ceSBarry Smith PetscViewerFormat format; 1274416022c9SBarry Smith 12753a40ed3dSBarry Smith PetscFunctionBegin; 12769566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 12779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 12789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 127932077d6dSBarry Smith if (iascii) { 12809566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer,&format)); 1281ef5fdb51SBarry Smith if (format == PETSC_VIEWER_LOAD_BALANCE) { 1282ef5fdb51SBarry Smith PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 12839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size,&nz)); 12849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1285ef5fdb51SBarry Smith for (i=0; i<(PetscInt)size; i++) { 1286ef5fdb51SBarry Smith nmax = PetscMax(nmax,nz[i]); 1287ef5fdb51SBarry Smith nmin = PetscMin(nmin,nz[i]); 1288ef5fdb51SBarry Smith navg += nz[i]; 1289ef5fdb51SBarry Smith } 12909566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1291ef5fdb51SBarry Smith navg = navg/size; 12929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1293ef5fdb51SBarry Smith PetscFunctionReturn(0); 1294ef5fdb51SBarry Smith } 12959566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer,&format)); 1296456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 12974e220ebcSLois Curfman McInnes MatInfo info; 12986335e310SSatish Balay PetscInt *inodes=NULL; 1299923f20ffSKris Buschelman 13009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 13019566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 13029566063dSJacob Faibussowitsch PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 13039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1304923f20ffSKris Buschelman if (!inodes) { 13059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 13065f80ce2aSJacob Faibussowitsch rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 13076831982aSBarry Smith } else { 13089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 13095f80ce2aSJacob Faibussowitsch rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 13106831982aSBarry Smith } 13119566063dSJacob Faibussowitsch PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 13129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 13139566063dSJacob Faibussowitsch PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 13149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 13159566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 13169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 13179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 13189566063dSJacob Faibussowitsch PetscCall(VecScatterView(aij->Mvctx,viewer)); 13193a40ed3dSBarry Smith PetscFunctionReturn(0); 1320fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1321923f20ffSKris Buschelman PetscInt inodecount,inodelimit,*inodes; 13229566063dSJacob Faibussowitsch PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1323923f20ffSKris Buschelman if (inodes) { 13249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1325d38fa0fbSBarry Smith } else { 13269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1327d38fa0fbSBarry Smith } 13283a40ed3dSBarry Smith PetscFunctionReturn(0); 13294aedb280SBarry Smith } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 13304aedb280SBarry Smith PetscFunctionReturn(0); 133108480c60SBarry Smith } 13328e2fed03SBarry Smith } else if (isbinary) { 13338e2fed03SBarry Smith if (size == 1) { 13349566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 13359566063dSJacob Faibussowitsch PetscCall(MatView(aij->A,viewer)); 13368e2fed03SBarry Smith } else { 13379566063dSJacob Faibussowitsch PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 13388e2fed03SBarry Smith } 13398e2fed03SBarry Smith PetscFunctionReturn(0); 134071e56450SStefano Zampini } else if (iascii && size == 1) { 13419566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 13429566063dSJacob Faibussowitsch PetscCall(MatView(aij->A,viewer)); 134371e56450SStefano Zampini PetscFunctionReturn(0); 13440f5bd95cSBarry Smith } else if (isdraw) { 1345b0a32e0cSBarry Smith PetscDraw draw; 1346ace3abfcSBarry Smith PetscBool isnull; 13479566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 13489566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw,&isnull)); 1349383922c3SLisandro Dalcin if (isnull) PetscFunctionReturn(0); 135019bcc07fSBarry Smith } 135119bcc07fSBarry Smith 135271e56450SStefano Zampini { /* assemble the entire matrix onto first processor */ 135371e56450SStefano Zampini Mat A = NULL, Av; 135471e56450SStefano Zampini IS isrow,iscol; 13552ee70a88SLois Curfman McInnes 13569566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 13579566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 13589566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 13599566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 136071e56450SStefano Zampini /* The commented code uses MatCreateSubMatrices instead */ 136171e56450SStefano Zampini /* 136271e56450SStefano Zampini Mat *AA, A = NULL, Av; 136371e56450SStefano Zampini IS isrow,iscol; 136471e56450SStefano Zampini 13659566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 13669566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 13679566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1368dd400576SPatrick Sanan if (rank == 0) { 13699566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)AA[0])); 137071e56450SStefano Zampini A = AA[0]; 137171e56450SStefano Zampini Av = AA[0]; 137295373324SBarry Smith } 13739566063dSJacob Faibussowitsch PetscCall(MatDestroySubMatrices(1,&AA)); 137471e56450SStefano Zampini */ 13759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol)); 13769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrow)); 137755843e3eSBarry Smith /* 137855843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1379b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 138055843e3eSBarry Smith */ 13819566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1382dd400576SPatrick Sanan if (rank == 0) { 138371e56450SStefano Zampini if (((PetscObject)mat)->name) { 13849566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 138571e56450SStefano Zampini } 13869566063dSJacob Faibussowitsch PetscCall(MatView_SeqAIJ(Av,sviewer)); 138795373324SBarry Smith } 13889566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 13899566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 13909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 139195373324SBarry Smith } 13923a40ed3dSBarry Smith PetscFunctionReturn(0); 13931eb62cbbSBarry Smith } 13941eb62cbbSBarry Smith 1395dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1396416022c9SBarry Smith { 1397ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 1398416022c9SBarry Smith 13993a40ed3dSBarry Smith PetscFunctionBegin; 14009566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 14019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 14029566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 14039566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 140432077d6dSBarry Smith if (iascii || isdraw || isbinary || issocket) { 14059566063dSJacob Faibussowitsch PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1406416022c9SBarry Smith } 14073a40ed3dSBarry Smith PetscFunctionReturn(0); 1408416022c9SBarry Smith } 1409416022c9SBarry Smith 141041f059aeSBarry Smith PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 14118a729477SBarry Smith { 141244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413f4259b30SLisandro Dalcin Vec bb1 = NULL; 1414ace3abfcSBarry Smith PetscBool hasop; 14158a729477SBarry Smith 14163a40ed3dSBarry Smith PetscFunctionBegin; 1417a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 14189566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1419a2b30743SBarry Smith PetscFunctionReturn(0); 1420a2b30743SBarry Smith } 1421a2b30743SBarry Smith 14224e980039SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 14239566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb,&bb1)); 14244e980039SJed Brown } 14254e980039SJed Brown 1426c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1427da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14289566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 14292798e883SHong Zhang its--; 1430da3a660dSBarry Smith } 14312798e883SHong Zhang 14322798e883SHong Zhang while (its--) { 14339566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14349566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14352798e883SHong Zhang 1436c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14379566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec,-1.0)); 14389566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 14392798e883SHong Zhang 1440c14dc6b6SHong Zhang /* local sweep */ 14419566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 14422798e883SHong Zhang } 14433a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1444da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14459566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 14462798e883SHong Zhang its--; 1447da3a660dSBarry Smith } 14482798e883SHong Zhang while (its--) { 14499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14512798e883SHong Zhang 1452c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14539566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec,-1.0)); 14549566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455c14dc6b6SHong Zhang 1456c14dc6b6SHong Zhang /* local sweep */ 14579566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 14582798e883SHong Zhang } 14593a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1460da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14619566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 14622798e883SHong Zhang its--; 1463da3a660dSBarry Smith } 14642798e883SHong Zhang while (its--) { 14659566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14669566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14672798e883SHong Zhang 1468c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14699566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec,-1.0)); 14709566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 14712798e883SHong Zhang 1472c14dc6b6SHong Zhang /* local sweep */ 14739566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 14742798e883SHong Zhang } 1475a7420bb7SBarry Smith } else if (flag & SOR_EISENSTAT) { 1476a7420bb7SBarry Smith Vec xx1; 1477a7420bb7SBarry Smith 14789566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb,&xx1)); 14799566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1480a7420bb7SBarry Smith 14819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 14829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483a7420bb7SBarry Smith if (!mat->diag) { 14849566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 14859566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin,mat->diag)); 1486a7420bb7SBarry Smith } 14879566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1488bd0c2dcbSBarry Smith if (hasop) { 14899566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1490bd0c2dcbSBarry Smith } else { 14919566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1492bd0c2dcbSBarry Smith } 14939566063dSJacob Faibussowitsch PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1494887ee2caSBarry Smith 14959566063dSJacob Faibussowitsch PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1496a7420bb7SBarry Smith 1497a7420bb7SBarry Smith /* local sweep */ 14989566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 14999566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx,1.0,xx1)); 15009566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xx1)); 1501ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1502c14dc6b6SHong Zhang 15039566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 1504a0808db4SHong Zhang 15057b6c816cSBarry Smith matin->factorerrortype = mat->A->factorerrortype; 15063a40ed3dSBarry Smith PetscFunctionReturn(0); 15078a729477SBarry Smith } 1508a66be287SLois Curfman McInnes 150942e855d1Svictor PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 151042e855d1Svictor { 151172e6a0cfSJed Brown Mat aA,aB,Aperm; 151272e6a0cfSJed Brown const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 151372e6a0cfSJed Brown PetscScalar *aa,*ba; 151472e6a0cfSJed Brown PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 151572e6a0cfSJed Brown PetscSF rowsf,sf; 15160298fd71SBarry Smith IS parcolp = NULL; 151772e6a0cfSJed Brown PetscBool done; 151842e855d1Svictor 151942e855d1Svictor PetscFunctionBegin; 15209566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A,&m,&n)); 15219566063dSJacob Faibussowitsch PetscCall(ISGetIndices(rowp,&rwant)); 15229566063dSJacob Faibussowitsch PetscCall(ISGetIndices(colp,&cwant)); 15239566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 152472e6a0cfSJed Brown 152572e6a0cfSJed Brown /* Invert row permutation to find out where my rows should go */ 15269566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 15279566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 15289566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(rowsf)); 152972e6a0cfSJed Brown for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 15309566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 15319566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 153272e6a0cfSJed Brown 153372e6a0cfSJed Brown /* Invert column permutation to find out where my columns should go */ 15349566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 15359566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 15369566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 153772e6a0cfSJed Brown for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 15389566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 15399566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 15409566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 154172e6a0cfSJed Brown 15429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(rowp,&rwant)); 15439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(colp,&cwant)); 15449566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 154572e6a0cfSJed Brown 154672e6a0cfSJed Brown /* Find out where my gcols should go */ 15479566063dSJacob Faibussowitsch PetscCall(MatGetSize(aB,NULL,&ng)); 15489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ng,&gcdest)); 15499566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 15509566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 15519566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 15529566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 15539566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 15549566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 155572e6a0cfSJed Brown 15569566063dSJacob Faibussowitsch PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 15579566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 15589566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 155972e6a0cfSJed Brown for (i=0; i<m; i++) { 1560131c27b5Sprj- PetscInt row = rdest[i]; 1561131c27b5Sprj- PetscMPIInt rowner; 15629566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 156372e6a0cfSJed Brown for (j=ai[i]; j<ai[i+1]; j++) { 1564131c27b5Sprj- PetscInt col = cdest[aj[j]]; 1565131c27b5Sprj- PetscMPIInt cowner; 15669566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 156772e6a0cfSJed Brown if (rowner == cowner) dnnz[i]++; 156872e6a0cfSJed Brown else onnz[i]++; 156972e6a0cfSJed Brown } 157072e6a0cfSJed Brown for (j=bi[i]; j<bi[i+1]; j++) { 1571131c27b5Sprj- PetscInt col = gcdest[bj[j]]; 1572131c27b5Sprj- PetscMPIInt cowner; 15739566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 157472e6a0cfSJed Brown if (rowner == cowner) dnnz[i]++; 157572e6a0cfSJed Brown else onnz[i]++; 157672e6a0cfSJed Brown } 157772e6a0cfSJed Brown } 15789566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 15799566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 15809566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 15819566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 15829566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&rowsf)); 158372e6a0cfSJed Brown 15849566063dSJacob Faibussowitsch PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 15859566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(aA,&aa)); 15869566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(aB,&ba)); 158772e6a0cfSJed Brown for (i=0; i<m; i++) { 158872e6a0cfSJed Brown PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1589970468b0SJed Brown PetscInt j0,rowlen; 159072e6a0cfSJed Brown rowlen = ai[i+1] - ai[i]; 1591970468b0SJed Brown for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1592970468b0SJed Brown for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 15939566063dSJacob Faibussowitsch PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1594970468b0SJed Brown } 159572e6a0cfSJed Brown rowlen = bi[i+1] - bi[i]; 1596970468b0SJed Brown for (j0=j=0; j<rowlen; j0=j) { 1597970468b0SJed Brown for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 15989566063dSJacob Faibussowitsch PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1599970468b0SJed Brown } 160072e6a0cfSJed Brown } 16019566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 16029566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 16039566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 16049566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 16059566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 16069566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 16079566063dSJacob Faibussowitsch PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 16089566063dSJacob Faibussowitsch PetscCall(PetscFree3(work,rdest,cdest)); 16099566063dSJacob Faibussowitsch PetscCall(PetscFree(gcdest)); 16109566063dSJacob Faibussowitsch if (parcolp) PetscCall(ISDestroy(&colp)); 161172e6a0cfSJed Brown *B = Aperm; 161242e855d1Svictor PetscFunctionReturn(0); 161342e855d1Svictor } 161442e855d1Svictor 1615c5e4d11fSDmitry Karpeev PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1616c5e4d11fSDmitry Karpeev { 1617c5e4d11fSDmitry Karpeev Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1618c5e4d11fSDmitry Karpeev 1619c5e4d11fSDmitry Karpeev PetscFunctionBegin; 16209566063dSJacob Faibussowitsch PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1621c5e4d11fSDmitry Karpeev if (ghosts) *ghosts = aij->garray; 1622c5e4d11fSDmitry Karpeev PetscFunctionReturn(0); 1623c5e4d11fSDmitry Karpeev } 1624c5e4d11fSDmitry Karpeev 1625dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1626a66be287SLois Curfman McInnes { 1627a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1628a66be287SLois Curfman McInnes Mat A = mat->A,B = mat->B; 16293966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 1630a66be287SLois Curfman McInnes 16313a40ed3dSBarry Smith PetscFunctionBegin; 16324e220ebcSLois Curfman McInnes info->block_size = 1.0; 16339566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 16342205254eSKarl Rupp 16354e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 16364e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 16372205254eSKarl Rupp 16389566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 16392205254eSKarl Rupp 16404e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 16414e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1642a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 16434e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 16444e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 16454e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 16464e220ebcSLois Curfman McInnes info->memory = isend[3]; 16474e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1648a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 16491c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 16502205254eSKarl Rupp 16514e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 16524e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 16534e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 16544e220ebcSLois Curfman McInnes info->memory = irecv[3]; 16554e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1656a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 16571c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 16582205254eSKarl Rupp 16594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 16604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 16614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 16624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 16634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1664a66be287SLois Curfman McInnes } 16654e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 16664e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16674e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16683a40ed3dSBarry Smith PetscFunctionReturn(0); 1669a66be287SLois Curfman McInnes } 1670a66be287SLois Curfman McInnes 1671ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1672c74985f6SBarry Smith { 1673c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674c74985f6SBarry Smith 16753a40ed3dSBarry Smith PetscFunctionBegin; 167612c028f9SKris Buschelman switch (op) { 1677512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 167812c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 167928b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 168112c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 16820ad02fcaSStefano Zampini case MAT_USE_INODES: 168312c028f9SKris Buschelman case MAT_IGNORE_ZERO_ENTRIES: 16841a2c6b5cSJunchao Zhang case MAT_FORM_EXPLICIT_TRANSPOSE: 1685fa1f0d2cSMatthew G Knepley MatCheckPreallocated(A,1); 16869566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A,op,flg)); 16879566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B,op,flg)); 168812c028f9SKris Buschelman break; 168912c028f9SKris Buschelman case MAT_ROW_ORIENTED: 169043674050SBarry Smith MatCheckPreallocated(A,1); 16914e0d8c25SBarry Smith a->roworiented = flg; 16922205254eSKarl Rupp 16939566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A,op,flg)); 16949566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B,op,flg)); 169512c028f9SKris Buschelman break; 16968c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1697071fcb05SBarry Smith case MAT_SORTED_FULL: 16989566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 169912c028f9SKris Buschelman break; 170012c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 17015c0f0b64SBarry Smith a->donotstash = flg; 170212c028f9SKris Buschelman break; 1703c8ca1fbcSVaclav Hapla /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1704ffa07934SHong Zhang case MAT_SPD: 170577e54ba9SKris Buschelman case MAT_SYMMETRIC: 170677e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 1707bf108f30SBarry Smith case MAT_HERMITIAN: 1708bf108f30SBarry Smith case MAT_SYMMETRY_ETERNAL: 170977e54ba9SKris Buschelman break; 1710c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1711c10200c1SHong Zhang A->submat_singleis = flg; 1712c10200c1SHong Zhang break; 1713957cac9fSHong Zhang case MAT_STRUCTURE_ONLY: 1714957cac9fSHong Zhang /* The option is handled directly by MatSetOption() */ 1715957cac9fSHong Zhang break; 171612c028f9SKris Buschelman default: 171798921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 17183a40ed3dSBarry Smith } 17193a40ed3dSBarry Smith PetscFunctionReturn(0); 1720c74985f6SBarry Smith } 1721c74985f6SBarry Smith 1722b1d57f15SBarry Smith PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 172339e00950SLois Curfman McInnes { 1724154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 172587828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1726d0f46423SBarry Smith PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1727d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1728b1d57f15SBarry Smith PetscInt *cmap,*idx_p; 172939e00950SLois Curfman McInnes 17303a40ed3dSBarry Smith PetscFunctionBegin; 173128b400f6SJacob Faibussowitsch PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 17327a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 17337a0afa10SBarry Smith 173470f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 17357a0afa10SBarry Smith /* 17367a0afa10SBarry Smith allocate enough space to hold information from the longest row. 17377a0afa10SBarry Smith */ 17387a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1739b1d57f15SBarry Smith PetscInt max = 1,tmp; 1740d0f46423SBarry Smith for (i=0; i<matin->rmap->n; i++) { 17417a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 17422205254eSKarl Rupp if (max < tmp) max = tmp; 17437a0afa10SBarry Smith } 17449566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 17457a0afa10SBarry Smith } 17467a0afa10SBarry Smith 17472c71b3e2SJacob Faibussowitsch PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1748abc0e9e4SLois Curfman McInnes lrow = row - rstart; 174939e00950SLois Curfman McInnes 1750154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1751f4259b30SLisandro Dalcin if (!v) {pvA = NULL; pvB = NULL;} 1752f4259b30SLisandro Dalcin if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 17539566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 17549566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1755154123eaSLois Curfman McInnes nztot = nzA + nzB; 1756154123eaSLois Curfman McInnes 175770f0671dSBarry Smith cmap = mat->garray; 1758154123eaSLois Curfman McInnes if (v || idx) { 1759154123eaSLois Curfman McInnes if (nztot) { 1760154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1761b1d57f15SBarry Smith PetscInt imark = -1; 1762154123eaSLois Curfman McInnes if (v) { 176370f0671dSBarry Smith *v = v_p = mat->rowvalues; 176439e00950SLois Curfman McInnes for (i=0; i<nzB; i++) { 176570f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766154123eaSLois Curfman McInnes else break; 1767154123eaSLois Curfman McInnes } 1768154123eaSLois Curfman McInnes imark = i; 176970f0671dSBarry Smith for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 177070f0671dSBarry Smith for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1771154123eaSLois Curfman McInnes } 1772154123eaSLois Curfman McInnes if (idx) { 177370f0671dSBarry Smith *idx = idx_p = mat->rowindices; 177470f0671dSBarry Smith if (imark > -1) { 177570f0671dSBarry Smith for (i=0; i<imark; i++) { 177670f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 177770f0671dSBarry Smith } 177870f0671dSBarry Smith } else { 1779154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) { 178070f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1781154123eaSLois Curfman McInnes else break; 1782154123eaSLois Curfman McInnes } 1783154123eaSLois Curfman McInnes imark = i; 178470f0671dSBarry Smith } 178570f0671dSBarry Smith for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 178670f0671dSBarry Smith for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 178739e00950SLois Curfman McInnes } 17883f97c4b0SBarry Smith } else { 1789f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1790f4259b30SLisandro Dalcin if (v) *v = NULL; 17911ca473b0SSatish Balay } 1792154123eaSLois Curfman McInnes } 179339e00950SLois Curfman McInnes *nz = nztot; 17949566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 17959566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 17963a40ed3dSBarry Smith PetscFunctionReturn(0); 179739e00950SLois Curfman McInnes } 179839e00950SLois Curfman McInnes 1799b1d57f15SBarry Smith PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 180039e00950SLois Curfman McInnes { 18017a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18023a40ed3dSBarry Smith 18033a40ed3dSBarry Smith PetscFunctionBegin; 180428b400f6SJacob Faibussowitsch PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 18057a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 18063a40ed3dSBarry Smith PetscFunctionReturn(0); 180739e00950SLois Curfman McInnes } 180839e00950SLois Curfman McInnes 1809dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1810855ac2c5SLois Curfman McInnes { 1811855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1812ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1813d0f46423SBarry Smith PetscInt i,j,cstart = mat->cmap->rstart; 1814329f5518SBarry Smith PetscReal sum = 0.0; 1815fff043a9SJunchao Zhang const MatScalar *v,*amata,*bmata; 181604ca555eSLois Curfman McInnes 18173a40ed3dSBarry Smith PetscFunctionBegin; 181817699dbbSLois Curfman McInnes if (aij->size == 1) { 18199566063dSJacob Faibussowitsch PetscCall(MatNorm(aij->A,type,norm)); 182037fa93a5SLois Curfman McInnes } else { 18219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 18229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 182304ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 1824fff043a9SJunchao Zhang v = amata; 182504ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++) { 1826329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 182704ca555eSLois Curfman McInnes } 1828fff043a9SJunchao Zhang v = bmata; 182904ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++) { 1830329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 183104ca555eSLois Curfman McInnes } 18321c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 18338f1a2a5eSBarry Smith *norm = PetscSqrtReal(*norm); 18349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 18353a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 1836329f5518SBarry Smith PetscReal *tmp,*tmp2; 1837b1d57f15SBarry Smith PetscInt *jj,*garray = aij->garray; 18389566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 18399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 184004ca555eSLois Curfman McInnes *norm = 0.0; 1841fff043a9SJunchao Zhang v = amata; jj = amat->j; 184204ca555eSLois Curfman McInnes for (j=0; j<amat->nz; j++) { 1843bfec09a0SHong Zhang tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 184404ca555eSLois Curfman McInnes } 1845fff043a9SJunchao Zhang v = bmata; jj = bmat->j; 184604ca555eSLois Curfman McInnes for (j=0; j<bmat->nz; j++) { 1847bfec09a0SHong Zhang tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 184804ca555eSLois Curfman McInnes } 18491c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1850d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 185104ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 185204ca555eSLois Curfman McInnes } 18539566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp)); 18549566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp2)); 18559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 18563a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1857329f5518SBarry Smith PetscReal ntemp = 0.0; 1858d0f46423SBarry Smith for (j=0; j<aij->A->rmap->n; j++) { 1859fff043a9SJunchao Zhang v = amata + amat->i[j]; 186004ca555eSLois Curfman McInnes sum = 0.0; 186104ca555eSLois Curfman McInnes for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1862cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 186304ca555eSLois Curfman McInnes } 1864fff043a9SJunchao Zhang v = bmata + bmat->i[j]; 186504ca555eSLois Curfman McInnes for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1866cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 186704ca555eSLois Curfman McInnes } 1868515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 186904ca555eSLois Curfman McInnes } 18701c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 18719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1872ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 18739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 18749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 187537fa93a5SLois Curfman McInnes } 18763a40ed3dSBarry Smith PetscFunctionReturn(0); 1877855ac2c5SLois Curfman McInnes } 1878855ac2c5SLois Curfman McInnes 1879fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1880b7c46309SBarry Smith { 1881a8661f62Sandi selinger Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1882a8661f62Sandi selinger Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1883071fcb05SBarry Smith PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1884071fcb05SBarry Smith const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1885a8661f62Sandi selinger Mat B,A_diag,*B_diag; 1886ce496241SStefano Zampini const MatScalar *pbv,*bv; 1887b7c46309SBarry Smith 18883a40ed3dSBarry Smith PetscFunctionBegin; 188980bcc5a1SJed Brown ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1890da668accSHong Zhang ai = Aloc->i; aj = Aloc->j; 1891da668accSHong Zhang bi = Bloc->i; bj = Bloc->j; 1892fc73b1b3SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 189380bcc5a1SJed Brown PetscInt *d_nnz,*g_nnz,*o_nnz; 189480bcc5a1SJed Brown PetscSFNode *oloc; 1895713c93b4SJed Brown PETSC_UNUSED PetscSF sf; 189680bcc5a1SJed Brown 18979566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 189880bcc5a1SJed Brown /* compute d_nnz for preallocation */ 18999566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(d_nnz,na)); 1900cbc6b225SStefano Zampini for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 190180bcc5a1SJed Brown /* compute local off-diagonal contributions */ 19029566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(g_nnz,nb)); 190380bcc5a1SJed Brown for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 190480bcc5a1SJed Brown /* map those to global */ 19059566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 19069566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 19079566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 19089566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(o_nnz,na)); 19099566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 19109566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 19119566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 1912d4bb536fSBarry Smith 19139566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 19149566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 19159566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 19169566063dSJacob Faibussowitsch PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 19179566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 19189566063dSJacob Faibussowitsch PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1919fc4dec0aSBarry Smith } else { 1920fc4dec0aSBarry Smith B = *matout; 19219566063dSJacob Faibussowitsch PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1922fc4dec0aSBarry Smith } 1923b7c46309SBarry Smith 1924f79cb1a0Sandi selinger b = (Mat_MPIAIJ*)B->data; 1925a8661f62Sandi selinger A_diag = a->A; 1926a8661f62Sandi selinger B_diag = &b->A; 1927a8661f62Sandi selinger sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1928a8661f62Sandi selinger A_diag_ncol = A_diag->cmap->N; 1929a8661f62Sandi selinger B_diag_ilen = sub_B_diag->ilen; 1930a8661f62Sandi selinger B_diag_i = sub_B_diag->i; 1931f79cb1a0Sandi selinger 1932f79cb1a0Sandi selinger /* Set ilen for diagonal of B */ 1933a8661f62Sandi selinger for (i=0; i<A_diag_ncol; i++) { 1934a8661f62Sandi selinger B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1935b7c46309SBarry Smith } 1936f79cb1a0Sandi selinger 1937a8661f62Sandi selinger /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1938a8661f62Sandi selinger very quickly (=without using MatSetValues), because all writes are local. */ 19399566063dSJacob Faibussowitsch PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1940f79cb1a0Sandi selinger 1941b7c46309SBarry Smith /* copy over the B part */ 19429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[mb],&cols)); 19439566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1944ce496241SStefano Zampini pbv = bv; 1945d0f46423SBarry Smith row = A->rmap->rstart; 19462205254eSKarl Rupp for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 194761a2fbbaSHong Zhang cols_tmp = cols; 1948da668accSHong Zhang for (i=0; i<mb; i++) { 1949da668accSHong Zhang ncol = bi[i+1]-bi[i]; 19509566063dSJacob Faibussowitsch PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 19512205254eSKarl Rupp row++; 1952ce496241SStefano Zampini pbv += ncol; cols_tmp += ncol; 1953b7c46309SBarry Smith } 19549566063dSJacob Faibussowitsch PetscCall(PetscFree(cols)); 19559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1956fc73b1b3SBarry Smith 19579566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 19589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1959cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 19600de55854SLois Curfman McInnes *matout = B; 19610de55854SLois Curfman McInnes } else { 19629566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A,&B)); 19630de55854SLois Curfman McInnes } 19643a40ed3dSBarry Smith PetscFunctionReturn(0); 1965b7c46309SBarry Smith } 1966b7c46309SBarry Smith 1967dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1968a008b906SSatish Balay { 19694b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 19704b967eb1SSatish Balay Mat a = aij->A,b = aij->B; 1971b1d57f15SBarry Smith PetscInt s1,s2,s3; 1972a008b906SSatish Balay 19733a40ed3dSBarry Smith PetscFunctionBegin; 19749566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat,&s2,&s3)); 19754b967eb1SSatish Balay if (rr) { 19769566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr,&s1)); 197708401ef6SPierre Jolivet PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 19784b967eb1SSatish Balay /* Overlap communication with computation. */ 19799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1980a008b906SSatish Balay } 19814b967eb1SSatish Balay if (ll) { 19829566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll,&s1)); 198308401ef6SPierre Jolivet PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 19849566063dSJacob Faibussowitsch PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 19854b967eb1SSatish Balay } 19864b967eb1SSatish Balay /* scale the diagonal block */ 19879566063dSJacob Faibussowitsch PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 19884b967eb1SSatish Balay 19894b967eb1SSatish Balay if (rr) { 19904b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 19919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 19929566063dSJacob Faibussowitsch PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 19934b967eb1SSatish Balay } 19943a40ed3dSBarry Smith PetscFunctionReturn(0); 1995a008b906SSatish Balay } 1996a008b906SSatish Balay 1997dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1998bb5a7306SBarry Smith { 1999bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 20003a40ed3dSBarry Smith 20013a40ed3dSBarry Smith PetscFunctionBegin; 20029566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 20033a40ed3dSBarry Smith PetscFunctionReturn(0); 2004bb5a7306SBarry Smith } 2005bb5a7306SBarry Smith 2006ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2007d4bb536fSBarry Smith { 2008d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2009d4bb536fSBarry Smith Mat a,b,c,d; 2010ace3abfcSBarry Smith PetscBool flg; 2011d4bb536fSBarry Smith 20123a40ed3dSBarry Smith PetscFunctionBegin; 2013d4bb536fSBarry Smith a = matA->A; b = matA->B; 2014d4bb536fSBarry Smith c = matB->A; d = matB->B; 2015d4bb536fSBarry Smith 20169566063dSJacob Faibussowitsch PetscCall(MatEqual(a,c,&flg)); 2017abc0a331SBarry Smith if (flg) { 20189566063dSJacob Faibussowitsch PetscCall(MatEqual(b,d,&flg)); 2019d4bb536fSBarry Smith } 20201c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 20213a40ed3dSBarry Smith PetscFunctionReturn(0); 2022d4bb536fSBarry Smith } 2023d4bb536fSBarry Smith 2024dfbe8321SBarry Smith PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2025cb5b572fSBarry Smith { 2026cb5b572fSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2027cb5b572fSBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2028cb5b572fSBarry Smith 2029cb5b572fSBarry Smith PetscFunctionBegin; 203033f4a19fSKris Buschelman /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 203133f4a19fSKris Buschelman if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2032cb5b572fSBarry Smith /* because of the column compression in the off-processor part of the matrix a->B, 2033cb5b572fSBarry Smith the number of columns in a->B and b->B may be different, hence we cannot call 2034cb5b572fSBarry Smith the MatCopy() directly on the two parts. If need be, we can provide a more 2035cb5b572fSBarry Smith efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2036cb5b572fSBarry Smith then copying the submatrices */ 20379566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A,B,str)); 2038cb5b572fSBarry Smith } else { 20399566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A,b->A,str)); 20409566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B,b->B,str)); 2041cb5b572fSBarry Smith } 20429566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2043cb5b572fSBarry Smith PetscFunctionReturn(0); 2044cb5b572fSBarry Smith } 2045cb5b572fSBarry Smith 20464994cf47SJed Brown PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2047273d9f13SBarry Smith { 2048273d9f13SBarry Smith PetscFunctionBegin; 20499566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2050273d9f13SBarry Smith PetscFunctionReturn(0); 2051273d9f13SBarry Smith } 2052273d9f13SBarry Smith 2053001ddc4fSHong Zhang /* 2054001ddc4fSHong Zhang Computes the number of nonzeros per row needed for preallocation when X and Y 2055001ddc4fSHong Zhang have different nonzero structure. 2056001ddc4fSHong Zhang */ 2057001ddc4fSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 205895b7e79eSJed Brown { 2059001ddc4fSHong Zhang PetscInt i,j,k,nzx,nzy; 206095b7e79eSJed Brown 206195b7e79eSJed Brown PetscFunctionBegin; 206295b7e79eSJed Brown /* Set the number of nonzeros in the new matrix */ 206395b7e79eSJed Brown for (i=0; i<m; i++) { 2064001ddc4fSHong Zhang const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065001ddc4fSHong Zhang nzx = xi[i+1] - xi[i]; 2066001ddc4fSHong Zhang nzy = yi[i+1] - yi[i]; 206795b7e79eSJed Brown nnz[i] = 0; 206895b7e79eSJed Brown for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069001ddc4fSHong Zhang for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070001ddc4fSHong Zhang if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 207195b7e79eSJed Brown nnz[i]++; 207295b7e79eSJed Brown } 207395b7e79eSJed Brown for (; k<nzy; k++) nnz[i]++; 207495b7e79eSJed Brown } 207595b7e79eSJed Brown PetscFunctionReturn(0); 207695b7e79eSJed Brown } 207795b7e79eSJed Brown 2078001ddc4fSHong Zhang /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079001ddc4fSHong Zhang static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080001ddc4fSHong Zhang { 2081001ddc4fSHong Zhang PetscInt m = Y->rmap->N; 2082001ddc4fSHong Zhang Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2083001ddc4fSHong Zhang Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2084001ddc4fSHong Zhang 2085001ddc4fSHong Zhang PetscFunctionBegin; 20869566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2087001ddc4fSHong Zhang PetscFunctionReturn(0); 2088001ddc4fSHong Zhang } 2089001ddc4fSHong Zhang 2090f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2091ac90fabeSBarry Smith { 2092ac90fabeSBarry Smith Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2093ac90fabeSBarry Smith 2094ac90fabeSBarry Smith PetscFunctionBegin; 2095ac90fabeSBarry Smith if (str == SAME_NONZERO_PATTERN) { 20969566063dSJacob Faibussowitsch PetscCall(MatAXPY(yy->A,a,xx->A,str)); 20979566063dSJacob Faibussowitsch PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2098ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 20999566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y,a,X,str)); 2100ac90fabeSBarry Smith } else { 21019f5f6813SShri Abhyankar Mat B; 21029f5f6813SShri Abhyankar PetscInt *nnz_d,*nnz_o; 2103d9d719b4SStefano Zampini 21049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 21059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 21069566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 21079566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 21089566063dSJacob Faibussowitsch PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 21099566063dSJacob Faibussowitsch PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 21109566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 21119566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 21129566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 21139566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 21149566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y,&B)); 21159566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 21169566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 2117ac90fabeSBarry Smith } 2118ac90fabeSBarry Smith PetscFunctionReturn(0); 2119ac90fabeSBarry Smith } 2120ac90fabeSBarry Smith 21212726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2122354c94deSBarry Smith 21237087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2124354c94deSBarry Smith { 21255f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 21265f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 2127354c94deSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2128354c94deSBarry Smith 21299566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqAIJ(aij->A)); 21309566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqAIJ(aij->B)); 21315f80ce2aSJacob Faibussowitsch } 2132354c94deSBarry Smith PetscFunctionReturn(0); 2133354c94deSBarry Smith } 2134354c94deSBarry Smith 213599cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIAIJ(Mat A) 213699cafbc1SBarry Smith { 213799cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 213899cafbc1SBarry Smith 213999cafbc1SBarry Smith PetscFunctionBegin; 21409566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 21419566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 214299cafbc1SBarry Smith PetscFunctionReturn(0); 214399cafbc1SBarry Smith } 214499cafbc1SBarry Smith 214599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 214699cafbc1SBarry Smith { 214799cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 214899cafbc1SBarry Smith 214999cafbc1SBarry Smith PetscFunctionBegin; 21509566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 21519566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 215299cafbc1SBarry Smith PetscFunctionReturn(0); 215399cafbc1SBarry Smith } 215499cafbc1SBarry Smith 2155c91732d9SHong Zhang PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2156c91732d9SHong Zhang { 2157c91732d9SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158475b8b61SHong Zhang PetscInt i,*idxb = NULL,m = A->rmap->n; 2159475b8b61SHong Zhang PetscScalar *va,*vv; 2160475b8b61SHong Zhang Vec vB,vA; 2161475b8b61SHong Zhang const PetscScalar *vb; 2162c91732d9SHong Zhang 2163c91732d9SHong Zhang PetscFunctionBegin; 21649566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21659566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2166475b8b61SHong Zhang 21679566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(vA,&va)); 2168c91732d9SHong Zhang if (idx) { 2169475b8b61SHong Zhang for (i=0; i<m; i++) { 2170d0f46423SBarry Smith if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2171c91732d9SHong Zhang } 2172c91732d9SHong Zhang } 2173c91732d9SHong Zhang 21749566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 21759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m,&idxb)); 21769566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2177c91732d9SHong Zhang 21789566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&vv)); 21799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(vB,&vb)); 2180475b8b61SHong Zhang for (i=0; i<m; i++) { 2181c91732d9SHong Zhang if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2182475b8b61SHong Zhang vv[i] = vb[i]; 2183c91732d9SHong Zhang if (idx) idx[i] = a->garray[idxb[i]]; 2184475b8b61SHong Zhang } else { 2185475b8b61SHong Zhang vv[i] = va[i]; 21864e879edeSHong Zhang if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2187475b8b61SHong Zhang idx[i] = a->garray[idxb[i]]; 2188c91732d9SHong Zhang } 2189c91732d9SHong Zhang } 21909566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA,&vv)); 21919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA,&va)); 21929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(vB,&vb)); 21939566063dSJacob Faibussowitsch PetscCall(PetscFree(idxb)); 21949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vA)); 21959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vB)); 2196c91732d9SHong Zhang PetscFunctionReturn(0); 2197c91732d9SHong Zhang } 2198c91732d9SHong Zhang 2199c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2200c87e5d42SMatthew Knepley { 2201f07e67edSHong Zhang Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2202f07e67edSHong Zhang PetscInt m = A->rmap->n,n = A->cmap->n; 2203f07e67edSHong Zhang PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2204f07e67edSHong Zhang PetscInt *cmap = mat->garray; 2205f07e67edSHong Zhang PetscInt *diagIdx, *offdiagIdx; 2206f07e67edSHong Zhang Vec diagV, offdiagV; 2207ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2208ce496241SStefano Zampini const PetscScalar *ba,*bav; 2209f07e67edSHong Zhang PetscInt r,j,col,ncols,*bi,*bj; 2210f07e67edSHong Zhang Mat B = mat->B; 2211f07e67edSHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2212c87e5d42SMatthew Knepley 2213c87e5d42SMatthew Knepley PetscFunctionBegin; 2214f07e67edSHong Zhang /* When a process holds entire A and other processes have no entry */ 2215f07e67edSHong Zhang if (A->cmap->N == n) { 22169566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&diagA)); 22179566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 22189566063dSJacob Faibussowitsch PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 22199566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 22209566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&diagA)); 2221f07e67edSHong Zhang PetscFunctionReturn(0); 2222f07e67edSHong Zhang } else if (n == 0) { 2223f07e67edSHong Zhang if (m) { 22249566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&a)); 2225f07e67edSHong Zhang for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 22269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&a)); 2227f07e67edSHong Zhang } 2228f07e67edSHong Zhang PetscFunctionReturn(0); 2229f07e67edSHong Zhang } 2230f07e67edSHong Zhang 22319566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 22329566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 22339566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 22349566063dSJacob Faibussowitsch PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2235f07e67edSHong Zhang 2236f07e67edSHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 22379566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2238ce496241SStefano Zampini ba = bav; 2239f07e67edSHong Zhang bi = b->i; 2240f07e67edSHong Zhang bj = b->j; 22419566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2242f07e67edSHong Zhang for (r = 0; r < m; r++) { 2243f07e67edSHong Zhang ncols = bi[r+1] - bi[r]; 2244f07e67edSHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 2245f07e67edSHong Zhang offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2246f07e67edSHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2247f07e67edSHong Zhang offdiagA[r] = 0.0; 2248f07e67edSHong Zhang 2249f07e67edSHong Zhang /* Find first hole in the cmap */ 2250f07e67edSHong Zhang for (j=0; j<ncols; j++) { 2251f07e67edSHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2252f07e67edSHong Zhang if (col > j && j < cstart) { 2253f07e67edSHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2254f07e67edSHong Zhang break; 2255f07e67edSHong Zhang } else if (col > j + n && j >= cstart) { 2256f07e67edSHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2257f07e67edSHong Zhang break; 2258f07e67edSHong Zhang } 2259f07e67edSHong Zhang } 22604e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 2261f07e67edSHong Zhang /* a hole is outside compressed Bcols */ 2262f07e67edSHong Zhang if (ncols == 0) { 2263f07e67edSHong Zhang if (cstart) { 2264f07e67edSHong Zhang offdiagIdx[r] = 0; 2265f07e67edSHong Zhang } else offdiagIdx[r] = cend; 2266f07e67edSHong Zhang } else { /* ncols > 0 */ 2267f07e67edSHong Zhang offdiagIdx[r] = cmap[ncols-1] + 1; 2268f07e67edSHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2269f07e67edSHong Zhang } 2270f07e67edSHong Zhang } 2271f07e67edSHong Zhang } 2272f07e67edSHong Zhang 2273f07e67edSHong Zhang for (j=0; j<ncols; j++) { 2274f07e67edSHong Zhang if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2275f07e67edSHong Zhang ba++; bj++; 2276f07e67edSHong Zhang } 2277f07e67edSHong Zhang } 2278f07e67edSHong Zhang 22799566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 22809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2281f07e67edSHong Zhang for (r = 0; r < m; ++r) { 2282f07e67edSHong Zhang if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2283f07e67edSHong Zhang a[r] = diagA[r]; 2284f07e67edSHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 2285f07e67edSHong Zhang } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2286f07e67edSHong Zhang a[r] = diagA[r]; 2287c87e5d42SMatthew Knepley if (idx) { 2288f07e67edSHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2289f07e67edSHong Zhang idx[r] = cstart + diagIdx[r]; 2290f07e67edSHong Zhang } else idx[r] = offdiagIdx[r]; 2291f07e67edSHong Zhang } 2292f07e67edSHong Zhang } else { 2293f07e67edSHong Zhang a[r] = offdiagA[r]; 2294f07e67edSHong Zhang if (idx) idx[r] = offdiagIdx[r]; 2295c87e5d42SMatthew Knepley } 2296c87e5d42SMatthew Knepley } 22979566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 22989566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 22999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 23009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 23019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 23029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 23039566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2304c87e5d42SMatthew Knepley PetscFunctionReturn(0); 2305c87e5d42SMatthew Knepley } 2306c87e5d42SMatthew Knepley 230703bc72f1SMatthew Knepley PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 230803bc72f1SMatthew Knepley { 230903bc72f1SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2310fa213d2fSHong Zhang PetscInt m = A->rmap->n,n = A->cmap->n; 2311fa213d2fSHong Zhang PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 231203bc72f1SMatthew Knepley PetscInt *cmap = mat->garray; 231303bc72f1SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 231403bc72f1SMatthew Knepley Vec diagV, offdiagV; 2315ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2316ce496241SStefano Zampini const PetscScalar *ba,*bav; 2317fa213d2fSHong Zhang PetscInt r,j,col,ncols,*bi,*bj; 2318fa213d2fSHong Zhang Mat B = mat->B; 2319fa213d2fSHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 232003bc72f1SMatthew Knepley 232103bc72f1SMatthew Knepley PetscFunctionBegin; 2322fa213d2fSHong Zhang /* When a process holds entire A and other processes have no entry */ 2323fa213d2fSHong Zhang if (A->cmap->N == n) { 23249566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&diagA)); 23259566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 23269566063dSJacob Faibussowitsch PetscCall(MatGetRowMin(mat->A,diagV,idx)); 23279566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 23289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&diagA)); 2329fa213d2fSHong Zhang PetscFunctionReturn(0); 2330fa213d2fSHong Zhang } else if (n == 0) { 2331fa213d2fSHong Zhang if (m) { 23329566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&a)); 2333fa213d2fSHong Zhang for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 23349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&a)); 2335fa213d2fSHong Zhang } 2336fa213d2fSHong Zhang PetscFunctionReturn(0); 2337fa213d2fSHong Zhang } 2338fa213d2fSHong Zhang 23399566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 23409566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 23419566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 23429566063dSJacob Faibussowitsch PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2343fa213d2fSHong Zhang 2344fa213d2fSHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 23459566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2346ce496241SStefano Zampini ba = bav; 2347fa213d2fSHong Zhang bi = b->i; 2348fa213d2fSHong Zhang bj = b->j; 23499566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2350fa213d2fSHong Zhang for (r = 0; r < m; r++) { 2351fa213d2fSHong Zhang ncols = bi[r+1] - bi[r]; 2352fa213d2fSHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 2353fa213d2fSHong Zhang offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2354fa213d2fSHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2355fa213d2fSHong Zhang offdiagA[r] = 0.0; 2356fa213d2fSHong Zhang 2357fa213d2fSHong Zhang /* Find first hole in the cmap */ 2358fa213d2fSHong Zhang for (j=0; j<ncols; j++) { 2359fa213d2fSHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2360fa213d2fSHong Zhang if (col > j && j < cstart) { 2361fa213d2fSHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2362fa213d2fSHong Zhang break; 2363fa213d2fSHong Zhang } else if (col > j + n && j >= cstart) { 2364fa213d2fSHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2365fa213d2fSHong Zhang break; 2366fa213d2fSHong Zhang } 2367fa213d2fSHong Zhang } 23684e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 2369fa213d2fSHong Zhang /* a hole is outside compressed Bcols */ 2370fa213d2fSHong Zhang if (ncols == 0) { 2371fa213d2fSHong Zhang if (cstart) { 2372fa213d2fSHong Zhang offdiagIdx[r] = 0; 2373fa213d2fSHong Zhang } else offdiagIdx[r] = cend; 2374fa213d2fSHong Zhang } else { /* ncols > 0 */ 2375fa213d2fSHong Zhang offdiagIdx[r] = cmap[ncols-1] + 1; 2376fa213d2fSHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2377fa213d2fSHong Zhang } 2378fa213d2fSHong Zhang } 2379fa213d2fSHong Zhang } 2380fa213d2fSHong Zhang 2381fa213d2fSHong Zhang for (j=0; j<ncols; j++) { 2382fa213d2fSHong Zhang if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2383fa213d2fSHong Zhang ba++; bj++; 2384fa213d2fSHong Zhang } 2385fa213d2fSHong Zhang } 2386fa213d2fSHong Zhang 23879566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 23889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2389fa213d2fSHong Zhang for (r = 0; r < m; ++r) { 2390fa213d2fSHong Zhang if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 239103bc72f1SMatthew Knepley a[r] = diagA[r]; 2392fa213d2fSHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 2393fa213d2fSHong Zhang } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2394fa213d2fSHong Zhang a[r] = diagA[r]; 2395fa213d2fSHong Zhang if (idx) { 2396fa213d2fSHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 239703bc72f1SMatthew Knepley idx[r] = cstart + diagIdx[r]; 2398fa213d2fSHong Zhang } else idx[r] = offdiagIdx[r]; 2399fa213d2fSHong Zhang } 240003bc72f1SMatthew Knepley } else { 240103bc72f1SMatthew Knepley a[r] = offdiagA[r]; 2402fa213d2fSHong Zhang if (idx) idx[r] = offdiagIdx[r]; 240303bc72f1SMatthew Knepley } 240403bc72f1SMatthew Knepley } 24059566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 24069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 24079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 24089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 24099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 24109566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 24119566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 241203bc72f1SMatthew Knepley PetscFunctionReturn(0); 241303bc72f1SMatthew Knepley } 241403bc72f1SMatthew Knepley 2415c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2416c87e5d42SMatthew Knepley { 2417c87e5d42SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 24181a254869SHong Zhang PetscInt m = A->rmap->n,n = A->cmap->n; 24191a254869SHong Zhang PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2420c87e5d42SMatthew Knepley PetscInt *cmap = mat->garray; 2421c87e5d42SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 2422c87e5d42SMatthew Knepley Vec diagV, offdiagV; 2423ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2424ce496241SStefano Zampini const PetscScalar *ba,*bav; 24251a254869SHong Zhang PetscInt r,j,col,ncols,*bi,*bj; 24261a254869SHong Zhang Mat B = mat->B; 24271a254869SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2428c87e5d42SMatthew Knepley 2429c87e5d42SMatthew Knepley PetscFunctionBegin; 24301a254869SHong Zhang /* When a process holds entire A and other processes have no entry */ 24311a254869SHong Zhang if (A->cmap->N == n) { 24329566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&diagA)); 24339566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 24349566063dSJacob Faibussowitsch PetscCall(MatGetRowMax(mat->A,diagV,idx)); 24359566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 24369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&diagA)); 24371a254869SHong Zhang PetscFunctionReturn(0); 24381a254869SHong Zhang } else if (n == 0) { 24391a254869SHong Zhang if (m) { 24409566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v,&a)); 24411a254869SHong Zhang for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 24429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v,&a)); 24431a254869SHong Zhang } 24441a254869SHong Zhang PetscFunctionReturn(0); 24451a254869SHong Zhang } 24461a254869SHong Zhang 24479566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 24489566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 24499566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 24509566063dSJacob Faibussowitsch PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 24511a254869SHong Zhang 24521a254869SHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 24539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2454ce496241SStefano Zampini ba = bav; 24551a254869SHong Zhang bi = b->i; 24561a254869SHong Zhang bj = b->j; 24579566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 24581a254869SHong Zhang for (r = 0; r < m; r++) { 24591a254869SHong Zhang ncols = bi[r+1] - bi[r]; 24601a254869SHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 24611a254869SHong Zhang offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 24621a254869SHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 24631a254869SHong Zhang offdiagA[r] = 0.0; 24641a254869SHong Zhang 24651a254869SHong Zhang /* Find first hole in the cmap */ 24661a254869SHong Zhang for (j=0; j<ncols; j++) { 24671a254869SHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 24681a254869SHong Zhang if (col > j && j < cstart) { 24691a254869SHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 24701a254869SHong Zhang break; 24711a254869SHong Zhang } else if (col > j + n && j >= cstart) { 24721a254869SHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 24731a254869SHong Zhang break; 24741a254869SHong Zhang } 24751a254869SHong Zhang } 24764e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 24771a254869SHong Zhang /* a hole is outside compressed Bcols */ 24781a254869SHong Zhang if (ncols == 0) { 24791a254869SHong Zhang if (cstart) { 24801a254869SHong Zhang offdiagIdx[r] = 0; 24811a254869SHong Zhang } else offdiagIdx[r] = cend; 24821a254869SHong Zhang } else { /* ncols > 0 */ 24831a254869SHong Zhang offdiagIdx[r] = cmap[ncols-1] + 1; 24841a254869SHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 24851a254869SHong Zhang } 24861a254869SHong Zhang } 24871a254869SHong Zhang } 24881a254869SHong Zhang 24891a254869SHong Zhang for (j=0; j<ncols; j++) { 24901a254869SHong Zhang if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 24911a254869SHong Zhang ba++; bj++; 24921a254869SHong Zhang } 24931a254869SHong Zhang } 24941a254869SHong Zhang 24959566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 24969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 24971a254869SHong Zhang for (r = 0; r < m; ++r) { 24981a254869SHong Zhang if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2499c87e5d42SMatthew Knepley a[r] = diagA[r]; 25001a254869SHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 25011a254869SHong Zhang } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 25021a254869SHong Zhang a[r] = diagA[r]; 25031a254869SHong Zhang if (idx) { 25041a254869SHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2505c87e5d42SMatthew Knepley idx[r] = cstart + diagIdx[r]; 25061a254869SHong Zhang } else idx[r] = offdiagIdx[r]; 25071a254869SHong Zhang } 2508c87e5d42SMatthew Knepley } else { 2509c87e5d42SMatthew Knepley a[r] = offdiagA[r]; 25101a254869SHong Zhang if (idx) idx[r] = offdiagIdx[r]; 2511c87e5d42SMatthew Knepley } 2512c87e5d42SMatthew Knepley } 25139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 25149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 25159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 25169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 25179566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 25189566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 25199566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2520c87e5d42SMatthew Knepley PetscFunctionReturn(0); 2521c87e5d42SMatthew Knepley } 2522c87e5d42SMatthew Knepley 2523d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 25245494a064SHong Zhang { 2525f6d58c54SBarry Smith Mat *dummy; 25265494a064SHong Zhang 25275494a064SHong Zhang PetscFunctionBegin; 25289566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2529f6d58c54SBarry Smith *newmat = *dummy; 25309566063dSJacob Faibussowitsch PetscCall(PetscFree(dummy)); 25315494a064SHong Zhang PetscFunctionReturn(0); 25325494a064SHong Zhang } 25335494a064SHong Zhang 2534713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2535bbead8a2SBarry Smith { 2536bbead8a2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2537bbead8a2SBarry Smith 2538bbead8a2SBarry Smith PetscFunctionBegin; 25399566063dSJacob Faibussowitsch PetscCall(MatInvertBlockDiagonal(a->A,values)); 25407b6c816cSBarry Smith A->factorerrortype = a->A->factorerrortype; 2541bbead8a2SBarry Smith PetscFunctionReturn(0); 2542bbead8a2SBarry Smith } 2543bbead8a2SBarry Smith 254473a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 254573a71a0fSBarry Smith { 254673a71a0fSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 254773a71a0fSBarry Smith 254873a71a0fSBarry Smith PetscFunctionBegin; 254908401ef6SPierre Jolivet PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 25509566063dSJacob Faibussowitsch PetscCall(MatSetRandom(aij->A,rctx)); 2551679944adSJunchao Zhang if (x->assembled) { 25529566063dSJacob Faibussowitsch PetscCall(MatSetRandom(aij->B,rctx)); 2553679944adSJunchao Zhang } else { 25549566063dSJacob Faibussowitsch PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2555679944adSJunchao Zhang } 25569566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 25579566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 255873a71a0fSBarry Smith PetscFunctionReturn(0); 255973a71a0fSBarry Smith } 2560bbead8a2SBarry Smith 2561b1b1104fSBarry Smith PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2562b1b1104fSBarry Smith { 2563b1b1104fSBarry Smith PetscFunctionBegin; 2564b1b1104fSBarry Smith if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2565b1b1104fSBarry Smith else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2566b1b1104fSBarry Smith PetscFunctionReturn(0); 2567b1b1104fSBarry Smith } 2568b1b1104fSBarry Smith 2569b1b1104fSBarry Smith /*@ 2570b1b1104fSBarry Smith MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2571b1b1104fSBarry Smith 2572b1b1104fSBarry Smith Collective on Mat 2573b1b1104fSBarry Smith 2574b1b1104fSBarry Smith Input Parameters: 2575b1b1104fSBarry Smith + A - the matrix 2576b1b1104fSBarry Smith - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2577b1b1104fSBarry Smith 257896a0c994SBarry Smith Level: advanced 257996a0c994SBarry Smith 2580b1b1104fSBarry Smith @*/ 2581b1b1104fSBarry Smith PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2582b1b1104fSBarry Smith { 2583b1b1104fSBarry Smith PetscFunctionBegin; 2584cac4c232SBarry Smith PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2585b1b1104fSBarry Smith PetscFunctionReturn(0); 2586b1b1104fSBarry Smith } 2587b1b1104fSBarry Smith 25884416b707SBarry Smith PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2589b1b1104fSBarry Smith { 2590b1b1104fSBarry Smith PetscBool sc = PETSC_FALSE,flg; 2591b1b1104fSBarry Smith 2592b1b1104fSBarry Smith PetscFunctionBegin; 2593d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2594b1b1104fSBarry Smith if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 25959566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2596b1b1104fSBarry Smith if (flg) { 25979566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2598b1b1104fSBarry Smith } 2599d0609cedSBarry Smith PetscOptionsHeadEnd(); 2600b1b1104fSBarry Smith PetscFunctionReturn(0); 2601b1b1104fSBarry Smith } 2602b1b1104fSBarry Smith 26037d68702bSBarry Smith PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 26047d68702bSBarry Smith { 26057d68702bSBarry Smith Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2606c5e4d11fSDmitry Karpeev Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 26077d68702bSBarry Smith 26087d68702bSBarry Smith PetscFunctionBegin; 2609c5e4d11fSDmitry Karpeev if (!Y->preallocated) { 26109566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2611c5e4d11fSDmitry Karpeev } else if (!aij->nz) { 2612b83222d8SBarry Smith PetscInt nonew = aij->nonew; 26139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2614b83222d8SBarry Smith aij->nonew = nonew; 26157d68702bSBarry Smith } 26169566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y,a)); 26177d68702bSBarry Smith PetscFunctionReturn(0); 26187d68702bSBarry Smith } 26197d68702bSBarry Smith 26203b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 26213b49f96aSBarry Smith { 26223b49f96aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 26233b49f96aSBarry Smith 26243b49f96aSBarry Smith PetscFunctionBegin; 262508401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 26269566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A,missing,d)); 26273b49f96aSBarry Smith if (d) { 26283b49f96aSBarry Smith PetscInt rstart; 26299566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 26303b49f96aSBarry Smith *d += rstart; 26313b49f96aSBarry Smith 26323b49f96aSBarry Smith } 26333b49f96aSBarry Smith PetscFunctionReturn(0); 26343b49f96aSBarry Smith } 26353b49f96aSBarry Smith 2636a8ee9fb5SBarry Smith PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2637a8ee9fb5SBarry Smith { 2638a8ee9fb5SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639a8ee9fb5SBarry Smith 2640a8ee9fb5SBarry Smith PetscFunctionBegin; 26419566063dSJacob Faibussowitsch PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2642a8ee9fb5SBarry Smith PetscFunctionReturn(0); 2643a8ee9fb5SBarry Smith } 26443b49f96aSBarry Smith 26458a729477SBarry Smith /* -------------------------------------------------------------------*/ 2646cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2647cda55fadSBarry Smith MatGetRow_MPIAIJ, 2648cda55fadSBarry Smith MatRestoreRow_MPIAIJ, 2649cda55fadSBarry Smith MatMult_MPIAIJ, 265097304618SKris Buschelman /* 4*/ MatMultAdd_MPIAIJ, 26517c922b88SBarry Smith MatMultTranspose_MPIAIJ, 26527c922b88SBarry Smith MatMultTransposeAdd_MPIAIJ, 2653f4259b30SLisandro Dalcin NULL, 2654f4259b30SLisandro Dalcin NULL, 2655f4259b30SLisandro Dalcin NULL, 2656f4259b30SLisandro Dalcin /*10*/ NULL, 2657f4259b30SLisandro Dalcin NULL, 2658f4259b30SLisandro Dalcin NULL, 265941f059aeSBarry Smith MatSOR_MPIAIJ, 2660b7c46309SBarry Smith MatTranspose_MPIAIJ, 266197304618SKris Buschelman /*15*/ MatGetInfo_MPIAIJ, 2662cda55fadSBarry Smith MatEqual_MPIAIJ, 2663cda55fadSBarry Smith MatGetDiagonal_MPIAIJ, 2664cda55fadSBarry Smith MatDiagonalScale_MPIAIJ, 2665cda55fadSBarry Smith MatNorm_MPIAIJ, 266697304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIAIJ, 2667cda55fadSBarry Smith MatAssemblyEnd_MPIAIJ, 2668cda55fadSBarry Smith MatSetOption_MPIAIJ, 2669cda55fadSBarry Smith MatZeroEntries_MPIAIJ, 2670d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIAIJ, 2671f4259b30SLisandro Dalcin NULL, 2672f4259b30SLisandro Dalcin NULL, 2673f4259b30SLisandro Dalcin NULL, 2674f4259b30SLisandro Dalcin NULL, 26754994cf47SJed Brown /*29*/ MatSetUp_MPIAIJ, 2676f4259b30SLisandro Dalcin NULL, 2677f4259b30SLisandro Dalcin NULL, 2678a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPIAIJ, 2679f4259b30SLisandro Dalcin NULL, 2680d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIAIJ, 2681f4259b30SLisandro Dalcin NULL, 2682f4259b30SLisandro Dalcin NULL, 2683f4259b30SLisandro Dalcin NULL, 2684f4259b30SLisandro Dalcin NULL, 2685d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIAIJ, 26867dae84e0SHong Zhang MatCreateSubMatrices_MPIAIJ, 2687cda55fadSBarry Smith MatIncreaseOverlap_MPIAIJ, 2688cda55fadSBarry Smith MatGetValues_MPIAIJ, 2689cb5b572fSBarry Smith MatCopy_MPIAIJ, 2690d519adbfSMatthew Knepley /*44*/ MatGetRowMax_MPIAIJ, 2691cda55fadSBarry Smith MatScale_MPIAIJ, 26927d68702bSBarry Smith MatShift_MPIAIJ, 269399e65526SBarry Smith MatDiagonalSet_MPIAIJ, 2694564f14d6SBarry Smith MatZeroRowsColumns_MPIAIJ, 269573a71a0fSBarry Smith /*49*/ MatSetRandom_MPIAIJ, 2696f4259b30SLisandro Dalcin NULL, 2697f4259b30SLisandro Dalcin NULL, 2698f4259b30SLisandro Dalcin NULL, 2699f4259b30SLisandro Dalcin NULL, 270093dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2701f4259b30SLisandro Dalcin NULL, 2702cda55fadSBarry Smith MatSetUnfactored_MPIAIJ, 270372e6a0cfSJed Brown MatPermute_MPIAIJ, 2704f4259b30SLisandro Dalcin NULL, 27057dae84e0SHong Zhang /*59*/ MatCreateSubMatrix_MPIAIJ, 2706e03a110bSBarry Smith MatDestroy_MPIAIJ, 2707e03a110bSBarry Smith MatView_MPIAIJ, 2708f4259b30SLisandro Dalcin NULL, 2709f4259b30SLisandro Dalcin NULL, 2710f4259b30SLisandro Dalcin /*64*/ NULL, 2711f996eeb8SHong Zhang MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2712f4259b30SLisandro Dalcin NULL, 2713f4259b30SLisandro Dalcin NULL, 2714f4259b30SLisandro Dalcin NULL, 2715d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIAIJ, 2716c87e5d42SMatthew Knepley MatGetRowMinAbs_MPIAIJ, 2717f4259b30SLisandro Dalcin NULL, 2718f4259b30SLisandro Dalcin NULL, 2719f4259b30SLisandro Dalcin NULL, 2720f4259b30SLisandro Dalcin NULL, 27213acb8795SBarry Smith /*75*/ MatFDColoringApply_AIJ, 2722b1b1104fSBarry Smith MatSetFromOptions_MPIAIJ, 2723f4259b30SLisandro Dalcin NULL, 2724f4259b30SLisandro Dalcin NULL, 2725f1f41ecbSJed Brown MatFindZeroDiagonals_MPIAIJ, 2726f4259b30SLisandro Dalcin /*80*/ NULL, 2727f4259b30SLisandro Dalcin NULL, 2728f4259b30SLisandro Dalcin NULL, 27295bba2384SShri Abhyankar /*83*/ MatLoad_MPIAIJ, 2730a3bbdb47SHong Zhang MatIsSymmetric_MPIAIJ, 2731f4259b30SLisandro Dalcin NULL, 2732f4259b30SLisandro Dalcin NULL, 2733f4259b30SLisandro Dalcin NULL, 2734f4259b30SLisandro Dalcin NULL, 2735f4259b30SLisandro Dalcin /*89*/ NULL, 2736f4259b30SLisandro Dalcin NULL, 273726be0446SHong Zhang MatMatMultNumeric_MPIAIJ_MPIAIJ, 2738f4259b30SLisandro Dalcin NULL, 2739f4259b30SLisandro Dalcin NULL, 2740cf3ca8ceSHong Zhang /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2741f4259b30SLisandro Dalcin NULL, 2742f4259b30SLisandro Dalcin NULL, 2743f4259b30SLisandro Dalcin NULL, 2744b470e4b4SRichard Tran Mills MatBindToCPU_MPIAIJ, 27454222ddf1SHong Zhang /*99*/ MatProductSetFromOptions_MPIAIJ, 2746f4259b30SLisandro Dalcin NULL, 2747f4259b30SLisandro Dalcin NULL, 27482fd7e33dSBarry Smith MatConjugate_MPIAIJ, 2749f4259b30SLisandro Dalcin NULL, 2750d519adbfSMatthew Knepley /*104*/MatSetValuesRow_MPIAIJ, 275199cafbc1SBarry Smith MatRealPart_MPIAIJ, 275269db28dcSHong Zhang MatImaginaryPart_MPIAIJ, 2753f4259b30SLisandro Dalcin NULL, 2754f4259b30SLisandro Dalcin NULL, 2755f4259b30SLisandro Dalcin /*109*/NULL, 2756f4259b30SLisandro Dalcin NULL, 27575494a064SHong Zhang MatGetRowMin_MPIAIJ, 2758f4259b30SLisandro Dalcin NULL, 27593b49f96aSBarry Smith MatMissingDiagonal_MPIAIJ, 2760d1adec66SJed Brown /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2761f4259b30SLisandro Dalcin NULL, 2762c5e4d11fSDmitry Karpeev MatGetGhosts_MPIAIJ, 2763f4259b30SLisandro Dalcin NULL, 2764f4259b30SLisandro Dalcin NULL, 2765b215bc84SStefano Zampini /*119*/MatMultDiagonalBlock_MPIAIJ, 2766f4259b30SLisandro Dalcin NULL, 2767f4259b30SLisandro Dalcin NULL, 2768f4259b30SLisandro Dalcin NULL, 2769b9614d88SDmitry Karpeev MatGetMultiProcBlock_MPIAIJ, 2770f2c98031SJed Brown /*124*/MatFindNonzeroRows_MPIAIJ, 2771a873a8cdSSam Reynolds MatGetColumnReductions_MPIAIJ, 2772bbead8a2SBarry Smith MatInvertBlockDiagonal_MPIAIJ, 2773a8ee9fb5SBarry Smith MatInvertVariableBlockDiagonal_MPIAIJ, 27747dae84e0SHong Zhang MatCreateSubMatricesMPI_MPIAIJ, 2775f4259b30SLisandro Dalcin /*129*/NULL, 2776f4259b30SLisandro Dalcin NULL, 2777f4259b30SLisandro Dalcin NULL, 2778187b3c17SHong Zhang MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2779f4259b30SLisandro Dalcin NULL, 2780f4259b30SLisandro Dalcin /*134*/NULL, 2781f4259b30SLisandro Dalcin NULL, 2782f4259b30SLisandro Dalcin NULL, 2783f4259b30SLisandro Dalcin NULL, 2784f4259b30SLisandro Dalcin NULL, 278546533700Sstefano_zampini /*139*/MatSetBlockSizes_MPIAIJ, 2786f4259b30SLisandro Dalcin NULL, 2787f4259b30SLisandro Dalcin NULL, 27889c8f2541SHong Zhang MatFDColoringSetUp_MPIXAIJ, 2789a0b6529bSBarry Smith MatFindOffBlockDiagonalEntries_MPIAIJ, 27904222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2791f4259b30SLisandro Dalcin /*145*/NULL, 2792f4259b30SLisandro Dalcin NULL, 2793f4259b30SLisandro Dalcin NULL 2794bd0c2dcbSBarry Smith }; 279536ce4990SBarry Smith 27962e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/ 27972e8a6d31SBarry Smith 27987087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 27992e8a6d31SBarry Smith { 28002e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28012e8a6d31SBarry Smith 28022e8a6d31SBarry Smith PetscFunctionBegin; 28039566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 28049566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 28052e8a6d31SBarry Smith PetscFunctionReturn(0); 28062e8a6d31SBarry Smith } 28072e8a6d31SBarry Smith 28087087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 28092e8a6d31SBarry Smith { 28102e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28112e8a6d31SBarry Smith 28122e8a6d31SBarry Smith PetscFunctionBegin; 28139566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 28149566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 28152e8a6d31SBarry Smith PetscFunctionReturn(0); 28162e8a6d31SBarry Smith } 28178a729477SBarry Smith 28187087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2819a23d5eceSKris Buschelman { 2820a23d5eceSKris Buschelman Mat_MPIAIJ *b; 28215d2a9ed1SStefano Zampini PetscMPIInt size; 2822a23d5eceSKris Buschelman 2823a23d5eceSKris Buschelman PetscFunctionBegin; 28249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 28259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 2826a23d5eceSKris Buschelman b = (Mat_MPIAIJ*)B->data; 2827899cda47SBarry Smith 2828cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 28299566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&b->colmap)); 2830cb7b82ddSBarry Smith #else 28319566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 2832cb7b82ddSBarry Smith #endif 28339566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 28349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 28359566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 2836cb7b82ddSBarry Smith 2837cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 28389566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 28399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 28409566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 28419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 28429566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 28439566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B,MATSEQAIJ)); 28449566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2845cb7b82ddSBarry Smith 2846cb7b82ddSBarry Smith if (!B->preallocated) { 28479566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 28489566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 28499566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 28509566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A,MATSEQAIJ)); 28519566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2852526dfc15SBarry Smith } 2853899cda47SBarry Smith 28549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 28559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2856526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2857cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 285815001458SStefano Zampini B->assembled = PETSC_FALSE; 2859a23d5eceSKris Buschelman PetscFunctionReturn(0); 2860a23d5eceSKris Buschelman } 2861a23d5eceSKris Buschelman 2862846b4da1SFande Kong PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2863846b4da1SFande Kong { 2864846b4da1SFande Kong Mat_MPIAIJ *b; 2865846b4da1SFande Kong 2866846b4da1SFande Kong PetscFunctionBegin; 2867846b4da1SFande Kong PetscValidHeaderSpecific(B,MAT_CLASSID,1); 28689566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 28699566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 2870846b4da1SFande Kong b = (Mat_MPIAIJ*)B->data; 2871846b4da1SFande Kong 2872846b4da1SFande Kong #if defined(PETSC_USE_CTABLE) 28739566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&b->colmap)); 2874846b4da1SFande Kong #else 28759566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 2876846b4da1SFande Kong #endif 28779566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 28789566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 28799566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 2880846b4da1SFande Kong 28819566063dSJacob Faibussowitsch PetscCall(MatResetPreallocation(b->A)); 28829566063dSJacob Faibussowitsch PetscCall(MatResetPreallocation(b->B)); 2883846b4da1SFande Kong B->preallocated = PETSC_TRUE; 2884846b4da1SFande Kong B->was_assembled = PETSC_FALSE; 2885846b4da1SFande Kong B->assembled = PETSC_FALSE; 2886846b4da1SFande Kong PetscFunctionReturn(0); 2887846b4da1SFande Kong } 2888846b4da1SFande Kong 2889dfbe8321SBarry Smith PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2890d6dfbf8fSBarry Smith { 2891d6dfbf8fSBarry Smith Mat mat; 2892416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2893d6dfbf8fSBarry Smith 28943a40ed3dSBarry Smith PetscFunctionBegin; 2895f4259b30SLisandro Dalcin *newmat = NULL; 28969566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 28979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 28989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 28999566063dSJacob Faibussowitsch PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2900273d9f13SBarry Smith a = (Mat_MPIAIJ*)mat->data; 2901e1b6402fSHong Zhang 2902d5f3da31SBarry Smith mat->factortype = matin->factortype; 2903501880eeSStefano Zampini mat->assembled = matin->assembled; 2904e7641de0SSatish Balay mat->insertmode = NOT_SET_VALUES; 2905501880eeSStefano Zampini mat->preallocated = matin->preallocated; 2906d6dfbf8fSBarry Smith 290717699dbbSLois Curfman McInnes a->size = oldmat->size; 290817699dbbSLois Curfman McInnes a->rank = oldmat->rank; 2909e7641de0SSatish Balay a->donotstash = oldmat->donotstash; 2910e7641de0SSatish Balay a->roworiented = oldmat->roworiented; 2911501880eeSStefano Zampini a->rowindices = NULL; 2912501880eeSStefano Zampini a->rowvalues = NULL; 2913bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 2914d6dfbf8fSBarry Smith 29159566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 29169566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2917899cda47SBarry Smith 29182ee70a88SLois Curfman McInnes if (oldmat->colmap) { 2919aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 29209566063dSJacob Faibussowitsch PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2921b1fc9764SSatish Balay #else 29229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 29239566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 29249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2925b1fc9764SSatish Balay #endif 2926501880eeSStefano Zampini } else a->colmap = NULL; 29273f41c07dSBarry Smith if (oldmat->garray) { 2928b1d57f15SBarry Smith PetscInt len; 2929d0f46423SBarry Smith len = oldmat->B->cmap->n; 29309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len+1,&a->garray)); 29319566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 29329566063dSJacob Faibussowitsch if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2933501880eeSStefano Zampini } else a->garray = NULL; 2934d6dfbf8fSBarry Smith 29350de76c62SStefano Zampini /* It may happen MatDuplicate is called with a non-assembled matrix 29360de76c62SStefano Zampini In fact, MatDuplicate only requires the matrix to be preallocated 29370de76c62SStefano Zampini This may happen inside a DMCreateMatrix_Shell */ 29380de76c62SStefano Zampini if (oldmat->lvec) { 29399566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 29409566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 29410de76c62SStefano Zampini } 29420de76c62SStefano Zampini if (oldmat->Mvctx) { 29439566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 29449566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 29450de76c62SStefano Zampini } 29469566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 29479566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 29489566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 29499566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 29509566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 29518a729477SBarry Smith *newmat = mat; 29523a40ed3dSBarry Smith PetscFunctionReturn(0); 29538a729477SBarry Smith } 2954416022c9SBarry Smith 2955112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 29568fb81238SShri Abhyankar { 295752f91c60SVaclav Hapla PetscBool isbinary, ishdf5; 295852f91c60SVaclav Hapla 295952f91c60SVaclav Hapla PetscFunctionBegin; 296052f91c60SVaclav Hapla PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 296152f91c60SVaclav Hapla PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2962c27b3999SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 29639566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 29649566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 29659566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 296652f91c60SVaclav Hapla if (isbinary) { 29679566063dSJacob Faibussowitsch PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 296852f91c60SVaclav Hapla } else if (ishdf5) { 296952f91c60SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 29709566063dSJacob Faibussowitsch PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 297152f91c60SVaclav Hapla #else 297252f91c60SVaclav Hapla SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 297352f91c60SVaclav Hapla #endif 297452f91c60SVaclav Hapla } else { 297598921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 297652f91c60SVaclav Hapla } 297752f91c60SVaclav Hapla PetscFunctionReturn(0); 297852f91c60SVaclav Hapla } 297952f91c60SVaclav Hapla 29803ea6fe3dSLisandro Dalcin PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 298152f91c60SVaclav Hapla { 29823ea6fe3dSLisandro Dalcin PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 29833ea6fe3dSLisandro Dalcin PetscInt *rowidxs,*colidxs; 29843ea6fe3dSLisandro Dalcin PetscScalar *matvals; 29858fb81238SShri Abhyankar 29868fb81238SShri Abhyankar PetscFunctionBegin; 29879566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 29888fb81238SShri Abhyankar 29893ea6fe3dSLisandro Dalcin /* read in matrix header */ 29909566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 299108401ef6SPierre Jolivet PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 29923ea6fe3dSLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 299308401ef6SPierre Jolivet PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 299408401ef6SPierre Jolivet PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 299508401ef6SPierre Jolivet PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 299608ea439dSMark F. Adams 29973ea6fe3dSLisandro Dalcin /* set block sizes from the viewer's .info file */ 29989566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 29993ea6fe3dSLisandro Dalcin /* set global sizes if not set already */ 30003ea6fe3dSLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 30013ea6fe3dSLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 30029566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 30039566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 30048fb81238SShri Abhyankar 30053ea6fe3dSLisandro Dalcin /* check if the matrix sizes are correct */ 30069566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat,&rows,&cols)); 30072c71b3e2SJacob Faibussowitsch PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 30088fb81238SShri Abhyankar 30093ea6fe3dSLisandro Dalcin /* read in row lengths and build row indices */ 30109566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat,&m,NULL)); 30119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m+1,&rowidxs)); 30129566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 30133ea6fe3dSLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 30141c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 301508401ef6SPierre Jolivet PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 30163ea6fe3dSLisandro Dalcin /* read in column indices and matrix values */ 30179566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 30189566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 30199566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 30203ea6fe3dSLisandro Dalcin /* store matrix indices and values */ 30219566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 30229566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 30239566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs,matvals)); 30248fb81238SShri Abhyankar PetscFunctionReturn(0); 30258fb81238SShri Abhyankar } 30268fb81238SShri Abhyankar 30273782ecc7SHong Zhang /* Not scalable because of ISAllGather() unless getting all columns. */ 30288b3fa1f7SHong Zhang PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 30294aa3045dSJed Brown { 30304aa3045dSJed Brown IS iscol_local; 3031c5e4d11fSDmitry Karpeev PetscBool isstride; 3032c5e4d11fSDmitry Karpeev PetscMPIInt lisstride=0,gisstride; 30333782ecc7SHong Zhang 30343782ecc7SHong Zhang PetscFunctionBegin; 30353782ecc7SHong Zhang /* check if we are grabbing all columns*/ 30369566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 30373782ecc7SHong Zhang 3038c5e4d11fSDmitry Karpeev if (isstride) { 3039c5e4d11fSDmitry Karpeev PetscInt start,len,mstart,mlen; 30409566063dSJacob Faibussowitsch PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 30419566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&len)); 30429566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3043c5e4d11fSDmitry Karpeev if (mstart == start && mlen-mstart == len) lisstride = 1; 3044c5e4d11fSDmitry Karpeev } 30453782ecc7SHong Zhang 30461c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3047c5e4d11fSDmitry Karpeev if (gisstride) { 3048c5e4d11fSDmitry Karpeev PetscInt N; 30499566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat,NULL,&N)); 30509566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 30519566063dSJacob Faibussowitsch PetscCall(ISSetIdentity(iscol_local)); 30529566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3053c5e4d11fSDmitry Karpeev } else { 3054c5bfad50SMark F. Adams PetscInt cbs; 30559566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol,&cbs)); 30569566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol,&iscol_local)); 30579566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(iscol_local,cbs)); 3058b79d0421SJed Brown } 30593782ecc7SHong Zhang 30603782ecc7SHong Zhang *isseq = iscol_local; 30613782ecc7SHong Zhang PetscFunctionReturn(0); 3062c5e4d11fSDmitry Karpeev } 30638d2139bdSHong Zhang 3064ddfdf956SHong Zhang /* 30659c988bcaSHong Zhang Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 30669c988bcaSHong Zhang (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3067ddfdf956SHong Zhang 3068ddfdf956SHong Zhang Input Parameters: 3069ddfdf956SHong Zhang mat - matrix 30709c988bcaSHong Zhang isrow - parallel row index set; its local indices are a subset of local columns of mat, 30719c988bcaSHong Zhang i.e., mat->rstart <= isrow[i] < mat->rend 3072ddfdf956SHong Zhang iscol - parallel column index set; its local indices are a subset of local columns of mat, 3073ddfdf956SHong Zhang i.e., mat->cstart <= iscol[i] < mat->cend 3074ddfdf956SHong Zhang Output Parameter: 30759c988bcaSHong Zhang isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 30769c988bcaSHong Zhang iscol_o - sequential column index set for retrieving mat->B 30779c988bcaSHong Zhang garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3078ddfdf956SHong Zhang */ 30799c988bcaSHong Zhang PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 30803782ecc7SHong Zhang { 3081040216a4SHong Zhang Vec x,cmap; 3082040216a4SHong Zhang const PetscInt *is_idx; 3083040216a4SHong Zhang PetscScalar *xarray,*cmaparray; 30849c988bcaSHong Zhang PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3085040216a4SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3086040216a4SHong Zhang Mat B=a->B; 3087040216a4SHong Zhang Vec lvec=a->lvec,lcmap; 3088a31a438cSHong Zhang PetscInt i,cstart,cend,Bn=B->cmap->N; 30898b3fa1f7SHong Zhang MPI_Comm comm; 30903a8d973cSHong Zhang VecScatter Mvctx=a->Mvctx; 30913782ecc7SHong Zhang 30923782ecc7SHong Zhang PetscFunctionBegin; 30939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 30949566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&ncols)); 30958b3fa1f7SHong Zhang 3096ddfdf956SHong Zhang /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 30979566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(mat,&x,NULL)); 30989566063dSJacob Faibussowitsch PetscCall(VecSet(x,-1.0)); 30999566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x,&cmap)); 31009566063dSJacob Faibussowitsch PetscCall(VecSet(cmap,-1.0)); 31010a351717SHong Zhang 31029c988bcaSHong Zhang /* Get start indices */ 31039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3104ddfdf956SHong Zhang isstart -= ncols; 31059566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3106040216a4SHong Zhang 31079566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol,&is_idx)); 31089566063dSJacob Faibussowitsch PetscCall(VecGetArray(x,&xarray)); 31099566063dSJacob Faibussowitsch PetscCall(VecGetArray(cmap,&cmaparray)); 31109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncols,&idx)); 3111ddfdf956SHong Zhang for (i=0; i<ncols; i++) { 31128b3fa1f7SHong Zhang xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3113ddfdf956SHong Zhang cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 31149c988bcaSHong Zhang idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 31158b3fa1f7SHong Zhang } 31169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x,&xarray)); 31179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(cmap,&cmaparray)); 31189566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol,&is_idx)); 31198b3fa1f7SHong Zhang 31209c988bcaSHong Zhang /* Get iscol_d */ 31219566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 31229566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol,&i)); 31239566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(*iscol_d,i)); 3124feb78a15SHong Zhang 31259c988bcaSHong Zhang /* Get isrow_d */ 31269566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow,&m)); 3127feb78a15SHong Zhang rstart = mat->rmap->rstart; 31289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m,&idx)); 31299566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow,&is_idx)); 31309c988bcaSHong Zhang for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 31319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow,&is_idx)); 3132feb78a15SHong Zhang 31339566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 31349566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(isrow,&i)); 31359566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(*isrow_d,i)); 3136feb78a15SHong Zhang 31379c988bcaSHong Zhang /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 31389566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 31399566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3140ddfdf956SHong Zhang 31419566063dSJacob Faibussowitsch PetscCall(VecDuplicate(lvec,&lcmap)); 314207250d77SHong Zhang 31439566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 31449566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 314564efcef9SHong Zhang 31469c988bcaSHong Zhang /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3147ddfdf956SHong Zhang /* off-process column indices */ 31489c988bcaSHong Zhang count = 0; 31499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bn,&idx)); 31509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bn,&cmap1)); 3151feb78a15SHong Zhang 31529566063dSJacob Faibussowitsch PetscCall(VecGetArray(lvec,&xarray)); 31539566063dSJacob Faibussowitsch PetscCall(VecGetArray(lcmap,&cmaparray)); 31548b3fa1f7SHong Zhang for (i=0; i<Bn; i++) { 3155f73421bfSHong Zhang if (PetscRealPart(xarray[i]) > -1.0) { 31569c988bcaSHong Zhang idx[count] = i; /* local column index in off-diagonal part B */ 31571c645242SHong Zhang cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 31581c645242SHong Zhang count++; 31598b3fa1f7SHong Zhang } 31608b3fa1f7SHong Zhang } 31619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lvec,&xarray)); 31629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lcmap,&cmaparray)); 316307250d77SHong Zhang 31649566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3165b6d9b4e0SHong Zhang /* cannot ensure iscol_o has same blocksize as iscol! */ 3166b6d9b4e0SHong Zhang 31679566063dSJacob Faibussowitsch PetscCall(PetscFree(idx)); 31689c988bcaSHong Zhang *garray = cmap1; 31699c988bcaSHong Zhang 31709566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 31719566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cmap)); 31729566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lcmap)); 3173040216a4SHong Zhang PetscFunctionReturn(0); 3174040216a4SHong Zhang } 3175040216a4SHong Zhang 3176b20e2604SHong Zhang /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 31773b00a383SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 31783b00a383SHong Zhang { 3179b20e2604SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 31801fd43edeSHong Zhang Mat M = NULL; 31813b00a383SHong Zhang MPI_Comm comm; 3182b20e2604SHong Zhang IS iscol_d,isrow_d,iscol_o; 31833b00a383SHong Zhang Mat Asub = NULL,Bsub = NULL; 3184b20e2604SHong Zhang PetscInt n; 31853b00a383SHong Zhang 31863b00a383SHong Zhang PetscFunctionBegin; 31879566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 31883b00a383SHong Zhang 31893b00a383SHong Zhang if (call == MAT_REUSE_MATRIX) { 3190b20e2604SHong Zhang /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 31919566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 319228b400f6SJacob Faibussowitsch PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 31933b00a383SHong Zhang 31949566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 319528b400f6SJacob Faibussowitsch PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 31963b00a383SHong Zhang 31979566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 319828b400f6SJacob Faibussowitsch PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 31993b00a383SHong Zhang 3200b20e2604SHong Zhang /* Update diagonal and off-diagonal portions of submat */ 3201b20e2604SHong Zhang asub = (Mat_MPIAIJ*)(*submat)->data; 32029566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 32039566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_o,&n)); 32047cfce09cSHong Zhang if (n) { 32059566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 32067cfce09cSHong Zhang } 32079566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 32089566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 32093b00a383SHong Zhang 32103b00a383SHong Zhang } else { /* call == MAT_INITIAL_MATRIX) */ 32119c988bcaSHong Zhang const PetscInt *garray; 3212b20e2604SHong Zhang PetscInt BsubN; 32133b00a383SHong Zhang 3214b20e2604SHong Zhang /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 32159566063dSJacob Faibussowitsch PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 32163b00a383SHong Zhang 3217b20e2604SHong Zhang /* Create local submatrices Asub and Bsub */ 32189566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 32199566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 32203b00a383SHong Zhang 32219c988bcaSHong Zhang /* Create submatrix M */ 32229566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 32233b00a383SHong Zhang 3224b20e2604SHong Zhang /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3225b20e2604SHong Zhang asub = (Mat_MPIAIJ*)M->data; 32267cfce09cSHong Zhang 32279566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3228b20e2604SHong Zhang n = asub->B->cmap->N; 3229b20e2604SHong Zhang if (BsubN > n) { 3230c4762a1bSJed Brown /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 32317cfce09cSHong Zhang const PetscInt *idx; 32329c988bcaSHong Zhang PetscInt i,j,*idx_new,*subgarray = asub->garray; 32339566063dSJacob Faibussowitsch PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 32347cfce09cSHong Zhang 32359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&idx_new)); 32367cfce09cSHong Zhang j = 0; 32379566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_o,&idx)); 3238b20e2604SHong Zhang for (i=0; i<n; i++) { 32397cfce09cSHong Zhang if (j >= BsubN) break; 32409c988bcaSHong Zhang while (subgarray[i] > garray[j]) j++; 32417cfce09cSHong Zhang 32429c988bcaSHong Zhang if (subgarray[i] == garray[j]) { 32437cfce09cSHong Zhang idx_new[i] = idx[j++]; 324498921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 32457cfce09cSHong Zhang } 32469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_o,&idx)); 32477cfce09cSHong Zhang 32489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_o)); 32499566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 32507cfce09cSHong Zhang 3251b20e2604SHong Zhang } else if (BsubN < n) { 325298921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3253b20e2604SHong Zhang } 32547cfce09cSHong Zhang 32559566063dSJacob Faibussowitsch PetscCall(PetscFree(garray)); 3256b20e2604SHong Zhang *submat = M; 32573b00a383SHong Zhang 3258e489de8fSHong Zhang /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 32599566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 32609566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrow_d)); 32613b00a383SHong Zhang 32629566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 32639566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_d)); 32643b00a383SHong Zhang 32659566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 32669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_o)); 32673b00a383SHong Zhang } 32683b00a383SHong Zhang PetscFunctionReturn(0); 32693b00a383SHong Zhang } 32703b00a383SHong Zhang 32713782ecc7SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 32723782ecc7SHong Zhang { 32731358a193SHong Zhang IS iscol_local=NULL,isrow_d; 32743782ecc7SHong Zhang PetscInt csize; 327518e627e3SHong Zhang PetscInt n,i,j,start,end; 32764a3daf6eSHong Zhang PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 32773782ecc7SHong Zhang MPI_Comm comm; 32783782ecc7SHong Zhang 32793782ecc7SHong Zhang PetscFunctionBegin; 3280bcae8d28SHong Zhang /* If isrow has same processor distribution as mat, 3281a31a438cSHong Zhang call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 32828f69fa7bSHong Zhang if (call == MAT_REUSE_MATRIX) { 32839566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3284d5761cdaSHong Zhang if (isrow_d) { 3285d5761cdaSHong Zhang sameRowDist = PETSC_TRUE; 3286d5761cdaSHong Zhang tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3287d5761cdaSHong Zhang } else { 32889566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3289d5761cdaSHong Zhang if (iscol_local) { 3290d5761cdaSHong Zhang sameRowDist = PETSC_TRUE; 3291d5761cdaSHong Zhang tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3292d5761cdaSHong Zhang } 3293d5761cdaSHong Zhang } 32948f69fa7bSHong Zhang } else { 3295e489de8fSHong Zhang /* Check if isrow has same processor distribution as mat */ 329618e627e3SHong Zhang sameDist[0] = PETSC_FALSE; 32979566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow,&n)); 32983782ecc7SHong Zhang if (!n) { 329918e627e3SHong Zhang sameDist[0] = PETSC_TRUE; 33003782ecc7SHong Zhang } else { 33019566063dSJacob Faibussowitsch PetscCall(ISGetMinMax(isrow,&i,&j)); 33029566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(mat,&start,&end)); 330318e627e3SHong Zhang if (i >= start && j < end) { 330418e627e3SHong Zhang sameDist[0] = PETSC_TRUE; 33053782ecc7SHong Zhang } 33068f69fa7bSHong Zhang } 33073782ecc7SHong Zhang 3308e489de8fSHong Zhang /* Check if iscol has same processor distribution as mat */ 330918e627e3SHong Zhang sameDist[1] = PETSC_FALSE; 33109566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&n)); 331118e627e3SHong Zhang if (!n) { 331218e627e3SHong Zhang sameDist[1] = PETSC_TRUE; 331318e627e3SHong Zhang } else { 33149566063dSJacob Faibussowitsch PetscCall(ISGetMinMax(iscol,&i,&j)); 33159566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 331618e627e3SHong Zhang if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 331718e627e3SHong Zhang } 331818e627e3SHong Zhang 33199566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 33201c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 332118e627e3SHong Zhang sameRowDist = tsameDist[0]; 332218e627e3SHong Zhang } 332318e627e3SHong Zhang 332418e627e3SHong Zhang if (sameRowDist) { 3325b20e2604SHong Zhang if (tsameDist[1]) { /* sameRowDist & sameColDist */ 33263b00a383SHong Zhang /* isrow and iscol have same processor distribution as mat */ 33279566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 33281358a193SHong Zhang PetscFunctionReturn(0); 3329b20e2604SHong Zhang } else { /* sameRowDist */ 33303b00a383SHong Zhang /* isrow has same processor distribution as mat */ 33311358a193SHong Zhang if (call == MAT_INITIAL_MATRIX) { 33321358a193SHong Zhang PetscBool sorted; 33339566063dSJacob Faibussowitsch PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 33349566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 33359566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol,&i)); 333608401ef6SPierre Jolivet PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 33371358a193SHong Zhang 33389566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local,&sorted)); 33391358a193SHong Zhang if (sorted) { 33401358a193SHong Zhang /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 33419566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 33423782ecc7SHong Zhang PetscFunctionReturn(0); 33433782ecc7SHong Zhang } 33441358a193SHong Zhang } else { /* call == MAT_REUSE_MATRIX */ 334548c0d076SHong Zhang IS iscol_sub; 33469566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 334748c0d076SHong Zhang if (iscol_sub) { 33489566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 334948c0d076SHong Zhang PetscFunctionReturn(0); 335048c0d076SHong Zhang } 33511358a193SHong Zhang } 33521358a193SHong Zhang } 33531358a193SHong Zhang } 33543782ecc7SHong Zhang 3355bcae8d28SHong Zhang /* General case: iscol -> iscol_local which has global size of iscol */ 33563782ecc7SHong Zhang if (call == MAT_REUSE_MATRIX) { 33579566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 335828b400f6SJacob Faibussowitsch PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 33593782ecc7SHong Zhang } else { 33601358a193SHong Zhang if (!iscol_local) { 33619566063dSJacob Faibussowitsch PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 33623782ecc7SHong Zhang } 33631358a193SHong Zhang } 33643782ecc7SHong Zhang 33659566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&csize)); 33669566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 33678f69fa7bSHong Zhang 3368b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 33699566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 33709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 3371b79d0421SJed Brown } 33724aa3045dSJed Brown PetscFunctionReturn(0); 33734aa3045dSJed Brown } 33744aa3045dSJed Brown 3375feb78a15SHong Zhang /*@C 3376feb78a15SHong Zhang MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3377feb78a15SHong Zhang and "off-diagonal" part of the matrix in CSR format. 3378feb78a15SHong Zhang 3379d083f849SBarry Smith Collective 3380feb78a15SHong Zhang 3381feb78a15SHong Zhang Input Parameters: 3382feb78a15SHong Zhang + comm - MPI communicator 3383feb78a15SHong Zhang . A - "diagonal" portion of matrix 3384b20e2604SHong Zhang . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3385feb78a15SHong Zhang - garray - global index of B columns 3386feb78a15SHong Zhang 3387feb78a15SHong Zhang Output Parameter: 3388d5761cdaSHong Zhang . mat - the matrix, with input A as its local diagonal matrix 3389feb78a15SHong Zhang Level: advanced 3390feb78a15SHong Zhang 3391feb78a15SHong Zhang Notes: 3392d5761cdaSHong Zhang See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3393d5761cdaSHong Zhang A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3394feb78a15SHong Zhang 3395feb78a15SHong Zhang .seealso: MatCreateMPIAIJWithSplitArrays() 3396feb78a15SHong Zhang @*/ 3397feb78a15SHong Zhang PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3398feb78a15SHong Zhang { 3399feb78a15SHong Zhang Mat_MPIAIJ *maij; 3400e489de8fSHong Zhang Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3401a5348796SHong Zhang PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3402ce496241SStefano Zampini const PetscScalar *oa; 3403e489de8fSHong Zhang Mat Bnew; 3404feb78a15SHong Zhang PetscInt m,n,N; 3405feb78a15SHong Zhang 3406feb78a15SHong Zhang PetscFunctionBegin; 34079566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,mat)); 34089566063dSJacob Faibussowitsch PetscCall(MatGetSize(A,&m,&n)); 340908401ef6SPierre Jolivet PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 341008401ef6SPierre Jolivet PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3411b6d9b4e0SHong Zhang /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 341208401ef6SPierre Jolivet /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3413feb78a15SHong Zhang 3414e489de8fSHong Zhang /* Get global columns of mat */ 34151c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3416feb78a15SHong Zhang 34179566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 34189566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat,MATMPIAIJ)); 34199566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3420feb78a15SHong Zhang maij = (Mat_MPIAIJ*)(*mat)->data; 3421feb78a15SHong Zhang 3422feb78a15SHong Zhang (*mat)->preallocated = PETSC_TRUE; 3423feb78a15SHong Zhang 34249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->rmap)); 34259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3426feb78a15SHong Zhang 3427e489de8fSHong Zhang /* Set A as diagonal portion of *mat */ 3428feb78a15SHong Zhang maij->A = A; 3429feb78a15SHong Zhang 3430a5348796SHong Zhang nz = oi[m]; 3431a5348796SHong Zhang for (i=0; i<nz; i++) { 3432a5348796SHong Zhang col = oj[i]; 3433a5348796SHong Zhang oj[i] = garray[col]; 3434feb78a15SHong Zhang } 3435feb78a15SHong Zhang 3436e489de8fSHong Zhang /* Set Bnew as off-diagonal portion of *mat */ 34379566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 34389566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 34399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3440e489de8fSHong Zhang bnew = (Mat_SeqAIJ*)Bnew->data; 3441e489de8fSHong Zhang bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3442e489de8fSHong Zhang maij->B = Bnew; 3443d5761cdaSHong Zhang 344408401ef6SPierre Jolivet PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3445d5761cdaSHong Zhang 3446e489de8fSHong Zhang b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3447d5761cdaSHong Zhang b->free_a = PETSC_FALSE; 3448d5761cdaSHong Zhang b->free_ij = PETSC_FALSE; 34499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3450d5761cdaSHong Zhang 3451e489de8fSHong Zhang bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3452e489de8fSHong Zhang bnew->free_a = PETSC_TRUE; 3453e489de8fSHong Zhang bnew->free_ij = PETSC_TRUE; 3454feb78a15SHong Zhang 3455a5348796SHong Zhang /* condense columns of maij->B */ 34569566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 34579566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 34589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 34599566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 34609566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3461feb78a15SHong Zhang PetscFunctionReturn(0); 3462feb78a15SHong Zhang } 3463feb78a15SHong Zhang 3464ef514586SHong Zhang extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 34654aa3045dSJed Brown 34661358a193SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3467a0ff6018SBarry Smith { 346898b658c4SHong Zhang PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 346985f27616SHong Zhang PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 347098b658c4SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 34711fd43edeSHong Zhang Mat M,Msub,B=a->B; 347298b658c4SHong Zhang MatScalar *aa; 347300e6dbe6SBarry Smith Mat_SeqAIJ *aij; 3474a31a438cSHong Zhang PetscInt *garray = a->garray,*colsub,Ncols; 347598b658c4SHong Zhang PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 347698b658c4SHong Zhang IS iscol_sub,iscmap; 347798b658c4SHong Zhang const PetscInt *is_idx,*cmap; 347818e627e3SHong Zhang PetscBool allcolumns=PETSC_FALSE; 3479a31a438cSHong Zhang MPI_Comm comm; 34807e2c5f70SBarry Smith 3481a0ff6018SBarry Smith PetscFunctionBegin; 34829566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3483d5761cdaSHong Zhang if (call == MAT_REUSE_MATRIX) { 34849566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 348528b400f6SJacob Faibussowitsch PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 34869566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_sub,&count)); 3487d5761cdaSHong Zhang 34889566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 348928b400f6SJacob Faibussowitsch PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3490d5761cdaSHong Zhang 34919566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 349228b400f6SJacob Faibussowitsch PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3493d5761cdaSHong Zhang 34949566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3495d5761cdaSHong Zhang 3496d5761cdaSHong Zhang } else { /* call == MAT_INITIAL_MATRIX) */ 34973b00a383SHong Zhang PetscBool flg; 34983b00a383SHong Zhang 34999566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&n)); 35009566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol,&Ncols)); 3501bcae8d28SHong Zhang 35023b00a383SHong Zhang /* (1) iscol -> nonscalable iscol_local */ 3503366a327dSHong Zhang /* Check for special case: each processor gets entire matrix columns */ 35049566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol_local,&flg)); 3505366a327dSHong Zhang if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 35061c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3507366a327dSHong Zhang if (allcolumns) { 3508366a327dSHong Zhang iscol_sub = iscol_local; 35099566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol_local)); 35109566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3511366a327dSHong Zhang 35123b00a383SHong Zhang } else { 35131358a193SHong Zhang /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3514244c7f15SHong Zhang PetscInt *idx,*cmap1,k; 35159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ncols,&idx)); 35169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ncols,&cmap1)); 35179566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local,&is_idx)); 35188d2139bdSHong Zhang count = 0; 3519a31a438cSHong Zhang k = 0; 3520a31a438cSHong Zhang for (i=0; i<Ncols; i++) { 3521a31a438cSHong Zhang j = is_idx[i]; 3522a31a438cSHong Zhang if (j >= cstart && j < cend) { 3523a31a438cSHong Zhang /* diagonal part of mat */ 35248d2139bdSHong Zhang idx[count] = j; 3525366a327dSHong Zhang cmap1[count++] = i; /* column index in submat */ 35264a3daf6eSHong Zhang } else if (Bn) { 3527a31a438cSHong Zhang /* off-diagonal part of mat */ 3528a31a438cSHong Zhang if (j == garray[k]) { 35298d2139bdSHong Zhang idx[count] = j; 3530a31a438cSHong Zhang cmap1[count++] = i; /* column index in submat */ 3531a31a438cSHong Zhang } else if (j > garray[k]) { 3532a31a438cSHong Zhang while (j > garray[k] && k < Bn-1) k++; 3533a31a438cSHong Zhang if (j == garray[k]) { 3534a31a438cSHong Zhang idx[count] = j; 3535a31a438cSHong Zhang cmap1[count++] = i; /* column index in submat */ 35368d2139bdSHong Zhang } 35378d2139bdSHong Zhang } 35388d2139bdSHong Zhang } 35398d2139bdSHong Zhang } 35409566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 35418d2139bdSHong Zhang 35429566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 35439566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol,&cbs)); 35449566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3545b6d9b4e0SHong Zhang 35469566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3547a31a438cSHong Zhang } 35488b3fa1f7SHong Zhang 35493b00a383SHong Zhang /* (3) Create sequential Msub */ 35509566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3551d5761cdaSHong Zhang } 35528d2139bdSHong Zhang 35539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_sub,&count)); 355498b658c4SHong Zhang aij = (Mat_SeqAIJ*)(Msub)->data; 355598b658c4SHong Zhang ii = aij->i; 35569566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscmap,&cmap)); 3557a0ff6018SBarry Smith 3558a0ff6018SBarry Smith /* 3559a0ff6018SBarry Smith m - number of local rows 3560a31a438cSHong Zhang Ncols - number of columns (same on all processors) 3561a0ff6018SBarry Smith rstart - first row in new global matrix generated 3562a0ff6018SBarry Smith */ 35639566063dSJacob Faibussowitsch PetscCall(MatGetSize(Msub,&m,NULL)); 356498b658c4SHong Zhang 35653b00a383SHong Zhang if (call == MAT_INITIAL_MATRIX) { 35663b00a383SHong Zhang /* (4) Create parallel newmat */ 356798b658c4SHong Zhang PetscMPIInt rank,size; 3568bcae8d28SHong Zhang PetscInt csize; 356998b658c4SHong Zhang 35709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 35719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 357200e6dbe6SBarry Smith 3573a0ff6018SBarry Smith /* 357400e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 357500e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 3576a0ff6018SBarry Smith */ 357700e6dbe6SBarry Smith 357800e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 35799566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&csize)); 35806a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 35819566063dSJacob Faibussowitsch PetscCall(ISGetSize(isrow,&mglobal)); 3582a31a438cSHong Zhang if (mglobal == Ncols) { /* square matrix */ 3583e2c4fddaSBarry Smith nlocal = m; 35846a6a5d1dSBarry Smith } else { 3585a31a438cSHong Zhang nlocal = Ncols/size + ((Ncols % size) > rank); 3586ab50ec6bSBarry Smith } 3587ab50ec6bSBarry Smith } else { 35886a6a5d1dSBarry Smith nlocal = csize; 35896a6a5d1dSBarry Smith } 35909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 359100e6dbe6SBarry Smith rstart = rend - nlocal; 35922c71b3e2SJacob Faibussowitsch PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 359300e6dbe6SBarry Smith 359400e6dbe6SBarry Smith /* next, compute all the lengths */ 359598b658c4SHong Zhang jj = aij->j; 35969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*m+1,&dlens)); 359700e6dbe6SBarry Smith olens = dlens + m; 359800e6dbe6SBarry Smith for (i=0; i<m; i++) { 359900e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 360000e6dbe6SBarry Smith olen = 0; 360100e6dbe6SBarry Smith dlen = 0; 360200e6dbe6SBarry Smith for (j=0; j<jend; j++) { 360315b2185cSHong Zhang if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 360400e6dbe6SBarry Smith else dlen++; 360500e6dbe6SBarry Smith jj++; 360600e6dbe6SBarry Smith } 360700e6dbe6SBarry Smith olens[i] = olen; 360800e6dbe6SBarry Smith dlens[i] = dlen; 360900e6dbe6SBarry Smith } 3610b6d9b4e0SHong Zhang 36119566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(isrow,&bs)); 36129566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol,&cbs)); 361398b658c4SHong Zhang 36149566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&M)); 36159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 36169566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(M,bs,cbs)); 36179566063dSJacob Faibussowitsch PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 36189566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 36199566063dSJacob Faibussowitsch PetscCall(PetscFree(dlens)); 3620d5761cdaSHong Zhang 3621d5761cdaSHong Zhang } else { /* call == MAT_REUSE_MATRIX */ 3622a0ff6018SBarry Smith M = *newmat; 36239566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(M,&i,NULL)); 362408401ef6SPierre Jolivet PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 36259566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 3626c48de900SBarry Smith /* 3627c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3628c48de900SBarry Smith rather than the slower MatSetValues(). 3629c48de900SBarry Smith */ 3630c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 3631c48de900SBarry Smith M->assembled = PETSC_FALSE; 3632a0ff6018SBarry Smith } 3633548ecf4dSHong Zhang 36343b00a383SHong Zhang /* (5) Set values of Msub to *newmat */ 36359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(count,&colsub)); 36369566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 363798b658c4SHong Zhang 363898b658c4SHong Zhang jj = aij->j; 36399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3640a0ff6018SBarry Smith for (i=0; i<m; i++) { 3641a0ff6018SBarry Smith row = rstart + i; 364200e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 364315b2185cSHong Zhang for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 36449566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 364515b2185cSHong Zhang jj += nz; aa += nz; 3646a0ff6018SBarry Smith } 36479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 36489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscmap,&cmap)); 3649a0ff6018SBarry Smith 36509566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 36519566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3652fee21e36SBarry Smith 36539566063dSJacob Faibussowitsch PetscCall(PetscFree(colsub)); 365498b658c4SHong Zhang 365598b658c4SHong Zhang /* save Msub, iscol_sub and iscmap used in processor for next request */ 3656fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 36573b00a383SHong Zhang *newmat = M; 36589566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 36599566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Msub)); 366098b658c4SHong Zhang 36619566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 36629566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_sub)); 366398b658c4SHong Zhang 36649566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 36659566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscmap)); 3666bcae8d28SHong Zhang 3667bcae8d28SHong Zhang if (iscol_local) { 36689566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 36699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 3670bcae8d28SHong Zhang } 367198b658c4SHong Zhang } 3672a0ff6018SBarry Smith PetscFunctionReturn(0); 3673a0ff6018SBarry Smith } 3674273d9f13SBarry Smith 3675df40acb1SHong Zhang /* 3676df40acb1SHong Zhang Not great since it makes two copies of the submatrix, first an SeqAIJ 3677df40acb1SHong Zhang in local and then by concatenating the local matrices the end result. 3678df40acb1SHong Zhang Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3679df40acb1SHong Zhang 3680df40acb1SHong Zhang Note: This requires a sequential iscol with all indices. 3681df40acb1SHong Zhang */ 3682618cbb4aSHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3683df40acb1SHong Zhang { 3684df40acb1SHong Zhang PetscMPIInt rank,size; 3685df40acb1SHong Zhang PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3686df40acb1SHong Zhang PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3687df40acb1SHong Zhang Mat M,Mreuse; 368898b658c4SHong Zhang MatScalar *aa,*vwork; 3689df40acb1SHong Zhang MPI_Comm comm; 3690df40acb1SHong Zhang Mat_SeqAIJ *aij; 36910b27a90eSHong Zhang PetscBool colflag,allcolumns=PETSC_FALSE; 3692df40acb1SHong Zhang 3693df40acb1SHong Zhang PetscFunctionBegin; 36949566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 36959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 36969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 3697df40acb1SHong Zhang 36980b27a90eSHong Zhang /* Check for special case: each processor gets entire matrix columns */ 36999566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol,&colflag)); 37009566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol,&n)); 37010b27a90eSHong Zhang if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 37021c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 37030b27a90eSHong Zhang 3704df40acb1SHong Zhang if (call == MAT_REUSE_MATRIX) { 37059566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 370628b400f6SJacob Faibussowitsch PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 37079566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3708df40acb1SHong Zhang } else { 37099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3710df40acb1SHong Zhang } 3711df40acb1SHong Zhang 3712df40acb1SHong Zhang /* 3713df40acb1SHong Zhang m - number of local rows 3714df40acb1SHong Zhang n - number of columns (same on all processors) 3715df40acb1SHong Zhang rstart - first row in new global matrix generated 3716df40acb1SHong Zhang */ 37179566063dSJacob Faibussowitsch PetscCall(MatGetSize(Mreuse,&m,&n)); 37189566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3719df40acb1SHong Zhang if (call == MAT_INITIAL_MATRIX) { 3720df40acb1SHong Zhang aij = (Mat_SeqAIJ*)(Mreuse)->data; 3721df40acb1SHong Zhang ii = aij->i; 3722df40acb1SHong Zhang jj = aij->j; 3723df40acb1SHong Zhang 3724df40acb1SHong Zhang /* 3725df40acb1SHong Zhang Determine the number of non-zeros in the diagonal and off-diagonal 3726df40acb1SHong Zhang portions of the matrix in order to do correct preallocation 3727df40acb1SHong Zhang */ 3728df40acb1SHong Zhang 3729df40acb1SHong Zhang /* first get start and end of "diagonal" columns */ 3730df40acb1SHong Zhang if (csize == PETSC_DECIDE) { 37319566063dSJacob Faibussowitsch PetscCall(ISGetSize(isrow,&mglobal)); 3732df40acb1SHong Zhang if (mglobal == n) { /* square matrix */ 3733df40acb1SHong Zhang nlocal = m; 3734df40acb1SHong Zhang } else { 3735df40acb1SHong Zhang nlocal = n/size + ((n % size) > rank); 3736df40acb1SHong Zhang } 3737df40acb1SHong Zhang } else { 3738df40acb1SHong Zhang nlocal = csize; 3739df40acb1SHong Zhang } 37409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3741df40acb1SHong Zhang rstart = rend - nlocal; 37422c71b3e2SJacob Faibussowitsch PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3743df40acb1SHong Zhang 3744df40acb1SHong Zhang /* next, compute all the lengths */ 37459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*m+1,&dlens)); 3746df40acb1SHong Zhang olens = dlens + m; 3747df40acb1SHong Zhang for (i=0; i<m; i++) { 3748df40acb1SHong Zhang jend = ii[i+1] - ii[i]; 3749df40acb1SHong Zhang olen = 0; 3750df40acb1SHong Zhang dlen = 0; 3751df40acb1SHong Zhang for (j=0; j<jend; j++) { 3752df40acb1SHong Zhang if (*jj < rstart || *jj >= rend) olen++; 3753df40acb1SHong Zhang else dlen++; 3754df40acb1SHong Zhang jj++; 3755df40acb1SHong Zhang } 3756df40acb1SHong Zhang olens[i] = olen; 3757df40acb1SHong Zhang dlens[i] = dlen; 3758df40acb1SHong Zhang } 37599566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&M)); 37609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 37619566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(M,bs,cbs)); 37629566063dSJacob Faibussowitsch PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 37639566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 37649566063dSJacob Faibussowitsch PetscCall(PetscFree(dlens)); 3765df40acb1SHong Zhang } else { 3766df40acb1SHong Zhang PetscInt ml,nl; 3767df40acb1SHong Zhang 3768df40acb1SHong Zhang M = *newmat; 37699566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(M,&ml,&nl)); 377008401ef6SPierre Jolivet PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 37719566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 3772df40acb1SHong Zhang /* 3773df40acb1SHong Zhang The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3774df40acb1SHong Zhang rather than the slower MatSetValues(). 3775df40acb1SHong Zhang */ 3776df40acb1SHong Zhang M->was_assembled = PETSC_TRUE; 3777df40acb1SHong Zhang M->assembled = PETSC_FALSE; 3778df40acb1SHong Zhang } 37799566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3780df40acb1SHong Zhang aij = (Mat_SeqAIJ*)(Mreuse)->data; 3781df40acb1SHong Zhang ii = aij->i; 3782df40acb1SHong Zhang jj = aij->j; 37832e5835c6SStefano Zampini 37842e5835c6SStefano Zampini /* trigger copy to CPU if needed */ 37859566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3786df40acb1SHong Zhang for (i=0; i<m; i++) { 3787df40acb1SHong Zhang row = rstart + i; 3788df40acb1SHong Zhang nz = ii[i+1] - ii[i]; 3789df40acb1SHong Zhang cwork = jj; jj += nz; 3790df40acb1SHong Zhang vwork = aa; aa += nz; 37919566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3792df40acb1SHong Zhang } 37939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3794df40acb1SHong Zhang 37959566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 37969566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3797df40acb1SHong Zhang *newmat = M; 3798df40acb1SHong Zhang 3799df40acb1SHong Zhang /* save submatrix used in processor for next request */ 3800df40acb1SHong Zhang if (call == MAT_INITIAL_MATRIX) { 38019566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 38029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Mreuse)); 3803df40acb1SHong Zhang } 3804df40acb1SHong Zhang PetscFunctionReturn(0); 3805df40acb1SHong Zhang } 3806df40acb1SHong Zhang 38077087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3808ccd8e176SBarry Smith { 3809899cda47SBarry Smith PetscInt m,cstart, cend,j,nnz,i,d; 3810899cda47SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3811ccd8e176SBarry Smith const PetscInt *JJ; 3812eeb24464SBarry Smith PetscBool nooffprocentries; 3813ccd8e176SBarry Smith 3814ccd8e176SBarry Smith PetscFunctionBegin; 38152c71b3e2SJacob Faibussowitsch PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3816899cda47SBarry Smith 38179566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 38189566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 3819d0f46423SBarry Smith m = B->rmap->n; 3820d0f46423SBarry Smith cstart = B->cmap->rstart; 3821d0f46423SBarry Smith cend = B->cmap->rend; 3822d0f46423SBarry Smith rstart = B->rmap->rstart; 3823899cda47SBarry Smith 38249566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3825ccd8e176SBarry Smith 382676bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 38278f8f2f0dSBarry Smith for (i=0; i<m; i++) { 3828ecc77c7aSBarry Smith nnz = Ii[i+1]- Ii[i]; 3829ecc77c7aSBarry Smith JJ = J + Ii[i]; 383008401ef6SPierre Jolivet PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 383108401ef6SPierre Jolivet PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 383208401ef6SPierre Jolivet PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3833ecc77c7aSBarry Smith } 383476bd3646SJed Brown } 3835ecc77c7aSBarry Smith 38368f8f2f0dSBarry Smith for (i=0; i<m; i++) { 3837b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 3838b7940d39SSatish Balay JJ = J + Ii[i]; 3839ccd8e176SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 3840ccd8e176SBarry Smith d = 0; 38410daa03b5SJed Brown for (j=0; j<nnz; j++) { 38420daa03b5SJed Brown if (cstart <= JJ[j] && JJ[j] < cend) d++; 3843ccd8e176SBarry Smith } 3844ccd8e176SBarry Smith d_nnz[i] = d; 3845ccd8e176SBarry Smith o_nnz[i] = nnz - d; 3846ccd8e176SBarry Smith } 38479566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 38489566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz,o_nnz)); 3849ccd8e176SBarry Smith 38508f8f2f0dSBarry Smith for (i=0; i<m; i++) { 3851ccd8e176SBarry Smith ii = i + rstart; 38529566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3853ccd8e176SBarry Smith } 3854eeb24464SBarry Smith nooffprocentries = B->nooffprocentries; 3855eeb24464SBarry Smith B->nooffprocentries = PETSC_TRUE; 38569566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 38579566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3858eeb24464SBarry Smith B->nooffprocentries = nooffprocentries; 3859ccd8e176SBarry Smith 38609566063dSJacob Faibussowitsch PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3861ccd8e176SBarry Smith PetscFunctionReturn(0); 3862ccd8e176SBarry Smith } 3863ccd8e176SBarry Smith 38641eea217eSSatish Balay /*@ 3865ccd8e176SBarry Smith MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3866ccd8e176SBarry Smith (the default parallel PETSc format). 3867ccd8e176SBarry Smith 3868d083f849SBarry Smith Collective 3869ccd8e176SBarry Smith 3870ccd8e176SBarry Smith Input Parameters: 3871a1661176SMatthew Knepley + B - the matrix 3872ccd8e176SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 38730daa03b5SJed Brown . j - the column indices for each local row (starts with zero) 3874ccd8e176SBarry Smith - v - optional values in the matrix 3875ccd8e176SBarry Smith 3876ccd8e176SBarry Smith Level: developer 3877ccd8e176SBarry Smith 387812251496SSatish Balay Notes: 3879c1c1d628SHong Zhang The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3880c1c1d628SHong Zhang thus you CANNOT change the matrix entries by changing the values of v[] after you have 388112251496SSatish Balay called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 388212251496SSatish Balay 388312251496SSatish Balay The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 388412251496SSatish Balay 388512251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 388612251496SSatish Balay row-major ordering.. i.e for the following matrix, the input data expected is 3887c5e4d11fSDmitry Karpeev as shown 388812251496SSatish Balay 3889c5e4d11fSDmitry Karpeev $ 1 0 0 3890c5e4d11fSDmitry Karpeev $ 2 0 3 P0 3891c5e4d11fSDmitry Karpeev $ ------- 3892c5e4d11fSDmitry Karpeev $ 4 5 6 P1 3893c5e4d11fSDmitry Karpeev $ 3894c5e4d11fSDmitry Karpeev $ Process0 [P0]: rows_owned=[0,1] 3895c5e4d11fSDmitry Karpeev $ i = {0,1,3} [size = nrow+1 = 2+1] 3896c5e4d11fSDmitry Karpeev $ j = {0,0,2} [size = 3] 3897c5e4d11fSDmitry Karpeev $ v = {1,2,3} [size = 3] 3898c5e4d11fSDmitry Karpeev $ 3899c5e4d11fSDmitry Karpeev $ Process1 [P1]: rows_owned=[2] 3900c5e4d11fSDmitry Karpeev $ i = {0,3} [size = nrow+1 = 1+1] 3901c5e4d11fSDmitry Karpeev $ j = {0,1,2} [size = 3] 3902c5e4d11fSDmitry Karpeev $ v = {4,5,6} [size = 3] 390312251496SSatish Balay 39045f4d30c4SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 39058d7a6e47SBarry Smith MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3906ccd8e176SBarry Smith @*/ 39077087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3908ccd8e176SBarry Smith { 3909ccd8e176SBarry Smith PetscFunctionBegin; 3910cac4c232SBarry Smith PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3911ccd8e176SBarry Smith PetscFunctionReturn(0); 3912ccd8e176SBarry Smith } 3913ccd8e176SBarry Smith 3914273d9f13SBarry Smith /*@C 3915ccd8e176SBarry Smith MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3916273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 3917273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3918273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3919273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3920273d9f13SBarry Smith 3921d083f849SBarry Smith Collective 3922273d9f13SBarry Smith 3923273d9f13SBarry Smith Input Parameters: 39241c4f3114SJed Brown + B - the matrix 3925273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3926273d9f13SBarry Smith (same value is used for all local rows) 3927273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 3928273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 392920fa73abSMatthew G. Knepley or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3930273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 39313287b5eaSJed Brown For matrices that will be factored, you must leave room for (and set) 39323287b5eaSJed Brown the diagonal entry even if it is zero. 3933273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3934273d9f13SBarry Smith submatrix (same value is used for all local rows). 3935273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 3936273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 393720fa73abSMatthew G. Knepley each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3938273d9f13SBarry Smith structure. The size of this array is equal to the number 3939273d9f13SBarry Smith of local rows, i.e 'm'. 3940273d9f13SBarry Smith 394149a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 394249a6f317SBarry Smith 3943273d9f13SBarry Smith The AIJ format (also called the Yale sparse matrix format or 3944ccd8e176SBarry Smith compressed row storage (CSR)), is fully compatible with standard Fortran 77 39450598bfebSBarry Smith storage. The stored row and column indices begin with zero. 3946a7f22e61SSatish Balay See Users-Manual: ch_mat for details. 3947273d9f13SBarry Smith 3948273d9f13SBarry Smith The parallel matrix is partitioned such that the first m0 rows belong to 3949273d9f13SBarry Smith process 0, the next m1 rows belong to process 1, the next m2 rows belong 3950273d9f13SBarry Smith to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3951273d9f13SBarry Smith 3952273d9f13SBarry Smith The DIAGONAL portion of the local submatrix of a processor can be defined 3953a05b864aSJed Brown as the submatrix which is obtained by extraction the part corresponding to 3954a05b864aSJed Brown the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3955a05b864aSJed Brown first row that belongs to the processor, r2 is the last row belonging to 3956a05b864aSJed Brown the this processor, and c1-c2 is range of indices of the local part of a 3957a05b864aSJed Brown vector suitable for applying the matrix to. This is an mxn matrix. In the 3958a05b864aSJed Brown common case of a square matrix, the row and column ranges are the same and 3959a05b864aSJed Brown the DIAGONAL part is also square. The remaining portion of the local 3960a05b864aSJed Brown submatrix (mxN) constitute the OFF-DIAGONAL portion. 3961273d9f13SBarry Smith 3962273d9f13SBarry Smith If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3963273d9f13SBarry Smith 3964aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3965aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3966aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3967aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3968aa95bbe8SBarry Smith 3969273d9f13SBarry Smith Example usage: 3970273d9f13SBarry Smith 3971273d9f13SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 3972273d9f13SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3973273d9f13SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3974273d9f13SBarry Smith as follows: 3975273d9f13SBarry Smith 3976273d9f13SBarry Smith .vb 3977273d9f13SBarry Smith 1 2 0 | 0 3 0 | 0 4 3978273d9f13SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 3979273d9f13SBarry Smith 9 0 10 | 11 0 0 | 12 0 3980273d9f13SBarry Smith ------------------------------------- 3981273d9f13SBarry Smith 13 0 14 | 15 16 17 | 0 0 3982273d9f13SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 3983273d9f13SBarry Smith 0 0 0 | 22 23 0 | 24 0 3984273d9f13SBarry Smith ------------------------------------- 3985273d9f13SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 3986273d9f13SBarry Smith 30 0 0 | 31 32 33 | 0 34 3987273d9f13SBarry Smith .ve 3988273d9f13SBarry Smith 3989273d9f13SBarry Smith This can be represented as a collection of submatrices as: 3990273d9f13SBarry Smith 3991273d9f13SBarry Smith .vb 3992273d9f13SBarry Smith A B C 3993273d9f13SBarry Smith D E F 3994273d9f13SBarry Smith G H I 3995273d9f13SBarry Smith .ve 3996273d9f13SBarry Smith 3997273d9f13SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 3998273d9f13SBarry Smith owned by proc1, G,H,I are owned by proc2. 3999273d9f13SBarry Smith 4000273d9f13SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4001273d9f13SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4002273d9f13SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 4003273d9f13SBarry Smith 4004273d9f13SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4005273d9f13SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4006273d9f13SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4007273d9f13SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4008273d9f13SBarry Smith part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4009273d9f13SBarry Smith matrix, ans [DF] as another SeqAIJ matrix. 4010273d9f13SBarry Smith 4011273d9f13SBarry Smith When d_nz, o_nz parameters are specified, d_nz storage elements are 4012273d9f13SBarry Smith allocated for every row of the local diagonal submatrix, and o_nz 4013273d9f13SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 4014273d9f13SBarry Smith One way to choose d_nz and o_nz is to use the max nonzerors per local 4015273d9f13SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4016273d9f13SBarry Smith In this case, the values of d_nz,o_nz are: 4017273d9f13SBarry Smith .vb 4018273d9f13SBarry Smith proc0 : dnz = 2, o_nz = 2 4019273d9f13SBarry Smith proc1 : dnz = 3, o_nz = 2 4020273d9f13SBarry Smith proc2 : dnz = 1, o_nz = 4 4021273d9f13SBarry Smith .ve 4022273d9f13SBarry Smith We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4023273d9f13SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4024273d9f13SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 4025273d9f13SBarry Smith 34 values. 4026273d9f13SBarry Smith 4027273d9f13SBarry Smith When d_nnz, o_nnz parameters are specified, the storage is specified 4028a5b23f4aSJose E. Roman for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4029273d9f13SBarry Smith In the above case the values for d_nnz,o_nnz are: 4030273d9f13SBarry Smith .vb 4031273d9f13SBarry Smith proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4032273d9f13SBarry Smith proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4033273d9f13SBarry Smith proc2: d_nnz = [1,1] and o_nnz = [4,4] 4034273d9f13SBarry Smith .ve 4035273d9f13SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 4036273d9f13SBarry Smith hence pre-allocation is perfect. 4037273d9f13SBarry Smith 4038273d9f13SBarry Smith Level: intermediate 4039273d9f13SBarry Smith 404069b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 40415f4d30c4SBarry Smith MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4042273d9f13SBarry Smith @*/ 40437087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4044273d9f13SBarry Smith { 4045273d9f13SBarry Smith PetscFunctionBegin; 40466ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 40476ba663aaSJed Brown PetscValidType(B,1); 4048cac4c232SBarry Smith PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4049273d9f13SBarry Smith PetscFunctionReturn(0); 4050273d9f13SBarry Smith } 4051273d9f13SBarry Smith 405258d36128SBarry Smith /*@ 40532fb0ec9aSBarry Smith MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 40548f8f2f0dSBarry Smith CSR format for the local rows. 40552fb0ec9aSBarry Smith 4056d083f849SBarry Smith Collective 40572fb0ec9aSBarry Smith 40582fb0ec9aSBarry Smith Input Parameters: 40592fb0ec9aSBarry Smith + comm - MPI communicator 40602fb0ec9aSBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 40612fb0ec9aSBarry Smith . n - This value should be the same as the local size used in creating the 40622fb0ec9aSBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 40632fb0ec9aSBarry Smith calculated if N is given) For square matrices n is almost always m. 40642fb0ec9aSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 40652fb0ec9aSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4066483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 40672fb0ec9aSBarry Smith . j - column indices 40682fb0ec9aSBarry Smith - a - matrix values 40692fb0ec9aSBarry Smith 40702fb0ec9aSBarry Smith Output Parameter: 40712fb0ec9aSBarry Smith . mat - the matrix 407203bfb495SBarry Smith 40732fb0ec9aSBarry Smith Level: intermediate 40742fb0ec9aSBarry Smith 40752fb0ec9aSBarry Smith Notes: 40762fb0ec9aSBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 40772fb0ec9aSBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 40788d7a6e47SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 40792fb0ec9aSBarry Smith 408012251496SSatish Balay The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 408112251496SSatish Balay 408212251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 408312251496SSatish Balay row-major ordering.. i.e for the following matrix, the input data expected is 4084c5e4d11fSDmitry Karpeev as shown 408512251496SSatish Balay 40868f8f2f0dSBarry Smith Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 40878f8f2f0dSBarry Smith 4088c5e4d11fSDmitry Karpeev $ 1 0 0 4089c5e4d11fSDmitry Karpeev $ 2 0 3 P0 4090c5e4d11fSDmitry Karpeev $ ------- 4091c5e4d11fSDmitry Karpeev $ 4 5 6 P1 4092c5e4d11fSDmitry Karpeev $ 4093c5e4d11fSDmitry Karpeev $ Process0 [P0]: rows_owned=[0,1] 4094c5e4d11fSDmitry Karpeev $ i = {0,1,3} [size = nrow+1 = 2+1] 4095c5e4d11fSDmitry Karpeev $ j = {0,0,2} [size = 3] 4096c5e4d11fSDmitry Karpeev $ v = {1,2,3} [size = 3] 4097c5e4d11fSDmitry Karpeev $ 4098c5e4d11fSDmitry Karpeev $ Process1 [P1]: rows_owned=[2] 4099c5e4d11fSDmitry Karpeev $ i = {0,3} [size = nrow+1 = 1+1] 4100c5e4d11fSDmitry Karpeev $ j = {0,1,2} [size = 3] 4101c5e4d11fSDmitry Karpeev $ v = {4,5,6} [size = 3] 41022fb0ec9aSBarry Smith 41032fb0ec9aSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 41048f8f2f0dSBarry Smith MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 41052fb0ec9aSBarry Smith @*/ 41067087cfbeSBarry Smith PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 41072fb0ec9aSBarry Smith { 41082fb0ec9aSBarry Smith PetscFunctionBegin; 410908401ef6SPierre Jolivet PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 411008401ef6SPierre Jolivet PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 41119566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,mat)); 41129566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat,m,n,M,N)); 41139566063dSJacob Faibussowitsch /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 41149566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat,MATMPIAIJ)); 41159566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 41162fb0ec9aSBarry Smith PetscFunctionReturn(0); 41172fb0ec9aSBarry Smith } 41182fb0ec9aSBarry Smith 41198f8f2f0dSBarry Smith /*@ 41208f8f2f0dSBarry Smith MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 41218f8f2f0dSBarry Smith CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 41228f8f2f0dSBarry Smith 41238f8f2f0dSBarry Smith Collective 41248f8f2f0dSBarry Smith 41258f8f2f0dSBarry Smith Input Parameters: 41268f8f2f0dSBarry Smith + mat - the matrix 41278f8f2f0dSBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 41288f8f2f0dSBarry Smith . n - This value should be the same as the local size used in creating the 41298f8f2f0dSBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 41308f8f2f0dSBarry Smith calculated if N is given) For square matrices n is almost always m. 41318f8f2f0dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 41328f8f2f0dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 41338f8f2f0dSBarry Smith . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 41348f8f2f0dSBarry Smith . J - column indices 41358f8f2f0dSBarry Smith - v - matrix values 41368f8f2f0dSBarry Smith 41378f8f2f0dSBarry Smith Level: intermediate 41388f8f2f0dSBarry Smith 41398f8f2f0dSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 41408f8f2f0dSBarry Smith MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 41418f8f2f0dSBarry Smith @*/ 41428f8f2f0dSBarry Smith PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 41438f8f2f0dSBarry Smith { 414470990e77SSatish Balay PetscInt cstart,nnz,i,j; 41458f8f2f0dSBarry Smith PetscInt *ld; 41468f8f2f0dSBarry Smith PetscBool nooffprocentries; 41478f8f2f0dSBarry Smith Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4148fff043a9SJunchao Zhang Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4149fff043a9SJunchao Zhang PetscScalar *ad,*ao; 41508f8f2f0dSBarry Smith const PetscInt *Adi = Ad->i; 41518f8f2f0dSBarry Smith PetscInt ldi,Iii,md; 41528f8f2f0dSBarry Smith 41538f8f2f0dSBarry Smith PetscFunctionBegin; 41542c71b3e2SJacob Faibussowitsch PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 415508401ef6SPierre Jolivet PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 415608401ef6SPierre Jolivet PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 415708401ef6SPierre Jolivet PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 41588f8f2f0dSBarry Smith 41599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 41609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 41618f8f2f0dSBarry Smith cstart = mat->cmap->rstart; 41628f8f2f0dSBarry Smith if (!Aij->ld) { 41638f8f2f0dSBarry Smith /* count number of entries below block diagonal */ 41649566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(m,&ld)); 41658f8f2f0dSBarry Smith Aij->ld = ld; 41668f8f2f0dSBarry Smith for (i=0; i<m; i++) { 41678f8f2f0dSBarry Smith nnz = Ii[i+1]- Ii[i]; 41688f8f2f0dSBarry Smith j = 0; 41698f8f2f0dSBarry Smith while (J[j] < cstart && j < nnz) {j++;} 41708f8f2f0dSBarry Smith J += nnz; 41718f8f2f0dSBarry Smith ld[i] = j; 41728f8f2f0dSBarry Smith } 41738f8f2f0dSBarry Smith } else { 41748f8f2f0dSBarry Smith ld = Aij->ld; 41758f8f2f0dSBarry Smith } 41768f8f2f0dSBarry Smith 41778f8f2f0dSBarry Smith for (i=0; i<m; i++) { 41788f8f2f0dSBarry Smith nnz = Ii[i+1]- Ii[i]; 41798f8f2f0dSBarry Smith Iii = Ii[i]; 41808f8f2f0dSBarry Smith ldi = ld[i]; 41818f8f2f0dSBarry Smith md = Adi[i+1]-Adi[i]; 41829566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 41839566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 41849566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 41858f8f2f0dSBarry Smith ad += md; 41868f8f2f0dSBarry Smith ao += nnz - md; 41878f8f2f0dSBarry Smith } 41888f8f2f0dSBarry Smith nooffprocentries = mat->nooffprocentries; 41898f8f2f0dSBarry Smith mat->nooffprocentries = PETSC_TRUE; 41909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 41919566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 41929566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 41939566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 41949566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 41959566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 41969566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 41978f8f2f0dSBarry Smith mat->nooffprocentries = nooffprocentries; 41988f8f2f0dSBarry Smith PetscFunctionReturn(0); 41998f8f2f0dSBarry Smith } 42008f8f2f0dSBarry Smith 4201273d9f13SBarry Smith /*@C 420269b1f4b7SBarry Smith MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4203273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 4204273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 4205273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4206273d9f13SBarry Smith performance can be increased by more than a factor of 50. 4207273d9f13SBarry Smith 4208d083f849SBarry Smith Collective 4209273d9f13SBarry Smith 4210273d9f13SBarry Smith Input Parameters: 4211273d9f13SBarry Smith + comm - MPI communicator 4212273d9f13SBarry Smith . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4213273d9f13SBarry Smith This value should be the same as the local size used in creating the 4214273d9f13SBarry Smith y vector for the matrix-vector product y = Ax. 4215273d9f13SBarry Smith . n - This value should be the same as the local size used in creating the 4216273d9f13SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4217273d9f13SBarry Smith calculated if N is given) For square matrices n is almost always m. 4218273d9f13SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4219273d9f13SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4220273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4221273d9f13SBarry Smith (same value is used for all local rows) 4222273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 4223273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 42240298fd71SBarry Smith or NULL, if d_nz is used to specify the nonzero structure. 4225273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 4226273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4227273d9f13SBarry Smith submatrix (same value is used for all local rows). 4228273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 4229273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 42300298fd71SBarry Smith each row) or NULL, if o_nz is used to specify the nonzero 4231273d9f13SBarry Smith structure. The size of this array is equal to the number 4232273d9f13SBarry Smith of local rows, i.e 'm'. 4233273d9f13SBarry Smith 4234273d9f13SBarry Smith Output Parameter: 4235273d9f13SBarry Smith . A - the matrix 4236273d9f13SBarry Smith 4237175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4238f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 4239175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4240175b88e8SBarry Smith 4241273d9f13SBarry Smith Notes: 424249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 424349a6f317SBarry Smith 4244273d9f13SBarry Smith m,n,M,N parameters specify the size of the matrix, and its partitioning across 4245273d9f13SBarry Smith processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4246273d9f13SBarry Smith storage requirements for this matrix. 4247273d9f13SBarry Smith 4248273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4249273d9f13SBarry Smith processor than it must be used on all processors that share the object for 4250273d9f13SBarry Smith that argument. 4251273d9f13SBarry Smith 4252273d9f13SBarry Smith The user MUST specify either the local or global matrix dimensions 4253273d9f13SBarry Smith (possibly both). 4254273d9f13SBarry Smith 425533a7c187SSatish Balay The parallel matrix is partitioned across processors such that the 425633a7c187SSatish Balay first m0 rows belong to process 0, the next m1 rows belong to 425733a7c187SSatish Balay process 1, the next m2 rows belong to process 2 etc.. where 425833a7c187SSatish Balay m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 425933a7c187SSatish Balay values corresponding to [m x N] submatrix. 4260273d9f13SBarry Smith 426133a7c187SSatish Balay The columns are logically partitioned with the n0 columns belonging 426233a7c187SSatish Balay to 0th partition, the next n1 columns belonging to the next 4263df3898eeSBarry Smith partition etc.. where n0,n1,n2... are the input parameter 'n'. 426433a7c187SSatish Balay 426533a7c187SSatish Balay The DIAGONAL portion of the local submatrix on any given processor 426633a7c187SSatish Balay is the submatrix corresponding to the rows and columns m,n 426733a7c187SSatish Balay corresponding to the given processor. i.e diagonal matrix on 426833a7c187SSatish Balay process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 426933a7c187SSatish Balay etc. The remaining portion of the local submatrix [m x (N-n)] 427033a7c187SSatish Balay constitute the OFF-DIAGONAL portion. The example below better 427133a7c187SSatish Balay illustrates this concept. 427233a7c187SSatish Balay 427333a7c187SSatish Balay For a square global matrix we define each processor's diagonal portion 427433a7c187SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 427533a7c187SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 427633a7c187SSatish Balay local matrix (a rectangular submatrix). 4277273d9f13SBarry Smith 4278273d9f13SBarry Smith If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4279273d9f13SBarry Smith 428097d05335SKris Buschelman When calling this routine with a single process communicator, a matrix of 428197d05335SKris Buschelman type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4282da57b5cdSKarl Rupp type of communicator, use the construction mechanism 4283da57b5cdSKarl Rupp .vb 428478102f6cSMatthew Knepley MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4285da57b5cdSKarl Rupp .ve 428697d05335SKris Buschelman 4287f1058c0fSBarry Smith $ MatCreate(...,&A); 4288f1058c0fSBarry Smith $ MatSetType(A,MATMPIAIJ); 4289f1058c0fSBarry Smith $ MatSetSizes(A, m,n,M,N); 4290f1058c0fSBarry Smith $ MatMPIAIJSetPreallocation(A,...); 4291f1058c0fSBarry Smith 4292273d9f13SBarry Smith By default, this format uses inodes (identical nodes) when possible. 4293273d9f13SBarry Smith We search for consecutive rows with the same nonzero structure, thereby 4294273d9f13SBarry Smith reusing matrix information to achieve increased efficiency. 4295273d9f13SBarry Smith 4296273d9f13SBarry Smith Options Database Keys: 4297923f20ffSKris Buschelman + -mat_no_inode - Do not use inodes 42982f3b2168SJunchao Zhang . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 42992f3b2168SJunchao Zhang - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 43002f3b2168SJunchao Zhang See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 43012f3b2168SJunchao Zhang Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 430247b2e64bSBarry Smith 4303273d9f13SBarry Smith Example usage: 4304273d9f13SBarry Smith 4305273d9f13SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 4306273d9f13SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4307273d9f13SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4308efc377ccSKarl Rupp as follows 4309273d9f13SBarry Smith 4310273d9f13SBarry Smith .vb 4311273d9f13SBarry Smith 1 2 0 | 0 3 0 | 0 4 4312273d9f13SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 4313273d9f13SBarry Smith 9 0 10 | 11 0 0 | 12 0 4314273d9f13SBarry Smith ------------------------------------- 4315273d9f13SBarry Smith 13 0 14 | 15 16 17 | 0 0 4316273d9f13SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 4317273d9f13SBarry Smith 0 0 0 | 22 23 0 | 24 0 4318273d9f13SBarry Smith ------------------------------------- 4319273d9f13SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 4320273d9f13SBarry Smith 30 0 0 | 31 32 33 | 0 34 4321273d9f13SBarry Smith .ve 4322273d9f13SBarry Smith 4323da57b5cdSKarl Rupp This can be represented as a collection of submatrices as 4324273d9f13SBarry Smith 4325273d9f13SBarry Smith .vb 4326273d9f13SBarry Smith A B C 4327273d9f13SBarry Smith D E F 4328273d9f13SBarry Smith G H I 4329273d9f13SBarry Smith .ve 4330273d9f13SBarry Smith 4331273d9f13SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 4332273d9f13SBarry Smith owned by proc1, G,H,I are owned by proc2. 4333273d9f13SBarry Smith 4334273d9f13SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4335273d9f13SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4336273d9f13SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 4337273d9f13SBarry Smith 4338273d9f13SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4339273d9f13SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4340273d9f13SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4341273d9f13SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4342273d9f13SBarry Smith part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4343273d9f13SBarry Smith matrix, ans [DF] as another SeqAIJ matrix. 4344273d9f13SBarry Smith 4345273d9f13SBarry Smith When d_nz, o_nz parameters are specified, d_nz storage elements are 4346273d9f13SBarry Smith allocated for every row of the local diagonal submatrix, and o_nz 4347273d9f13SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 4348273d9f13SBarry Smith One way to choose d_nz and o_nz is to use the max nonzerors per local 4349273d9f13SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4350da57b5cdSKarl Rupp In this case, the values of d_nz,o_nz are 4351273d9f13SBarry Smith .vb 4352273d9f13SBarry Smith proc0 : dnz = 2, o_nz = 2 4353273d9f13SBarry Smith proc1 : dnz = 3, o_nz = 2 4354273d9f13SBarry Smith proc2 : dnz = 1, o_nz = 4 4355273d9f13SBarry Smith .ve 4356273d9f13SBarry Smith We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4357273d9f13SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4358273d9f13SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 4359273d9f13SBarry Smith 34 values. 4360273d9f13SBarry Smith 4361273d9f13SBarry Smith When d_nnz, o_nnz parameters are specified, the storage is specified 4362a5b23f4aSJose E. Roman for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4363da57b5cdSKarl Rupp In the above case the values for d_nnz,o_nnz are 4364273d9f13SBarry Smith .vb 4365273d9f13SBarry Smith proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4366273d9f13SBarry Smith proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4367273d9f13SBarry Smith proc2: d_nnz = [1,1] and o_nnz = [4,4] 4368273d9f13SBarry Smith .ve 4369273d9f13SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 4370273d9f13SBarry Smith hence pre-allocation is perfect. 4371273d9f13SBarry Smith 4372273d9f13SBarry Smith Level: intermediate 4373273d9f13SBarry Smith 4374ccd8e176SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 43755f4d30c4SBarry Smith MATMPIAIJ, MatCreateMPIAIJWithArrays() 4376273d9f13SBarry Smith @*/ 437769b1f4b7SBarry Smith PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4378273d9f13SBarry Smith { 4379b1d57f15SBarry Smith PetscMPIInt size; 4380273d9f13SBarry Smith 4381273d9f13SBarry Smith PetscFunctionBegin; 43829566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 43839566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,M,N)); 43849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 4385273d9f13SBarry Smith if (size > 1) { 43869566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATMPIAIJ)); 43879566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4388273d9f13SBarry Smith } else { 43899566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSEQAIJ)); 43909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4391273d9f13SBarry Smith } 4392273d9f13SBarry Smith PetscFunctionReturn(0); 4393273d9f13SBarry Smith } 4394195d93cdSBarry Smith 4395127ca0efSMatthew Knepley /*@C 4396127ca0efSMatthew Knepley MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4397127ca0efSMatthew Knepley 4398127ca0efSMatthew Knepley Not collective 4399127ca0efSMatthew Knepley 4400127ca0efSMatthew Knepley Input Parameter: 4401127ca0efSMatthew Knepley . A - The MPIAIJ matrix 4402127ca0efSMatthew Knepley 4403127ca0efSMatthew Knepley Output Parameters: 4404127ca0efSMatthew Knepley + Ad - The local diagonal block as a SeqAIJ matrix 4405127ca0efSMatthew Knepley . Ao - The local off-diagonal block as a SeqAIJ matrix 4406127ca0efSMatthew Knepley - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4407127ca0efSMatthew Knepley 4408127ca0efSMatthew Knepley Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4409127ca0efSMatthew Knepley in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4410127ca0efSMatthew Knepley the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4411127ca0efSMatthew Knepley local column numbers to global column numbers in the original matrix. 4412127ca0efSMatthew Knepley 4413127ca0efSMatthew Knepley Level: intermediate 4414127ca0efSMatthew Knepley 4415c3ca5d0dSPierre Jolivet .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4416127ca0efSMatthew Knepley @*/ 44179230625dSJed Brown PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4418195d93cdSBarry Smith { 4419195d93cdSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 442004cf37c7SBarry Smith PetscBool flg; 4421b1d57f15SBarry Smith 4422195d93cdSBarry Smith PetscFunctionBegin; 44239566063dSJacob Faibussowitsch PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 442428b400f6SJacob Faibussowitsch PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 442521e72a00SBarry Smith if (Ad) *Ad = a->A; 442621e72a00SBarry Smith if (Ao) *Ao = a->B; 442721e72a00SBarry Smith if (colmap) *colmap = a->garray; 4428195d93cdSBarry Smith PetscFunctionReturn(0); 4429195d93cdSBarry Smith } 4430a2243be0SBarry Smith 4431110bb6e1SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 44329b8102ccSHong Zhang { 4433110bb6e1SHong Zhang PetscInt m,N,i,rstart,nnz,Ii; 44349b8102ccSHong Zhang PetscInt *indx; 4435110bb6e1SHong Zhang PetscScalar *values; 4436421ddf4dSJunchao Zhang MatType rootType; 44379b8102ccSHong Zhang 44389b8102ccSHong Zhang PetscFunctionBegin; 44399566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat,&m,&N)); 4440110bb6e1SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4441110bb6e1SHong Zhang PetscInt *dnz,*onz,sum,bs,cbs; 4442110bb6e1SHong Zhang 44439b8102ccSHong Zhang if (n == PETSC_DECIDE) { 44449566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnership(comm,&n,&N)); 44459b8102ccSHong Zhang } 4446a22543b6SHong Zhang /* Check sum(n) = N */ 44471c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 444808401ef6SPierre Jolivet PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4449a22543b6SHong Zhang 44509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 44519b8102ccSHong Zhang rstart -= m; 44529b8102ccSHong Zhang 4453d0609cedSBarry Smith MatPreallocateBegin(comm,m,n,dnz,onz); 44549b8102ccSHong Zhang for (i=0; i<m; i++) { 44559566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 44569566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 44579566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 44589b8102ccSHong Zhang } 44599b8102ccSHong Zhang 44609566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,outmat)); 44619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 44629566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 44639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 44649566063dSJacob Faibussowitsch PetscCall(MatGetRootType_Private(inmat,&rootType)); 44659566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat,rootType)); 44669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 44679566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4468d0609cedSBarry Smith MatPreallocateEnd(dnz,onz); 44699566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 44709b8102ccSHong Zhang } 44719b8102ccSHong Zhang 4472110bb6e1SHong Zhang /* numeric phase */ 44739566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 44749b8102ccSHong Zhang for (i=0; i<m; i++) { 44759566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 44769b8102ccSHong Zhang Ii = i + rstart; 44779566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 44789566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 44799b8102ccSHong Zhang } 44809566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 44819566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4482c5d6d63eSBarry Smith PetscFunctionReturn(0); 4483c5d6d63eSBarry Smith } 4484c5d6d63eSBarry Smith 4485dfbe8321SBarry Smith PetscErrorCode MatFileSplit(Mat A,char *outfile) 4486c5d6d63eSBarry Smith { 448732dcc486SBarry Smith PetscMPIInt rank; 4488b1d57f15SBarry Smith PetscInt m,N,i,rstart,nnz; 4489de4209c5SBarry Smith size_t len; 4490b1d57f15SBarry Smith const PetscInt *indx; 4491c5d6d63eSBarry Smith PetscViewer out; 4492c5d6d63eSBarry Smith char *name; 4493c5d6d63eSBarry Smith Mat B; 4494b3cc6726SBarry Smith const PetscScalar *values; 4495c5d6d63eSBarry Smith 4496c5d6d63eSBarry Smith PetscFunctionBegin; 44979566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A,&m,NULL)); 44989566063dSJacob Faibussowitsch PetscCall(MatGetSize(A,NULL,&N)); 4499f204ca49SKris Buschelman /* Should this be the type of the diagonal block of A? */ 45009566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 45019566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,m,N,m,N)); 45029566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B,A,A)); 45039566063dSJacob Faibussowitsch PetscCall(MatSetType(B,MATSEQAIJ)); 45049566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 45059566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4506c5d6d63eSBarry Smith for (i=0; i<m; i++) { 45079566063dSJacob Faibussowitsch PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 45089566063dSJacob Faibussowitsch PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 45099566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4510c5d6d63eSBarry Smith } 45119566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 45129566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4513c5d6d63eSBarry Smith 45149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 45159566063dSJacob Faibussowitsch PetscCall(PetscStrlen(outfile,&len)); 45169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len+6,&name)); 45179566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 45189566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 45199566063dSJacob Faibussowitsch PetscCall(PetscFree(name)); 45209566063dSJacob Faibussowitsch PetscCall(MatView(B,out)); 45219566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&out)); 45229566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 4523c5d6d63eSBarry Smith PetscFunctionReturn(0); 4524c5d6d63eSBarry Smith } 4525e5f2cdd8SHong Zhang 45266718818eSStefano Zampini static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 452751a7d1a8SHong Zhang { 45286718818eSStefano Zampini Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 452951a7d1a8SHong Zhang 453051a7d1a8SHong Zhang PetscFunctionBegin; 45316718818eSStefano Zampini if (!merge) PetscFunctionReturn(0); 45329566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->id_r)); 45339566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->len_s)); 45349566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->len_r)); 45359566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->bi)); 45369566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->bj)); 45379566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_ri[0])); 45389566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_ri)); 45399566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_rj[0])); 45409566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_rj)); 45419566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->coi)); 45429566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->coj)); 45439566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->owners_co)); 45449566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&merge->rowmap)); 45459566063dSJacob Faibussowitsch PetscCall(PetscFree(merge)); 454651a7d1a8SHong Zhang PetscFunctionReturn(0); 454751a7d1a8SHong Zhang } 454851a7d1a8SHong Zhang 4549c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 4550c6db04a5SJed Brown #include <petscbt.h> 45514ebed01fSBarry Smith 455290431a8fSHong Zhang PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 455355d1abb9SHong Zhang { 4554ce94432eSBarry Smith MPI_Comm comm; 455555d1abb9SHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4556b1d57f15SBarry Smith PetscMPIInt size,rank,taga,*len_s; 4557a2ea699eSBarry Smith PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4558b1d57f15SBarry Smith PetscInt proc,m; 4559b1d57f15SBarry Smith PetscInt **buf_ri,**buf_rj; 4560b1d57f15SBarry Smith PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4561b1d57f15SBarry Smith PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 456255d1abb9SHong Zhang MPI_Request *s_waits,*r_waits; 456355d1abb9SHong Zhang MPI_Status *status; 4564fff043a9SJunchao Zhang const MatScalar *aa,*a_a; 4565dd6ea824SBarry Smith MatScalar **abuf_r,*ba_i; 456655d1abb9SHong Zhang Mat_Merge_SeqsToMPI *merge; 4567776b82aeSLisandro Dalcin PetscContainer container; 456855d1abb9SHong Zhang 456955d1abb9SHong Zhang PetscFunctionBegin; 45709566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 45719566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 45723c2c1871SHong Zhang 45739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 45749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 457555d1abb9SHong Zhang 45769566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 457728b400f6SJacob Faibussowitsch PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 45789566063dSJacob Faibussowitsch PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 45799566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4580fff043a9SJunchao Zhang aa = a_a; 4581bf0cc555SLisandro Dalcin 458255d1abb9SHong Zhang bi = merge->bi; 458355d1abb9SHong Zhang bj = merge->bj; 458455d1abb9SHong Zhang buf_ri = merge->buf_ri; 458555d1abb9SHong Zhang buf_rj = merge->buf_rj; 458655d1abb9SHong Zhang 45879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size,&status)); 45887a2fc3feSBarry Smith owners = merge->rowmap->range; 458955d1abb9SHong Zhang len_s = merge->len_s; 459055d1abb9SHong Zhang 459155d1abb9SHong Zhang /* send and recv matrix values */ 459255d1abb9SHong Zhang /*-----------------------------*/ 45939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 45949566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 459555d1abb9SHong Zhang 45969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 459755d1abb9SHong Zhang for (proc=0,k=0; proc<size; proc++) { 459855d1abb9SHong Zhang if (!len_s[proc]) continue; 459955d1abb9SHong Zhang i = owners[proc]; 46009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 460155d1abb9SHong Zhang k++; 460255d1abb9SHong Zhang } 460355d1abb9SHong Zhang 46049566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 46059566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 46069566063dSJacob Faibussowitsch PetscCall(PetscFree(status)); 460755d1abb9SHong Zhang 46089566063dSJacob Faibussowitsch PetscCall(PetscFree(s_waits)); 46099566063dSJacob Faibussowitsch PetscCall(PetscFree(r_waits)); 461055d1abb9SHong Zhang 461155d1abb9SHong Zhang /* insert mat values of mpimat */ 461255d1abb9SHong Zhang /*----------------------------*/ 46139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N,&ba_i)); 46149566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 461555d1abb9SHong Zhang 461655d1abb9SHong Zhang for (k=0; k<merge->nrecv; k++) { 461755d1abb9SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 461855d1abb9SHong Zhang nrows = *(buf_ri_k[k]); 461955d1abb9SHong Zhang nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4620a5b23f4aSJose E. Roman nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 462155d1abb9SHong Zhang } 462255d1abb9SHong Zhang 462355d1abb9SHong Zhang /* set values of ba */ 46247a2fc3feSBarry Smith m = merge->rowmap->n; 462555d1abb9SHong Zhang for (i=0; i<m; i++) { 462655d1abb9SHong Zhang arow = owners[rank] + i; 462755d1abb9SHong Zhang bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 462855d1abb9SHong Zhang bnzi = bi[i+1] - bi[i]; 46299566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ba_i,bnzi)); 463055d1abb9SHong Zhang 463155d1abb9SHong Zhang /* add local non-zero vals of this proc's seqmat into ba */ 463255d1abb9SHong Zhang anzi = ai[arow+1] - ai[arow]; 463355d1abb9SHong Zhang aj = a->j + ai[arow]; 4634fff043a9SJunchao Zhang aa = a_a + ai[arow]; 463555d1abb9SHong Zhang nextaj = 0; 463655d1abb9SHong Zhang for (j=0; nextaj<anzi; j++) { 463755d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 463855d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 463955d1abb9SHong Zhang } 464055d1abb9SHong Zhang } 464155d1abb9SHong Zhang 464255d1abb9SHong Zhang /* add received vals into ba */ 464355d1abb9SHong Zhang for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 464455d1abb9SHong Zhang /* i-th row */ 464555d1abb9SHong Zhang if (i == *nextrow[k]) { 464655d1abb9SHong Zhang anzi = *(nextai[k]+1) - *nextai[k]; 464755d1abb9SHong Zhang aj = buf_rj[k] + *(nextai[k]); 464855d1abb9SHong Zhang aa = abuf_r[k] + *(nextai[k]); 464955d1abb9SHong Zhang nextaj = 0; 465055d1abb9SHong Zhang for (j=0; nextaj<anzi; j++) { 465155d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 465255d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 465355d1abb9SHong Zhang } 465455d1abb9SHong Zhang } 465555d1abb9SHong Zhang nextrow[k]++; nextai[k]++; 465655d1abb9SHong Zhang } 465755d1abb9SHong Zhang } 46589566063dSJacob Faibussowitsch PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 465955d1abb9SHong Zhang } 46609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 46619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 46629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 466355d1abb9SHong Zhang 46649566063dSJacob Faibussowitsch PetscCall(PetscFree(abuf_r[0])); 46659566063dSJacob Faibussowitsch PetscCall(PetscFree(abuf_r)); 46669566063dSJacob Faibussowitsch PetscCall(PetscFree(ba_i)); 46679566063dSJacob Faibussowitsch PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 46689566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 466955d1abb9SHong Zhang PetscFunctionReturn(0); 467055d1abb9SHong Zhang } 467138f152feSBarry Smith 467290431a8fSHong Zhang PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4673e5f2cdd8SHong Zhang { 467455a3bba9SHong Zhang Mat B_mpi; 4675c2234fe3SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4676b1d57f15SBarry Smith PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4677b1d57f15SBarry Smith PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4678d0f46423SBarry Smith PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4679a2f3521dSMark F. Adams PetscInt len,proc,*dnz,*onz,bs,cbs; 4680b1d57f15SBarry Smith PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4681b1d57f15SBarry Smith PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 468255d1abb9SHong Zhang MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 468358cb9c82SHong Zhang MPI_Status *status; 46840298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 4685be0fcf8dSHong Zhang PetscBT lnkbt; 468651a7d1a8SHong Zhang Mat_Merge_SeqsToMPI *merge; 4687776b82aeSLisandro Dalcin PetscContainer container; 468802c68681SHong Zhang 4689e5f2cdd8SHong Zhang PetscFunctionBegin; 46909566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 46913c2c1871SHong Zhang 469238f152feSBarry Smith /* make sure it is a PETSc comm */ 46939566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 46949566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 46959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 469655d1abb9SHong Zhang 46979566063dSJacob Faibussowitsch PetscCall(PetscNew(&merge)); 46989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size,&status)); 4699e5f2cdd8SHong Zhang 47006abd8857SHong Zhang /* determine row ownership */ 4701f08fae4eSHong Zhang /*---------------------------------------------------------*/ 47029566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 47039566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 47049566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 47059566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 47069566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(merge->rowmap)); 47079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size,&len_si)); 47089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size,&merge->len_s)); 470955d1abb9SHong Zhang 47107a2fc3feSBarry Smith m = merge->rowmap->n; 47117a2fc3feSBarry Smith owners = merge->rowmap->range; 47126abd8857SHong Zhang 47136abd8857SHong Zhang /* determine the number of messages to send, their lengths */ 47146abd8857SHong Zhang /*---------------------------------------------------------*/ 47153e06a4e6SHong Zhang len_s = merge->len_s; 471651a7d1a8SHong Zhang 47172257cef7SHong Zhang len = 0; /* length of buf_si[] */ 4718c2234fe3SHong Zhang merge->nsend = 0; 4719409913e3SHong Zhang for (proc=0; proc<size; proc++) { 47202257cef7SHong Zhang len_si[proc] = 0; 47213e06a4e6SHong Zhang if (proc == rank) { 47226abd8857SHong Zhang len_s[proc] = 0; 47233e06a4e6SHong Zhang } else { 472402c68681SHong Zhang len_si[proc] = owners[proc+1] - owners[proc] + 1; 47253e06a4e6SHong Zhang len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 47263e06a4e6SHong Zhang } 47273e06a4e6SHong Zhang if (len_s[proc]) { 4728c2234fe3SHong Zhang merge->nsend++; 47292257cef7SHong Zhang nrows = 0; 47302257cef7SHong Zhang for (i=owners[proc]; i<owners[proc+1]; i++) { 47312257cef7SHong Zhang if (ai[i+1] > ai[i]) nrows++; 47322257cef7SHong Zhang } 47332257cef7SHong Zhang len_si[proc] = 2*(nrows+1); 47342257cef7SHong Zhang len += len_si[proc]; 4735409913e3SHong Zhang } 473658cb9c82SHong Zhang } 4737409913e3SHong Zhang 47382257cef7SHong Zhang /* determine the number and length of messages to receive for ij-structure */ 47392257cef7SHong Zhang /*-------------------------------------------------------------------------*/ 47409566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 47419566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4742671beff6SHong Zhang 47433e06a4e6SHong Zhang /* post the Irecv of j-structure */ 47443e06a4e6SHong Zhang /*-------------------------------*/ 47459566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm,&tagj)); 47469566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 474702c68681SHong Zhang 47483e06a4e6SHong Zhang /* post the Isend of j-structure */ 4749affca5deSHong Zhang /*--------------------------------*/ 47509566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 47513e06a4e6SHong Zhang 47522257cef7SHong Zhang for (proc=0, k=0; proc<size; proc++) { 4753409913e3SHong Zhang if (!len_s[proc]) continue; 475402c68681SHong Zhang i = owners[proc]; 47559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 475651a7d1a8SHong Zhang k++; 475751a7d1a8SHong Zhang } 475851a7d1a8SHong Zhang 47593e06a4e6SHong Zhang /* receives and sends of j-structure are complete */ 47603e06a4e6SHong Zhang /*------------------------------------------------*/ 47619566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 47629566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 476302c68681SHong Zhang 476402c68681SHong Zhang /* send and recv i-structure */ 476502c68681SHong Zhang /*---------------------------*/ 47669566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm,&tagi)); 47679566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 476802c68681SHong Zhang 47699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len+1,&buf_s)); 47703e06a4e6SHong Zhang buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 47712257cef7SHong Zhang for (proc=0,k=0; proc<size; proc++) { 477202c68681SHong Zhang if (!len_s[proc]) continue; 47733e06a4e6SHong Zhang /* form outgoing message for i-structure: 47743e06a4e6SHong Zhang buf_si[0]: nrows to be sent 47753e06a4e6SHong Zhang [1:nrows]: row index (global) 47763e06a4e6SHong Zhang [nrows+1:2*nrows+1]: i-structure index 47773e06a4e6SHong Zhang */ 47783e06a4e6SHong Zhang /*-------------------------------------------*/ 47792257cef7SHong Zhang nrows = len_si[proc]/2 - 1; 47803e06a4e6SHong Zhang buf_si_i = buf_si + nrows+1; 47813e06a4e6SHong Zhang buf_si[0] = nrows; 47823e06a4e6SHong Zhang buf_si_i[0] = 0; 47833e06a4e6SHong Zhang nrows = 0; 47843e06a4e6SHong Zhang for (i=owners[proc]; i<owners[proc+1]; i++) { 47853e06a4e6SHong Zhang anzi = ai[i+1] - ai[i]; 47863e06a4e6SHong Zhang if (anzi) { 47873e06a4e6SHong Zhang buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 47883e06a4e6SHong Zhang buf_si[nrows+1] = i-owners[proc]; /* local row index */ 47893e06a4e6SHong Zhang nrows++; 47903e06a4e6SHong Zhang } 47913e06a4e6SHong Zhang } 47929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 479302c68681SHong Zhang k++; 47942257cef7SHong Zhang buf_si += len_si[proc]; 479502c68681SHong Zhang } 47962257cef7SHong Zhang 47979566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 47989566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 479902c68681SHong Zhang 48009566063dSJacob Faibussowitsch PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 48013e06a4e6SHong Zhang for (i=0; i<merge->nrecv; i++) { 48029566063dSJacob Faibussowitsch PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 48033e06a4e6SHong Zhang } 48043e06a4e6SHong Zhang 48059566063dSJacob Faibussowitsch PetscCall(PetscFree(len_si)); 48069566063dSJacob Faibussowitsch PetscCall(PetscFree(len_ri)); 48079566063dSJacob Faibussowitsch PetscCall(PetscFree(rj_waits)); 48089566063dSJacob Faibussowitsch PetscCall(PetscFree2(si_waits,sj_waits)); 48099566063dSJacob Faibussowitsch PetscCall(PetscFree(ri_waits)); 48109566063dSJacob Faibussowitsch PetscCall(PetscFree(buf_s)); 48119566063dSJacob Faibussowitsch PetscCall(PetscFree(status)); 481258cb9c82SHong Zhang 4813bcc1bcd5SHong Zhang /* compute a local seq matrix in each processor */ 4814bcc1bcd5SHong Zhang /*----------------------------------------------*/ 481558cb9c82SHong Zhang /* allocate bi array and free space for accumulating nonzero column info */ 48169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m+1,&bi)); 481758cb9c82SHong Zhang bi[0] = 0; 481858cb9c82SHong Zhang 4819be0fcf8dSHong Zhang /* create and initialize a linked list */ 4820be0fcf8dSHong Zhang nlnk = N+1; 48219566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 482258cb9c82SHong Zhang 4823bcc1bcd5SHong Zhang /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4824bcc1bcd5SHong Zhang len = ai[owners[rank+1]] - ai[owners[rank]]; 48259566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 48262205254eSKarl Rupp 482758cb9c82SHong Zhang current_space = free_space; 482858cb9c82SHong Zhang 4829bcc1bcd5SHong Zhang /* determine symbolic info for each local row */ 48309566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 48311d79065fSBarry Smith 48323e06a4e6SHong Zhang for (k=0; k<merge->nrecv; k++) { 48332257cef7SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 48343e06a4e6SHong Zhang nrows = *buf_ri_k[k]; 48353e06a4e6SHong Zhang nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4836a5b23f4aSJose E. Roman nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 48373e06a4e6SHong Zhang } 48382257cef7SHong Zhang 4839d0609cedSBarry Smith MatPreallocateBegin(comm,m,n,dnz,onz); 4840bcc1bcd5SHong Zhang len = 0; 484158cb9c82SHong Zhang for (i=0; i<m; i++) { 484258cb9c82SHong Zhang bnzi = 0; 484358cb9c82SHong Zhang /* add local non-zero cols of this proc's seqmat into lnk */ 484458cb9c82SHong Zhang arow = owners[rank] + i; 484558cb9c82SHong Zhang anzi = ai[arow+1] - ai[arow]; 484658cb9c82SHong Zhang aj = a->j + ai[arow]; 48479566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 484858cb9c82SHong Zhang bnzi += nlnk; 484958cb9c82SHong Zhang /* add received col data into lnk */ 485051a7d1a8SHong Zhang for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 485155d1abb9SHong Zhang if (i == *nextrow[k]) { /* i-th row */ 48523e06a4e6SHong Zhang anzi = *(nextai[k]+1) - *nextai[k]; 48533e06a4e6SHong Zhang aj = buf_rj[k] + *nextai[k]; 48549566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 48553e06a4e6SHong Zhang bnzi += nlnk; 48563e06a4e6SHong Zhang nextrow[k]++; nextai[k]++; 48573e06a4e6SHong Zhang } 485858cb9c82SHong Zhang } 4859bcc1bcd5SHong Zhang if (len < bnzi) len = bnzi; /* =max(bnzi) */ 486058cb9c82SHong Zhang 486158cb9c82SHong Zhang /* if free space is not available, make more free space */ 486258cb9c82SHong Zhang if (current_space->local_remaining<bnzi) { 48639566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 486458cb9c82SHong Zhang nspacedouble++; 486558cb9c82SHong Zhang } 486658cb9c82SHong Zhang /* copy data into free space, then initialize lnk */ 48679566063dSJacob Faibussowitsch PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 48689566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4869bcc1bcd5SHong Zhang 487058cb9c82SHong Zhang current_space->array += bnzi; 487158cb9c82SHong Zhang current_space->local_used += bnzi; 487258cb9c82SHong Zhang current_space->local_remaining -= bnzi; 487358cb9c82SHong Zhang 487458cb9c82SHong Zhang bi[i+1] = bi[i] + bnzi; 487558cb9c82SHong Zhang } 4876bcc1bcd5SHong Zhang 48779566063dSJacob Faibussowitsch PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4878bcc1bcd5SHong Zhang 48799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[m]+1,&bj)); 48809566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 48819566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk,lnkbt)); 4882409913e3SHong Zhang 4883bcc1bcd5SHong Zhang /* create symbolic parallel matrix B_mpi */ 4884bcc1bcd5SHong Zhang /*---------------------------------------*/ 48859566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 48869566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&B_mpi)); 488754b84b50SHong Zhang if (n==PETSC_DECIDE) { 48889566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 488954b84b50SHong Zhang } else { 48909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 489154b84b50SHong Zhang } 48929566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 48939566063dSJacob Faibussowitsch PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 48949566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4895d0609cedSBarry Smith MatPreallocateEnd(dnz,onz); 48969566063dSJacob Faibussowitsch PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 489758cb9c82SHong Zhang 489890431a8fSHong Zhang /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 48996abd8857SHong Zhang B_mpi->assembled = PETSC_FALSE; 4900affca5deSHong Zhang merge->bi = bi; 4901affca5deSHong Zhang merge->bj = bj; 490202c68681SHong Zhang merge->buf_ri = buf_ri; 490302c68681SHong Zhang merge->buf_rj = buf_rj; 49040298fd71SBarry Smith merge->coi = NULL; 49050298fd71SBarry Smith merge->coj = NULL; 49060298fd71SBarry Smith merge->owners_co = NULL; 4907affca5deSHong Zhang 49089566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 4909bf0cc555SLisandro Dalcin 4910affca5deSHong Zhang /* attach the supporting struct to B_mpi for reuse */ 49119566063dSJacob Faibussowitsch PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 49129566063dSJacob Faibussowitsch PetscCall(PetscContainerSetPointer(container,merge)); 49139566063dSJacob Faibussowitsch PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 49149566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 49159566063dSJacob Faibussowitsch PetscCall(PetscContainerDestroy(&container)); 4916affca5deSHong Zhang *mpimat = B_mpi; 491738f152feSBarry Smith 49189566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4919e5f2cdd8SHong Zhang PetscFunctionReturn(0); 4920e5f2cdd8SHong Zhang } 492125616d81SHong Zhang 4922d4036a1aSHong Zhang /*@C 49235f4d30c4SBarry Smith MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4924d4036a1aSHong Zhang matrices from each processor 4925d4036a1aSHong Zhang 4926d083f849SBarry Smith Collective 4927d4036a1aSHong Zhang 4928d4036a1aSHong Zhang Input Parameters: 4929d4036a1aSHong Zhang + comm - the communicators the parallel matrix will live on 4930d4036a1aSHong Zhang . seqmat - the input sequential matrices 4931d4036a1aSHong Zhang . m - number of local rows (or PETSC_DECIDE) 4932d4036a1aSHong Zhang . n - number of local columns (or PETSC_DECIDE) 4933d4036a1aSHong Zhang - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4934d4036a1aSHong Zhang 4935d4036a1aSHong Zhang Output Parameter: 4936d4036a1aSHong Zhang . mpimat - the parallel matrix generated 4937d4036a1aSHong Zhang 4938d4036a1aSHong Zhang Level: advanced 4939d4036a1aSHong Zhang 4940d4036a1aSHong Zhang Notes: 4941d4036a1aSHong Zhang The dimensions of the sequential matrix in each processor MUST be the same. 4942d4036a1aSHong Zhang The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4943d4036a1aSHong Zhang destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4944d4036a1aSHong Zhang @*/ 494590431a8fSHong Zhang PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 494655d1abb9SHong Zhang { 49477e63b356SHong Zhang PetscMPIInt size; 494855d1abb9SHong Zhang 494955d1abb9SHong Zhang PetscFunctionBegin; 49509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 49517e63b356SHong Zhang if (size == 1) { 49529566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 49537e63b356SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 49549566063dSJacob Faibussowitsch PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 49557e63b356SHong Zhang } else { 49569566063dSJacob Faibussowitsch PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 49577e63b356SHong Zhang } 49589566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 49597e63b356SHong Zhang PetscFunctionReturn(0); 49607e63b356SHong Zhang } 49619566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 496255d1abb9SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 49639566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 496455d1abb9SHong Zhang } 49659566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 49669566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 496755d1abb9SHong Zhang PetscFunctionReturn(0); 496855d1abb9SHong Zhang } 49694ebed01fSBarry Smith 4970bc08b0f1SBarry Smith /*@ 4971ef76dfe8SJed Brown MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 49728661ff28SBarry Smith mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 49738661ff28SBarry Smith with MatGetSize() 497425616d81SHong Zhang 497532fba14fSHong Zhang Not Collective 497625616d81SHong Zhang 497725616d81SHong Zhang Input Parameters: 497825616d81SHong Zhang + A - the matrix 4979a2b725a8SWilliam Gropp - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 498025616d81SHong Zhang 498125616d81SHong Zhang Output Parameter: 498225616d81SHong Zhang . A_loc - the local sequential matrix generated 498325616d81SHong Zhang 498425616d81SHong Zhang Level: developer 498525616d81SHong Zhang 498677c65a98SStefano Zampini Notes: 498777c65a98SStefano Zampini When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 498877c65a98SStefano Zampini If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 498977c65a98SStefano Zampini This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 499077c65a98SStefano Zampini modify the values of the returned A_loc. 499177c65a98SStefano Zampini 4992ed502f03SStefano Zampini .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 499325616d81SHong Zhang @*/ 49944a2b5492SBarry Smith PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 499525616d81SHong Zhang { 499601b7ae99SHong Zhang Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4997b78526a6SJose E. Roman Mat_SeqAIJ *mat,*a,*b; 4998b78526a6SJose E. Roman PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4999ce496241SStefano Zampini const PetscScalar *aa,*ba,*aav,*bav; 5000ce496241SStefano Zampini PetscScalar *ca,*cam; 500177c65a98SStefano Zampini PetscMPIInt size; 5002d0f46423SBarry Smith PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 50035a7d977cSHong Zhang PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 50048661ff28SBarry Smith PetscBool match; 500525616d81SHong Zhang 500625616d81SHong Zhang PetscFunctionBegin; 50079566063dSJacob Faibussowitsch PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 500828b400f6SJacob Faibussowitsch PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 50099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 501077c65a98SStefano Zampini if (size == 1) { 501177c65a98SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 50129566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 501377c65a98SStefano Zampini *A_loc = mpimat->A; 501477c65a98SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 50159566063dSJacob Faibussowitsch PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 501677c65a98SStefano Zampini } 501777c65a98SStefano Zampini PetscFunctionReturn(0); 501877c65a98SStefano Zampini } 501970a9ba44SHong Zhang 50209566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5021b78526a6SJose E. Roman a = (Mat_SeqAIJ*)(mpimat->A)->data; 5022b78526a6SJose E. Roman b = (Mat_SeqAIJ*)(mpimat->B)->data; 5023b78526a6SJose E. Roman ai = a->i; aj = a->j; bi = b->i; bj = b->j; 50249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 50259566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5026ce496241SStefano Zampini aa = aav; 5027ce496241SStefano Zampini ba = bav; 502801b7ae99SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 50299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1+am,&ci)); 5030dea91ad1SHong Zhang ci[0] = 0; 503101b7ae99SHong Zhang for (i=0; i<am; i++) { 5032dea91ad1SHong Zhang ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 503301b7ae99SHong Zhang } 50349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1+ci[am],&cj)); 50359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1+ci[am],&ca)); 5036dea91ad1SHong Zhang k = 0; 503701b7ae99SHong Zhang for (i=0; i<am; i++) { 50385a7d977cSHong Zhang ncols_o = bi[i+1] - bi[i]; 50395a7d977cSHong Zhang ncols_d = ai[i+1] - ai[i]; 504001b7ae99SHong Zhang /* off-diagonal portion of A */ 50415a7d977cSHong Zhang for (jo=0; jo<ncols_o; jo++) { 50425a7d977cSHong Zhang col = cmap[*bj]; 50435a7d977cSHong Zhang if (col >= cstart) break; 50445a7d977cSHong Zhang cj[k] = col; bj++; 50455a7d977cSHong Zhang ca[k++] = *ba++; 50465a7d977cSHong Zhang } 50475a7d977cSHong Zhang /* diagonal portion of A */ 50485a7d977cSHong Zhang for (j=0; j<ncols_d; j++) { 50495a7d977cSHong Zhang cj[k] = cstart + *aj++; 50505a7d977cSHong Zhang ca[k++] = *aa++; 50515a7d977cSHong Zhang } 50525a7d977cSHong Zhang /* off-diagonal portion of A */ 50535a7d977cSHong Zhang for (j=jo; j<ncols_o; j++) { 50545a7d977cSHong Zhang cj[k] = cmap[*bj++]; 50555a7d977cSHong Zhang ca[k++] = *ba++; 50565a7d977cSHong Zhang } 505725616d81SHong Zhang } 5058dea91ad1SHong Zhang /* put together the new matrix */ 50599566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5060dea91ad1SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5061dea91ad1SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 5062dea91ad1SHong Zhang mat = (Mat_SeqAIJ*)(*A_loc)->data; 5063e6b907acSBarry Smith mat->free_a = PETSC_TRUE; 5064e6b907acSBarry Smith mat->free_ij = PETSC_TRUE; 5065dea91ad1SHong Zhang mat->nonew = 0; 50665a7d977cSHong Zhang } else if (scall == MAT_REUSE_MATRIX) { 50675a7d977cSHong Zhang mat =(Mat_SeqAIJ*)(*A_loc)->data; 5068fff043a9SJunchao Zhang ci = mat->i; 5069fff043a9SJunchao Zhang cj = mat->j; 50709566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 50715a7d977cSHong Zhang for (i=0; i<am; i++) { 50725a7d977cSHong Zhang /* off-diagonal portion of A */ 50735a7d977cSHong Zhang ncols_o = bi[i+1] - bi[i]; 50745a7d977cSHong Zhang for (jo=0; jo<ncols_o; jo++) { 50755a7d977cSHong Zhang col = cmap[*bj]; 50765a7d977cSHong Zhang if (col >= cstart) break; 5077a77337e4SBarry Smith *cam++ = *ba++; bj++; 50785a7d977cSHong Zhang } 50795a7d977cSHong Zhang /* diagonal portion of A */ 5080ecc9b87dSHong Zhang ncols_d = ai[i+1] - ai[i]; 5081a77337e4SBarry Smith for (j=0; j<ncols_d; j++) *cam++ = *aa++; 50825a7d977cSHong Zhang /* off-diagonal portion of A */ 5083f33d1a9aSHong Zhang for (j=jo; j<ncols_o; j++) { 5084a77337e4SBarry Smith *cam++ = *ba++; bj++; 5085f33d1a9aSHong Zhang } 50865a7d977cSHong Zhang } 50879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 508898921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 50899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 50909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 50919566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 509225616d81SHong Zhang PetscFunctionReturn(0); 509325616d81SHong Zhang } 509425616d81SHong Zhang 5095ed502f03SStefano Zampini /*@ 5096ed502f03SStefano Zampini MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5097ed502f03SStefano Zampini mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5098ed502f03SStefano Zampini 5099ed502f03SStefano Zampini Not Collective 5100ed502f03SStefano Zampini 5101ed502f03SStefano Zampini Input Parameters: 5102ed502f03SStefano Zampini + A - the matrix 5103ed502f03SStefano Zampini - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104ed502f03SStefano Zampini 5105d8d19677SJose E. Roman Output Parameters: 5106ed502f03SStefano Zampini + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5107ed502f03SStefano Zampini - A_loc - the local sequential matrix generated 5108ed502f03SStefano Zampini 5109ed502f03SStefano Zampini Level: developer 5110ed502f03SStefano Zampini 5111ed502f03SStefano Zampini Notes: 5112ec446438SStefano Zampini This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5113ed502f03SStefano Zampini 5114ed502f03SStefano Zampini .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5115ed502f03SStefano Zampini 5116ed502f03SStefano Zampini @*/ 5117ed502f03SStefano Zampini PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5118ed502f03SStefano Zampini { 5119ed502f03SStefano Zampini Mat Ao,Ad; 5120ed502f03SStefano Zampini const PetscInt *cmap; 5121ed502f03SStefano Zampini PetscMPIInt size; 5122ed502f03SStefano Zampini PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5123ed502f03SStefano Zampini 5124ed502f03SStefano Zampini PetscFunctionBegin; 51259566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 51269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5127ed502f03SStefano Zampini if (size == 1) { 5128ed502f03SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 51299566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ad)); 5130ed502f03SStefano Zampini *A_loc = Ad; 5131ed502f03SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 51329566063dSJacob Faibussowitsch PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5133ed502f03SStefano Zampini } 51349566063dSJacob Faibussowitsch if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5135ed502f03SStefano Zampini PetscFunctionReturn(0); 5136ed502f03SStefano Zampini } 51379566063dSJacob Faibussowitsch PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 51389566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5139ed502f03SStefano Zampini if (f) { 51409566063dSJacob Faibussowitsch PetscCall((*f)(A,scall,glob,A_loc)); 5141ed502f03SStefano Zampini } else { 5142ed502f03SStefano Zampini Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5143ed502f03SStefano Zampini Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5144ed502f03SStefano Zampini Mat_SeqAIJ *c; 5145ed502f03SStefano Zampini PetscInt *ai = a->i, *aj = a->j; 5146ed502f03SStefano Zampini PetscInt *bi = b->i, *bj = b->j; 5147ed502f03SStefano Zampini PetscInt *ci,*cj; 5148ed502f03SStefano Zampini const PetscScalar *aa,*ba; 5149ed502f03SStefano Zampini PetscScalar *ca; 5150ed502f03SStefano Zampini PetscInt i,j,am,dn,on; 5151ed502f03SStefano Zampini 51529566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(Ad,&am,&dn)); 51539566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(Ao,NULL,&on)); 51549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 51559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5156ed502f03SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 5157ed502f03SStefano Zampini PetscInt k; 51589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1+am,&ci)); 51599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 51609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5161ed502f03SStefano Zampini ci[0] = 0; 5162ed502f03SStefano Zampini for (i=0,k=0; i<am; i++) { 5163ed502f03SStefano Zampini const PetscInt ncols_o = bi[i+1] - bi[i]; 5164ed502f03SStefano Zampini const PetscInt ncols_d = ai[i+1] - ai[i]; 5165ed502f03SStefano Zampini ci[i+1] = ci[i] + ncols_o + ncols_d; 5166ed502f03SStefano Zampini /* diagonal portion of A */ 5167ed502f03SStefano Zampini for (j=0; j<ncols_d; j++,k++) { 5168ed502f03SStefano Zampini cj[k] = *aj++; 5169ed502f03SStefano Zampini ca[k] = *aa++; 5170ed502f03SStefano Zampini } 5171ed502f03SStefano Zampini /* off-diagonal portion of A */ 5172ed502f03SStefano Zampini for (j=0; j<ncols_o; j++,k++) { 5173ed502f03SStefano Zampini cj[k] = dn + *bj++; 5174ed502f03SStefano Zampini ca[k] = *ba++; 5175ed502f03SStefano Zampini } 5176ed502f03SStefano Zampini } 5177ed502f03SStefano Zampini /* put together the new matrix */ 51789566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5179ed502f03SStefano Zampini /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5180ed502f03SStefano Zampini /* Since these are PETSc arrays, change flags to free them as necessary. */ 5181ed502f03SStefano Zampini c = (Mat_SeqAIJ*)(*A_loc)->data; 5182ed502f03SStefano Zampini c->free_a = PETSC_TRUE; 5183ed502f03SStefano Zampini c->free_ij = PETSC_TRUE; 5184ed502f03SStefano Zampini c->nonew = 0; 51859566063dSJacob Faibussowitsch PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5186ed502f03SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 51879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5188ed502f03SStefano Zampini for (i=0; i<am; i++) { 5189ed502f03SStefano Zampini const PetscInt ncols_d = ai[i+1] - ai[i]; 5190ed502f03SStefano Zampini const PetscInt ncols_o = bi[i+1] - bi[i]; 5191ed502f03SStefano Zampini /* diagonal portion of A */ 5192ed502f03SStefano Zampini for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5193ed502f03SStefano Zampini /* off-diagonal portion of A */ 5194ed502f03SStefano Zampini for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5195ed502f03SStefano Zampini } 51969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 519798921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 51989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 51999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5200ed502f03SStefano Zampini if (glob) { 5201ed502f03SStefano Zampini PetscInt cst, *gidx; 5202ed502f03SStefano Zampini 52039566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 52049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dn+on,&gidx)); 5205ed502f03SStefano Zampini for (i=0; i<dn; i++) gidx[i] = cst + i; 5206ed502f03SStefano Zampini for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 52079566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5208ed502f03SStefano Zampini } 5209ed502f03SStefano Zampini } 52109566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5211ed502f03SStefano Zampini PetscFunctionReturn(0); 5212ed502f03SStefano Zampini } 5213ed502f03SStefano Zampini 521432fba14fSHong Zhang /*@C 52155f4d30c4SBarry Smith MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 521632fba14fSHong Zhang 521732fba14fSHong Zhang Not Collective 521832fba14fSHong Zhang 521932fba14fSHong Zhang Input Parameters: 522032fba14fSHong Zhang + A - the matrix 522132fba14fSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 52220298fd71SBarry Smith - row, col - index sets of rows and columns to extract (or NULL) 522332fba14fSHong Zhang 522432fba14fSHong Zhang Output Parameter: 522532fba14fSHong Zhang . A_loc - the local sequential matrix generated 522632fba14fSHong Zhang 522732fba14fSHong Zhang Level: developer 522832fba14fSHong Zhang 5229ba264940SBarry Smith .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5230ba264940SBarry Smith 523132fba14fSHong Zhang @*/ 52324a2b5492SBarry Smith PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 523332fba14fSHong Zhang { 523432fba14fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 523532fba14fSHong Zhang PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 523632fba14fSHong Zhang IS isrowa,iscola; 523732fba14fSHong Zhang Mat *aloc; 52384a2b5492SBarry Smith PetscBool match; 523932fba14fSHong Zhang 524032fba14fSHong Zhang PetscFunctionBegin; 52419566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 524228b400f6SJacob Faibussowitsch PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 52439566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 524432fba14fSHong Zhang if (!row) { 5245d0f46423SBarry Smith start = A->rmap->rstart; end = A->rmap->rend; 52469566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 524732fba14fSHong Zhang } else { 524832fba14fSHong Zhang isrowa = *row; 524932fba14fSHong Zhang } 525032fba14fSHong Zhang if (!col) { 5251d0f46423SBarry Smith start = A->cmap->rstart; 525232fba14fSHong Zhang cmap = a->garray; 5253d0f46423SBarry Smith nzA = a->A->cmap->n; 5254d0f46423SBarry Smith nzB = a->B->cmap->n; 52559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nzA+nzB, &idx)); 525632fba14fSHong Zhang ncols = 0; 525732fba14fSHong Zhang for (i=0; i<nzB; i++) { 525832fba14fSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 525932fba14fSHong Zhang else break; 526032fba14fSHong Zhang } 526132fba14fSHong Zhang imark = i; 526232fba14fSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 526332fba14fSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 52649566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 526532fba14fSHong Zhang } else { 526632fba14fSHong Zhang iscola = *col; 526732fba14fSHong Zhang } 526832fba14fSHong Zhang if (scall != MAT_INITIAL_MATRIX) { 52699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1,&aloc)); 527032fba14fSHong Zhang aloc[0] = *A_loc; 527132fba14fSHong Zhang } 52729566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5273109e0772SStefano Zampini if (!col) { /* attach global id of condensed columns */ 52749566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5275109e0772SStefano Zampini } 527632fba14fSHong Zhang *A_loc = aloc[0]; 52779566063dSJacob Faibussowitsch PetscCall(PetscFree(aloc)); 527832fba14fSHong Zhang if (!row) { 52799566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrowa)); 528032fba14fSHong Zhang } 528132fba14fSHong Zhang if (!col) { 52829566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscola)); 528332fba14fSHong Zhang } 52849566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 528532fba14fSHong Zhang PetscFunctionReturn(0); 528632fba14fSHong Zhang } 528732fba14fSHong Zhang 52885c65b9ecSFande Kong /* 52895c65b9ecSFande Kong * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 52905c65b9ecSFande Kong * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 52915c65b9ecSFande Kong * on a global size. 52925c65b9ecSFande Kong * */ 52935c65b9ecSFande Kong PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 52945c65b9ecSFande Kong { 52955c65b9ecSFande Kong Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 52965c65b9ecSFande Kong Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5297131c27b5Sprj- PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5298131c27b5Sprj- PetscMPIInt owner; 52995c65b9ecSFande Kong PetscSFNode *iremote,*oiremote; 53005c65b9ecSFande Kong const PetscInt *lrowindices; 53015c65b9ecSFande Kong PetscSF sf,osf; 53025c65b9ecSFande Kong PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 53035c65b9ecSFande Kong PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 53045c65b9ecSFande Kong MPI_Comm comm; 53055c65b9ecSFande Kong ISLocalToGlobalMapping mapping; 5306fff043a9SJunchao Zhang const PetscScalar *pd_a,*po_a; 53075c65b9ecSFande Kong 53085c65b9ecSFande Kong PetscFunctionBegin; 53099566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 53105c65b9ecSFande Kong /* plocalsize is the number of roots 53115c65b9ecSFande Kong * nrows is the number of leaves 53125c65b9ecSFande Kong * */ 53139566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 53149566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(rows,&nrows)); 53159566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows,&iremote)); 53169566063dSJacob Faibussowitsch PetscCall(ISGetIndices(rows,&lrowindices)); 53175c65b9ecSFande Kong for (i=0;i<nrows;i++) { 53185c65b9ecSFande Kong /* Find a remote index and an owner for a row 53195c65b9ecSFande Kong * The row could be local or remote 53205c65b9ecSFande Kong * */ 532134bcad68SFande Kong owner = 0; 532234bcad68SFande Kong lidx = 0; 53239566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 53245c65b9ecSFande Kong iremote[i].index = lidx; 53255c65b9ecSFande Kong iremote[i].rank = owner; 53265c65b9ecSFande Kong } 53275c65b9ecSFande Kong /* Create SF to communicate how many nonzero columns for each row */ 53289566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&sf)); 53295c65b9ecSFande Kong /* SF will figure out the number of nonzero colunms for each row, and their 53305c65b9ecSFande Kong * offsets 53315c65b9ecSFande Kong * */ 53329566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 53339566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 53349566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 5335bc8e477aSFande Kong 53369566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 53379566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 53389566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows,&pnnz)); 53395c65b9ecSFande Kong roffsets[0] = 0; 53405c65b9ecSFande Kong roffsets[1] = 0; 53415c65b9ecSFande Kong for (i=0;i<plocalsize;i++) { 53425c65b9ecSFande Kong /* diag */ 53435c65b9ecSFande Kong nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 53445c65b9ecSFande Kong /* off diag */ 53455c65b9ecSFande Kong nrcols[i*2+1] = po->i[i+1] - po->i[i]; 53465c65b9ecSFande Kong /* compute offsets so that we relative location for each row */ 53475c65b9ecSFande Kong roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 53485c65b9ecSFande Kong roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 53495c65b9ecSFande Kong } 53509566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2*nrows,&nlcols)); 53519566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2*nrows,&loffsets)); 53525c65b9ecSFande Kong /* 'r' means root, and 'l' means leaf */ 53539566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 53549566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 53559566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 53569566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 53579566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 53589566063dSJacob Faibussowitsch PetscCall(PetscFree(roffsets)); 53599566063dSJacob Faibussowitsch PetscCall(PetscFree(nrcols)); 53605c65b9ecSFande Kong dntotalcols = 0; 53615c65b9ecSFande Kong ontotalcols = 0; 5362bc8e477aSFande Kong ncol = 0; 53635c65b9ecSFande Kong for (i=0;i<nrows;i++) { 53645c65b9ecSFande Kong pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5365bc8e477aSFande Kong ncol = PetscMax(pnnz[i],ncol); 53665c65b9ecSFande Kong /* diag */ 53675c65b9ecSFande Kong dntotalcols += nlcols[i*2+0]; 53685c65b9ecSFande Kong /* off diag */ 53695c65b9ecSFande Kong ontotalcols += nlcols[i*2+1]; 53705c65b9ecSFande Kong } 53715c65b9ecSFande Kong /* We do not need to figure the right number of columns 53725c65b9ecSFande Kong * since all the calculations will be done by going through the raw data 53735c65b9ecSFande Kong * */ 53749566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 53759566063dSJacob Faibussowitsch PetscCall(MatSetUp(*P_oth)); 53769566063dSJacob Faibussowitsch PetscCall(PetscFree(pnnz)); 53775c65b9ecSFande Kong p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 53785c65b9ecSFande Kong /* diag */ 53799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(dntotalcols,&iremote)); 53805c65b9ecSFande Kong /* off diag */ 53819566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 53825c65b9ecSFande Kong /* diag */ 53839566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 53845c65b9ecSFande Kong /* off diag */ 53859566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 53865c65b9ecSFande Kong dntotalcols = 0; 53875c65b9ecSFande Kong ontotalcols = 0; 53885c65b9ecSFande Kong ntotalcols = 0; 53895c65b9ecSFande Kong for (i=0;i<nrows;i++) { 539034bcad68SFande Kong owner = 0; 53919566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 53925c65b9ecSFande Kong /* Set iremote for diag matrix */ 53935c65b9ecSFande Kong for (j=0;j<nlcols[i*2+0];j++) { 53945c65b9ecSFande Kong iremote[dntotalcols].index = loffsets[i*2+0] + j; 53955c65b9ecSFande Kong iremote[dntotalcols].rank = owner; 53965c65b9ecSFande Kong /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 53975c65b9ecSFande Kong ilocal[dntotalcols++] = ntotalcols++; 53985c65b9ecSFande Kong } 53995c65b9ecSFande Kong /* off diag */ 54005c65b9ecSFande Kong for (j=0;j<nlcols[i*2+1];j++) { 54015c65b9ecSFande Kong oiremote[ontotalcols].index = loffsets[i*2+1] + j; 54025c65b9ecSFande Kong oiremote[ontotalcols].rank = owner; 54035c65b9ecSFande Kong oilocal[ontotalcols++] = ntotalcols++; 54045c65b9ecSFande Kong } 54055c65b9ecSFande Kong } 54069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(rows,&lrowindices)); 54079566063dSJacob Faibussowitsch PetscCall(PetscFree(loffsets)); 54089566063dSJacob Faibussowitsch PetscCall(PetscFree(nlcols)); 54099566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&sf)); 54105c65b9ecSFande Kong /* P serves as roots and P_oth is leaves 54115c65b9ecSFande Kong * Diag matrix 54125c65b9ecSFande Kong * */ 54139566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 54149566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 54159566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 54165c65b9ecSFande Kong 54179566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&osf)); 54185c65b9ecSFande Kong /* Off diag */ 54199566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 54209566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(osf)); 54219566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(osf)); 54229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 54239566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 54245c65b9ecSFande Kong /* We operate on the matrix internal data for saving memory */ 54259566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 54269566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 54279566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 54285c65b9ecSFande Kong /* Convert to global indices for diag matrix */ 54295c65b9ecSFande Kong for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 54309566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 54315c65b9ecSFande Kong /* We want P_oth store global indices */ 54329566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 54335c65b9ecSFande Kong /* Use memory scalable approach */ 54349566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 54359566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 54369566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 54379566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 54385c65b9ecSFande Kong /* Convert back to local indices */ 54395c65b9ecSFande Kong for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 54409566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 54415c65b9ecSFande Kong nout = 0; 54429566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 544308401ef6SPierre Jolivet PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 54449566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 54455c65b9ecSFande Kong /* Exchange values */ 54469566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 54479566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 54489566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 54499566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 54505c65b9ecSFande Kong /* Stop PETSc from shrinking memory */ 54515c65b9ecSFande Kong for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 54529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 54539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 54545c65b9ecSFande Kong /* Attach PetscSF objects to P_oth so that we can reuse it later */ 54559566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 54569566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 54579566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 54589566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&osf)); 54595c65b9ecSFande Kong PetscFunctionReturn(0); 54605c65b9ecSFande Kong } 54615c65b9ecSFande Kong 54625c65b9ecSFande Kong /* 54635c65b9ecSFande Kong * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 54645c65b9ecSFande Kong * This supports MPIAIJ and MAIJ 54655c65b9ecSFande Kong * */ 5466bc8e477aSFande Kong PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 54675c65b9ecSFande Kong { 54685c65b9ecSFande Kong Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5469bc8e477aSFande Kong Mat_SeqAIJ *p_oth; 5470bc8e477aSFande Kong IS rows,map; 5471bc8e477aSFande Kong PetscHMapI hamp; 5472bc8e477aSFande Kong PetscInt i,htsize,*rowindices,off,*mapping,key,count; 54735c65b9ecSFande Kong MPI_Comm comm; 54745c65b9ecSFande Kong PetscSF sf,osf; 5475bc8e477aSFande Kong PetscBool has; 54765c65b9ecSFande Kong 54775c65b9ecSFande Kong PetscFunctionBegin; 54789566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 54799566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 54805c65b9ecSFande Kong /* If it is the first time, create an index set of off-diag nonzero columns of A, 54815c65b9ecSFande Kong * and then create a submatrix (that often is an overlapping matrix) 54825c65b9ecSFande Kong * */ 54835c65b9ecSFande Kong if (reuse == MAT_INITIAL_MATRIX) { 54845c65b9ecSFande Kong /* Use a hash table to figure out unique keys */ 54859566063dSJacob Faibussowitsch PetscCall(PetscHMapICreate(&hamp)); 54869566063dSJacob Faibussowitsch PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 54879566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5488bc8e477aSFande Kong count = 0; 5489bc8e477aSFande Kong /* Assume that a->g is sorted, otherwise the following does not make sense */ 5490bc8e477aSFande Kong for (i=0;i<a->B->cmap->n;i++) { 5491bc8e477aSFande Kong key = a->garray[i]/dof; 54929566063dSJacob Faibussowitsch PetscCall(PetscHMapIHas(hamp,key,&has)); 5493bc8e477aSFande Kong if (!has) { 5494bc8e477aSFande Kong mapping[i] = count; 54959566063dSJacob Faibussowitsch PetscCall(PetscHMapISet(hamp,key,count++)); 5496bc8e477aSFande Kong } else { 5497bc8e477aSFande Kong /* Current 'i' has the same value the previous step */ 5498bc8e477aSFande Kong mapping[i] = count-1; 54995c65b9ecSFande Kong } 5500bc8e477aSFande Kong } 55019566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 55029566063dSJacob Faibussowitsch PetscCall(PetscHMapIGetSize(hamp,&htsize)); 550308401ef6SPierre Jolivet PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 55049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(htsize,&rowindices)); 55055c65b9ecSFande Kong off = 0; 55069566063dSJacob Faibussowitsch PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 55079566063dSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&hamp)); 55089566063dSJacob Faibussowitsch PetscCall(PetscSortInt(htsize,rowindices)); 55099566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 55105c65b9ecSFande Kong /* In case, the matrix was already created but users want to recreate the matrix */ 55119566063dSJacob Faibussowitsch PetscCall(MatDestroy(P_oth)); 55129566063dSJacob Faibussowitsch PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 55139566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 55149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&map)); 55159566063dSJacob Faibussowitsch PetscCall(ISDestroy(&rows)); 55165c65b9ecSFande Kong } else if (reuse == MAT_REUSE_MATRIX) { 55175c65b9ecSFande Kong /* If matrix was already created, we simply update values using SF objects 55185c65b9ecSFande Kong * that as attached to the matrix ealier. 5519fff043a9SJunchao Zhang */ 5520fff043a9SJunchao Zhang const PetscScalar *pd_a,*po_a; 5521fff043a9SJunchao Zhang 55229566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 55239566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 552408401ef6SPierre Jolivet PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 55255c65b9ecSFande Kong p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 55265c65b9ecSFande Kong /* Update values in place */ 55279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 55289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 55299566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 55309566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 55319566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 55329566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 55339566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 55349566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 55356718818eSStefano Zampini } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 55369566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 55375c65b9ecSFande Kong PetscFunctionReturn(0); 55385c65b9ecSFande Kong } 55395c65b9ecSFande Kong 554025616d81SHong Zhang /*@C 554132fba14fSHong Zhang MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 554225616d81SHong Zhang 554325616d81SHong Zhang Collective on Mat 554425616d81SHong Zhang 554525616d81SHong Zhang Input Parameters: 55466b867d5aSJose E. Roman + A - the first matrix in mpiaij format 55476b867d5aSJose E. Roman . B - the second matrix in mpiaij format 55486b867d5aSJose E. Roman - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 554925616d81SHong Zhang 5550f1a722f8SMatthew G. Knepley Output Parameters: 5551f1a722f8SMatthew G. Knepley + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5552f1a722f8SMatthew G. Knepley . colb - On input index sets of columns of B to extract (or NULL), modified on output 5553f1a722f8SMatthew G. Knepley - B_seq - the sequential matrix generated 555425616d81SHong Zhang 555525616d81SHong Zhang Level: developer 555625616d81SHong Zhang 555725616d81SHong Zhang @*/ 555866bfb163SHong Zhang PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 555925616d81SHong Zhang { 5560899cda47SBarry Smith Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5561b1d57f15SBarry Smith PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 556225616d81SHong Zhang IS isrowb,iscolb; 55630298fd71SBarry Smith Mat *bseq=NULL; 556425616d81SHong Zhang 556525616d81SHong Zhang PetscFunctionBegin; 5566d0f46423SBarry Smith if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 556798921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 556825616d81SHong Zhang } 55699566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 557025616d81SHong Zhang 557125616d81SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 5572d0f46423SBarry Smith start = A->cmap->rstart; 557325616d81SHong Zhang cmap = a->garray; 5574d0f46423SBarry Smith nzA = a->A->cmap->n; 5575d0f46423SBarry Smith nzB = a->B->cmap->n; 55769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nzA+nzB, &idx)); 557725616d81SHong Zhang ncols = 0; 55780390132cSHong Zhang for (i=0; i<nzB; i++) { /* row < local row index */ 557925616d81SHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 558025616d81SHong Zhang else break; 558125616d81SHong Zhang } 558225616d81SHong Zhang imark = i; 55830390132cSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 55840390132cSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 55859566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 55869566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 558725616d81SHong Zhang } else { 558808401ef6SPierre Jolivet PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 558925616d81SHong Zhang isrowb = *rowb; iscolb = *colb; 55909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1,&bseq)); 559125616d81SHong Zhang bseq[0] = *B_seq; 559225616d81SHong Zhang } 55939566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 559425616d81SHong Zhang *B_seq = bseq[0]; 55959566063dSJacob Faibussowitsch PetscCall(PetscFree(bseq)); 559625616d81SHong Zhang if (!rowb) { 55979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrowb)); 559825616d81SHong Zhang } else { 559925616d81SHong Zhang *rowb = isrowb; 560025616d81SHong Zhang } 560125616d81SHong Zhang if (!colb) { 56029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscolb)); 560325616d81SHong Zhang } else { 560425616d81SHong Zhang *colb = iscolb; 560525616d81SHong Zhang } 56069566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 560725616d81SHong Zhang PetscFunctionReturn(0); 560825616d81SHong Zhang } 5609429d309bSHong Zhang 5610f8487c73SHong Zhang /* 5611f8487c73SHong Zhang MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 561201b7ae99SHong Zhang of the OFF-DIAGONAL portion of local A 5613429d309bSHong Zhang 5614429d309bSHong Zhang Collective on Mat 5615429d309bSHong Zhang 5616429d309bSHong Zhang Input Parameters: 5617429d309bSHong Zhang + A,B - the matrices in mpiaij format 5618598bc09dSHong Zhang - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5619429d309bSHong Zhang 5620429d309bSHong Zhang Output Parameter: 56210298fd71SBarry Smith + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 56220298fd71SBarry Smith . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 56230298fd71SBarry Smith . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5624598bc09dSHong Zhang - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5625429d309bSHong Zhang 56266eb45d04SBarry Smith Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 56276eb45d04SBarry Smith for this matrix. This is not desirable.. 56286eb45d04SBarry Smith 5629429d309bSHong Zhang Level: developer 5630429d309bSHong Zhang 5631f8487c73SHong Zhang */ 5632b7f45c76SHong Zhang PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5633429d309bSHong Zhang { 5634899cda47SBarry Smith Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 563587025532SHong Zhang Mat_SeqAIJ *b_oth; 56364b8d542aSHong Zhang VecScatter ctx; 5637ce94432eSBarry Smith MPI_Comm comm; 56383515ee7fSJunchao Zhang const PetscMPIInt *rprocs,*sprocs; 56393515ee7fSJunchao Zhang const PetscInt *srow,*rstarts,*sstarts; 5640277f51e8SBarry Smith PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5641f4259b30SLisandro Dalcin PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5642277f51e8SBarry Smith PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5643ddea5d60SJunchao Zhang MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5644ddea5d60SJunchao Zhang PetscMPIInt size,tag,rank,nreqs; 5645429d309bSHong Zhang 5646429d309bSHong Zhang PetscFunctionBegin; 56479566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 56489566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 5649a7c7454dSHong Zhang 5650c0aa6a63SJacob Faibussowitsch if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 565198921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5652429d309bSHong Zhang } 56539566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 56549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5655a6b2eed2SHong Zhang 5656ec07b8f8SHong Zhang if (size == 1) { 5657ec07b8f8SHong Zhang startsj_s = NULL; 5658ec07b8f8SHong Zhang bufa_ptr = NULL; 565952f7967eSHong Zhang *B_oth = NULL; 5660ec07b8f8SHong Zhang PetscFunctionReturn(0); 5661ec07b8f8SHong Zhang } 5662ec07b8f8SHong Zhang 5663fa83eaafSHong Zhang ctx = a->Mvctx; 56644b8d542aSHong Zhang tag = ((PetscObject)ctx)->tag; 56654b8d542aSHong Zhang 56669566063dSJacob Faibussowitsch PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 56673515ee7fSJunchao Zhang /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 56689566063dSJacob Faibussowitsch PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 56699566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 56709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nreqs,&reqs)); 5671ddea5d60SJunchao Zhang rwaits = reqs; 5672ddea5d60SJunchao Zhang swaits = reqs + nrecvs; 5673429d309bSHong Zhang 5674b7f45c76SHong Zhang if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5675429d309bSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 5676a6b2eed2SHong Zhang /* i-array */ 5677a6b2eed2SHong Zhang /*---------*/ 5678a6b2eed2SHong Zhang /* post receives */ 56799566063dSJacob Faibussowitsch if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5680a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++) { 568174268593SBarry Smith rowlen = rvalues + rstarts[i]*rbs; 5682e42f35eeSHong Zhang nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 56839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5684429d309bSHong Zhang } 5685a6b2eed2SHong Zhang 5686a6b2eed2SHong Zhang /* pack the outgoing message */ 56879566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 56882205254eSKarl Rupp 56892205254eSKarl Rupp sstartsj[0] = 0; 56902205254eSKarl Rupp rstartsj[0] = 0; 5691a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be sent */ 56923515ee7fSJunchao Zhang if (nsends) { 56933515ee7fSJunchao Zhang k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 56949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 56953515ee7fSJunchao Zhang } 5696a6b2eed2SHong Zhang for (i=0; i<nsends; i++) { 56973515ee7fSJunchao Zhang rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5698e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 569987025532SHong Zhang for (j=0; j<nrows; j++) { 5700d0f46423SBarry Smith row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5701e42f35eeSHong Zhang for (l=0; l<sbs; l++) { 57029566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 57032205254eSKarl Rupp 5704e42f35eeSHong Zhang rowlen[j*sbs+l] = ncols; 57052205254eSKarl Rupp 5706e42f35eeSHong Zhang len += ncols; 57079566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5708e42f35eeSHong Zhang } 5709a6b2eed2SHong Zhang k++; 5710429d309bSHong Zhang } 57119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 57122205254eSKarl Rupp 5713dea91ad1SHong Zhang sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5714429d309bSHong Zhang } 571587025532SHong Zhang /* recvs and sends of i-array are completed */ 57169566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 57179566063dSJacob Faibussowitsch PetscCall(PetscFree(svalues)); 5718e42f35eeSHong Zhang 5719a6b2eed2SHong Zhang /* allocate buffers for sending j and a arrays */ 57209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len+1,&bufj)); 57219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len+1,&bufa)); 5722a6b2eed2SHong Zhang 572387025532SHong Zhang /* create i-array of B_oth */ 57249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(aBn+2,&b_othi)); 57252205254eSKarl Rupp 572687025532SHong Zhang b_othi[0] = 0; 5727a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be received */ 5728a6b2eed2SHong Zhang k = 0; 5729a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++) { 57303515ee7fSJunchao Zhang rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 57313515ee7fSJunchao Zhang nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 573287025532SHong Zhang for (j=0; j<nrows; j++) { 573387025532SHong Zhang b_othi[k+1] = b_othi[k] + rowlen[j]; 57349566063dSJacob Faibussowitsch PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5735f91af8c7SBarry Smith k++; 5736a6b2eed2SHong Zhang } 5737dea91ad1SHong Zhang rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5738a6b2eed2SHong Zhang } 57399566063dSJacob Faibussowitsch PetscCall(PetscFree(rvalues)); 5740a6b2eed2SHong Zhang 574187025532SHong Zhang /* allocate space for j and a arrrays of B_oth */ 57429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 57439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5744a6b2eed2SHong Zhang 574587025532SHong Zhang /* j-array */ 574687025532SHong Zhang /*---------*/ 5747a6b2eed2SHong Zhang /* post receives of j-array */ 5748a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++) { 574987025532SHong Zhang nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 57509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5751a6b2eed2SHong Zhang } 5752e42f35eeSHong Zhang 5753e42f35eeSHong Zhang /* pack the outgoing message j-array */ 57543515ee7fSJunchao Zhang if (nsends) k = sstarts[0]; 5755a6b2eed2SHong Zhang for (i=0; i<nsends; i++) { 5756e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5757a6b2eed2SHong Zhang bufJ = bufj+sstartsj[i]; 575887025532SHong Zhang for (j=0; j<nrows; j++) { 5759d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5760e42f35eeSHong Zhang for (ll=0; ll<sbs; ll++) { 57619566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5762a6b2eed2SHong Zhang for (l=0; l<ncols; l++) { 5763a6b2eed2SHong Zhang *bufJ++ = cols[l]; 576487025532SHong Zhang } 57659566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5766e42f35eeSHong Zhang } 576787025532SHong Zhang } 57689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 576987025532SHong Zhang } 577087025532SHong Zhang 577187025532SHong Zhang /* recvs and sends of j-array are completed */ 57729566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 577387025532SHong Zhang } else if (scall == MAT_REUSE_MATRIX) { 5774b7f45c76SHong Zhang sstartsj = *startsj_s; 57751d79065fSBarry Smith rstartsj = *startsj_r; 577687025532SHong Zhang bufa = *bufa_ptr; 577787025532SHong Zhang b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 57789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5779ddea5d60SJunchao Zhang } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 578087025532SHong Zhang 578187025532SHong Zhang /* a-array */ 578287025532SHong Zhang /*---------*/ 578387025532SHong Zhang /* post receives of a-array */ 578487025532SHong Zhang for (i=0; i<nrecvs; i++) { 578587025532SHong Zhang nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 57869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 578787025532SHong Zhang } 5788e42f35eeSHong Zhang 5789e42f35eeSHong Zhang /* pack the outgoing message a-array */ 57903515ee7fSJunchao Zhang if (nsends) k = sstarts[0]; 579187025532SHong Zhang for (i=0; i<nsends; i++) { 5792e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 579387025532SHong Zhang bufA = bufa+sstartsj[i]; 579487025532SHong Zhang for (j=0; j<nrows; j++) { 5795d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5796e42f35eeSHong Zhang for (ll=0; ll<sbs; ll++) { 57979566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 579887025532SHong Zhang for (l=0; l<ncols; l++) { 5799a6b2eed2SHong Zhang *bufA++ = vals[l]; 5800a6b2eed2SHong Zhang } 58019566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5802e42f35eeSHong Zhang } 5803a6b2eed2SHong Zhang } 58049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5805a6b2eed2SHong Zhang } 580687025532SHong Zhang /* recvs and sends of a-array are completed */ 58079566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 58089566063dSJacob Faibussowitsch PetscCall(PetscFree(reqs)); 5809a6b2eed2SHong Zhang 581087025532SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 5811a6b2eed2SHong Zhang /* put together the new matrix */ 58129566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5813a6b2eed2SHong Zhang 5814a6b2eed2SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5815a6b2eed2SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 581687025532SHong Zhang b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5817e6b907acSBarry Smith b_oth->free_a = PETSC_TRUE; 5818e6b907acSBarry Smith b_oth->free_ij = PETSC_TRUE; 581987025532SHong Zhang b_oth->nonew = 0; 5820a6b2eed2SHong Zhang 58219566063dSJacob Faibussowitsch PetscCall(PetscFree(bufj)); 5822b7f45c76SHong Zhang if (!startsj_s || !bufa_ptr) { 58239566063dSJacob Faibussowitsch PetscCall(PetscFree2(sstartsj,rstartsj)); 58249566063dSJacob Faibussowitsch PetscCall(PetscFree(bufa_ptr)); 5825dea91ad1SHong Zhang } else { 5826b7f45c76SHong Zhang *startsj_s = sstartsj; 58271d79065fSBarry Smith *startsj_r = rstartsj; 582887025532SHong Zhang *bufa_ptr = bufa; 582987025532SHong Zhang } 5830fff043a9SJunchao Zhang } else if (scall == MAT_REUSE_MATRIX) { 58319566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5832dea91ad1SHong Zhang } 58333515ee7fSJunchao Zhang 58349566063dSJacob Faibussowitsch PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 58359566063dSJacob Faibussowitsch PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 58369566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5837429d309bSHong Zhang PetscFunctionReturn(0); 5838429d309bSHong Zhang } 5839ccd8e176SBarry Smith 5840cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5841cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5842ca9cdca7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 58439779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE) 5844a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5845191b95cbSRichard Tran Mills #endif 5846ae8d29abSPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5847cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 58485d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5849cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 58505d7652ecSHong Zhang #endif 5851d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 5852d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5853d24d4204SJose E. Roman #endif 585463c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE) 585563c07aadSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 585663c07aadSStefano Zampini #endif 58573338378cSStefano Zampini #if defined(PETSC_HAVE_CUDA) 58583338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 58593338378cSStefano Zampini #endif 58603d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS) 58613d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 58623d0639e7SStefano Zampini #endif 5863d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 58644222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 58654222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 586617667f90SBarry Smith 5867fc4dec0aSBarry Smith /* 5868fc4dec0aSBarry Smith Computes (B'*A')' since computing B*A directly is untenable 5869fc4dec0aSBarry Smith 5870fc4dec0aSBarry Smith n p p 58712da392ccSBarry Smith [ ] [ ] [ ] 58722da392ccSBarry Smith m [ A ] * n [ B ] = m [ C ] 58732da392ccSBarry Smith [ ] [ ] [ ] 5874fc4dec0aSBarry Smith 5875fc4dec0aSBarry Smith */ 58766718818eSStefano Zampini static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5877fc4dec0aSBarry Smith { 5878fc4dec0aSBarry Smith Mat At,Bt,Ct; 5879fc4dec0aSBarry Smith 5880fc4dec0aSBarry Smith PetscFunctionBegin; 58819566063dSJacob Faibussowitsch PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 58829566063dSJacob Faibussowitsch PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 58839566063dSJacob Faibussowitsch PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 58849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&At)); 58859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bt)); 58869566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 58879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 5888fc4dec0aSBarry Smith PetscFunctionReturn(0); 5889fc4dec0aSBarry Smith } 5890fc4dec0aSBarry Smith 58916718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5892fc4dec0aSBarry Smith { 58936718818eSStefano Zampini PetscBool cisdense; 5894fc4dec0aSBarry Smith 5895fc4dec0aSBarry Smith PetscFunctionBegin; 589608401ef6SPierre Jolivet PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 58979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 58989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 58999566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 59006718818eSStefano Zampini if (!cisdense) { 59019566063dSJacob Faibussowitsch PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 59026718818eSStefano Zampini } 59039566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 5904f75ecaa4SHong Zhang 59054222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5906fc4dec0aSBarry Smith PetscFunctionReturn(0); 5907fc4dec0aSBarry Smith } 5908fc4dec0aSBarry Smith 5909fc4dec0aSBarry Smith /* ----------------------------------------------------------------*/ 59104222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5911fc4dec0aSBarry Smith { 59124222ddf1SHong Zhang Mat_Product *product = C->product; 59134222ddf1SHong Zhang Mat A = product->A,B=product->B; 5914fc4dec0aSBarry Smith 5915fc4dec0aSBarry Smith PetscFunctionBegin; 59164222ddf1SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 591798921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 59184222ddf1SHong Zhang 59194222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 59204222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 5921fc4dec0aSBarry Smith PetscFunctionReturn(0); 5922fc4dec0aSBarry Smith } 5923fc4dec0aSBarry Smith 59244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 59254222ddf1SHong Zhang { 59264222ddf1SHong Zhang Mat_Product *product = C->product; 59274222ddf1SHong Zhang 59284222ddf1SHong Zhang PetscFunctionBegin; 59294222ddf1SHong Zhang if (product->type == MATPRODUCT_AB) { 59309566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 59316718818eSStefano Zampini } 59324222ddf1SHong Zhang PetscFunctionReturn(0); 59334222ddf1SHong Zhang } 5934394ed5ebSJunchao Zhang 5935394ed5ebSJunchao Zhang /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 5936394ed5ebSJunchao Zhang is greater than value, or last if there is no such element. 5937394ed5ebSJunchao Zhang */ 5938394ed5ebSJunchao Zhang static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 5939394ed5ebSJunchao Zhang { 5940394ed5ebSJunchao Zhang PetscCount it,step,count = last - first; 5941394ed5ebSJunchao Zhang 5942394ed5ebSJunchao Zhang PetscFunctionBegin; 5943394ed5ebSJunchao Zhang while (count > 0) { 5944394ed5ebSJunchao Zhang it = first; 5945394ed5ebSJunchao Zhang step = count / 2; 5946394ed5ebSJunchao Zhang it += step; 5947394ed5ebSJunchao Zhang if (!(value < array[it])) { 5948394ed5ebSJunchao Zhang first = ++it; 5949394ed5ebSJunchao Zhang count -= step + 1; 5950394ed5ebSJunchao Zhang } else count = step; 5951394ed5ebSJunchao Zhang } 5952394ed5ebSJunchao Zhang *upper = first; 5953394ed5ebSJunchao Zhang PetscFunctionReturn(0); 5954394ed5ebSJunchao Zhang } 5955394ed5ebSJunchao Zhang 5956*158ec288SJunchao Zhang /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5957394ed5ebSJunchao Zhang 5958394ed5ebSJunchao Zhang Input Parameters: 5959394ed5ebSJunchao Zhang 5960394ed5ebSJunchao Zhang j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5961394ed5ebSJunchao Zhang j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5962394ed5ebSJunchao Zhang 5963*158ec288SJunchao Zhang mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5964394ed5ebSJunchao Zhang 5965394ed5ebSJunchao Zhang For Set1, j1[] contains column indices of the nonzeros. 5966394ed5ebSJunchao Zhang For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5967394ed5ebSJunchao Zhang respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5968394ed5ebSJunchao Zhang but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5969394ed5ebSJunchao Zhang 5970394ed5ebSJunchao Zhang Similar for Set2. 5971394ed5ebSJunchao Zhang 5972394ed5ebSJunchao Zhang This routine merges the two sets of nonzeros row by row and removes repeats. 5973394ed5ebSJunchao Zhang 5974*158ec288SJunchao Zhang Output Parameters: (memory is allocated by the caller) 5975394ed5ebSJunchao Zhang 5976394ed5ebSJunchao Zhang i[],j[]: the CSR of the merged matrix, which has m rows. 5977394ed5ebSJunchao Zhang imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5978394ed5ebSJunchao Zhang imap2[]: similar to imap1[], but for Set2. 5979394ed5ebSJunchao Zhang Note we order nonzeros row-by-row and from left to right. 5980394ed5ebSJunchao Zhang */ 5981394ed5ebSJunchao Zhang static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5982394ed5ebSJunchao Zhang const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5983394ed5ebSJunchao Zhang PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5984394ed5ebSJunchao Zhang { 5985394ed5ebSJunchao Zhang PetscInt r,m; /* Row index of mat */ 5986394ed5ebSJunchao Zhang PetscCount t,t1,t2,b1,e1,b2,e2; 5987394ed5ebSJunchao Zhang 5988394ed5ebSJunchao Zhang PetscFunctionBegin; 59899566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat,&m,NULL)); 5990394ed5ebSJunchao Zhang t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5991394ed5ebSJunchao Zhang i[0] = 0; 5992394ed5ebSJunchao Zhang for (r=0; r<m; r++) { /* Do row by row merging */ 5993394ed5ebSJunchao Zhang b1 = rowBegin1[r]; 5994394ed5ebSJunchao Zhang e1 = rowEnd1[r]; 5995394ed5ebSJunchao Zhang b2 = rowBegin2[r]; 5996394ed5ebSJunchao Zhang e2 = rowEnd2[r]; 5997394ed5ebSJunchao Zhang while (b1 < e1 && b2 < e2) { 5998394ed5ebSJunchao Zhang if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5999394ed5ebSJunchao Zhang j[t] = j1[b1]; 6000394ed5ebSJunchao Zhang imap1[t1] = t; 6001394ed5ebSJunchao Zhang imap2[t2] = t; 6002394ed5ebSJunchao Zhang b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6003394ed5ebSJunchao Zhang b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6004394ed5ebSJunchao Zhang t1++; t2++; t++; 6005394ed5ebSJunchao Zhang } else if (j1[b1] < j2[b2]) { 6006394ed5ebSJunchao Zhang j[t] = j1[b1]; 6007394ed5ebSJunchao Zhang imap1[t1] = t; 6008394ed5ebSJunchao Zhang b1 += jmap1[t1+1] - jmap1[t1]; 6009394ed5ebSJunchao Zhang t1++; t++; 6010394ed5ebSJunchao Zhang } else { 6011394ed5ebSJunchao Zhang j[t] = j2[b2]; 6012394ed5ebSJunchao Zhang imap2[t2] = t; 6013394ed5ebSJunchao Zhang b2 += jmap2[t2+1] - jmap2[t2]; 6014394ed5ebSJunchao Zhang t2++; t++; 6015394ed5ebSJunchao Zhang } 6016394ed5ebSJunchao Zhang } 6017394ed5ebSJunchao Zhang /* Merge the remaining in either j1[] or j2[] */ 6018394ed5ebSJunchao Zhang while (b1 < e1) { 6019394ed5ebSJunchao Zhang j[t] = j1[b1]; 6020394ed5ebSJunchao Zhang imap1[t1] = t; 6021394ed5ebSJunchao Zhang b1 += jmap1[t1+1] - jmap1[t1]; 6022394ed5ebSJunchao Zhang t1++; t++; 6023394ed5ebSJunchao Zhang } 6024394ed5ebSJunchao Zhang while (b2 < e2) { 6025394ed5ebSJunchao Zhang j[t] = j2[b2]; 6026394ed5ebSJunchao Zhang imap2[t2] = t; 6027394ed5ebSJunchao Zhang b2 += jmap2[t2+1] - jmap2[t2]; 6028394ed5ebSJunchao Zhang t2++; t++; 6029394ed5ebSJunchao Zhang } 6030394ed5ebSJunchao Zhang i[r+1] = t; 6031394ed5ebSJunchao Zhang } 6032394ed5ebSJunchao Zhang PetscFunctionReturn(0); 6033394ed5ebSJunchao Zhang } 6034394ed5ebSJunchao Zhang 6035*158ec288SJunchao Zhang /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6036394ed5ebSJunchao Zhang 6037394ed5ebSJunchao Zhang Input Parameters: 6038394ed5ebSJunchao Zhang mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6039394ed5ebSJunchao Zhang n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6040394ed5ebSJunchao Zhang respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6041394ed5ebSJunchao Zhang 6042394ed5ebSJunchao Zhang i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6043394ed5ebSJunchao Zhang i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6044394ed5ebSJunchao Zhang 6045394ed5ebSJunchao Zhang Output Parameters: 6046394ed5ebSJunchao Zhang j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6047394ed5ebSJunchao Zhang rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6048394ed5ebSJunchao Zhang They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6049394ed5ebSJunchao Zhang and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6050394ed5ebSJunchao Zhang 6051394ed5ebSJunchao Zhang Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6052*158ec288SJunchao Zhang Atot: number of entries belonging to the diagonal block. 6053*158ec288SJunchao Zhang Annz: number of unique nonzeros belonging to the diagonal block. 6054394ed5ebSJunchao Zhang Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6055394ed5ebSJunchao Zhang repeats (i.e., same 'i,j' pair). 6056394ed5ebSJunchao Zhang Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6057394ed5ebSJunchao Zhang is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6058394ed5ebSJunchao Zhang 6059394ed5ebSJunchao Zhang Atot: number of entries belonging to the diagonal block 6060394ed5ebSJunchao Zhang Annz: number of unique nonzeros belonging to the diagonal block. 6061394ed5ebSJunchao Zhang 6062394ed5ebSJunchao Zhang Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6063394ed5ebSJunchao Zhang 6064*158ec288SJunchao Zhang Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6065394ed5ebSJunchao Zhang */ 6066394ed5ebSJunchao Zhang static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6067394ed5ebSJunchao Zhang PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6068394ed5ebSJunchao Zhang PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6069394ed5ebSJunchao Zhang PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6070394ed5ebSJunchao Zhang { 6071394ed5ebSJunchao Zhang PetscInt cstart,cend,rstart,rend,row,col; 6072394ed5ebSJunchao Zhang PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6073394ed5ebSJunchao Zhang PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6074394ed5ebSJunchao Zhang PetscCount k,m,p,q,r,s,mid; 6075394ed5ebSJunchao Zhang PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6076394ed5ebSJunchao Zhang 6077394ed5ebSJunchao Zhang PetscFunctionBegin; 60789566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 60799566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6080394ed5ebSJunchao Zhang m = rend - rstart; 6081394ed5ebSJunchao Zhang 6082394ed5ebSJunchao Zhang for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6083394ed5ebSJunchao Zhang 6084394ed5ebSJunchao Zhang /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6085394ed5ebSJunchao Zhang fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6086394ed5ebSJunchao Zhang */ 6087394ed5ebSJunchao Zhang while (k<n) { 6088394ed5ebSJunchao Zhang row = i[k]; 6089394ed5ebSJunchao Zhang /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6090394ed5ebSJunchao Zhang for (s=k; s<n; s++) if (i[s] != row) break; 6091394ed5ebSJunchao Zhang for (p=k; p<s; p++) { 6092394ed5ebSJunchao Zhang if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 609354c59aa7SJacob Faibussowitsch else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6094394ed5ebSJunchao Zhang } 60959566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6096*158ec288SJunchao Zhang PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6097394ed5ebSJunchao Zhang rowBegin[row-rstart] = k; 6098394ed5ebSJunchao Zhang rowMid[row-rstart] = mid; 6099394ed5ebSJunchao Zhang rowEnd[row-rstart] = s; 6100394ed5ebSJunchao Zhang 6101394ed5ebSJunchao Zhang /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6102394ed5ebSJunchao Zhang Atot += mid - k; 6103394ed5ebSJunchao Zhang Btot += s - mid; 6104394ed5ebSJunchao Zhang 6105394ed5ebSJunchao Zhang /* Count unique nonzeros of this diag/offdiag row */ 6106394ed5ebSJunchao Zhang for (p=k; p<mid;) { 6107394ed5ebSJunchao Zhang col = j[p]; 6108394ed5ebSJunchao Zhang do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6109394ed5ebSJunchao Zhang Annz++; 6110394ed5ebSJunchao Zhang } 6111394ed5ebSJunchao Zhang 6112394ed5ebSJunchao Zhang for (p=mid; p<s;) { 6113394ed5ebSJunchao Zhang col = j[p]; 6114394ed5ebSJunchao Zhang do {p++;} while (p<s && j[p] == col); 6115394ed5ebSJunchao Zhang Bnnz++; 6116394ed5ebSJunchao Zhang } 6117394ed5ebSJunchao Zhang k = s; 6118394ed5ebSJunchao Zhang } 6119394ed5ebSJunchao Zhang 6120394ed5ebSJunchao Zhang /* Allocation according to Atot, Btot, Annz, Bnnz */ 6121*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Atot,&Aperm)); 6122*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Btot,&Bperm)); 6123*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6124*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6125394ed5ebSJunchao Zhang 6126394ed5ebSJunchao Zhang /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6127394ed5ebSJunchao Zhang Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6128394ed5ebSJunchao Zhang for (r=0; r<m; r++) { 6129394ed5ebSJunchao Zhang k = rowBegin[r]; 6130394ed5ebSJunchao Zhang mid = rowMid[r]; 6131394ed5ebSJunchao Zhang s = rowEnd[r]; 61329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 61339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6134394ed5ebSJunchao Zhang Atot += mid - k; 6135394ed5ebSJunchao Zhang Btot += s - mid; 6136394ed5ebSJunchao Zhang 6137394ed5ebSJunchao Zhang /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6138394ed5ebSJunchao Zhang for (p=k; p<mid;) { 6139394ed5ebSJunchao Zhang col = j[p]; 6140394ed5ebSJunchao Zhang q = p; 6141394ed5ebSJunchao Zhang do {p++;} while (p<mid && j[p] == col); 6142394ed5ebSJunchao Zhang Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6143394ed5ebSJunchao Zhang Annz++; 6144394ed5ebSJunchao Zhang } 6145394ed5ebSJunchao Zhang 6146394ed5ebSJunchao Zhang for (p=mid; p<s;) { 6147394ed5ebSJunchao Zhang col = j[p]; 6148394ed5ebSJunchao Zhang q = p; 6149394ed5ebSJunchao Zhang do {p++;} while (p<s && j[p] == col); 6150394ed5ebSJunchao Zhang Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6151394ed5ebSJunchao Zhang Bnnz++; 6152394ed5ebSJunchao Zhang } 6153394ed5ebSJunchao Zhang } 6154394ed5ebSJunchao Zhang /* Output */ 6155394ed5ebSJunchao Zhang *Aperm_ = Aperm; 6156394ed5ebSJunchao Zhang *Annz_ = Annz; 6157394ed5ebSJunchao Zhang *Atot_ = Atot; 6158394ed5ebSJunchao Zhang *Ajmap_ = Ajmap; 6159394ed5ebSJunchao Zhang *Bperm_ = Bperm; 6160394ed5ebSJunchao Zhang *Bnnz_ = Bnnz; 6161394ed5ebSJunchao Zhang *Btot_ = Btot; 6162394ed5ebSJunchao Zhang *Bjmap_ = Bjmap; 6163394ed5ebSJunchao Zhang PetscFunctionReturn(0); 6164394ed5ebSJunchao Zhang } 6165394ed5ebSJunchao Zhang 6166*158ec288SJunchao Zhang /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6167*158ec288SJunchao Zhang 6168*158ec288SJunchao Zhang Input Parameters: 6169*158ec288SJunchao Zhang nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6170*158ec288SJunchao Zhang nnz: number of unique nonzeros in the merged matrix 6171*158ec288SJunchao Zhang imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6172*158ec288SJunchao Zhang jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6173*158ec288SJunchao Zhang 6174*158ec288SJunchao Zhang Output Parameter: (memory is allocated by the caller) 6175*158ec288SJunchao Zhang jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6176*158ec288SJunchao Zhang 6177*158ec288SJunchao Zhang Example: 6178*158ec288SJunchao Zhang nnz1 = 4 6179*158ec288SJunchao Zhang nnz = 6 6180*158ec288SJunchao Zhang imap = [1,3,4,5] 6181*158ec288SJunchao Zhang jmap = [0,3,5,6,7] 6182*158ec288SJunchao Zhang then, 6183*158ec288SJunchao Zhang jmap_new = [0,0,3,3,5,6,7] 6184*158ec288SJunchao Zhang */ 6185*158ec288SJunchao Zhang static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6186*158ec288SJunchao Zhang { 6187*158ec288SJunchao Zhang PetscCount k,p; 6188*158ec288SJunchao Zhang 6189*158ec288SJunchao Zhang PetscFunctionBegin; 6190*158ec288SJunchao Zhang jmap_new[0] = 0; 6191*158ec288SJunchao Zhang p = nnz; /* p loops over jmap_new[] backwards */ 6192*158ec288SJunchao Zhang for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6193*158ec288SJunchao Zhang for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6194*158ec288SJunchao Zhang } 6195*158ec288SJunchao Zhang for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6196*158ec288SJunchao Zhang PetscFunctionReturn(0); 6197*158ec288SJunchao Zhang } 6198*158ec288SJunchao Zhang 6199394ed5ebSJunchao Zhang PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6200394ed5ebSJunchao Zhang { 6201394ed5ebSJunchao Zhang MPI_Comm comm; 6202394ed5ebSJunchao Zhang PetscMPIInt rank,size; 6203394ed5ebSJunchao Zhang PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6204394ed5ebSJunchao Zhang PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6205394ed5ebSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6206394ed5ebSJunchao Zhang 6207394ed5ebSJunchao Zhang PetscFunctionBegin; 62089566063dSJacob Faibussowitsch PetscCall(PetscFree(mpiaij->garray)); 62099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mpiaij->lvec)); 6210cbc6b225SStefano Zampini #if defined(PETSC_USE_CTABLE) 62119566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6212cbc6b225SStefano Zampini #else 62139566063dSJacob Faibussowitsch PetscCall(PetscFree(mpiaij->colmap)); 6214cbc6b225SStefano Zampini #endif 62159566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6216cbc6b225SStefano Zampini mat->assembled = PETSC_FALSE; 6217cbc6b225SStefano Zampini mat->was_assembled = PETSC_FALSE; 62189566063dSJacob Faibussowitsch PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6219cbc6b225SStefano Zampini 62209566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 62219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 62229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 62239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 62249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 62259566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 62269566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 62279566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat,&m,&n)); 62289566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat,&M,&N)); 6229394ed5ebSJunchao Zhang 6230394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6231394ed5ebSJunchao Zhang /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6232394ed5ebSJunchao Zhang /* entries come first, then local rows, then remote rows. */ 6233394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6234394ed5ebSJunchao Zhang PetscCount n1 = coo_n,*perm1; 6235394ed5ebSJunchao Zhang PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 62369566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 62379566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 62389566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(j1,coo_j,n1)); 6239394ed5ebSJunchao Zhang for (k=0; k<n1; k++) perm1[k] = k; 6240394ed5ebSJunchao Zhang 6241394ed5ebSJunchao Zhang /* Manipulate indices so that entries with negative row or col indices will have smallest 6242394ed5ebSJunchao Zhang row indices, local entries will have greater but negative row indices, and remote entries 6243394ed5ebSJunchao Zhang will have positive row indices. 6244394ed5ebSJunchao Zhang */ 6245394ed5ebSJunchao Zhang for (k=0; k<n1; k++) { 6246394ed5ebSJunchao Zhang if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6247394ed5ebSJunchao Zhang else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 624854c59aa7SJacob Faibussowitsch else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6249394ed5ebSJunchao Zhang else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6250394ed5ebSJunchao Zhang } 6251394ed5ebSJunchao Zhang 6252394ed5ebSJunchao Zhang /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 62539566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6254394ed5ebSJunchao Zhang for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 62559566063dSJacob Faibussowitsch PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6256394ed5ebSJunchao Zhang for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6257394ed5ebSJunchao Zhang 6258394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6259394ed5ebSJunchao Zhang /* Split local rows into diag/offdiag portions */ 6260394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6261394ed5ebSJunchao Zhang PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6262394ed5ebSJunchao Zhang PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6263394ed5ebSJunchao Zhang PetscCount Annz1,Bnnz1,Atot1,Btot1; 6264394ed5ebSJunchao Zhang 62659566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 62669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 62679566063dSJacob Faibussowitsch PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6268394ed5ebSJunchao Zhang 6269394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6270394ed5ebSJunchao Zhang /* Send remote rows to their owner */ 6271394ed5ebSJunchao Zhang /* ---------------------------------------------------------------------------*/ 6272394ed5ebSJunchao Zhang /* Find which rows should be sent to which remote ranks*/ 6273394ed5ebSJunchao Zhang PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6274394ed5ebSJunchao Zhang PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6275394ed5ebSJunchao Zhang PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6276394ed5ebSJunchao Zhang const PetscInt *ranges; 6277394ed5ebSJunchao Zhang PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6278394ed5ebSJunchao Zhang 62799566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 62809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6281394ed5ebSJunchao Zhang for (k=rem; k<n1;) { 6282394ed5ebSJunchao Zhang PetscMPIInt owner; 6283394ed5ebSJunchao Zhang PetscInt firstRow,lastRow; 6284cbc6b225SStefano Zampini 6285394ed5ebSJunchao Zhang /* Locate a row range */ 6286394ed5ebSJunchao Zhang firstRow = i1[k]; /* first row of this owner */ 62879566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6288394ed5ebSJunchao Zhang lastRow = ranges[owner+1]-1; /* last row of this owner */ 6289394ed5ebSJunchao Zhang 6290394ed5ebSJunchao Zhang /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 62919566063dSJacob Faibussowitsch PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6292394ed5ebSJunchao Zhang 6293394ed5ebSJunchao Zhang /* All entries in [k,p) belong to this remote owner */ 6294394ed5ebSJunchao Zhang if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6295394ed5ebSJunchao Zhang PetscMPIInt *sendto2; 6296394ed5ebSJunchao Zhang PetscInt *nentries2; 6297394ed5ebSJunchao Zhang PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6298cbc6b225SStefano Zampini 62999566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 63009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 63019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 63029566063dSJacob Faibussowitsch PetscCall(PetscFree2(sendto,nentries2)); 6303394ed5ebSJunchao Zhang sendto = sendto2; 6304394ed5ebSJunchao Zhang nentries = nentries2; 6305394ed5ebSJunchao Zhang maxNsend = maxNsend2; 6306394ed5ebSJunchao Zhang } 6307394ed5ebSJunchao Zhang sendto[nsend] = owner; 6308394ed5ebSJunchao Zhang nentries[nsend] = p - k; 63099566063dSJacob Faibussowitsch PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6310394ed5ebSJunchao Zhang nsend++; 6311394ed5ebSJunchao Zhang k = p; 6312394ed5ebSJunchao Zhang } 6313394ed5ebSJunchao Zhang 6314394ed5ebSJunchao Zhang /* Build 1st SF to know offsets on remote to send data */ 6315394ed5ebSJunchao Zhang PetscSF sf1; 6316394ed5ebSJunchao Zhang PetscInt nroots = 1,nroots2 = 0; 6317394ed5ebSJunchao Zhang PetscInt nleaves = nsend,nleaves2 = 0; 6318394ed5ebSJunchao Zhang PetscInt *offsets; 6319394ed5ebSJunchao Zhang PetscSFNode *iremote; 6320394ed5ebSJunchao Zhang 63219566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&sf1)); 63229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsend,&iremote)); 63239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsend,&offsets)); 6324394ed5ebSJunchao Zhang for (k=0; k<nsend; k++) { 6325394ed5ebSJunchao Zhang iremote[k].rank = sendto[k]; 6326394ed5ebSJunchao Zhang iremote[k].index = 0; 6327394ed5ebSJunchao Zhang nleaves2 += nentries[k]; 632854c59aa7SJacob Faibussowitsch PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6329394ed5ebSJunchao Zhang } 63309566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 63319566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 63329566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 63339566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf1)); 633463a3b9bcSJacob Faibussowitsch PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6335394ed5ebSJunchao Zhang 6336394ed5ebSJunchao Zhang /* Build 2nd SF to send remote COOs to their owner */ 6337394ed5ebSJunchao Zhang PetscSF sf2; 6338394ed5ebSJunchao Zhang nroots = nroots2; 6339394ed5ebSJunchao Zhang nleaves = nleaves2; 63409566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&sf2)); 63419566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf2)); 63429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves,&iremote)); 6343394ed5ebSJunchao Zhang p = 0; 6344394ed5ebSJunchao Zhang for (k=0; k<nsend; k++) { 634554c59aa7SJacob Faibussowitsch PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6346394ed5ebSJunchao Zhang for (q=0; q<nentries[k]; q++,p++) { 6347394ed5ebSJunchao Zhang iremote[p].rank = sendto[k]; 6348394ed5ebSJunchao Zhang iremote[p].index = offsets[k] + q; 6349394ed5ebSJunchao Zhang } 6350394ed5ebSJunchao Zhang } 63519566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6352394ed5ebSJunchao Zhang 6353394ed5ebSJunchao Zhang /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 63549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6355394ed5ebSJunchao Zhang 6356394ed5ebSJunchao Zhang /* Send the remote COOs to their owner */ 6357394ed5ebSJunchao Zhang PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6358394ed5ebSJunchao Zhang PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 63599566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 63609566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 63619566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 63629566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 63639566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6364394ed5ebSJunchao Zhang 63659566063dSJacob Faibussowitsch PetscCall(PetscFree(offsets)); 63669566063dSJacob Faibussowitsch PetscCall(PetscFree2(sendto,nentries)); 6367394ed5ebSJunchao Zhang 6368394ed5ebSJunchao Zhang /* ---------------------------------------------------------------*/ 6369394ed5ebSJunchao Zhang /* Sort received COOs by row along with the permutation array */ 6370394ed5ebSJunchao Zhang /* ---------------------------------------------------------------*/ 6371394ed5ebSJunchao Zhang for (k=0; k<n2; k++) perm2[k] = k; 63729566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6373394ed5ebSJunchao Zhang 6374394ed5ebSJunchao Zhang /* ---------------------------------------------------------------*/ 6375394ed5ebSJunchao Zhang /* Split received COOs into diag/offdiag portions */ 6376394ed5ebSJunchao Zhang /* ---------------------------------------------------------------*/ 6377394ed5ebSJunchao Zhang PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6378394ed5ebSJunchao Zhang PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6379394ed5ebSJunchao Zhang PetscCount Annz2,Bnnz2,Atot2,Btot2; 6380394ed5ebSJunchao Zhang 63819566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 63829566063dSJacob Faibussowitsch PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6383394ed5ebSJunchao Zhang 6384394ed5ebSJunchao Zhang /* --------------------------------------------------------------------------*/ 6385394ed5ebSJunchao Zhang /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6386394ed5ebSJunchao Zhang /* --------------------------------------------------------------------------*/ 6387394ed5ebSJunchao Zhang PetscInt *Ai,*Bi; 6388394ed5ebSJunchao Zhang PetscInt *Aj,*Bj; 6389394ed5ebSJunchao Zhang 63909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m+1,&Ai)); 63919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m+1,&Bi)); 63929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 63939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6394394ed5ebSJunchao Zhang 6395394ed5ebSJunchao Zhang PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6396*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6397*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6398*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6399*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6400394ed5ebSJunchao Zhang 64019566063dSJacob Faibussowitsch PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 64029566063dSJacob Faibussowitsch PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6403*158ec288SJunchao Zhang 6404*158ec288SJunchao Zhang /* --------------------------------------------------------------------------*/ 6405*158ec288SJunchao Zhang /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6406*158ec288SJunchao Zhang /* expect nonzeros in A/B most likely have local contributing entries */ 6407*158ec288SJunchao Zhang /* --------------------------------------------------------------------------*/ 6408*158ec288SJunchao Zhang PetscInt Annz = Ai[m]; 6409*158ec288SJunchao Zhang PetscInt Bnnz = Bi[m]; 6410*158ec288SJunchao Zhang PetscCount *Ajmap1_new,*Bjmap1_new; 6411*158ec288SJunchao Zhang 6412*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6413*158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6414*158ec288SJunchao Zhang 6415*158ec288SJunchao Zhang PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6416*158ec288SJunchao Zhang PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6417*158ec288SJunchao Zhang 6418*158ec288SJunchao Zhang PetscCall(PetscFree(Aimap1)); 6419*158ec288SJunchao Zhang PetscCall(PetscFree(Ajmap1)); 6420*158ec288SJunchao Zhang PetscCall(PetscFree(Bimap1)); 6421*158ec288SJunchao Zhang PetscCall(PetscFree(Bjmap1)); 64229566063dSJacob Faibussowitsch PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 64239566063dSJacob Faibussowitsch PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 64249566063dSJacob Faibussowitsch PetscCall(PetscFree3(i1,j1,perm1)); 64259566063dSJacob Faibussowitsch PetscCall(PetscFree3(i2,j2,perm2)); 6426394ed5ebSJunchao Zhang 6427*158ec288SJunchao Zhang Ajmap1 = Ajmap1_new; 6428*158ec288SJunchao Zhang Bjmap1 = Bjmap1_new; 6429*158ec288SJunchao Zhang 6430394ed5ebSJunchao Zhang /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6431394ed5ebSJunchao Zhang if (Annz < Annz1 + Annz2) { 6432394ed5ebSJunchao Zhang PetscInt *Aj_new; 64339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Annz,&Aj_new)); 64349566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 64359566063dSJacob Faibussowitsch PetscCall(PetscFree(Aj)); 6436394ed5ebSJunchao Zhang Aj = Aj_new; 6437394ed5ebSJunchao Zhang } 6438394ed5ebSJunchao Zhang 6439394ed5ebSJunchao Zhang if (Bnnz < Bnnz1 + Bnnz2) { 6440394ed5ebSJunchao Zhang PetscInt *Bj_new; 64419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 64429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 64439566063dSJacob Faibussowitsch PetscCall(PetscFree(Bj)); 6444394ed5ebSJunchao Zhang Bj = Bj_new; 6445394ed5ebSJunchao Zhang } 6446394ed5ebSJunchao Zhang 6447394ed5ebSJunchao Zhang /* --------------------------------------------------------------------------------*/ 6448cbc6b225SStefano Zampini /* Create new submatrices for on-process and off-process coupling */ 6449394ed5ebSJunchao Zhang /* --------------------------------------------------------------------------------*/ 6450394ed5ebSJunchao Zhang PetscScalar *Aa,*Ba; 6451cbc6b225SStefano Zampini MatType rtype; 6452394ed5ebSJunchao Zhang Mat_SeqAIJ *a,*b; 64539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 64549566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Bnnz,&Ba)); 6455394ed5ebSJunchao Zhang /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6456394ed5ebSJunchao Zhang if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 64579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mpiaij->A)); 64589566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mpiaij->B)); 64599566063dSJacob Faibussowitsch PetscCall(MatGetRootType_Private(mat,&rtype)); 64609566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 64619566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 64629566063dSJacob Faibussowitsch PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6463cbc6b225SStefano Zampini 6464394ed5ebSJunchao Zhang a = (Mat_SeqAIJ*)mpiaij->A->data; 6465394ed5ebSJunchao Zhang b = (Mat_SeqAIJ*)mpiaij->B->data; 6466394ed5ebSJunchao Zhang a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6467394ed5ebSJunchao Zhang a->free_a = b->free_a = PETSC_TRUE; 6468394ed5ebSJunchao Zhang a->free_ij = b->free_ij = PETSC_TRUE; 6469394ed5ebSJunchao Zhang 6470cbc6b225SStefano Zampini /* conversion must happen AFTER multiply setup */ 64719566063dSJacob Faibussowitsch PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 64729566063dSJacob Faibussowitsch PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 64739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mpiaij->lvec)); 64749566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 64759566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6476cbc6b225SStefano Zampini 6477394ed5ebSJunchao Zhang mpiaij->coo_n = coo_n; 6478394ed5ebSJunchao Zhang mpiaij->coo_sf = sf2; 6479394ed5ebSJunchao Zhang mpiaij->sendlen = nleaves; 6480394ed5ebSJunchao Zhang mpiaij->recvlen = nroots; 6481394ed5ebSJunchao Zhang 6482*158ec288SJunchao Zhang mpiaij->Annz = Annz; 6483*158ec288SJunchao Zhang mpiaij->Bnnz = Bnnz; 6484*158ec288SJunchao Zhang 6485394ed5ebSJunchao Zhang mpiaij->Annz2 = Annz2; 6486394ed5ebSJunchao Zhang mpiaij->Bnnz2 = Bnnz2; 6487394ed5ebSJunchao Zhang 6488394ed5ebSJunchao Zhang mpiaij->Atot1 = Atot1; 6489394ed5ebSJunchao Zhang mpiaij->Atot2 = Atot2; 6490394ed5ebSJunchao Zhang mpiaij->Btot1 = Btot1; 6491394ed5ebSJunchao Zhang mpiaij->Btot2 = Btot2; 6492394ed5ebSJunchao Zhang 6493394ed5ebSJunchao Zhang mpiaij->Ajmap1 = Ajmap1; 6494394ed5ebSJunchao Zhang mpiaij->Aperm1 = Aperm1; 6495*158ec288SJunchao Zhang 6496*158ec288SJunchao Zhang mpiaij->Bjmap1 = Bjmap1; 6497394ed5ebSJunchao Zhang mpiaij->Bperm1 = Bperm1; 6498*158ec288SJunchao Zhang 6499*158ec288SJunchao Zhang mpiaij->Aimap2 = Aimap2; 6500*158ec288SJunchao Zhang mpiaij->Ajmap2 = Ajmap2; 6501*158ec288SJunchao Zhang mpiaij->Aperm2 = Aperm2; 6502*158ec288SJunchao Zhang 6503*158ec288SJunchao Zhang mpiaij->Bimap2 = Bimap2; 6504*158ec288SJunchao Zhang mpiaij->Bjmap2 = Bjmap2; 6505394ed5ebSJunchao Zhang mpiaij->Bperm2 = Bperm2; 6506394ed5ebSJunchao Zhang 6507394ed5ebSJunchao Zhang mpiaij->Cperm1 = Cperm1; 6508394ed5ebSJunchao Zhang 6509394ed5ebSJunchao Zhang /* Allocate in preallocation. If not used, it has zero cost on host */ 65109566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6511394ed5ebSJunchao Zhang PetscFunctionReturn(0); 6512394ed5ebSJunchao Zhang } 6513394ed5ebSJunchao Zhang 6514394ed5ebSJunchao Zhang static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6515394ed5ebSJunchao Zhang { 6516394ed5ebSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6517394ed5ebSJunchao Zhang Mat A = mpiaij->A,B = mpiaij->B; 6518*158ec288SJunchao Zhang PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6519394ed5ebSJunchao Zhang PetscScalar *Aa,*Ba; 6520394ed5ebSJunchao Zhang PetscScalar *sendbuf = mpiaij->sendbuf; 6521394ed5ebSJunchao Zhang PetscScalar *recvbuf = mpiaij->recvbuf; 6522*158ec288SJunchao Zhang const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6523*158ec288SJunchao Zhang const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6524394ed5ebSJunchao Zhang const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6525394ed5ebSJunchao Zhang const PetscCount *Cperm1 = mpiaij->Cperm1; 6526394ed5ebSJunchao Zhang 6527394ed5ebSJunchao Zhang PetscFunctionBegin; 65289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 65299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B,&Ba)); 6530394ed5ebSJunchao Zhang 6531394ed5ebSJunchao Zhang /* Pack entries to be sent to remote */ 6532394ed5ebSJunchao Zhang for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6533394ed5ebSJunchao Zhang 6534394ed5ebSJunchao Zhang /* Send remote entries to their owner and overlap the communication with local computation */ 65359566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6536394ed5ebSJunchao Zhang /* Add local entries to A and B */ 6537*158ec288SJunchao Zhang for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6538*158ec288SJunchao Zhang PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6539*158ec288SJunchao Zhang for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6540*158ec288SJunchao Zhang Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6541394ed5ebSJunchao Zhang } 6542*158ec288SJunchao Zhang for (PetscCount i=0; i<Bnnz; i++) { 6543*158ec288SJunchao Zhang PetscScalar sum = 0.0; 6544*158ec288SJunchao Zhang for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6545*158ec288SJunchao Zhang Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6546394ed5ebSJunchao Zhang } 65479566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6548394ed5ebSJunchao Zhang 6549394ed5ebSJunchao Zhang /* Add received remote entries to A and B */ 6550394ed5ebSJunchao Zhang for (PetscCount i=0; i<Annz2; i++) { 6551394ed5ebSJunchao Zhang for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6552394ed5ebSJunchao Zhang } 6553394ed5ebSJunchao Zhang for (PetscCount i=0; i<Bnnz2; i++) { 6554394ed5ebSJunchao Zhang for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6555394ed5ebSJunchao Zhang } 65569566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 65579566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6558394ed5ebSJunchao Zhang PetscFunctionReturn(0); 6559394ed5ebSJunchao Zhang } 6560394ed5ebSJunchao Zhang 65614222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 65624222ddf1SHong Zhang 6563ccd8e176SBarry Smith /*MC 6564ccd8e176SBarry Smith MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6565ccd8e176SBarry Smith 6566ccd8e176SBarry Smith Options Database Keys: 6567ccd8e176SBarry Smith . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6568ccd8e176SBarry Smith 6569ccd8e176SBarry Smith Level: beginner 65700cd7f59aSBarry Smith 65710cd7f59aSBarry Smith Notes: 65720cd7f59aSBarry Smith MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 65730cd7f59aSBarry Smith in this case the values associated with the rows and columns one passes in are set to zero 65740cd7f59aSBarry Smith in the matrix 65750cd7f59aSBarry Smith 65760cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 65770cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6578ccd8e176SBarry Smith 657969b1f4b7SBarry Smith .seealso: MatCreateAIJ() 6580ccd8e176SBarry Smith M*/ 6581ccd8e176SBarry Smith 65828cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6583ccd8e176SBarry Smith { 6584ccd8e176SBarry Smith Mat_MPIAIJ *b; 6585ccd8e176SBarry Smith PetscMPIInt size; 6586ccd8e176SBarry Smith 6587ccd8e176SBarry Smith PetscFunctionBegin; 65889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 65892205254eSKarl Rupp 65909566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B,&b)); 6591ccd8e176SBarry Smith B->data = (void*)b; 65929566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6593ccd8e176SBarry Smith B->assembled = PETSC_FALSE; 6594ccd8e176SBarry Smith B->insertmode = NOT_SET_VALUES; 6595ccd8e176SBarry Smith b->size = size; 65962205254eSKarl Rupp 65979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6598ccd8e176SBarry Smith 6599ccd8e176SBarry Smith /* build cache for off array entries formed */ 66009566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 66012205254eSKarl Rupp 6602ccd8e176SBarry Smith b->donotstash = PETSC_FALSE; 6603f4259b30SLisandro Dalcin b->colmap = NULL; 6604f4259b30SLisandro Dalcin b->garray = NULL; 6605ccd8e176SBarry Smith b->roworiented = PETSC_TRUE; 6606ccd8e176SBarry Smith 6607ccd8e176SBarry Smith /* stuff used for matrix vector multiply */ 66080298fd71SBarry Smith b->lvec = NULL; 66090298fd71SBarry Smith b->Mvctx = NULL; 6610ccd8e176SBarry Smith 6611ccd8e176SBarry Smith /* stuff for MatGetRow() */ 6612f4259b30SLisandro Dalcin b->rowindices = NULL; 6613f4259b30SLisandro Dalcin b->rowvalues = NULL; 6614ccd8e176SBarry Smith b->getrowactive = PETSC_FALSE; 6615ccd8e176SBarry Smith 6616f719121fSJed Brown /* flexible pointer used in CUSPARSE classes */ 66170298fd71SBarry Smith b->spptr = NULL; 6618f60c3dc2SHong Zhang 66199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 66209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 66219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 66229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 66239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 66249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 66259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 66269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 66279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 66289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 66293d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA) 66309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 66313d0639e7SStefano Zampini #endif 66323d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS) 66339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 66343d0639e7SStefano Zampini #endif 66359779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE) 66369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6637191b95cbSRichard Tran Mills #endif 66389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 66399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 66409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 66419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 66425d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 66439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 66445d7652ecSHong Zhang #endif 6645d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 66469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6647d24d4204SJose E. Roman #endif 66489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 66499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 66503dad0653Sstefano_zampini #if defined(PETSC_HAVE_HYPRE) 66519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 66529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 66533dad0653Sstefano_zampini #endif 66549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 66559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 66569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 66579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 66589566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6659ccd8e176SBarry Smith PetscFunctionReturn(0); 6660ccd8e176SBarry Smith } 666181824310SBarry Smith 6662cce60c4dSBarry Smith /*@C 666303bfb495SBarry Smith MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 666403bfb495SBarry Smith and "off-diagonal" part of the matrix in CSR format. 666503bfb495SBarry Smith 6666d083f849SBarry Smith Collective 666703bfb495SBarry Smith 666803bfb495SBarry Smith Input Parameters: 666903bfb495SBarry Smith + comm - MPI communicator 667003bfb495SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 667103bfb495SBarry Smith . n - This value should be the same as the local size used in creating the 667203bfb495SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 667303bfb495SBarry Smith calculated if N is given) For square matrices n is almost always m. 667403bfb495SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 667503bfb495SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6676483a2f95SBarry Smith . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 667704ccdda3SJunchao Zhang . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 667803bfb495SBarry Smith . a - matrix values 6679483a2f95SBarry Smith . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 668004ccdda3SJunchao Zhang . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 668103bfb495SBarry Smith - oa - matrix values 668203bfb495SBarry Smith 668303bfb495SBarry Smith Output Parameter: 668403bfb495SBarry Smith . mat - the matrix 668503bfb495SBarry Smith 668603bfb495SBarry Smith Level: advanced 668703bfb495SBarry Smith 668803bfb495SBarry Smith Notes: 6689292fb18eSBarry Smith The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6690292fb18eSBarry Smith must free the arrays once the matrix has been destroyed and not before. 669103bfb495SBarry Smith 669203bfb495SBarry Smith The i and j indices are 0 based 669303bfb495SBarry Smith 669469b1f4b7SBarry Smith See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 669503bfb495SBarry Smith 66967b55108eSBarry Smith This sets local rows and cannot be used to set off-processor values. 66977b55108eSBarry Smith 6698dca341c0SJed Brown Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6699dca341c0SJed Brown legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6700dca341c0SJed Brown not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6701dca341c0SJed Brown the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6702eeb24464SBarry Smith keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6703dca341c0SJed Brown communication if it is known that only local entries will be set. 670403bfb495SBarry Smith 670503bfb495SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 67065f4d30c4SBarry Smith MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 67072b26979fSBarry Smith @*/ 67082205254eSKarl Rupp PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 670903bfb495SBarry Smith { 671003bfb495SBarry Smith Mat_MPIAIJ *maij; 671103bfb495SBarry Smith 671203bfb495SBarry Smith PetscFunctionBegin; 671308401ef6SPierre Jolivet PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 67142c71b3e2SJacob Faibussowitsch PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 67152c71b3e2SJacob Faibussowitsch PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 67169566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,mat)); 67179566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat,m,n,M,N)); 67189566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat,MATMPIAIJ)); 671903bfb495SBarry Smith maij = (Mat_MPIAIJ*) (*mat)->data; 67202205254eSKarl Rupp 67218d7a6e47SBarry Smith (*mat)->preallocated = PETSC_TRUE; 672203bfb495SBarry Smith 67239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->rmap)); 67249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->cmap)); 672503bfb495SBarry Smith 67269566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 67279566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 672803bfb495SBarry Smith 67299566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 67309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 67319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 67329566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 67339566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 673403bfb495SBarry Smith PetscFunctionReturn(0); 673503bfb495SBarry Smith } 673603bfb495SBarry Smith 67374e84afc0SStefano Zampini typedef struct { 67384e84afc0SStefano Zampini Mat *mp; /* intermediate products */ 67394e84afc0SStefano Zampini PetscBool *mptmp; /* is the intermediate product temporary ? */ 67404e84afc0SStefano Zampini PetscInt cp; /* number of intermediate products */ 67414e84afc0SStefano Zampini 67424e84afc0SStefano Zampini /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 67434e84afc0SStefano Zampini PetscInt *startsj_s,*startsj_r; 67444e84afc0SStefano Zampini PetscScalar *bufa; 67454e84afc0SStefano Zampini Mat P_oth; 67464e84afc0SStefano Zampini 67474e84afc0SStefano Zampini /* may take advantage of merging product->B */ 6748ddea5d60SJunchao Zhang Mat Bloc; /* B-local by merging diag and off-diag */ 67494e84afc0SStefano Zampini 6750ddea5d60SJunchao Zhang /* cusparse does not have support to split between symbolic and numeric phases. 67514e84afc0SStefano Zampini When api_user is true, we don't need to update the numerical values 67524e84afc0SStefano Zampini of the temporary storage */ 67534e84afc0SStefano Zampini PetscBool reusesym; 67544e84afc0SStefano Zampini 67554e84afc0SStefano Zampini /* support for COO values insertion */ 6756ddea5d60SJunchao Zhang PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6757ddea5d60SJunchao Zhang PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6758ddea5d60SJunchao Zhang PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6759ddea5d60SJunchao Zhang PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6760c215019aSStefano Zampini PetscSF sf; /* used for non-local values insertion and memory malloc */ 6761c215019aSStefano Zampini PetscMemType mtype; 67624e84afc0SStefano Zampini 67634e84afc0SStefano Zampini /* customization */ 67644e84afc0SStefano Zampini PetscBool abmerge; 6765abb89eb1SStefano Zampini PetscBool P_oth_bind; 67664e84afc0SStefano Zampini } MatMatMPIAIJBACKEND; 67674e84afc0SStefano Zampini 67684e84afc0SStefano Zampini PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 67694e84afc0SStefano Zampini { 67704e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 67714e84afc0SStefano Zampini PetscInt i; 67724e84afc0SStefano Zampini 67734e84afc0SStefano Zampini PetscFunctionBegin; 67749566063dSJacob Faibussowitsch PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 67759566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->bufa)); 67769566063dSJacob Faibussowitsch PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 67779566063dSJacob Faibussowitsch PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 67789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mmdata->P_oth)); 67799566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mmdata->Bloc)); 67809566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&mmdata->sf)); 67814e84afc0SStefano Zampini for (i = 0; i < mmdata->cp; i++) { 67829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mmdata->mp[i])); 67834e84afc0SStefano Zampini } 67849566063dSJacob Faibussowitsch PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 67859566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->own[0])); 67869566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->own)); 67879566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->off[0])); 67889566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->off)); 67899566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata)); 67904e84afc0SStefano Zampini PetscFunctionReturn(0); 67914e84afc0SStefano Zampini } 67924e84afc0SStefano Zampini 6793fff043a9SJunchao Zhang /* Copy selected n entries with indices in idx[] of A to v[]. 6794fff043a9SJunchao Zhang If idx is NULL, copy the whole data array of A to v[] 6795fff043a9SJunchao Zhang */ 6796c215019aSStefano Zampini static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6797c215019aSStefano Zampini { 6798c215019aSStefano Zampini PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6799c215019aSStefano Zampini 6800c215019aSStefano Zampini PetscFunctionBegin; 68019566063dSJacob Faibussowitsch PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6802c215019aSStefano Zampini if (f) { 68039566063dSJacob Faibussowitsch PetscCall((*f)(A,n,idx,v)); 6804c215019aSStefano Zampini } else { 6805c215019aSStefano Zampini const PetscScalar *vv; 6806c215019aSStefano Zampini 68079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6808c215019aSStefano Zampini if (n && idx) { 6809c215019aSStefano Zampini PetscScalar *w = v; 6810c215019aSStefano Zampini const PetscInt *oi = idx; 6811c215019aSStefano Zampini PetscInt j; 6812c215019aSStefano Zampini 6813c215019aSStefano Zampini for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6814c215019aSStefano Zampini } else { 68159566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(v,vv,n)); 6816c215019aSStefano Zampini } 68179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6818c215019aSStefano Zampini } 6819c215019aSStefano Zampini PetscFunctionReturn(0); 6820c215019aSStefano Zampini } 6821c215019aSStefano Zampini 68224e84afc0SStefano Zampini static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 68234e84afc0SStefano Zampini { 68244e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata; 68254e84afc0SStefano Zampini PetscInt i,n_d,n_o; 68264e84afc0SStefano Zampini 68274e84afc0SStefano Zampini PetscFunctionBegin; 68284e84afc0SStefano Zampini MatCheckProduct(C,1); 682928b400f6SJacob Faibussowitsch PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 68304e84afc0SStefano Zampini mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 68314e84afc0SStefano Zampini if (!mmdata->reusesym) { /* update temporary matrices */ 68324e84afc0SStefano Zampini if (mmdata->P_oth) { 68339566063dSJacob Faibussowitsch PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 68344e84afc0SStefano Zampini } 68354e84afc0SStefano Zampini if (mmdata->Bloc) { 68369566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 68374e84afc0SStefano Zampini } 68384e84afc0SStefano Zampini } 68394e84afc0SStefano Zampini mmdata->reusesym = PETSC_FALSE; 6840abb89eb1SStefano Zampini 6841abb89eb1SStefano Zampini for (i = 0; i < mmdata->cp; i++) { 684208401ef6SPierre Jolivet PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 68439566063dSJacob Faibussowitsch PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6844abb89eb1SStefano Zampini } 68454e84afc0SStefano Zampini for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 68464e84afc0SStefano Zampini PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 68474e84afc0SStefano Zampini 68484e84afc0SStefano Zampini if (mmdata->mptmp[i]) continue; 68494e84afc0SStefano Zampini if (noff) { 6850c215019aSStefano Zampini PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6851c215019aSStefano Zampini 68529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 68539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 68544e84afc0SStefano Zampini n_o += noff; 68554e84afc0SStefano Zampini n_d += nown; 68564e84afc0SStefano Zampini } else { 6857c215019aSStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6858c215019aSStefano Zampini 68599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 68604e84afc0SStefano Zampini n_d += mm->nz; 68614e84afc0SStefano Zampini } 68624e84afc0SStefano Zampini } 6863c215019aSStefano Zampini if (mmdata->hasoffproc) { /* offprocess insertion */ 68649566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 68659566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 68664e84afc0SStefano Zampini } 68679566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 68684e84afc0SStefano Zampini PetscFunctionReturn(0); 68694e84afc0SStefano Zampini } 68704e84afc0SStefano Zampini 68714e84afc0SStefano Zampini /* Support for Pt * A, A * P, or Pt * A * P */ 68724e84afc0SStefano Zampini #define MAX_NUMBER_INTERMEDIATE 4 68734e84afc0SStefano Zampini PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 68744e84afc0SStefano Zampini { 68754e84afc0SStefano Zampini Mat_Product *product = C->product; 6876ddea5d60SJunchao Zhang Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 68774e84afc0SStefano Zampini Mat_MPIAIJ *a,*p; 68784e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata; 68794e84afc0SStefano Zampini ISLocalToGlobalMapping P_oth_l2g = NULL; 68804e84afc0SStefano Zampini IS glob = NULL; 68814e84afc0SStefano Zampini const char *prefix; 68824e84afc0SStefano Zampini char pprefix[256]; 68834e84afc0SStefano Zampini const PetscInt *globidx,*P_oth_idx; 688482a78a4eSJed Brown PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 688582a78a4eSJed Brown PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6886ddea5d60SJunchao Zhang PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6887ddea5d60SJunchao Zhang /* type-0: consecutive, start from 0; type-1: consecutive with */ 6888ddea5d60SJunchao Zhang /* a base offset; type-2: sparse with a local to global map table */ 6889ddea5d60SJunchao Zhang const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6890ddea5d60SJunchao Zhang 68914e84afc0SStefano Zampini MatProductType ptype; 6892c215019aSStefano Zampini PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 68934e84afc0SStefano Zampini PetscMPIInt size; 68944e84afc0SStefano Zampini 68954e84afc0SStefano Zampini PetscFunctionBegin; 68964e84afc0SStefano Zampini MatCheckProduct(C,1); 689728b400f6SJacob Faibussowitsch PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 68984e84afc0SStefano Zampini ptype = product->type; 6899fa046f9fSJunchao Zhang if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6900fa046f9fSJunchao Zhang ptype = MATPRODUCT_AB; 6901fa046f9fSJunchao Zhang product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6902fa046f9fSJunchao Zhang } 69034e84afc0SStefano Zampini switch (ptype) { 69044e84afc0SStefano Zampini case MATPRODUCT_AB: 69054e84afc0SStefano Zampini A = product->A; 69064e84afc0SStefano Zampini P = product->B; 69074e84afc0SStefano Zampini m = A->rmap->n; 69084e84afc0SStefano Zampini n = P->cmap->n; 69094e84afc0SStefano Zampini M = A->rmap->N; 69104e84afc0SStefano Zampini N = P->cmap->N; 6911ddea5d60SJunchao Zhang hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 69124e84afc0SStefano Zampini break; 69134e84afc0SStefano Zampini case MATPRODUCT_AtB: 69144e84afc0SStefano Zampini P = product->A; 69154e84afc0SStefano Zampini A = product->B; 69164e84afc0SStefano Zampini m = P->cmap->n; 69174e84afc0SStefano Zampini n = A->cmap->n; 69184e84afc0SStefano Zampini M = P->cmap->N; 69194e84afc0SStefano Zampini N = A->cmap->N; 69204e84afc0SStefano Zampini hasoffproc = PETSC_TRUE; 69214e84afc0SStefano Zampini break; 69224e84afc0SStefano Zampini case MATPRODUCT_PtAP: 69234e84afc0SStefano Zampini A = product->A; 69244e84afc0SStefano Zampini P = product->B; 69254e84afc0SStefano Zampini m = P->cmap->n; 69264e84afc0SStefano Zampini n = P->cmap->n; 69274e84afc0SStefano Zampini M = P->cmap->N; 69284e84afc0SStefano Zampini N = P->cmap->N; 69294e84afc0SStefano Zampini hasoffproc = PETSC_TRUE; 69304e84afc0SStefano Zampini break; 69314e84afc0SStefano Zampini default: 693298921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 69334e84afc0SStefano Zampini } 69349566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 69354e84afc0SStefano Zampini if (size == 1) hasoffproc = PETSC_FALSE; 69364e84afc0SStefano Zampini 69374e84afc0SStefano Zampini /* defaults */ 69384e84afc0SStefano Zampini for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 69394e84afc0SStefano Zampini mp[i] = NULL; 69404e84afc0SStefano Zampini mptmp[i] = PETSC_FALSE; 69414e84afc0SStefano Zampini rmapt[i] = -1; 69424e84afc0SStefano Zampini cmapt[i] = -1; 69434e84afc0SStefano Zampini rmapa[i] = NULL; 69444e84afc0SStefano Zampini cmapa[i] = NULL; 69454e84afc0SStefano Zampini } 69464e84afc0SStefano Zampini 69474e84afc0SStefano Zampini /* customization */ 69489566063dSJacob Faibussowitsch PetscCall(PetscNew(&mmdata)); 69494e84afc0SStefano Zampini mmdata->reusesym = product->api_user; 69504e84afc0SStefano Zampini if (ptype == MATPRODUCT_AB) { 69514e84afc0SStefano Zampini if (product->api_user) { 6952d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 69539566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 69549566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6955d0609cedSBarry Smith PetscOptionsEnd(); 69564e84afc0SStefano Zampini } else { 6957d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 69589566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 69599566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6960d0609cedSBarry Smith PetscOptionsEnd(); 6961abb89eb1SStefano Zampini } 6962abb89eb1SStefano Zampini } else if (ptype == MATPRODUCT_PtAP) { 6963abb89eb1SStefano Zampini if (product->api_user) { 6964d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 69659566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6966d0609cedSBarry Smith PetscOptionsEnd(); 6967abb89eb1SStefano Zampini } else { 6968d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 69699566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6970d0609cedSBarry Smith PetscOptionsEnd(); 69714e84afc0SStefano Zampini } 69724e84afc0SStefano Zampini } 69734e84afc0SStefano Zampini a = (Mat_MPIAIJ*)A->data; 69744e84afc0SStefano Zampini p = (Mat_MPIAIJ*)P->data; 69759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,m,n,M,N)); 69769566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(C->rmap)); 69779566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(C->cmap)); 69789566063dSJacob Faibussowitsch PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 69799566063dSJacob Faibussowitsch PetscCall(MatGetOptionsPrefix(C,&prefix)); 6980ddea5d60SJunchao Zhang 6981ddea5d60SJunchao Zhang cp = 0; 69824e84afc0SStefano Zampini switch (ptype) { 69834e84afc0SStefano Zampini case MATPRODUCT_AB: /* A * P */ 69849566063dSJacob Faibussowitsch PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 69854e84afc0SStefano Zampini 6986ddea5d60SJunchao Zhang /* A_diag * P_local (merged or not) */ 6987ddea5d60SJunchao Zhang if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 69884e84afc0SStefano Zampini /* P is product->B */ 69899566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 69909566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 69919566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 69929566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 69939566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 69949566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 69959566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 69964e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 69979566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 699808401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 69999566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob,&globidx)); 70014e84afc0SStefano Zampini rmapt[cp] = 1; 70024e84afc0SStefano Zampini cmapt[cp] = 2; 70034e84afc0SStefano Zampini cmapa[cp] = globidx; 70044e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70054e84afc0SStefano Zampini cp++; 7006ddea5d60SJunchao Zhang } else { /* A_diag * P_diag and A_diag * P_off */ 70079566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 70089566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 70099566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 70109566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 70119566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 70129566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 70134e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 70149566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 701508401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 70169566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70174e84afc0SStefano Zampini rmapt[cp] = 1; 70184e84afc0SStefano Zampini cmapt[cp] = 1; 70194e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70204e84afc0SStefano Zampini cp++; 70219566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 70229566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 70239566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 70249566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 70259566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 70269566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 70274e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 70289566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 702908401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 70309566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70314e84afc0SStefano Zampini rmapt[cp] = 1; 70324e84afc0SStefano Zampini cmapt[cp] = 2; 70334e84afc0SStefano Zampini cmapa[cp] = p->garray; 70344e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70354e84afc0SStefano Zampini cp++; 70364e84afc0SStefano Zampini } 7037ddea5d60SJunchao Zhang 7038ddea5d60SJunchao Zhang /* A_off * P_other */ 70394e84afc0SStefano Zampini if (mmdata->P_oth) { 70409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 70419566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 70429566063dSJacob Faibussowitsch PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 70439566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 70449566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 70459566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 70469566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 70479566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 70489566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 70499566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 70504e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 70519566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 705208401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 70539566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70544e84afc0SStefano Zampini rmapt[cp] = 1; 70554e84afc0SStefano Zampini cmapt[cp] = 2; 70564e84afc0SStefano Zampini cmapa[cp] = P_oth_idx; 70574e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70584e84afc0SStefano Zampini cp++; 70594e84afc0SStefano Zampini } 70604e84afc0SStefano Zampini break; 7061ddea5d60SJunchao Zhang 70624e84afc0SStefano Zampini case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 70634e84afc0SStefano Zampini /* A is product->B */ 70649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7065ddea5d60SJunchao Zhang if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 70669566063dSJacob Faibussowitsch PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 70679566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 70689566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 70699566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 70709566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 70719566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 70724e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 70739566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 707408401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 70759566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob,&globidx)); 70774e84afc0SStefano Zampini rmapt[cp] = 2; 70784e84afc0SStefano Zampini rmapa[cp] = globidx; 70794e84afc0SStefano Zampini cmapt[cp] = 2; 70804e84afc0SStefano Zampini cmapa[cp] = globidx; 70814e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70824e84afc0SStefano Zampini cp++; 70834e84afc0SStefano Zampini } else { 70849566063dSJacob Faibussowitsch PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 70859566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 70869566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 70879566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 70889566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 70899566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 70904e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 70919566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 709208401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 70939566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 70949566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob,&globidx)); 70954e84afc0SStefano Zampini rmapt[cp] = 1; 70964e84afc0SStefano Zampini cmapt[cp] = 2; 70974e84afc0SStefano Zampini cmapa[cp] = globidx; 70984e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 70994e84afc0SStefano Zampini cp++; 71009566063dSJacob Faibussowitsch PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 71019566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 71029566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 71039566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 71049566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 71059566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 71064e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 71079566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 710808401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 71099566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 71104e84afc0SStefano Zampini rmapt[cp] = 2; 71114e84afc0SStefano Zampini rmapa[cp] = p->garray; 71124e84afc0SStefano Zampini cmapt[cp] = 2; 71134e84afc0SStefano Zampini cmapa[cp] = globidx; 71144e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 71154e84afc0SStefano Zampini cp++; 71164e84afc0SStefano Zampini } 71174e84afc0SStefano Zampini break; 71184e84afc0SStefano Zampini case MATPRODUCT_PtAP: 71199566063dSJacob Faibussowitsch PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 71204e84afc0SStefano Zampini /* P is product->B */ 71219566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 71229566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 71239566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 71249566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 71259566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 71269566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 71279566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 71284e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 71299566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 713008401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 71319566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 71329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob,&globidx)); 71334e84afc0SStefano Zampini rmapt[cp] = 2; 71344e84afc0SStefano Zampini rmapa[cp] = globidx; 71354e84afc0SStefano Zampini cmapt[cp] = 2; 71364e84afc0SStefano Zampini cmapa[cp] = globidx; 71374e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 71384e84afc0SStefano Zampini cp++; 71394e84afc0SStefano Zampini if (mmdata->P_oth) { 71409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 71419566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 71429566063dSJacob Faibussowitsch PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 71439566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 71449566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 71459566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 71469566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 71479566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 71489566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 71499566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 71504e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 71519566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 715208401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 71539566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 71544e84afc0SStefano Zampini mptmp[cp] = PETSC_TRUE; 71554e84afc0SStefano Zampini cp++; 71569566063dSJacob Faibussowitsch PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 71579566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 71589566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp],product->fill)); 71599566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 71609566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 71619566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 71624e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 71639566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 716408401ef6SPierre Jolivet PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 71659566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 71664e84afc0SStefano Zampini rmapt[cp] = 2; 71674e84afc0SStefano Zampini rmapa[cp] = globidx; 71684e84afc0SStefano Zampini cmapt[cp] = 2; 71694e84afc0SStefano Zampini cmapa[cp] = P_oth_idx; 71704e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 71714e84afc0SStefano Zampini cp++; 71724e84afc0SStefano Zampini } 71734e84afc0SStefano Zampini break; 71744e84afc0SStefano Zampini default: 717598921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 71764e84afc0SStefano Zampini } 71774e84afc0SStefano Zampini /* sanity check */ 71782c71b3e2SJacob Faibussowitsch if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 71794e84afc0SStefano Zampini 71809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7181ddea5d60SJunchao Zhang for (i = 0; i < cp; i++) { 7182ddea5d60SJunchao Zhang mmdata->mp[i] = mp[i]; 7183ddea5d60SJunchao Zhang mmdata->mptmp[i] = mptmp[i]; 7184ddea5d60SJunchao Zhang } 71854e84afc0SStefano Zampini mmdata->cp = cp; 71864e84afc0SStefano Zampini C->product->data = mmdata; 71874e84afc0SStefano Zampini C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 71884e84afc0SStefano Zampini C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 71894e84afc0SStefano Zampini 7190c215019aSStefano Zampini /* memory type */ 7191c215019aSStefano Zampini mmdata->mtype = PETSC_MEMTYPE_HOST; 71929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 71939566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7194c215019aSStefano Zampini if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 71953214990dSStefano Zampini else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7196c215019aSStefano Zampini 71974e84afc0SStefano Zampini /* prepare coo coordinates for values insertion */ 7198ddea5d60SJunchao Zhang 7199ddea5d60SJunchao Zhang /* count total nonzeros of those intermediate seqaij Mats 7200ddea5d60SJunchao Zhang ncoo_d: # of nonzeros of matrices that do not have offproc entries 7201ddea5d60SJunchao Zhang ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7202ddea5d60SJunchao Zhang ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7203ddea5d60SJunchao Zhang */ 72044e84afc0SStefano Zampini for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 72054e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 72064e84afc0SStefano Zampini if (mptmp[cp]) continue; 7207ddea5d60SJunchao Zhang if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 72084e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 72094e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 72104e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 72114e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 72124e84afc0SStefano Zampini const PetscInt *ii = mm->i; 72134e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 72144e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 72154e84afc0SStefano Zampini const PetscInt nz = ii[i+1] - ii[i]; 7216ddea5d60SJunchao Zhang if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7217ddea5d60SJunchao Zhang else ncoo_oown += nz; /* this row is local */ 72184e84afc0SStefano Zampini } 72194e84afc0SStefano Zampini } else ncoo_d += mm->nz; 72204e84afc0SStefano Zampini } 7221ddea5d60SJunchao Zhang 7222ddea5d60SJunchao Zhang /* 7223ddea5d60SJunchao Zhang ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7224ddea5d60SJunchao Zhang 7225ddea5d60SJunchao Zhang ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7226ddea5d60SJunchao Zhang 7227ddea5d60SJunchao Zhang off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7228ddea5d60SJunchao Zhang 7229ddea5d60SJunchao Zhang off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7230ddea5d60SJunchao Zhang own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7231ddea5d60SJunchao Zhang so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7232ddea5d60SJunchao Zhang 7233ddea5d60SJunchao Zhang coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7234ddea5d60SJunchao Zhang Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7235ddea5d60SJunchao Zhang */ 72369566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 72379566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7238ddea5d60SJunchao Zhang 7239ddea5d60SJunchao Zhang /* gather (i,j) of nonzeros inserted by remote procs */ 7240ddea5d60SJunchao Zhang if (hasoffproc) { 72414e84afc0SStefano Zampini PetscSF msf; 72424e84afc0SStefano Zampini PetscInt ncoo2,*coo_i2,*coo_j2; 72434e84afc0SStefano Zampini 72449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 72459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 72469566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7247ddea5d60SJunchao Zhang 72484e84afc0SStefano Zampini for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 72494e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 72504e84afc0SStefano Zampini PetscInt *idxoff = mmdata->off[cp]; 72514e84afc0SStefano Zampini PetscInt *idxown = mmdata->own[cp]; 7252ddea5d60SJunchao Zhang if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 72534e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 72544e84afc0SStefano Zampini const PetscInt *cmap = cmapa[cp]; 72554e84afc0SStefano Zampini const PetscInt *ii = mm->i; 72564e84afc0SStefano Zampini PetscInt *coi = coo_i + ncoo_o; 72574e84afc0SStefano Zampini PetscInt *coj = coo_j + ncoo_o; 72584e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 72594e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 72604e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 72614e84afc0SStefano Zampini const PetscInt cs = C->cmap->rstart; 72624e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 72634e84afc0SStefano Zampini const PetscInt *jj = mm->j + ii[i]; 72644e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 72654e84afc0SStefano Zampini const PetscInt nz = ii[i+1] - ii[i]; 7266ddea5d60SJunchao Zhang if (gr < rs || gr >= re) { /* this is an offproc row */ 72674e84afc0SStefano Zampini for (j = ii[i]; j < ii[i+1]; j++) { 72684e84afc0SStefano Zampini *coi++ = gr; 72694e84afc0SStefano Zampini *idxoff++ = j; 72704e84afc0SStefano Zampini } 72714e84afc0SStefano Zampini if (!cmapt[cp]) { /* already global */ 72724e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j]; 72734e84afc0SStefano Zampini } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 72744e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 72754e84afc0SStefano Zampini } else { /* offdiag */ 72764e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 72774e84afc0SStefano Zampini } 72784e84afc0SStefano Zampini ncoo_o += nz; 7279ddea5d60SJunchao Zhang } else { /* this is a local row */ 72804e84afc0SStefano Zampini for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 72814e84afc0SStefano Zampini } 72824e84afc0SStefano Zampini } 72834e84afc0SStefano Zampini } 72844e84afc0SStefano Zampini mmdata->off[cp + 1] = idxoff; 72854e84afc0SStefano Zampini mmdata->own[cp + 1] = idxown; 72864e84afc0SStefano Zampini } 72874e84afc0SStefano Zampini 72889566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 72899566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 72909566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 72919566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 72924e84afc0SStefano Zampini ncoo = ncoo_d + ncoo_oown + ncoo2; 72939566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 72949566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 72959566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 72969566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 72979566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 72989566063dSJacob Faibussowitsch PetscCall(PetscFree2(coo_i,coo_j)); 7299ddea5d60SJunchao Zhang /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 73009566063dSJacob Faibussowitsch PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 73014e84afc0SStefano Zampini coo_i = coo_i2; 73024e84afc0SStefano Zampini coo_j = coo_j2; 73034e84afc0SStefano Zampini } else { /* no offproc values insertion */ 73044e84afc0SStefano Zampini ncoo = ncoo_d; 73059566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7306c215019aSStefano Zampini 73079566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 73089566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 73099566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(mmdata->sf)); 73104e84afc0SStefano Zampini } 7311c215019aSStefano Zampini mmdata->hasoffproc = hasoffproc; 73124e84afc0SStefano Zampini 7313ddea5d60SJunchao Zhang /* gather (i,j) of nonzeros inserted locally */ 73144e84afc0SStefano Zampini for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 73154e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 73164e84afc0SStefano Zampini PetscInt *coi = coo_i + ncoo_d; 73174e84afc0SStefano Zampini PetscInt *coj = coo_j + ncoo_d; 73184e84afc0SStefano Zampini const PetscInt *jj = mm->j; 73194e84afc0SStefano Zampini const PetscInt *ii = mm->i; 73204e84afc0SStefano Zampini const PetscInt *cmap = cmapa[cp]; 73214e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 73224e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 73234e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 73244e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 73254e84afc0SStefano Zampini const PetscInt cs = C->cmap->rstart; 73264e84afc0SStefano Zampini 73274e84afc0SStefano Zampini if (mptmp[cp]) continue; 7328ddea5d60SJunchao Zhang if (rmapt[cp] == 1) { /* consecutive rows */ 7329ddea5d60SJunchao Zhang /* fill coo_i */ 73304e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 73314e84afc0SStefano Zampini const PetscInt gr = i + rs; 73324e84afc0SStefano Zampini for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 73334e84afc0SStefano Zampini } 7334ddea5d60SJunchao Zhang /* fill coo_j */ 7335ddea5d60SJunchao Zhang if (!cmapt[cp]) { /* type-0, already global */ 73369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7337ddea5d60SJunchao Zhang } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7338ddea5d60SJunchao Zhang for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7339ddea5d60SJunchao Zhang } else { /* type-2, local to global for sparse columns */ 73404e84afc0SStefano Zampini for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 73414e84afc0SStefano Zampini } 73424e84afc0SStefano Zampini ncoo_d += mm->nz; 7343ddea5d60SJunchao Zhang } else if (rmapt[cp] == 2) { /* sparse rows */ 73444e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 73454e84afc0SStefano Zampini const PetscInt *jj = mm->j + ii[i]; 73464e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 73474e84afc0SStefano Zampini const PetscInt nz = ii[i+1] - ii[i]; 7348ddea5d60SJunchao Zhang if (gr >= rs && gr < re) { /* local rows */ 73494e84afc0SStefano Zampini for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7350ddea5d60SJunchao Zhang if (!cmapt[cp]) { /* type-0, already global */ 73514e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j]; 73524e84afc0SStefano Zampini } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 73534e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7354ddea5d60SJunchao Zhang } else { /* type-2, local to global for sparse columns */ 73554e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 73564e84afc0SStefano Zampini } 73574e84afc0SStefano Zampini ncoo_d += nz; 73584e84afc0SStefano Zampini } 73594e84afc0SStefano Zampini } 73604e84afc0SStefano Zampini } 73614e84afc0SStefano Zampini } 73624e84afc0SStefano Zampini if (glob) { 73639566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(glob,&globidx)); 73644e84afc0SStefano Zampini } 73659566063dSJacob Faibussowitsch PetscCall(ISDestroy(&glob)); 73664e84afc0SStefano Zampini if (P_oth_l2g) { 73679566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 73684e84afc0SStefano Zampini } 73699566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7370ddea5d60SJunchao Zhang /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 73719566063dSJacob Faibussowitsch PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 73724e84afc0SStefano Zampini 73734e84afc0SStefano Zampini /* preallocate with COO data */ 73749566063dSJacob Faibussowitsch PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 73759566063dSJacob Faibussowitsch PetscCall(PetscFree2(coo_i,coo_j)); 73764e84afc0SStefano Zampini PetscFunctionReturn(0); 73774e84afc0SStefano Zampini } 73784e84afc0SStefano Zampini 73794e84afc0SStefano Zampini PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 73804e84afc0SStefano Zampini { 73814e84afc0SStefano Zampini Mat_Product *product = mat->product; 73824e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 73834e84afc0SStefano Zampini PetscBool match = PETSC_FALSE; 7384abb89eb1SStefano Zampini PetscBool usecpu = PETSC_FALSE; 73854e84afc0SStefano Zampini #else 73864e84afc0SStefano Zampini PetscBool match = PETSC_TRUE; 73874e84afc0SStefano Zampini #endif 73884e84afc0SStefano Zampini 73894e84afc0SStefano Zampini PetscFunctionBegin; 73904e84afc0SStefano Zampini MatCheckProduct(mat,1); 73914e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 73924e84afc0SStefano Zampini if (!product->A->boundtocpu && !product->B->boundtocpu) { 73939566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 73944e84afc0SStefano Zampini } 739565e4b4d4SStefano Zampini if (match) { /* we can always fallback to the CPU if requested */ 7396abb89eb1SStefano Zampini switch (product->type) { 7397abb89eb1SStefano Zampini case MATPRODUCT_AB: 7398abb89eb1SStefano Zampini if (product->api_user) { 7399d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 74009566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7401d0609cedSBarry Smith PetscOptionsEnd(); 7402abb89eb1SStefano Zampini } else { 7403d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 74049566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7405d0609cedSBarry Smith PetscOptionsEnd(); 7406abb89eb1SStefano Zampini } 7407abb89eb1SStefano Zampini break; 7408abb89eb1SStefano Zampini case MATPRODUCT_AtB: 7409abb89eb1SStefano Zampini if (product->api_user) { 7410d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 74119566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7412d0609cedSBarry Smith PetscOptionsEnd(); 7413abb89eb1SStefano Zampini } else { 7414d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 74159566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7416d0609cedSBarry Smith PetscOptionsEnd(); 7417abb89eb1SStefano Zampini } 7418abb89eb1SStefano Zampini break; 7419abb89eb1SStefano Zampini case MATPRODUCT_PtAP: 7420abb89eb1SStefano Zampini if (product->api_user) { 7421d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 74229566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7423d0609cedSBarry Smith PetscOptionsEnd(); 7424abb89eb1SStefano Zampini } else { 7425d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 74269566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7427d0609cedSBarry Smith PetscOptionsEnd(); 7428abb89eb1SStefano Zampini } 7429abb89eb1SStefano Zampini break; 7430abb89eb1SStefano Zampini default: 7431abb89eb1SStefano Zampini break; 7432abb89eb1SStefano Zampini } 7433abb89eb1SStefano Zampini match = (PetscBool)!usecpu; 7434abb89eb1SStefano Zampini } 74354e84afc0SStefano Zampini #endif 74364e84afc0SStefano Zampini if (match) { 74374e84afc0SStefano Zampini switch (product->type) { 74384e84afc0SStefano Zampini case MATPRODUCT_AB: 74394e84afc0SStefano Zampini case MATPRODUCT_AtB: 74404e84afc0SStefano Zampini case MATPRODUCT_PtAP: 74414e84afc0SStefano Zampini mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 74424e84afc0SStefano Zampini break; 74434e84afc0SStefano Zampini default: 74444e84afc0SStefano Zampini break; 74454e84afc0SStefano Zampini } 74464e84afc0SStefano Zampini } 74474e84afc0SStefano Zampini /* fallback to MPIAIJ ops */ 74489566063dSJacob Faibussowitsch if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 74494e84afc0SStefano Zampini PetscFunctionReturn(0); 745081824310SBarry Smith } 745198921bdaSJacob Faibussowitsch 745298921bdaSJacob Faibussowitsch /* 745398921bdaSJacob Faibussowitsch Special version for direct calls from Fortran 745498921bdaSJacob Faibussowitsch */ 745598921bdaSJacob Faibussowitsch #include <petsc/private/fortranimpl.h> 745698921bdaSJacob Faibussowitsch 745798921bdaSJacob Faibussowitsch /* Change these macros so can be used in void function */ 74589566063dSJacob Faibussowitsch /* Identical to PetscCallVoid, except it assigns to *_ierr */ 74599566063dSJacob Faibussowitsch #undef PetscCall 74609566063dSJacob Faibussowitsch #define PetscCall(...) do { \ 74615f80ce2aSJacob Faibussowitsch PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 746298921bdaSJacob Faibussowitsch if (PetscUnlikely(ierr_msv_mpiaij)) { \ 746398921bdaSJacob Faibussowitsch *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 746498921bdaSJacob Faibussowitsch return; \ 746598921bdaSJacob Faibussowitsch } \ 746698921bdaSJacob Faibussowitsch } while (0) 746798921bdaSJacob Faibussowitsch 746898921bdaSJacob Faibussowitsch #undef SETERRQ 746998921bdaSJacob Faibussowitsch #define SETERRQ(comm,ierr,...) do { \ 747098921bdaSJacob Faibussowitsch *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 747198921bdaSJacob Faibussowitsch return; \ 747298921bdaSJacob Faibussowitsch } while (0) 747398921bdaSJacob Faibussowitsch 747498921bdaSJacob Faibussowitsch #if defined(PETSC_HAVE_FORTRAN_CAPS) 747598921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 747698921bdaSJacob Faibussowitsch #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 747798921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ matsetvaluesmpiaij 747898921bdaSJacob Faibussowitsch #else 747998921bdaSJacob Faibussowitsch #endif 748098921bdaSJacob Faibussowitsch PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 748198921bdaSJacob Faibussowitsch { 748298921bdaSJacob Faibussowitsch Mat mat = *mmat; 748398921bdaSJacob Faibussowitsch PetscInt m = *mm, n = *mn; 748498921bdaSJacob Faibussowitsch InsertMode addv = *maddv; 748598921bdaSJacob Faibussowitsch Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748698921bdaSJacob Faibussowitsch PetscScalar value; 748798921bdaSJacob Faibussowitsch 748898921bdaSJacob Faibussowitsch MatCheckPreallocated(mat,1); 748998921bdaSJacob Faibussowitsch if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 74905f80ce2aSJacob Faibussowitsch else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 749198921bdaSJacob Faibussowitsch { 749298921bdaSJacob Faibussowitsch PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749398921bdaSJacob Faibussowitsch PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 749498921bdaSJacob Faibussowitsch PetscBool roworiented = aij->roworiented; 749598921bdaSJacob Faibussowitsch 749698921bdaSJacob Faibussowitsch /* Some Variables required in the macro */ 749798921bdaSJacob Faibussowitsch Mat A = aij->A; 749898921bdaSJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 749998921bdaSJacob Faibussowitsch PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 750098921bdaSJacob Faibussowitsch MatScalar *aa; 750198921bdaSJacob Faibussowitsch PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 750298921bdaSJacob Faibussowitsch Mat B = aij->B; 750398921bdaSJacob Faibussowitsch Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 750498921bdaSJacob Faibussowitsch PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 750598921bdaSJacob Faibussowitsch MatScalar *ba; 750698921bdaSJacob Faibussowitsch /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 750798921bdaSJacob Faibussowitsch * cannot use "#if defined" inside a macro. */ 750898921bdaSJacob Faibussowitsch PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 750998921bdaSJacob Faibussowitsch 751098921bdaSJacob Faibussowitsch PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 751198921bdaSJacob Faibussowitsch PetscInt nonew = a->nonew; 751298921bdaSJacob Faibussowitsch MatScalar *ap1,*ap2; 751398921bdaSJacob Faibussowitsch 751498921bdaSJacob Faibussowitsch PetscFunctionBegin; 75159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A,&aa)); 75169566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B,&ba)); 751798921bdaSJacob Faibussowitsch for (i=0; i<m; i++) { 751898921bdaSJacob Faibussowitsch if (im[i] < 0) continue; 75196bdcaf15SBarry Smith PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 752098921bdaSJacob Faibussowitsch if (im[i] >= rstart && im[i] < rend) { 752198921bdaSJacob Faibussowitsch row = im[i] - rstart; 752298921bdaSJacob Faibussowitsch lastcol1 = -1; 752398921bdaSJacob Faibussowitsch rp1 = aj + ai[row]; 752498921bdaSJacob Faibussowitsch ap1 = aa + ai[row]; 752598921bdaSJacob Faibussowitsch rmax1 = aimax[row]; 752698921bdaSJacob Faibussowitsch nrow1 = ailen[row]; 752798921bdaSJacob Faibussowitsch low1 = 0; 752898921bdaSJacob Faibussowitsch high1 = nrow1; 752998921bdaSJacob Faibussowitsch lastcol2 = -1; 753098921bdaSJacob Faibussowitsch rp2 = bj + bi[row]; 753198921bdaSJacob Faibussowitsch ap2 = ba + bi[row]; 753298921bdaSJacob Faibussowitsch rmax2 = bimax[row]; 753398921bdaSJacob Faibussowitsch nrow2 = bilen[row]; 753498921bdaSJacob Faibussowitsch low2 = 0; 753598921bdaSJacob Faibussowitsch high2 = nrow2; 753698921bdaSJacob Faibussowitsch 753798921bdaSJacob Faibussowitsch for (j=0; j<n; j++) { 753898921bdaSJacob Faibussowitsch if (roworiented) value = v[i*n+j]; 753998921bdaSJacob Faibussowitsch else value = v[i+j*m]; 754098921bdaSJacob Faibussowitsch if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 754198921bdaSJacob Faibussowitsch if (in[j] >= cstart && in[j] < cend) { 754298921bdaSJacob Faibussowitsch col = in[j] - cstart; 754398921bdaSJacob Faibussowitsch MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 754498921bdaSJacob Faibussowitsch } else if (in[j] < 0) continue; 754598921bdaSJacob Faibussowitsch else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 754698921bdaSJacob Faibussowitsch /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 754763a3b9bcSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 754898921bdaSJacob Faibussowitsch } else { 754998921bdaSJacob Faibussowitsch if (mat->was_assembled) { 755098921bdaSJacob Faibussowitsch if (!aij->colmap) { 75519566063dSJacob Faibussowitsch PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 755298921bdaSJacob Faibussowitsch } 755398921bdaSJacob Faibussowitsch #if defined(PETSC_USE_CTABLE) 75549566063dSJacob Faibussowitsch PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 755598921bdaSJacob Faibussowitsch col--; 755698921bdaSJacob Faibussowitsch #else 755798921bdaSJacob Faibussowitsch col = aij->colmap[in[j]] - 1; 755898921bdaSJacob Faibussowitsch #endif 755998921bdaSJacob Faibussowitsch if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 75609566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); 756198921bdaSJacob Faibussowitsch col = in[j]; 756298921bdaSJacob Faibussowitsch /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 756398921bdaSJacob Faibussowitsch B = aij->B; 756498921bdaSJacob Faibussowitsch b = (Mat_SeqAIJ*)B->data; 756598921bdaSJacob Faibussowitsch bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 756698921bdaSJacob Faibussowitsch rp2 = bj + bi[row]; 756798921bdaSJacob Faibussowitsch ap2 = ba + bi[row]; 756898921bdaSJacob Faibussowitsch rmax2 = bimax[row]; 756998921bdaSJacob Faibussowitsch nrow2 = bilen[row]; 757098921bdaSJacob Faibussowitsch low2 = 0; 757198921bdaSJacob Faibussowitsch high2 = nrow2; 757298921bdaSJacob Faibussowitsch bm = aij->B->rmap->n; 757398921bdaSJacob Faibussowitsch ba = b->a; 757498921bdaSJacob Faibussowitsch inserted = PETSC_FALSE; 757598921bdaSJacob Faibussowitsch } 757698921bdaSJacob Faibussowitsch } else col = in[j]; 757798921bdaSJacob Faibussowitsch MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 757898921bdaSJacob Faibussowitsch } 757998921bdaSJacob Faibussowitsch } 758098921bdaSJacob Faibussowitsch } else if (!aij->donotstash) { 758198921bdaSJacob Faibussowitsch if (roworiented) { 75829566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 758398921bdaSJacob Faibussowitsch } else { 75849566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 758598921bdaSJacob Faibussowitsch } 758698921bdaSJacob Faibussowitsch } 758798921bdaSJacob Faibussowitsch } 75889566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A,&aa)); 75899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B,&ba)); 759098921bdaSJacob Faibussowitsch } 759198921bdaSJacob Faibussowitsch PetscFunctionReturnVoid(); 759298921bdaSJacob Faibussowitsch } 759398921bdaSJacob Faibussowitsch /* Undefining these here since they were redefined from their original definition above! No 759498921bdaSJacob Faibussowitsch * other PETSc functions should be defined past this point, as it is impossible to recover the 759598921bdaSJacob Faibussowitsch * original definitions */ 75969566063dSJacob Faibussowitsch #undef PetscCall 759798921bdaSJacob Faibussowitsch #undef SETERRQ 7598